package net.doo.datamining.language;

import com.google.common.base.Charsets;
import com.google.common.io.Files;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import net.doo.datamining.ClassificationResults;
import net.doo.datamining.io.BinaryChunk;
import net.doo.datamining.io.ClassifyingParams;
import net.doo.datamining.io.IO;
import net.doo.datamining.io.InfoChunkData;
import net.doo.datamining.util.Pair;
import org.apache.log4j.Logger;

/* loaded from: input_file:net/doo/datamining/language/LanguageClassifier.class */
public class LanguageClassifier implements Serializable {
    private static final Logger log = Logger.getLogger(LanguageClassifier.class);

    public static LanguageLearningResult readClassifiers(File file) throws IOException {
        if (!file.isFile()) {
            throw new IOException("Not a file: '" + file + "'");
        }
        BufferedInputStream bufferedInputStream = new BufferedInputStream(new FileInputStream(file));
        try {
            BinaryChunk readBinaryChunk = IO.readBinaryChunk(bufferedInputStream, "doo\u001a");
            bufferedInputStream.close();
            log.info("'" + file + "' contains language classification data with info " + new InfoChunkData().fromChunk(readBinaryChunk).assertValidity(7, 2, "markov  ") + ".");
            return new LanguageLearningResult().fromChunk(readBinaryChunk);
        } catch (Throwable th) {
            bufferedInputStream.close();
            throw th;
        }
    }

    public static String classifyFile(LanguageLearningResult languageLearningResult, File file) throws IOException {
        log.debug(file);
        return classify(languageLearningResult, Files.toString(file, Charsets.UTF_8));
    }

    /* JADX WARN: Multi-variable type inference failed */
    public static String classify(LanguageLearningResult languageLearningResult, String str) throws IOException {
        String str2;
        List<ClassificationResults.ClassificationResult> classifySorted = classifySorted(languageLearningResult, str);
        double winnerBySecond = ClassificationResults.winnerBySecond(classifySorted);
        if (log.isTraceEnabled()) {
            for (ClassificationResults.ClassificationResult classificationResult : classifySorted) {
                log.trace(String.format("%s: %5.4f", classificationResult.categoryKey.fst, Double.valueOf(classificationResult.score)));
            }
        }
        String str3 = (String) classifySorted.get(0).categoryKey.fst;
        log.debug(String.format("1st: %s, 2nd: %s, Ratio: %4.3f, text length: %d", str3, (String) classifySorted.get(1).categoryKey.fst, Double.valueOf(winnerBySecond), Integer.valueOf(str.length())));
        if ("deu".equals(str3) || "eng".equals(str3) || winnerBySecond >= 1.5d) {
            log.debug("Result: " + ((String) classifySorted.get(0).categoryKey.fst));
            str2 = (String) classifySorted.get(0).categoryKey.fst;
        } else {
            log.debug("Result: und, ratio too small (uncertain)");
            str2 = "0";
        }
        log.debug("");
        return str2;
    }

    public static List<ClassificationResults.ClassificationResult> classifySorted(LanguageLearningResult languageLearningResult, String str) throws IOException {
        String filterString = languageLearningResult.getMarkovConfiguration().getFilter().filterString(str);
        log.trace(filterString);
        if (filterString.length() < 75) {
            log.debug("Result: und, input too short.");
            log.debug("");
            return Arrays.asList(new ClassificationResults.ClassificationResult(Pair.of("und", ""), 0.0d));
        }
        ArrayList arrayList = new ArrayList(languageLearningResult.getClassifiers().size());
        Iterator it = languageLearningResult.getClassifiers().iterator();
        while (it.hasNext()) {
            MarkovClassifier markovClassifier = (MarkovClassifier) it.next();
            arrayList.add(new ClassificationResults.ClassificationResult(Pair.of(markovClassifier.getIsoCode(), ""), markovClassifier.classifyFiltered(filterString)));
        }
        return ClassificationResults.byScoreDesc(arrayList);
    }

    /* JADX WARN: Multi-variable type inference failed */
    public static void main(String[] strArr) throws Exception {
        ClassifyingParams classifyingParams = new ClassifyingParams();
        IO.performJCommanderInitialization(strArr, Arrays.asList(classifyingParams));
        LanguageLearningResult readClassifiers = readClassifiers(classifyingParams.getBinaryFile());
        log.info(readClassifiers.getMarkovConfiguration());
        StringBuilder sb = new StringBuilder();
        sb.append("Classifiers: ");
        Iterator it = readClassifiers.getClassifiers().iterator();
        while (it.hasNext()) {
            MarkovClassifier markovClassifier = (MarkovClassifier) it.next();
            sb.append(markovClassifier.getIsoCode()).append(' ');
            log.trace(markovClassifier);
        }
        log.info(sb.toString());
        TreeMap treeMap = new TreeMap();
        int i = 0;
        Iterator it2 = classifyingParams.getFilesToClassify().iterator();
        while (it2.hasNext()) {
            for (Pair<File, String> pair : IO.listFilesRecursive((File) it2.next())) {
                if (IO.mightBeText((File) pair.fst)) {
                    String classifyFile = classifyFile(readClassifiers, (File) pair.fst);
                    if (treeMap.containsKey(classifyFile)) {
                        treeMap.put(classifyFile, Integer.valueOf(((Integer) treeMap.get(classifyFile)).intValue() + 1));
                    } else {
                        treeMap.put(classifyFile, 1);
                    }
                }
                i++;
                if (i % 1000 == 0) {
                    log.info(Integer.valueOf(i));
                }
            }
        }
        log.info("----------------------------------------");
        for (Map.Entry entry : treeMap.entrySet()) {
            log.info(((String) entry.getKey()) + " : " + entry.getValue());
        }
    }
}
