/*
 * Decompiled with CFR 0.152.
 */
package net.doo.datamining.language;

import com.google.common.base.Charsets;
import com.google.common.io.Files;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import net.doo.datamining.ClassificationResults;
import net.doo.datamining.io.BinaryChunk;
import net.doo.datamining.io.ClassifyingParams;
import net.doo.datamining.io.IO;
import net.doo.datamining.io.InfoChunkData;
import net.doo.datamining.language.LanguageLearningResult;
import net.doo.datamining.language.MarkovClassifier;
import net.doo.datamining.util.Pair;
import org.apache.log4j.Logger;

public class LanguageClassifier {
    private static final Logger log = Logger.getLogger(LanguageClassifier.class);
    public static final String UNDEFINED_LANGUAGE = "0";

    public static LanguageLearningResult readClassifiers(File binFile) throws IOException {
        BinaryChunk chunk;
        if (!binFile.isFile()) {
            throw new IOException("Not a file: '" + binFile + "'");
        }
        try (BufferedInputStream in = new BufferedInputStream(new FileInputStream(binFile));){
            chunk = IO.readBinaryChunk(in, "doo\u001a");
        }
        InfoChunkData infoChunkData = new InfoChunkData().fromChunk(chunk).assertValidity(7, 2, "markov  ");
        log.info((Object)("'" + binFile + "' contains language classification data with info " + infoChunkData + "."));
        return new LanguageLearningResult().fromChunk(chunk);
    }

    public static String classifyFile(LanguageLearningResult learned, File toClassify) throws IOException {
        log.debug((Object)toClassify);
        return LanguageClassifier.classify(learned, Files.toString((File)toClassify, (Charset)Charsets.UTF_8));
    }

    public static String classify(LanguageLearningResult learned, String toClassify) throws IOException {
        String result;
        List<ClassificationResults.ClassificationResult> sortedResults = LanguageClassifier.classifySorted(learned, toClassify);
        if (sortedResults.size() < 2) {
            return UNDEFINED_LANGUAGE;
        }
        double ratio = ClassificationResults.winnerBySecond(sortedResults);
        if (log.isTraceEnabled()) {
            for (ClassificationResults.ClassificationResult result2 : sortedResults) {
                log.trace((Object)String.format("%s: %5.4f", result2.categoryKey.fst, result2.score));
            }
        }
        String first = (String)sortedResults.get((int)0).categoryKey.fst;
        String second = (String)sortedResults.get((int)1).categoryKey.fst;
        log.debug((Object)String.format("1st: %s, 2nd: %s, Ratio: %4.3f, text length: %d", first, second, ratio, toClassify.length()));
        if (!"deu".equals(first) && !"eng".equals(first) && ratio < 1.5) {
            log.debug((Object)"Result: und, ratio too small (uncertain)");
            result = UNDEFINED_LANGUAGE;
        } else {
            log.debug((Object)("Result: " + (String)sortedResults.get((int)0).categoryKey.fst));
            result = (String)sortedResults.get((int)0).categoryKey.fst;
        }
        log.debug((Object)"");
        return result;
    }

    public static List<ClassificationResults.ClassificationResult> classifySorted(LanguageLearningResult learned, String toClassify) throws IOException {
        String filtered = learned.getMarkovConfiguration().getFilter().filterString(toClassify);
        log.trace((Object)filtered);
        if (filtered.length() < 75) {
            log.debug((Object)"Result: und, input too short.");
            log.debug((Object)"");
            return Arrays.asList(new ClassificationResults.ClassificationResult(Pair.of("und", ""), 0.0));
        }
        ArrayList<ClassificationResults.ClassificationResult> results = new ArrayList<ClassificationResults.ClassificationResult>(learned.getClassifiers().size());
        for (MarkovClassifier classifier : learned.getClassifiers()) {
            results.add(new ClassificationResults.ClassificationResult(Pair.of(classifier.getIsoCode(), ""), classifier.classifyFiltered(filtered)));
        }
        return ClassificationResults.byScoreDesc(results);
    }

    public static void main(String[] args) throws Exception {
        ClassifyingParams params = new ClassifyingParams();
        IO.performJCommanderInitialization(args, Arrays.asList(params));
        LanguageLearningResult learned = LanguageClassifier.readClassifiers(params.getBinaryFile());
        log.info((Object)learned.getMarkovConfiguration());
        StringBuilder classifiersString = new StringBuilder();
        classifiersString.append("Classifiers: ");
        for (MarkovClassifier c : learned.getClassifiers()) {
            classifiersString.append(c.getIsoCode()).append(' ');
            log.trace((Object)c);
        }
        log.info((Object)classifiersString.toString());
        TreeMap<String, Integer> resultCount = new TreeMap<String, Integer>();
        int count = 0;
        for (File toClassify : params.getFilesToClassify()) {
            for (Pair<File, String> f : IO.listFilesRecursive(toClassify)) {
                if (IO.mightBeText((File)f.fst)) {
                    String result = LanguageClassifier.classifyFile(learned, (File)f.fst);
                    if (resultCount.containsKey(result)) {
                        resultCount.put(result, (Integer)resultCount.get(result) + 1);
                    } else {
                        resultCount.put(result, 1);
                    }
                }
                if (++count % 1000 != 0) continue;
                log.info((Object)count);
            }
        }
        log.info((Object)"----------------------------------------");
        for (Map.Entry entry : resultCount.entrySet()) {
            log.info((Object)((String)entry.getKey() + " : " + entry.getValue()));
        }
    }
}

