/*
 * Decompiled with CFR 0.152.
 */
package net.doo.datamining.natalie;

import com.beust.jcommander.Parameter;
import com.google.common.base.MoreObjects;
import com.google.common.collect.ImmutableList;
import java.io.IOException;
import java.util.Set;
import net.doo.datamining.AggregatedClassificationResults;
import net.doo.datamining.ClassificationResults;
import net.doo.datamining.io.BinaryChunk;
import net.doo.datamining.natalie.BinaryDmnbClassifier;
import net.doo.datamining.natalie.NatalieLearningResult;
import net.doo.datamining.preprocessing.WordVector;
import org.apache.log4j.Logger;

public class Natalie {
    private static final Logger log = Logger.getLogger(Natalie.class);
    @Parameter(names={"-iterations"}, description="The number of times the algorithm learns on a data point. More iterations can improve the result to same extends, but makes learning a lot slower.For testing, one iteration is sufficient, for live releases use three or more.")
    private int iterations = 3;
    @Parameter(names={"-subClasses"}, description="Whether to use subclass information. When this is switched on, the subclasses present in the learning data are used to create additional classifiers. Good subclasses can improve the precision of the result. Shouldbe turned off unless you know what you are doing.", arity=1)
    private boolean subClasses = false;
    @Parameter(names={"-documentPrior"}, description="Whether to use the document prior in classification. When learning, the probability of a document type occuring (e.g. the relative amount of invoices) is learned for each document type. If this flag is set, this probability is factored into the result. Only use this if the document distribution you have is realistic. Generally you do not want to set this flag.", arity=1)
    private boolean documentPrior = false;
    @Parameter(names={"-laplaceDefault"}, description="Assumed Dictionary size for the laplacian smoother. Normally the laplace smoothing should be determined by the total resulting dictionary size. However, assuming a larger dictionary size can cause the algorithm to learn more `eagerly` which can improve results. A good starting value is the total final dictionary size, larger vales might improve the result.")
    private int laplaceDefault = 15000;
    @Parameter(names={"-beta"}, description="\u03b2 value to calculate f-Measure with. \u03b2 < 1 value precision over recall, \u03b2 > 1 value recall over precision.")
    private double beta = 0.25;
    @Parameter(names={"-threadCount"}, description="Number of threads to use for learning.")
    private int threadCount = 4;

    public Natalie fromChunk(BinaryChunk b) throws IOException {
        BinaryChunk n = b.readChunk("nata");
        Set<Long> flags = n.readFlags64();
        this.subClasses = flags.contains(1L);
        this.documentPrior = flags.contains(2L);
        this.iterations = n.readI32();
        this.laplaceDefault = n.readI32();
        this.threadCount = n.readI32();
        this.beta = n.readDouble();
        return this;
    }

    public int getLaplaceDefault() {
        return this.laplaceDefault;
    }

    public AggregatedClassificationResults classify(NatalieLearningResult learned, WordVector vector) {
        ImmutableList.Builder results = ImmutableList.builder();
        for (BinaryDmnbClassifier nc : learned.classifiers.values()) {
            double score = nc.getScore(vector, this.documentPrior);
            results.add((Object)new ClassificationResults.ClassificationResult(nc.categoryKey, score));
        }
        return AggregatedClassificationResults.buildNatalieScores(vector, (ImmutableList<ClassificationResults.ClassificationResult>)results.build(), learned.classifiers);
    }

    public String toString() {
        return MoreObjects.toStringHelper((Object)this).add("iterations", this.iterations).add("subClasses?", this.subClasses).add("documentPrior?", this.documentPrior).add("laplaceDefault", this.laplaceDefault).toString();
    }
}

