/*
 * Decompiled with CFR 0.152.
 */
package net.doo.datamining.preprocessing;

import com.beust.jcommander.ParametersDelegate;
import com.google.common.base.MoreObjects;
import com.google.common.collect.ImmutableList;
import java.io.File;
import java.io.IOException;
import java.util.Collection;
import net.doo.datamining.io.BinaryChunk;
import net.doo.datamining.io.IO;
import net.doo.datamining.preprocessing.BagOfWords;
import net.doo.datamining.preprocessing.BagOfWordsFactory;
import net.doo.datamining.preprocessing.Dictionary;
import net.doo.datamining.preprocessing.DictionaryFactory;
import net.doo.datamining.preprocessing.StringFilter;
import net.doo.datamining.preprocessing.WordVector;
import net.doo.datamining.util.Pair;
import org.apache.log4j.Logger;

public class PreprocessingConfiguration {
    private static final Logger log = Logger.getLogger(PreprocessingConfiguration.class);
    @ParametersDelegate
    public final StringFilter filter = new StringFilter();
    @ParametersDelegate
    public final BagOfWordsFactory bowFactory = new BagOfWordsFactory();
    @ParametersDelegate
    public final DictionaryFactory dictionaryFactory = new DictionaryFactory();

    public PreprocessingConfiguration fromChunk(BinaryChunk bc) throws IOException {
        this.filter.fromChunk(bc);
        this.bowFactory.fromChunk(bc);
        this.dictionaryFactory.fromChunk(bc);
        return this;
    }

    public ImmutableList<WordVector> getClassificationData(Collection<File> files, Dictionary dictionary) throws IOException {
        ImmutableList.Builder result = ImmutableList.builder();
        for (BagOfWords bow : this.readBagsOfWords(files)) {
            WordVector wv = this.dictionaryFactory.buildWordVector(bow, dictionary);
            if (this.dictionaryFactory.isNormalizeWordVector()) {
                wv = wv.normalize();
            }
            result.add((Object)wv);
        }
        return result.build();
    }

    public ImmutableList<BagOfWords> readBagsOfWords(Collection<File> files) throws IOException {
        ImmutableList.Builder result = ImmutableList.builder();
        for (File toClassify : files) {
            for (Pair<File, String> pair : IO.listFilesRecursive(toClassify)) {
                String category;
                if (((File)pair.fst).equals(toClassify) || ((File)pair.fst).getParentFile().equals(toClassify)) {
                    category = "0";
                } else {
                    String relativePath = ((File)pair.fst).getAbsolutePath().substring(toClassify.getAbsolutePath().length());
                    category = relativePath.substring(1, relativePath.indexOf(47, 1));
                }
                if (!IO.mightBeText((File)pair.fst)) continue;
                BagOfWords bow = this.bowFactory.preprocess(Pair.of(category, ""), (File)pair.fst, (String)pair.snd + "/" + ((File)pair.fst).getName(), this.filter, true);
                result.add((Object)bow);
            }
        }
        return result.build();
    }

    public String toString() {
        return MoreObjects.toStringHelper((Object)this).addValue((Object)this.filter).addValue((Object)this.bowFactory).addValue((Object)this.dictionaryFactory).toString();
    }
}

