/*
 * Decompiled with CFR 0.152.
 */
package net.doo.datamining.preprocessing;

import com.google.common.collect.ImmutableList;
import java.io.File;
import net.doo.datamining.preprocessing.BagOfWordsFactory;
import net.doo.datamining.preprocessing.StringFilter;
import net.doo.datamining.util.Collections2;

public class WordReader
implements Iterable<String> {
    private final BagOfWordsFactory bowFactory;
    private final StringFilter filter;
    private final File file;

    public WordReader(BagOfWordsFactory bowFactory, StringFilter filter, File file) {
        this.bowFactory = bowFactory;
        this.filter = filter;
        this.file = file;
    }

    public ImmutableList<String> tokenize(String c) {
        ImmutableList.Builder ws = ImmutableList.builder();
        int i = 0;
        while (true) {
            if (i < c.length() && c.charAt(i) == ' ') {
                ++i;
                continue;
            }
            if (i >= c.length()) break;
            int s = i;
            while (i < c.length() && c.charAt(i) != ' ') {
                ++i;
            }
            String w = c.substring(s, i);
            if (w.length() < this.bowFactory.getMinWordLength() || this.bowFactory.getStopwords().contains(w)) continue;
            ws.add((Object)w);
        }
        ImmutableList words = ws.build();
        ImmutableList usedWords = this.bowFactory.getDropoffFactor() == -1 ? words : Collections2.take(words, 100 + (int)((double)this.bowFactory.getDropoffFactor() * Math.log(Math.max(1, words.size() - 100))));
        return usedWords;
    }
}

