package net.doo.datamining.preprocessing;

import com.google.common.collect.ImmutableList;
import java.io.File;
import net.doo.datamining.util.Collections2;

/* loaded from: input_file:net/doo/datamining/preprocessing/WordReader.class */
public class WordReader {
    private final BagOfWordsFactory bowFactory;
    private final StringFilter filter;
    private final File file;

    public WordReader(BagOfWordsFactory bagOfWordsFactory, StringFilter stringFilter, File file) {
        this.bowFactory = bagOfWordsFactory;
        this.filter = stringFilter;
        this.file = file;
    }

    public ImmutableList<String> tokenize(String str) {
        ImmutableList.Builder builder = ImmutableList.builder();
        int i = 0;
        while (true) {
            if (i < str.length() && str.charAt(i) == ' ') {
                i++;
            } else {
                if (i >= str.length()) {
                    break;
                }
                int i2 = i;
                while (i < str.length() && str.charAt(i) != ' ') {
                    i++;
                }
                String substring = str.substring(i2, i);
                if (substring.length() >= this.bowFactory.getMinWordLength() && !this.bowFactory.getStopwords().contains(substring)) {
                    builder.add(substring);
                }
            }
        }
        ImmutableList<String> build = builder.build();
        return this.bowFactory.getDropoffFactor() == -1 ? build : Collections2.take(build, 100 + ((int) (this.bowFactory.getDropoffFactor() * Math.log(Math.max(1, build.size() - 100)))));
    }
}
