/*
 * Decompiled with CFR 0.152.
 */
package net.doo.datamining.preprocessing;

import com.google.common.collect.ImmutableCollection;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import java.io.IOException;
import java.util.HashSet;
import net.doo.datamining.io.BinaryChunk;
import net.doo.datamining.preprocessing.BagOfWords;
import net.doo.datamining.util.HashMapInt;

public class Dictionary {
    private ImmutableList<String> indexedWords;
    private ImmutableMap<String, Integer> wordToIndex;
    private ImmutableList<Double> indexedIdf;

    public Dictionary(ImmutableCollection<String> words, Iterable<BagOfWords> documents) {
        this.indexedWords = ImmutableList.copyOf(words);
        ImmutableMap.Builder b = ImmutableMap.builder();
        for (int n = 0; n < this.indexedWords.size(); ++n) {
            b.put(this.indexedWords.get(n), (Object)n);
        }
        this.wordToIndex = b.build();
        this.indexedIdf = Dictionary.buildIdf(this.indexedWords, documents);
    }

    public Dictionary() {
    }

    public ImmutableList<String> getIndexedWords() {
        return this.indexedWords;
    }

    public ImmutableMap<String, Integer> getWordToIndex() {
        return this.wordToIndex;
    }

    public ImmutableList<Double> getIndexedIdf() {
        return this.indexedIdf;
    }

    private static final ImmutableList<Double> buildIdf(ImmutableList<String> indexedWords, Iterable<BagOfWords> documents) {
        HashMapInt<String> documentCounter = new HashMapInt<String>();
        int documentCount = 0;
        for (BagOfWords document : documents) {
            HashSet<String> wordSet = new HashSet<String>();
            for (String word : document.getWords()) {
                wordSet.add(word);
            }
            for (String word : wordSet) {
                documentCounter.sum(word, 1);
            }
            ++documentCount;
        }
        ImmutableList.Builder b = ImmutableList.builder();
        for (String word : indexedWords) {
            b.add((Object)Math.log((double)documentCount / ((double)documentCounter.get(word) + 1.0)));
        }
        return b.build();
    }

    public Dictionary fromChunk(BinaryChunk bc) throws IOException {
        BinaryChunk v = bc.readChunk("vocb");
        int vocabularySize = v.readI32();
        ImmutableList.Builder indexedWordsBuilder = ImmutableList.builder();
        for (int n = 0; n < vocabularySize; ++n) {
            indexedWordsBuilder.add((Object)v.readString());
        }
        BinaryChunk i = bc.readChunk("idf ");
        int idfSize = i.readI32();
        if (idfSize != vocabularySize) {
            throw new IOException("Broken IDF chunk. Expected " + vocabularySize + " entries matching vocabulary but " + idfSize + " entries were declared at the beginning of the chunk.");
        }
        ImmutableList.Builder indexedIdfBuilder = ImmutableList.builder();
        for (int n = 0; n < idfSize; ++n) {
            indexedIdfBuilder.add((Object)i.readDouble());
        }
        this.indexedWords = indexedWordsBuilder.build();
        ImmutableMap.Builder b = ImmutableMap.builder();
        for (int n = 0; n < this.indexedWords.size(); ++n) {
            b.put(this.indexedWords.get(n), (Object)n);
        }
        this.wordToIndex = b.build();
        this.indexedIdf = indexedIdfBuilder.build();
        return this;
    }
}

