package com.hankcs.hanlp.mining.word2vec;

import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.Arrays;
import java.util.Map;

/* loaded from: classes.dex */
public abstract class Corpus {
    public File cacheFile;
    public Config config;
    public Charset encoding;
    public boolean eoc;
    public int[] table;
    public int trainWords;
    public VocabWord[] vocab;
    public Map<String, Integer> vocabIndexMap;
    public int vocabMaxSize;
    public int vocabSize;

    public Corpus(Config config) throws IOException {
        this.trainWords = 0;
        this.vocabMaxSize = 1000;
        this.eoc = true;
        this.encoding = Charset.forName("UTF-8");
        this.config = config;
    }

    public Corpus(Corpus corpus) throws IOException {
        this.trainWords = 0;
        this.vocabMaxSize = 1000;
        this.eoc = true;
        this.encoding = Charset.forName("UTF-8");
        this.trainWords = corpus.trainWords;
        this.vocabSize = corpus.vocabSize;
        this.vocab = corpus.vocab;
        this.vocabIndexMap = corpus.vocabIndexMap;
        this.table = corpus.table;
    }

    public int addWordToVocab(String str) {
        this.vocab[this.vocabSize] = new VocabWord(str);
        int i2 = this.vocabSize + 1;
        this.vocabSize = i2;
        int i3 = i2 + 2;
        int i4 = this.vocabMaxSize;
        if (i3 >= i4) {
            int i5 = i4 + 1000;
            this.vocabMaxSize = i5;
            VocabWord[] vocabWordArr = new VocabWord[i5];
            System.arraycopy(this.vocab, 0, vocabWordArr, 0, i2);
            this.vocab = vocabWordArr;
        }
        this.vocabIndexMap.put(str, Integer.valueOf(this.vocabSize - 1));
        return this.vocabSize - 1;
    }

    public void close() throws IOException {
        shutdown();
        this.cacheFile.delete();
    }

    public void createBinaryTree() {
        int i2;
        int i3;
        int i4;
        int i5;
        int i6;
        int[] iArr = new int[40];
        char[] cArr = new char[40];
        int i7 = this.vocabSize;
        int[] iArr2 = new int[(i7 * 2) + 1];
        char[] cArr2 = new char[(i7 * 2) + 1];
        int[] iArr3 = new int[(i7 * 2) + 1];
        int i8 = 0;
        while (true) {
            i2 = this.vocabSize;
            if (i8 >= i2) {
                break;
            }
            iArr2[i8] = this.vocab[i8].cn;
            i8++;
        }
        while (true) {
            i3 = this.vocabSize;
            if (i2 >= i3 * 2) {
                break;
            }
            iArr2[i2] = Integer.MAX_VALUE;
            i2++;
        }
        int i9 = i3 - 1;
        int i10 = 0;
        while (true) {
            int i11 = this.vocabSize;
            if (i10 >= i11 - 1) {
                break;
            }
            if (i9 < 0 || iArr2[i9] >= iArr2[i3]) {
                int i12 = i9;
                i9 = i3;
                i3++;
                i5 = i12;
            } else {
                i5 = i9 - 1;
            }
            if (i5 < 0 || iArr2[i5] >= iArr2[i3]) {
                i6 = i3 + 1;
            } else {
                i6 = i3;
                i3 = i5;
                i5--;
            }
            iArr2[i11 + i10] = iArr2[i9] + iArr2[i3];
            iArr3[i9] = i11 + i10;
            iArr3[i3] = i11 + i10;
            cArr2[i3] = 1;
            i10++;
            i9 = i5;
            i3 = i6;
        }
        for (int i13 = 0; i13 < this.vocabSize; i13++) {
            int i14 = i13;
            int i15 = 0;
            do {
                cArr[i15] = cArr2[i14];
                iArr[i15] = i14;
                i15++;
                i14 = iArr3[i14];
                i4 = this.vocabSize;
            } while (i14 != (i4 * 2) - 2);
            VocabWord[] vocabWordArr = this.vocab;
            vocabWordArr[i13].codelen = i15;
            vocabWordArr[i13].point[0] = i4 - 2;
            for (int i16 = 0; i16 < i15; i16++) {
                VocabWord[] vocabWordArr2 = this.vocab;
                int i17 = i15 - i16;
                vocabWordArr2[i13].code[i17 - 1] = cArr[i16];
                vocabWordArr2[i13].point[i17] = iArr[i16] - this.vocabSize;
            }
        }
    }

    public boolean endOfCorpus() {
        return this.eoc;
    }

    public int getTrainWords() {
        return this.trainWords;
    }

    public VocabWord[] getVocab() {
        return this.vocab;
    }

    public Map<String, Integer> getVocabIndexMap() {
        return this.vocabIndexMap;
    }

    public int getVocabSize() {
        return this.vocabSize;
    }

    public abstract String nextWord() throws IOException;

    public int readWordIndex() throws IOException {
        String nextWord = nextWord();
        return nextWord == null ? this.eoc ? -2 : -3 : searchVocab(nextWord);
    }

    public void rewind(int i2, int i3) throws IOException {
        this.eoc = false;
    }

    public int searchVocab(String str) {
        Integer num;
        if (str == null || (num = this.vocabIndexMap.get(str)) == null) {
            return -1;
        }
        return num.intValue();
    }

    public void setVocabIndexMap(VocabWord vocabWord, int i2) {
        this.trainWords += vocabWord.cn;
    }

    public void shutdown() throws IOException {
        this.table = null;
    }

    public void sortVocab() {
        Arrays.sort(this.vocab, 0, this.vocabSize);
        int i2 = this.vocabSize;
        this.trainWords = 0;
        this.table = new int[i2];
        for (int i3 = 0; i3 < i2; i3++) {
            VocabWord vocabWord = this.vocab[i3];
            if (vocabWord.cn < this.config.getMinCount()) {
                this.table[this.vocabIndexMap.get(vocabWord.word).intValue()] = -4;
                this.vocabSize--;
            } else {
                this.table[this.vocabIndexMap.get(vocabWord.word).intValue()] = i3;
                setVocabIndexMap(vocabWord, i3);
            }
        }
        this.vocabIndexMap = null;
        int i4 = this.vocabSize;
        System.arraycopy(this.vocab, 0, new VocabWord[i4], 0, i4);
    }
}
