package com.hankcs.hanlp.mining.word;

import com.hankcs.hanlp.algorithm.MaxHeap;
import com.hankcs.hanlp.corpus.io.IOUtil;
import com.hankcs.hanlp.seg.Segment;
import com.hankcs.hanlp.seg.common.Term;
import com.hankcs.hanlp.summary.KeywordExtractor;
import com.hankcs.hanlp.tokenizer.StandardTokenizer;
import com.hankcs.hanlp.utility.Predefine;
import e.b.a.a.a;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

/* loaded from: classes.dex */
public class TfIdfCounter extends KeywordExtractor {
    private boolean filterStopWord;
    private Map<String, Double> idf;
    private Map<Object, Map<String, Double>> tfMap;
    private Map<Object, Map<String, Double>> tfidfMap;

    public TfIdfCounter() {
        this(true);
    }

    public TfIdfCounter(Segment segment) {
        this(segment, true);
    }

    public TfIdfCounter(Segment segment, boolean z) {
        super(segment);
        this.filterStopWord = z;
        this.tfMap = new HashMap();
    }

    public TfIdfCounter(boolean z) {
        this(StandardTokenizer.SEGMENT, z);
    }

    private static List<String> convert(List<Term> list) {
        ArrayList arrayList = new ArrayList(list.size());
        Iterator<Term> it = list.iterator();
        while (it.hasNext()) {
            arrayList.add(it.next().word);
        }
        return arrayList;
    }

    private static List<Map.Entry<String, Integer>> doubleToInteger(List<Map.Entry<String, Double>> list) {
        ArrayList arrayList = new ArrayList(list.size());
        for (Map.Entry<String, Double> entry : list) {
            arrayList.add(new AbstractMap.SimpleEntry(entry.getKey(), Integer.valueOf(entry.getValue().intValue())));
        }
        return arrayList;
    }

    private List<Term> preprocess(String str) {
        List<Term> seg = this.defaultSegment.seg(str);
        if (this.filterStopWord) {
            filter(seg);
        }
        return seg;
    }

    private static List<Map.Entry<String, Double>> sort(Map<String, Double> map) {
        ArrayList arrayList = new ArrayList(map.entrySet());
        Collections.sort(arrayList, new Comparator<Map.Entry<String, Double>>() { // from class: com.hankcs.hanlp.mining.word.TfIdfCounter.2
            @Override // java.util.Comparator
            public int compare(Map.Entry<String, Double> entry, Map.Entry<String, Double> entry2) {
                return entry2.getValue().compareTo(entry.getValue());
            }
        });
        return arrayList;
    }

    private List<Map.Entry<String, Double>> topN(Map<String, Double> map, int i2) {
        MaxHeap maxHeap = new MaxHeap(i2, new Comparator<Map.Entry<String, Double>>() { // from class: com.hankcs.hanlp.mining.word.TfIdfCounter.1
            @Override // java.util.Comparator
            public int compare(Map.Entry<String, Double> entry, Map.Entry<String, Double> entry2) {
                return entry.getValue().compareTo(entry2.getValue());
            }
        });
        maxHeap.addAll(map.entrySet());
        return maxHeap.toList();
    }

    public int add(String str) {
        int size = this.tfMap.size();
        add(Integer.valueOf(size), str);
        return size;
    }

    public void add(Object obj, String str) {
        add(obj, preprocess(str));
    }

    public void add(Object obj, List<Term> list) {
        this.tfMap.put(obj, TfIdf.tf(convert(list)));
        this.idf = null;
    }

    public void add(List<Term> list) {
        add(Integer.valueOf(this.tfMap.size()), list);
    }

    public Map<String, Double> allTf() {
        HashMap hashMap = new HashMap();
        Iterator<Map<String, Double>> it = this.tfMap.values().iterator();
        while (it.hasNext()) {
            for (Map.Entry<String, Double> entry : it.next().entrySet()) {
                Double d2 = (Double) hashMap.get(entry.getKey());
                if (d2 == null) {
                    hashMap.put(entry.getKey(), entry.getValue());
                } else {
                    hashMap.put(entry.getKey(), Double.valueOf(entry.getValue().doubleValue() + d2.doubleValue()));
                }
            }
        }
        return hashMap;
    }

    public Map<Object, Map<String, Double>> compute() {
        if (this.idf == null) {
            this.idf = TfIdf.idfFromTfs(this.tfMap.values());
        }
        this.tfidfMap = new HashMap(this.idf.size());
        for (Map.Entry<Object, Map<String, Double>> entry : this.tfMap.entrySet()) {
            this.tfidfMap.put(entry.getKey(), TfIdf.tfIdf(entry.getValue(), this.idf));
        }
        return this.tfidfMap;
    }

    public Set<Object> documents() {
        return this.tfMap.keySet();
    }

    @Override // com.hankcs.hanlp.summary.KeywordExtractor
    public List<String> getKeywords(List<Term> list, int i2) {
        List<Map.Entry<String, Double>> keywordsWithTfIdf = getKeywordsWithTfIdf(list, i2);
        ArrayList arrayList = new ArrayList(keywordsWithTfIdf.size());
        Iterator<Map.Entry<String, Double>> it = keywordsWithTfIdf.iterator();
        while (it.hasNext()) {
            arrayList.add(it.next().getKey());
        }
        return arrayList;
    }

    public List<Map.Entry<String, Double>> getKeywordsOf(Object obj) {
        return getKeywordsOf(obj, 10);
    }

    public List<Map.Entry<String, Double>> getKeywordsOf(Object obj, int i2) {
        Map<String, Double> map = this.tfidfMap.get(obj);
        if (map == null) {
            return null;
        }
        return topN(map, i2);
    }

    public List<Map.Entry<String, Double>> getKeywordsWithTfIdf(String str, int i2) {
        return getKeywordsWithTfIdf(preprocess(str), i2);
    }

    public List<Map.Entry<String, Double>> getKeywordsWithTfIdf(List<Term> list, int i2) {
        if (this.idf == null) {
            compute();
        }
        return topN(TfIdf.tfIdf(TfIdf.tf(convert(list)), this.idf), i2);
    }

    public Map<Object, Map<String, Double>> getTfMap() {
        return this.tfMap;
    }

    public void loadIdfFile(String str) {
        try {
            this.idf = new HashMap();
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(IOUtil.newInputStream(str), "UTF-8"));
            boolean z = true;
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    bufferedReader.close();
                    return;
                }
                if (z) {
                    if (!readLine.isEmpty() && readLine.charAt(0) == 65279) {
                        readLine = readLine.substring(1);
                    }
                    z = false;
                }
                String[] split = readLine.split(" ");
                this.idf.put(split[0], Double.valueOf(split[1]));
            }
        } catch (Exception e2) {
            Predefine.logger.warning("加载" + str + "失败，" + e2);
            throw new RuntimeException(a.u("载入反文档词频文件", str, "失败"));
        }
    }

    public List<Map.Entry<String, Double>> sortedAllTf() {
        return sort(allTf());
    }

    public List<Map.Entry<String, Integer>> sortedAllTfInt() {
        return doubleToInteger(sortedAllTf());
    }
}
