package com.hankcs.hanlp.mining.cluster;

import com.google.android.material.shadow.ShadowDrawableWrapper;
import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.classification.utilities.io.ConsoleLogger;
import com.hankcs.hanlp.classification.utilities.io.ILogger;
import com.hankcs.hanlp.collection.trie.datrie.MutableDoubleArrayTrieInteger;
import com.hankcs.hanlp.corpus.io.IOUtil;
import com.hankcs.hanlp.dictionary.stopword.CoreStopWordDictionary;
import com.hankcs.hanlp.seg.Segment;
import com.hankcs.hanlp.seg.common.Term;
import com.hankcs.hanlp.utility.MathUtility;
import e.b.a.a.a;
import java.io.File;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.Set;

/* loaded from: classes.dex */
public class ClusterAnalyzer<K> {
    public static final int NUM_REFINE_LOOP = 30;
    public HashMap<K, Document<K>> documents_ = new HashMap<>();
    public Segment segment = HanLP.newSegment();
    public MutableDoubleArrayTrieInteger vocabulary = new MutableDoubleArrayTrieInteger();

    public static double evaluate(String str, String str2) {
        String[] strArr;
        int i2;
        int i3;
        File[] listFiles;
        int i4;
        File file;
        File[] fileArr;
        int i5;
        if (str == null) {
            throw new IllegalArgumentException("参数 folderPath == null");
        }
        File file2 = new File(str);
        int i6 = 0;
        if (!file2.exists()) {
            throw new IllegalArgumentException(String.format("目录 %s 不存在", file2.getAbsolutePath()));
        }
        if (!file2.isDirectory()) {
            throw new IllegalArgumentException(String.format("目录 %s 不是一个目录", file2.getAbsolutePath()));
        }
        ClusterAnalyzer clusterAnalyzer = new ClusterAnalyzer();
        File[] listFiles2 = file2.listFiles();
        if (listFiles2 == null) {
            return 1.0d;
        }
        ConsoleLogger.logger.start("根目录:%s\n加载中...\n", str);
        int length = listFiles2.length;
        int[] iArr = new int[length];
        String[] strArr2 = new String[listFiles2.length];
        int length2 = listFiles2.length;
        int i7 = 0;
        char c2 = 0;
        int i8 = 1;
        int i9 = 0;
        while (i6 < length2) {
            File file3 = listFiles2[i6];
            if (file3.isFile() || (listFiles = file3.listFiles()) == null) {
                strArr = strArr2;
                i2 = length2;
                i3 = length;
            } else {
                String name = file3.getName();
                strArr2[i7] = name;
                ILogger iLogger = ConsoleLogger.logger;
                Object[] objArr = new Object[i8];
                objArr[c2] = name;
                iLogger.out("[%s]...", objArr);
                int length3 = listFiles.length;
                int i10 = length3 + 0;
                strArr = strArr2;
                i2 = length2;
                int ceil = (int) Math.ceil(i10 / 10000.0f);
                int i11 = 0;
                while (i11 < length3) {
                    StringBuilder sb = new StringBuilder();
                    int i12 = length3;
                    sb.append(file3.getName());
                    sb.append(" ");
                    sb.append(listFiles[i11].getName());
                    clusterAnalyzer.addDocument((ClusterAnalyzer) sb.toString(), IOUtil.readTxt(listFiles[i11].getAbsolutePath()));
                    if (i11 % ceil == 0) {
                        i4 = ceil;
                        file = file3;
                        fileArr = listFiles;
                        i5 = length;
                        ConsoleLogger.logger.out("%c[%s]...%.2f%%", 13, name, Double.valueOf(MathUtility.percentage(i11 + 0 + 1, i10)));
                    } else {
                        i4 = ceil;
                        file = file3;
                        fileArr = listFiles;
                        i5 = length;
                    }
                    i9++;
                    iArr[i7] = iArr[i7] + 1;
                    i11++;
                    length = i5;
                    length3 = i12;
                    file3 = file;
                    ceil = i4;
                    listFiles = fileArr;
                }
                i3 = length;
                ConsoleLogger.logger.out(" %d 篇文档\n", Integer.valueOf(i10));
                i7++;
            }
            i6++;
            c2 = 0;
            i8 = 1;
            length2 = i2;
            length = i3;
            strArr2 = strArr;
        }
        String[] strArr3 = strArr2;
        int i13 = length;
        ConsoleLogger.logger.finish(" 加载了 %d 个类目,共 %d 篇文档\n", Integer.valueOf(listFiles2.length), Integer.valueOf(i9));
        ConsoleLogger.logger.start(a.t(str2, "聚类中..."), new Object[0]);
        List<Set<K>> kmeans = str2.replaceAll("[-\\s]", "").toLowerCase().equals("kmeans") ? clusterAnalyzer.kmeans(i13) : clusterAnalyzer.repeatedBisection(i13);
        ConsoleLogger.logger.finish(" 完毕。\n", new Object[0]);
        double[] dArr = new double[i13];
        for (int i14 = 0; i14 < i13; i14++) {
            Iterator<Set<K>> it = kmeans.iterator();
            while (it.hasNext()) {
                Iterator<K> it2 = it.next().iterator();
                int i15 = 0;
                while (it2.hasNext()) {
                    if (((String) it2.next()).startsWith(strArr3[i14])) {
                        i15++;
                    }
                }
                if (i15 != 0) {
                    double d2 = i15;
                    double size = d2 / r4.size();
                    double d3 = d2 / iArr[i14];
                    dArr[i14] = Math.max(dArr[i14], ((2.0d * size) * d3) / (size + d3));
                }
            }
        }
        double d4 = ShadowDrawableWrapper.r;
        for (int i16 = 0; i16 < i13; i16++) {
            d4 += (dArr[i16] * iArr[i16]) / i9;
        }
        return d4;
    }

    private List<Set<K>> toResult(List<Cluster<K>> list) {
        ArrayList arrayList = new ArrayList(list.size());
        for (Cluster<K> cluster : list) {
            HashSet hashSet = new HashSet();
            Iterator<Document<K>> it = cluster.documents_.iterator();
            while (it.hasNext()) {
                hashSet.add(it.next().id_);
            }
            arrayList.add(hashSet);
        }
        return arrayList;
    }

    public Document<K> addDocument(K k2, String str) {
        return addDocument((ClusterAnalyzer<K>) k2, preprocess(str));
    }

    public Document<K> addDocument(K k2, List<String> list) {
        return this.documents_.put(k2, new Document<>(k2, toVector(list)));
    }

    public int id(String str) {
        int i2 = this.vocabulary.get(str);
        if (i2 != -1) {
            return i2;
        }
        int size = this.vocabulary.size();
        this.vocabulary.put(str, size);
        return size;
    }

    public List<Set<K>> kmeans(int i2) {
        if (i2 > size()) {
            ConsoleLogger.logger.err("传入聚类数目%d大于文档数量%d，已纠正为文档数量\n", Integer.valueOf(i2), Integer.valueOf(size()));
            i2 = size();
        }
        Cluster cluster = new Cluster();
        Iterator<Document<K>> it = this.documents_.values().iterator();
        while (it.hasNext()) {
            cluster.add_document(it.next());
        }
        cluster.section(i2);
        refine_clusters(cluster.sectioned_clusters());
        ArrayList arrayList = new ArrayList(i2);
        for (Cluster<K> cluster2 : cluster.sectioned_clusters()) {
            cluster2.refresh();
            arrayList.add(cluster2);
        }
        return toResult(arrayList);
    }

    public List<String> preprocess(String str) {
        List<Term> seg = this.segment.seg(str);
        ListIterator<Term> listIterator = seg.listIterator();
        while (listIterator.hasNext()) {
            Term next = listIterator.next();
            if (CoreStopWordDictionary.contains(next.word) || next.nature.startsWith("w")) {
                listIterator.remove();
            }
        }
        ArrayList arrayList = new ArrayList(seg.size());
        Iterator<Term> it = seg.iterator();
        while (it.hasNext()) {
            arrayList.add(it.next().word);
        }
        return arrayList;
    }

    public double refine_clusters(List<Cluster<K>> list) {
        char c2;
        int i2;
        int i3;
        int i4;
        Iterator it;
        double[] dArr = new double[list.size()];
        Iterator<Cluster<K>> it2 = list.iterator();
        int i5 = 0;
        int i6 = 0;
        while (it2.hasNext()) {
            dArr[i6] = it2.next().composite_vector().norm();
            i6++;
        }
        int i7 = 0;
        double d2 = ShadowDrawableWrapper.r;
        while (true) {
            int i8 = i7 + 1;
            if (i7 >= 30) {
                break;
            }
            ArrayList arrayList = new ArrayList(size());
            int i9 = i5;
            while (true) {
                c2 = 1;
                if (i9 >= list.size()) {
                    break;
                }
                for (int i10 = i5; i10 < list.get(i9).documents().size(); i10++) {
                    int[] iArr = new int[2];
                    iArr[i5] = i9;
                    iArr[1] = i10;
                    arrayList.add(iArr);
                }
                i9++;
            }
            Collections.shuffle(arrayList);
            Iterator it3 = arrayList.iterator();
            int i11 = i5;
            while (it3.hasNext()) {
                int[] iArr2 = (int[]) it3.next();
                int i12 = iArr2[i5];
                int i13 = iArr2[c2];
                Cluster<K> cluster = list.get(i12);
                Document<K> document = cluster.documents().get(i13);
                Iterator it4 = it3;
                double pow = Math.pow(dArr[i12], 2.0d) + refined_vector_value(cluster.composite_vector(), document.feature(), -1);
                double sqrt = pow > ShadowDrawableWrapper.r ? Math.sqrt(pow) : ShadowDrawableWrapper.r;
                double d3 = -1.0d;
                int i14 = 0;
                double d4 = ShadowDrawableWrapper.r;
                int i15 = 0;
                while (i14 < list.size()) {
                    if (i12 == i14) {
                        i2 = i8;
                        i3 = i11;
                        i4 = i13;
                        it = it4;
                    } else {
                        i2 = i8;
                        i3 = i11;
                        i4 = i13;
                        it = it4;
                        double pow2 = Math.pow(dArr[i14], 2.0d) + refined_vector_value(list.get(i14).composite_vector(), document.feature(), 1);
                        double sqrt2 = pow2 > ShadowDrawableWrapper.r ? Math.sqrt(pow2) : ShadowDrawableWrapper.r;
                        double d5 = ((sqrt + sqrt2) - dArr[i12]) - dArr[i14];
                        if (d3 < d5) {
                            d4 = sqrt2;
                            d3 = d5;
                            i15 = i14;
                        }
                    }
                    i14++;
                    i11 = i3;
                    i8 = i2;
                    i13 = i4;
                    it4 = it;
                }
                int i16 = i8;
                int i17 = i11;
                int i18 = i13;
                Iterator it5 = it4;
                if (d3 > ShadowDrawableWrapper.r) {
                    d2 += d3;
                    int i19 = i15;
                    list.get(i19).add_document(document);
                    list.get(i12).remove_document(i18);
                    dArr[i12] = sqrt;
                    dArr[i19] = d4;
                    i11 = 1;
                } else {
                    i11 = i17;
                }
                i8 = i16;
                it3 = it5;
                i5 = 0;
                c2 = 1;
            }
            int i20 = i8;
            if (i11 == 0) {
                break;
            }
            Iterator<Cluster<K>> it6 = list.iterator();
            while (it6.hasNext()) {
                it6.next().refresh();
            }
            i7 = i20;
            i5 = 0;
        }
        return d2;
    }

    public double refined_vector_value(SparseVector sparseVector, SparseVector sparseVector2, int i2) {
        double d2 = ShadowDrawableWrapper.r;
        for (Map.Entry<Integer, Double> entry : sparseVector2.entrySet()) {
            d2 += (entry.getValue().doubleValue() * sparseVector.get((Object) entry.getKey()).doubleValue() * i2 * 2) + Math.pow(entry.getValue().doubleValue(), 2.0d);
        }
        return d2;
    }

    public List<Set<K>> repeatedBisection(double d2) {
        return repeatedBisection(0, d2);
    }

    public List<Set<K>> repeatedBisection(int i2) {
        return repeatedBisection(i2, ShadowDrawableWrapper.r);
    }

    public List<Set<K>> repeatedBisection(int i2, double d2) {
        if (i2 > size()) {
            ConsoleLogger.logger.err("传入聚类数目%d大于文档数量%d，已纠正为文档数量\n", Integer.valueOf(i2), Integer.valueOf(size()));
            i2 = size();
        }
        Cluster cluster = new Cluster();
        ArrayList arrayList = new ArrayList(i2 > 0 ? i2 : 16);
        Iterator<Document<K>> it = this.documents_.values().iterator();
        while (it.hasNext()) {
            cluster.add_document(it.next());
        }
        PriorityQueue priorityQueue = new PriorityQueue();
        cluster.section(2);
        refine_clusters(cluster.sectioned_clusters());
        cluster.set_sectioned_gain();
        cluster.composite_vector().clear();
        priorityQueue.add(cluster);
        while (!priorityQueue.isEmpty() && (i2 <= 0 || priorityQueue.size() < i2)) {
            Cluster cluster2 = (Cluster) priorityQueue.peek();
            if (cluster2.sectioned_clusters().size() < 1 || (d2 > ShadowDrawableWrapper.r && cluster2.sectioned_gain() < d2)) {
                break;
            }
            priorityQueue.poll();
            for (Cluster<K> cluster3 : cluster2.sectioned_clusters()) {
                if (cluster3.size() >= 2) {
                    cluster3.section(2);
                    refine_clusters(cluster3.sectioned_clusters());
                    cluster3.set_sectioned_gain();
                    if (cluster3.sectioned_gain() < d2) {
                        Iterator<Cluster<K>> it2 = cluster3.sectioned_clusters().iterator();
                        while (it2.hasNext()) {
                            it2.next().clear();
                        }
                    }
                    cluster3.composite_vector().clear();
                }
                priorityQueue.add(cluster3);
            }
        }
        while (!priorityQueue.isEmpty()) {
            arrayList.add(0, priorityQueue.poll());
        }
        return toResult(arrayList);
    }

    public int size() {
        return this.documents_.size();
    }

    public SparseVector toVector(List<String> list) {
        SparseVector sparseVector = new SparseVector();
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            int id = id(it.next());
            Double d2 = sparseVector.get((Object) Integer.valueOf(id));
            if (d2 == null) {
                sparseVector.put(Integer.valueOf(id), Double.valueOf(1.0d));
            } else {
                sparseVector.put(Integer.valueOf(id), Double.valueOf(d2.doubleValue() + 1.0d));
            }
        }
        return sparseVector;
    }
}
