package com.hankcs.hanlp.seg;

import com.hankcs.hanlp.algorithm.Viterbi;
import com.hankcs.hanlp.collection.AhoCorasick.AhoCorasickDoubleArrayTrie;
import com.hankcs.hanlp.collection.trie.DoubleArrayTrie;
import com.hankcs.hanlp.corpus.tag.Nature;
import com.hankcs.hanlp.dictionary.CoreDictionary;
import com.hankcs.hanlp.dictionary.CoreDictionaryTransformMatrixDictionary;
import com.hankcs.hanlp.dictionary.CustomDictionary;
import com.hankcs.hanlp.dictionary.other.CharType;
import com.hankcs.hanlp.seg.NShort.Path.AtomNode;
import com.hankcs.hanlp.seg.common.Graph;
import com.hankcs.hanlp.seg.common.Term;
import com.hankcs.hanlp.seg.common.Vertex;
import com.hankcs.hanlp.seg.common.WordNet;
import com.hankcs.hanlp.utility.TextUtility;
import e.b.a.a.a;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.ListIterator;

/* loaded from: classes.dex */
public abstract class WordBasedSegment extends Segment {
    private static List<AtomNode> atomSegment(String str, int i2, int i3) {
        boolean z;
        if (i3 < i2) {
            throw new RuntimeException(a.l("start=", i2, " < end=", i3));
        }
        ArrayList arrayList = new ArrayList();
        StringBuilder sb = new StringBuilder();
        char[] charArray = str.substring(i2, i3).toCharArray();
        int[] iArr = new int[charArray.length];
        for (int i4 = 0; i4 < charArray.length; i4++) {
            char c2 = charArray[i4];
            iArr[i4] = CharType.get(c2);
            if (c2 == '.' && i4 < charArray.length - 1 && CharType.get(charArray[i4 + 1]) == 9) {
                iArr[i4] = 9;
            } else {
                if (c2 == '.' && i4 < charArray.length - 1) {
                    int i5 = i4 + 1;
                    if (charArray[i5] >= '0' && charArray[i5] <= '9') {
                        iArr[i4] = 5;
                    }
                }
                if (iArr[i4] == 8) {
                    iArr[i4] = 5;
                }
            }
        }
        int i6 = 0;
        while (i6 < charArray.length) {
            int i7 = iArr[i6];
            if (i7 == 7 || i7 == 10 || i7 == 6 || i7 == 17) {
                String valueOf = String.valueOf(charArray[i6]);
                if (valueOf.length() != 0) {
                    arrayList.add(new AtomNode(valueOf, i7));
                }
            } else if (i6 >= charArray.length - 1 || !(i7 == 5 || i7 == 9)) {
                arrayList.add(new AtomNode(charArray[i6], i7));
            } else {
                sb.delete(0, sb.length());
                sb.append(charArray[i6]);
                while (true) {
                    if (i6 >= charArray.length - 1) {
                        z = true;
                        break;
                    }
                    i6++;
                    if (iArr[i6] != i7) {
                        z = false;
                        break;
                    }
                    sb.append(charArray[i6]);
                }
                arrayList.add(new AtomNode(sb.toString(), i7));
                if (z) {
                }
            }
            i6++;
        }
        return arrayList;
    }

    public static void changeDelimiterPOS(List<Vertex> list) {
        for (Vertex vertex : list) {
            if (vertex.realWord.equals("－－") || vertex.realWord.equals("—") || vertex.realWord.equals("-")) {
                vertex.confirmNature(Nature.w);
            }
        }
    }

    private static void checkDateElements(List<Vertex> list) {
        if (list.size() < 2) {
            return;
        }
        ListIterator<Vertex> listIterator = list.listIterator();
        Vertex next = listIterator.next();
        while (listIterator.hasNext()) {
            Vertex next2 = listIterator.next();
            if (TextUtility.isAllNum(next.realWord) || TextUtility.isAllChineseNum(next.realWord)) {
                String str = next2.realWord;
                if ((str.length() == 1 && "月日时分秒".contains(str)) || (str.length() == 2 && str.equals("月份"))) {
                    mergeDate(listIterator, next2, next);
                } else if (str.equals("年")) {
                    if (TextUtility.isYearTime(next.realWord)) {
                        mergeDate(listIterator, next2, next);
                    } else {
                        next.confirmNature(Nature.f5536m);
                    }
                } else if (next.realWord.endsWith("点")) {
                    next.confirmNature(Nature.t, true);
                } else {
                    char[] charArray = next.realWord.toCharArray();
                    if (!"∶·．／./".contains(String.valueOf(charArray[charArray.length - 1]))) {
                        next.confirmNature(Nature.f5536m, true);
                    } else if (next.realWord.length() > 1) {
                        String str2 = next.realWord;
                        char charAt = str2.charAt(str2.length() - 1);
                        String str3 = next.realWord;
                        Vertex newNumberInstance = Vertex.newNumberInstance(str3.substring(0, str3.length() - 1));
                        listIterator.previous();
                        listIterator.previous();
                        listIterator.set(newNumberInstance);
                        listIterator.next();
                        listIterator.add(Vertex.newPunctuationInstance(String.valueOf(charAt)));
                    }
                }
            }
            next = next2;
        }
    }

    public static List<Term> convert(List<Vertex> list) {
        return Segment.convert(list, false);
    }

    public static void fixResultByRule(List<Vertex> list) {
        mergeContinueNumIntoOne(list);
        changeDelimiterPOS(list);
        splitMiddleSlashFromDigitalWords(list);
        checkDateElements(list);
    }

    public static Graph generateBiGraph(WordNet wordNet) {
        return wordNet.toGraph();
    }

    public static void generateWord(List<Vertex> list, WordNet wordNet) {
        fixResultByRule(list);
        wordNet.addAll(list);
    }

    private static void mergeContinueNumIntoOne(List<Vertex> list) {
        if (list.size() < 2) {
            return;
        }
        ListIterator<Vertex> listIterator = list.listIterator();
        Vertex next = listIterator.next();
        while (listIterator.hasNext()) {
            Vertex next2 = listIterator.next();
            if ((TextUtility.isAllNum(next.realWord) || TextUtility.isAllChineseNum(next.realWord)) && (TextUtility.isAllNum(next2.realWord) || TextUtility.isAllChineseNum(next2.realWord))) {
                next = Vertex.newNumberInstance(next.realWord + next2.realWord);
                listIterator.previous();
                listIterator.previous();
                listIterator.set(next);
                listIterator.next();
                listIterator.next();
                listIterator.remove();
            } else {
                next = next2;
            }
        }
    }

    private static void mergeDate(ListIterator<Vertex> listIterator, Vertex vertex, Vertex vertex2) {
        Vertex newTimeInstance = Vertex.newTimeInstance(vertex2.realWord + vertex.realWord);
        listIterator.previous();
        listIterator.previous();
        listIterator.set(newTimeInstance);
        listIterator.next();
        listIterator.next();
        listIterator.remove();
    }

    public static void speechTagging(List<Vertex> list) {
        Viterbi.compute(list, CoreDictionaryTransformMatrixDictionary.transformMatrixDictionary);
    }

    private static void splitMiddleSlashFromDigitalWords(List<Vertex> list) {
        if (list.size() < 2) {
            return;
        }
        ListIterator<Vertex> listIterator = list.listIterator();
        Vertex next = listIterator.next();
        while (listIterator.hasNext()) {
            Vertex next2 = listIterator.next();
            if (next.getNature() == Nature.nx && (next2.hasNature(Nature.q) || next2.hasNature(Nature.f5537n))) {
                String[] split = next.realWord.split("-", 1);
                if (split.length == 2 && TextUtility.isAllNum(split[0]) && TextUtility.isAllNum(split[1])) {
                    Vertex copy = next.copy();
                    copy.realWord = split[0];
                    copy.confirmNature(Nature.f5536m);
                    listIterator.previous();
                    listIterator.previous();
                    listIterator.set(copy);
                    listIterator.next();
                    listIterator.add(Vertex.newPunctuationInstance("-"));
                    listIterator.add(Vertex.newNumberInstance(split[1]));
                }
            }
            next = next2;
        }
    }

    public List<Term> decorateResultForIndexMode(List<Vertex> list, WordNet wordNet) {
        LinkedList linkedList = new LinkedList();
        ListIterator<Vertex> listIterator = list.listIterator();
        listIterator.next();
        int size = list.size() - 2;
        int i2 = 1;
        for (int i3 = 0; i3 < size; i3++) {
            Vertex next = listIterator.next();
            Term convert = Segment.convert(next);
            linkedList.add(convert);
            convert.offset = i2 - 1;
            if (next.realWord.length() > 2) {
                for (int i4 = i2; i4 < next.realWord.length() + i2; i4++) {
                    Iterator<Vertex> descendingIterator = wordNet.descendingIterator(i4);
                    while (descendingIterator.hasNext()) {
                        Vertex next2 = descendingIterator.next();
                        if ((convert.nature == Nature.mq && next2.hasNature(Nature.q)) || next2.realWord.length() >= this.config.indexMode) {
                            if (next2 != next && next2.realWord.length() + i4 <= next.realWord.length() + i2) {
                                listIterator.add(next2);
                                Term convert2 = Segment.convert(next2);
                                convert2.offset = i4 - 1;
                                linkedList.add(convert2);
                            }
                        }
                    }
                }
            }
            i2 += next.realWord.length();
        }
        return linkedList;
    }

    public void generateWordNet(final WordNet wordNet) {
        final char[] cArr = wordNet.charArray;
        DoubleArrayTrie<CoreDictionary.Attribute>.Searcher searcher = CoreDictionary.trie.getSearcher(cArr, 0);
        while (searcher.next()) {
            wordNet.add(searcher.begin + 1, new Vertex(new String(cArr, searcher.begin, searcher.length), (CoreDictionary.Attribute) searcher.value, searcher.index));
        }
        if (this.config.forceCustomDictionary) {
            CustomDictionary.parseText(cArr, new AhoCorasickDoubleArrayTrie.IHit<CoreDictionary.Attribute>() { // from class: com.hankcs.hanlp.seg.WordBasedSegment.1
                @Override // com.hankcs.hanlp.collection.AhoCorasick.AhoCorasickDoubleArrayTrie.IHit
                public void hit(int i2, int i3, CoreDictionary.Attribute attribute) {
                    wordNet.add(i2 + 1, new Vertex(new String(cArr, i2, i3 - i2), attribute));
                }
            });
        }
        LinkedList<Vertex>[] vertexes = wordNet.getVertexes();
        int i2 = 1;
        while (i2 < vertexes.length) {
            if (vertexes[i2].isEmpty()) {
                int i3 = i2 + 1;
                while (i3 < vertexes.length - 1 && (vertexes[i3].isEmpty() || CharType.get(cArr[i3 - 1]) == 11)) {
                    i3++;
                }
                wordNet.add(i2, Segment.quickAtomSegment(cArr, i2 - 1, i3 - 1));
                i2 = i3;
            } else {
                i2 += vertexes[i2].getLast().realWord.length();
            }
        }
    }
}
