package org.apertium.tagger;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import org.apertium.lttoolbox.Compression;
import org.apertium.utils.IOUtils;

/* loaded from: input_file:org/apertium/tagger/HMM.class */
public class HMM {
    private TaggerData td;
    private int eos;
    static final double DBL_MIN = Double.MIN_NORMAL;
    private boolean DEBUG = false;
    private double ZERO = 1.0E-10d;
    private boolean debug = false;
    private boolean show_sf = false;
    private boolean null_flush = false;

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/apertium/tagger/HMM$IntVector.class */
    public class IntVector {
        ArrayList<Integer> nodes = new ArrayList<>();

        IntVector() {
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public HMM(TaggerData taggerData) {
        this.td = taggerData;
        this.eos = this.td.getTagIndex().get("TAG_SENT").intValue();
    }

    void set_eos(int i) {
        this.eos = i;
    }

    void set_debug(boolean z) {
        this.debug = z;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public void set_show_sf(boolean z) {
        this.show_sf = z;
    }

    void read_ambiguity_classes(InputStream inputStream) throws IOException {
        while (true) {
            int multibyte_read = Compression.multibyte_read(inputStream);
            if (multibyte_read == -1) {
                this.td.setProbabilities(this.td.getTagIndex().size(), this.td.getOutput().size());
                return;
            }
            LinkedHashSet linkedHashSet = new LinkedHashSet();
            while (multibyte_read != 0) {
                linkedHashSet.add(Integer.valueOf(Compression.multibyte_read(inputStream)));
                multibyte_read--;
            }
            if (linkedHashSet.size() != 0) {
                this.td.getOutput().add(linkedHashSet);
            }
        }
    }

    void write_ambiguity_classes(OutputStream outputStream) throws IOException {
        for (int i = 0; i != this.td.getOutput().size(); i++) {
            Set<Integer> set = this.td.getOutput().get(i);
            Compression.multibyte_write(set.size(), outputStream);
            Iterator<Integer> it = set.iterator();
            while (it.hasNext()) {
                Compression.multibyte_write(it.next().intValue(), outputStream);
            }
        }
    }

    void read_probabilities(InputStream inputStream) throws IOException {
        this.td.read(inputStream);
    }

    void write_probabilities(OutputStream outputStream) throws IOException {
        this.td.write(outputStream);
    }

    void init_probabilities_kupiec(Reader reader) throws IOException {
        int n = this.td.getN();
        int m = this.td.getM();
        int i = 0;
        double[] dArr = new double[m];
        double[][] dArr2 = new double[m][m];
        double[] dArr3 = new double[n];
        double[][] dArr4 = new double[n][n];
        Collection output = this.td.getOutput();
        MorphoStream morphoStream = new MorphoStream(reader, true, this.td);
        new TaggerWord();
        int i2 = 0;
        while (i2 < m) {
            dArr[i2] = 1.0d;
            for (int i3 = 0; i3 < m; i3++) {
                dArr2[i2][i3] = 1.0d;
            }
            i2++;
        }
        LinkedHashSet linkedHashSet = new LinkedHashSet();
        linkedHashSet.add(Integer.valueOf(this.eos));
        int i4 = output.get(linkedHashSet);
        int i5 = i2;
        dArr[i5] = dArr[i5] + 1.0d;
        TaggerWord taggerWord = morphoStream.get_next_word();
        while (taggerWord != null) {
            i++;
            if (i % 10000 == 0) {
                System.err.print('.');
                System.err.flush();
            }
            Set<Integer> set = taggerWord.get_tags();
            if (set.size() == 0) {
                set = this.td.getOpenClass();
            } else if (output.has_not(set)) {
                fatal_error((("A new ambiguity class was found. I cannot continue.\nWord '" + taggerWord.get_superficial_form() + "' not found in the dictionary.\n") + "New ambiguity class: " + taggerWord.get_string_tags() + "\n") + "Take a look at the dictionary and at the training corpus. Then, retrain.");
            }
            int i6 = output.get(set);
            int i7 = i4;
            dArr[i7] = dArr[i7] + 1.0d;
            double[] dArr5 = dArr2[i4];
            dArr5[i6] = dArr5[i6] + 1.0d;
            taggerWord = morphoStream.get_next_word();
            i4 = i6;
        }
        for (int i8 = 0; i8 < n; i8++) {
            dArr3[i8] = 0.0d;
            for (int i9 = 0; i9 < m; i9++) {
                if (output.get(i9).contains(Integer.valueOf(i8))) {
                    int i10 = i8;
                    dArr3[i10] = dArr3[i10] + (dArr[i9] / output.get(i9).size());
                }
            }
        }
        for (int i11 = 0; i11 < n; i11++) {
            for (int i12 = 0; i12 < n; i12++) {
                dArr4[i11][i12] = 0.0d;
            }
        }
        for (int i13 = 0; i13 < m; i13++) {
            Set<Integer> set2 = output.get(i13);
            for (int i14 = 0; i14 < m; i14++) {
                Set<Integer> set3 = output.get(i14);
                double size = dArr2[i13][i14] / (set2.size() * set3.size());
                for (Integer num : (Integer[]) set2.toArray(new Integer[set2.size()])) {
                    for (Integer num2 : (Integer[]) set3.toArray(new Integer[set3.size()])) {
                        double[] dArr6 = dArr4[num.intValue()];
                        int intValue = num2.intValue();
                        dArr6[intValue] = dArr6[intValue] + size;
                    }
                }
            }
        }
        double[][] a = this.td.getA();
        for (int i15 = 0; i15 < n; i15++) {
            double d = 0.0d;
            for (int i16 = 0; i16 < n; i16++) {
                d += dArr4[i15][i16];
            }
            for (int i17 = 0; i17 < n; i17++) {
                if (d > 0.0d) {
                    a[i15][i17] = dArr4[i15][i17] / d;
                } else {
                    a[i15][i17] = 0.0d;
                }
            }
        }
        this.td.setA(a);
        double[][] b = this.td.getB();
        for (int i18 = 0; i18 < n; i18++) {
            for (int i19 = 0; i19 < m; i19++) {
                if (output.get(i19).contains(Integer.valueOf(i18))) {
                    if (dArr3[i18] > 0.0d) {
                        b[i18][i19] = (dArr[i19] / output.get(i19).size()) / dArr3[i18];
                    } else {
                        b[i18][i19] = 0.0d;
                    }
                }
            }
        }
        this.td.setB(b);
        System.err.println();
    }

    void init_probabilities_from_tagged_text(Reader reader, Reader reader2) throws IOException {
        int i = 0;
        int n = this.td.getN();
        int m = this.td.getM();
        double[][] dArr = new double[n][n];
        double[][] dArr2 = new double[n][m];
        MorphoStream morphoStream = new MorphoStream(reader, true, this.td);
        MorphoStream morphoStream2 = new MorphoStream(reader2, true, this.td);
        new TaggerWord();
        new TaggerWord();
        Collection output = this.td.getOutput();
        Set<Integer> linkedHashSet = new LinkedHashSet();
        for (int i2 = 0; i2 < n; i2++) {
            for (int i3 = 0; i3 < n; i3++) {
                dArr[i2][i3] = 0.0d;
            }
        }
        for (int i4 = 0; i4 < m; i4++) {
            for (int i5 = 0; i5 < n; i5++) {
                if (output.get(i4).contains(Integer.valueOf(i5))) {
                    dArr2[i5][i4] = 0.0d;
                }
            }
        }
        Integer valueOf = Integer.valueOf(this.eos);
        TaggerWord taggerWord = morphoStream.get_next_word();
        TaggerWord taggerWord2 = morphoStream2.get_next_word();
        while (true) {
            TaggerWord taggerWord3 = taggerWord2;
            if (taggerWord == null) {
                for (int i6 = 0; i6 < n; i6++) {
                    double d = 0.0d;
                    for (int i7 = 0; i7 < n; i7++) {
                        d += dArr[i6][i7] + 1.0d;
                    }
                    for (int i8 = 0; i8 < n; i8++) {
                        this.td.setAElement(i6, i8, (dArr[i6][i8] + 1.0d) / d);
                    }
                }
                for (int i9 = 0; i9 < n; i9++) {
                    int i10 = 0;
                    double d2 = 0.0d;
                    for (int i11 = 0; i11 < m; i11++) {
                        if (output.get(i11).contains(Integer.valueOf(i9))) {
                            i10++;
                            d2 += dArr2[i9][i11];
                        }
                    }
                    for (int i12 = 0; i12 < m; i12++) {
                        if (output.get(i12).contains(Integer.valueOf(i9))) {
                            this.td.setBElement(i9, i12, (dArr2[i9][i12] + (1.0d / i10)) / (d2 + 1.0d));
                        }
                    }
                }
                System.err.println();
                return;
            }
            System.err.print(taggerWord);
            System.err.println(" -- " + taggerWord3);
            if (!taggerWord3.get_superficial_form().equals(taggerWord.get_superficial_form())) {
                System.err.println();
                System.err.println("Tagged text (.tagged) and analyzed text (.untagged) streams are not aligned.");
                System.err.println("Take a look at tagged text (.tagged).");
                System.err.println("Perhaps this is caused by a multiword unit that is not a multiword unit in one of the two files.");
                System.err.println(taggerWord + " -- " + taggerWord3);
                throw new Error();
            }
            i++;
            if (i % 100 == 0) {
                System.err.print(".");
                System.err.flush();
            }
            Integer num = valueOf;
            if (taggerWord3 == null) {
                throw new IOException("word_untagged==NULL");
            }
            if (taggerWord.get_tags().size() == 0) {
                valueOf = -1;
            } else if (taggerWord.get_tags().size() > 1) {
                System.err.println("Error in tagged text. An ambiguous word was found: " + taggerWord.get_superficial_form());
            } else {
                valueOf = taggerWord.get_tags().iterator().next();
            }
            if (valueOf.intValue() >= 0 && num.intValue() >= 0) {
                double[] dArr3 = dArr[num.intValue()];
                int intValue = valueOf.intValue();
                dArr3[intValue] = dArr3[intValue] + 1.0d;
            }
            if (taggerWord3.get_tags().size() == 0) {
                linkedHashSet = this.td.getOpenClass();
            } else if (output.has_not(taggerWord3.get_tags())) {
                fatal_error((("A new ambiguity class was found. I cannot continue.\nWord '" + taggerWord3.get_superficial_form() + "' not found in the dictionary.\n") + "New ambiguity class: " + taggerWord3.get_string_tags() + "\n") + "Take a look at the dictionary, then retrain.");
            } else {
                linkedHashSet = taggerWord3.get_tags();
            }
            int i13 = output.get(linkedHashSet);
            if (valueOf.intValue() >= 0) {
                double[] dArr4 = dArr2[valueOf.intValue()];
                dArr4[i13] = dArr4[i13] + 1.0d;
            }
            taggerWord = morphoStream.get_next_word();
            taggerWord2 = morphoStream2.get_next_word();
        }
    }

    void apply_rules() {
        List<TForbidRule> forbidRules = this.td.getForbidRules();
        List<TEnforceAfterRule> enforceRules = this.td.getEnforceRules();
        int n = this.td.getN();
        for (int i = 0; i < forbidRules.size(); i++) {
            this.td.setAElement(forbidRules.get(i).tagi, forbidRules.get(i).tagj, this.ZERO);
        }
        for (int i2 = 0; i2 < enforceRules.size(); i2++) {
            for (int i3 = 0; i3 < n; i3++) {
                boolean z = false;
                int i4 = 0;
                while (true) {
                    if (i4 >= enforceRules.get(i2).tagsj.size()) {
                        break;
                    }
                    if (enforceRules.get(i2).tagsj.get(i4).intValue() == i3) {
                        z = true;
                        break;
                    }
                    i4++;
                }
                if (!z) {
                    this.td.setAElement(enforceRules.get(i2).tagi, i3, this.ZERO);
                }
            }
        }
        for (int i5 = 0; i5 < n; i5++) {
            double d = 0.0d;
            for (int i6 = 0; i6 < n; i6++) {
                d += this.td.getA()[i5][i6];
            }
            for (int i7 = 0; i7 < n; i7++) {
                if (d > 0.0d) {
                    this.td.setAElement(i5, i7, this.td.getA()[i5][i7] / d);
                } else {
                    this.td.setAElement(i5, i7, 0.0d);
                }
            }
        }
    }

    void read_dictionary(Reader reader) throws IOException {
        int i = 0;
        new TaggerWord();
        new LinkedHashSet();
        Collection output = this.td.getOutput();
        MorphoStream morphoStream = new MorphoStream(reader, true, this.td);
        TaggerWord taggerWord = morphoStream.get_next_word();
        while (true) {
            TaggerWord taggerWord2 = taggerWord;
            if (taggerWord2 == null) {
                break;
            }
            i++;
            if (i % 10000 == 0) {
                System.err.println(".");
                System.err.flush();
            }
            Set<Integer> set = taggerWord2.get_tags();
            if (set.size() > 0) {
                output.get(set);
            }
            taggerWord = morphoStream.get_next_word();
        }
        System.err.println();
        output.get(this.td.getOpenClass());
        int size = this.td.getTagIndex().size();
        for (int i2 = 0; i2 != size; i2++) {
            LinkedHashSet linkedHashSet = new LinkedHashSet();
            linkedHashSet.add(Integer.valueOf(i2));
            output.get(linkedHashSet);
        }
        int size2 = output.size();
        System.err.println(size + " states and " + size2 + " ambiguity classes");
        this.td.setProbabilities(size, size2);
    }

    void filter_ambiguity_classes(Reader reader, Appendable appendable) throws IOException {
        LinkedHashSet linkedHashSet = new LinkedHashSet();
        MorphoStream morphoStream = new MorphoStream(reader, true, this.td);
        TaggerWord taggerWord = morphoStream.get_next_word();
        while (true) {
            TaggerWord taggerWord2 = taggerWord;
            if (taggerWord2 == null) {
                return;
            }
            Set<Integer> set = taggerWord2.get_tags();
            if (set.size() > 0 && !linkedHashSet.contains(set)) {
                linkedHashSet.add(set);
                taggerWord2.outputOriginal(appendable);
            }
            taggerWord = morphoStream.get_next_word();
        }
    }

    void train(Reader reader) throws IOException, UnsupportedOperationException {
        throw new UnsupportedOperationException("HMM training doesn't work, it hasn't been fully ported, yet!");
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public void tagger(Reader reader, Appendable appendable, boolean z) throws IOException {
        Set<Integer> linkedHashSet = new LinkedHashSet();
        new LinkedHashSet();
        int n = this.td.getN();
        double[][] dArr = new double[2][n];
        IntVector[][] intVectorArr = new IntVector[2][n];
        for (int i = 0; i != 2; i++) {
            for (int i2 = 0; i2 != n; i2++) {
                intVectorArr[i][i2] = new IntVector();
            }
        }
        ArrayList arrayList = new ArrayList();
        MorphoStream morphoStream = new MorphoStream(reader, this.debug, this.td);
        morphoStream.setNullFlush(this.null_flush);
        Collection output = this.td.getOutput();
        double d = 0.0d;
        linkedHashSet.add(Integer.valueOf(this.eos));
        dArr[0][this.eos] = 1.0d;
        TaggerWord taggerWord = morphoStream.get_next_word();
        while (true) {
            TaggerWord taggerWord2 = taggerWord;
            if (taggerWord2 == null) {
                break;
            }
            if (this.DEBUG) {
                taggerWord2.print();
            }
            arrayList.add(taggerWord2);
            int size = arrayList.size();
            int i3 = size % 2;
            Set<Integer> set = linkedHashSet;
            linkedHashSet = taggerWord2.get_tags();
            if (linkedHashSet.size() == 0) {
                linkedHashSet = this.td.getOpenClass();
            }
            if (output.has_not(linkedHashSet)) {
                if (this.debug) {
                    System.err.print((("A new ambiguity class was found. \nRetraining the tagger is neccessary to take it into account.\n") + "Word '" + taggerWord2.get_superficial_form() + "'.\n") + "New ambiguity class: " + taggerWord2.get_string_tags() + "\n");
                }
                linkedHashSet = find_similar_ambiguity_class(linkedHashSet);
            }
            int i4 = output.get(linkedHashSet);
            if (this.DEBUG) {
                System.out.println("k: " + i4);
            }
            clear_array_double(dArr[i3]);
            clear_array_vector(intVectorArr[i3]);
            for (Integer num : linkedHashSet) {
                int intValue = num.intValue();
                if (this.DEBUG) {
                    System.out.println("i: " + num);
                }
                for (Integer num2 : set) {
                    int intValue2 = num2.intValue();
                    double d2 = dArr[1 - i3][intValue2] * this.td.getA()[intValue2][intValue] * this.td.getB()[intValue][i4];
                    if (this.DEBUG) {
                        System.out.println("j: " + num2 + " nwpend: " + size + " A[j][i]: " + this.td.getA()[intValue2][intValue] + " B[i][k]: " + this.td.getB()[intValue][i4] + "  x: " + d2);
                    }
                    if (dArr[i3][intValue] <= d2) {
                        if (size > 1 && (i3 != 1 - i3 || intValue != intValue2)) {
                            intVectorArr[i3][intValue].nodes.clear();
                            intVectorArr[i3][intValue].nodes.addAll(intVectorArr[1 - i3][intValue2].nodes);
                        }
                        if (this.DEBUG) {
                            System.out.println("best: " + i3 + " " + intValue);
                        }
                        intVectorArr[i3][intValue].nodes.add(Integer.valueOf(intValue));
                        dArr[i3][intValue] = d2;
                    }
                }
            }
            if (linkedHashSet.size() == 1) {
                Integer next = linkedHashSet.iterator().next();
                double d3 = dArr[i3][next.intValue()];
                if (d3 > 0.0d) {
                    d -= Math.log(d3);
                } else if (this.debug) {
                    System.err.println("Problem with word '" + taggerWord2.get_superficial_form() + "' " + taggerWord2.get_string_tags());
                }
                for (int i5 = 0; i5 < intVectorArr[i3][next.intValue()].nodes.size(); i5++) {
                    if (z) {
                        appendable.append(((TaggerWord) arrayList.get(i5)).get_all_chosen_tag_first(intVectorArr[i3][next.intValue()].nodes.get(i5), this.td.getTagIndex().get("TAG_kEOF").intValue()));
                    } else {
                        int intValue3 = this.td.getTagIndex().get("TAG_kEOF").intValue();
                        int intValue4 = intVectorArr[i3][next.intValue()].nodes.get(i5).intValue();
                        TaggerWord taggerWord3 = (TaggerWord) arrayList.get(i5);
                        taggerWord3.set_show_sf(this.show_sf);
                        appendable.append(taggerWord3.get_lexical_form(intValue4, intValue3));
                    }
                }
                arrayList.clear();
                dArr[0][next.intValue()] = 1.0d;
            }
            if (morphoStream.getEndOfFile()) {
                if (this.null_flush) {
                    appendable.append((char) 0);
                }
                IOUtils.flush(appendable);
                morphoStream.setEndOfFile(false);
            }
            taggerWord = morphoStream.get_next_word();
        }
        if (linkedHashSet.size() <= 1 || !this.debug) {
            return;
        }
        System.err.print("\nError: This message should never appear. If you are reading this ..... this is very bad news.\n");
    }

    void print_A() {
        System.out.println("TRANSITION MATRIX (A)");
        System.out.println("-------------------------------");
        for (int i = 0; i != this.td.getN(); i++) {
            for (int i2 = 0; i2 != this.td.getN(); i2++) {
                System.out.println("A[" + i + "][" + i2 + "] = " + this.td.getA()[i][i2]);
            }
        }
    }

    void print_B() {
        System.out.println("EMISSION MATRIX (B)");
        System.out.println("-------------------------------");
        for (int i = 0; i != this.td.getN(); i++) {
            for (int i2 = 0; i2 != this.td.getM(); i2++) {
                if (this.td.getOutput().get(i2).contains(Integer.valueOf(i))) {
                    System.out.println("B[" + i + "][" + i2 + "] = " + this.td.getB()[i][i2]);
                }
            }
        }
    }

    void print_ambiguity_classes() {
        new LinkedHashSet();
        System.out.println("AMBIGUITY CLASSES");
        System.out.println("-------------------------------");
        for (int i = 0; i != this.td.getM(); i++) {
            Set<Integer> set = this.td.getOutput().get(i);
            System.out.print(i + ": ");
            Iterator<Integer> it = set.iterator();
            while (it.hasNext()) {
                System.out.print(it.next() + " ");
            }
            System.out.println();
        }
    }

    Set<Integer> find_similar_ambiguity_class(Set<Integer> set) {
        int i = -1;
        Set<Integer> openClass = this.td.getOpenClass();
        Collection output = this.td.getOutput();
        for (int i2 = 0; i2 < this.td.getM(); i2++) {
            if (output.get(i2).size() > i && output.get(i2).size() < set.size()) {
                boolean z = false;
                Iterator<Integer> it = output.get(i2).iterator();
                while (true) {
                    if (!it.hasNext()) {
                        break;
                    }
                    if (!set.contains(it.next())) {
                        z = true;
                        break;
                    }
                }
                if (!z) {
                    i = output.get(i2).size();
                    openClass = output.get(i2);
                }
            }
        }
        return openClass;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public void setNullFlush(boolean z) {
        this.null_flush = z;
    }

    private void fatal_error(String str) {
        throw new Error(str);
    }

    void clear_array_double(double[] dArr) {
        for (int i = 0; i < dArr.length; i++) {
            dArr[i] = 0.0d;
        }
    }

    void clear_array_vector(IntVector[] intVectorArr) {
        for (IntVector intVector : intVectorArr) {
            intVector.nodes.clear();
        }
    }
}
