package org.apertium.tagger;

import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apertium.lttoolbox.Alphabet;
import org.apertium.transfer.ApertiumRE;

/* loaded from: input_file:org/apertium/tagger/MorphoStream.class */
public class MorphoStream {
    private boolean debug;
    private String last_string_tag;
    private int ca_any_char;
    private int ca_any_tag;
    private int ca_kignorar;
    private int ca_kbarra;
    private int ca_kdollar;
    private int ca_kbegin;
    private int ca_kmot;
    private int ca_kmas;
    private int ca_kunknown;
    private int ca_tag_keof;
    private int ca_tag_kundef;
    private ArrayList<TaggerWord> vwords;
    private InputStream input;
    private Reader inputReader;
    private MatchExe me;
    private TaggerData td;
    private Alphabet alphabet;
    private MatchState ms;
    private boolean end_of_file;
    private boolean null_flush;
    private Map<String, Integer> tag_index;
    private ConstantManager constants;
    private boolean DEBUG = false;
    private boolean foundEOF = false;

    /* JADX INFO: Access modifiers changed from: package-private */
    public MorphoStream(Reader reader, boolean z, TaggerData taggerData) throws UnsupportedEncodingException {
        this.debug = false;
        this.debug = z;
        this.td = taggerData;
        this.alphabet = this.td.getPatternList().getAlphabet();
        this.ca_any_char = this.alphabet.cast("<ANY_CHAR>");
        this.ca_any_tag = this.alphabet.cast("<ANY_TAG>");
        if (this.DEBUG) {
            System.out.println("ca_any_char = " + this.ca_any_char);
            System.out.println("ca_any_tag = " + this.ca_any_tag);
        }
        this.null_flush = false;
        this.inputReader = reader;
        this.end_of_file = false;
        this.me = this.td.getPatternList().newMatchExe();
        this.ms = new MatchState(this.me);
        this.constants = this.td.getConstants();
        this.ca_kignorar = this.constants.getConstant("kIGNORAR");
        this.ca_kbarra = this.constants.getConstant("kBARRA");
        this.ca_kdollar = this.constants.getConstant("kDOLLAR");
        this.ca_kbegin = this.constants.getConstant("kBEGIN");
        this.ca_kmot = this.constants.getConstant("kMOT");
        this.ca_kmas = this.constants.getConstant("kMAS");
        this.ca_kunknown = this.constants.getConstant("kUNKNOWN");
        this.tag_index = this.td.getTagIndex();
        this.ca_tag_keof = this.tag_index.get("TAG_kEOF").intValue();
        this.ca_tag_kundef = this.tag_index.get("TAG_kUNDEF").intValue();
        this.vwords = new ArrayList<>();
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public TaggerWord get_next_word() throws IOException {
        String str;
        if (this.DEBUG) {
            System.out.println("MorphoStream.getNextWord -- vwords: " + this.vwords);
        }
        if (this.vwords.size() != 0) {
            TaggerWord taggerWord = this.vwords.get(0);
            this.vwords.remove(0);
            if (taggerWord.isAmbiguous()) {
                List<String> discardRules = this.td.getDiscardRules();
                for (int i = 0; i < discardRules.size(); i++) {
                    taggerWord.discardOnAmbiguity(discardRules.get(i));
                }
            }
            if (this.DEBUG) {
                System.out.println("get_next_word " + taggerWord.get_superficial_form());
            }
            return taggerWord;
        }
        int read = this.inputReader.read();
        if (this.end_of_file || (read == -1 && this.foundEOF)) {
            if (!this.DEBUG) {
                return null;
            }
            System.out.println("MorphoStream.get_next_word: EOF reached, returning NULL.");
            return null;
        }
        if (read == -1) {
            this.foundEOF = true;
        }
        this.vwords.add(new TaggerWord());
        while (read != -1 && (!this.null_flush || read != 0)) {
            if (read == 94) {
                readRestOfWord(0);
                return get_next_word();
            }
            if (read == 92) {
                str = (ApertiumRE.EMPTY_STRING + '\\') + ((char) this.inputReader.read());
                read = 92;
            } else {
                str = ApertiumRE.EMPTY_STRING + ((char) read);
            }
            while (read != 94) {
                read = this.inputReader.read();
                if (read == -1 || (this.null_flush && read == 0)) {
                    this.end_of_file = true;
                    this.vwords.get(0).add_ignored_string(str);
                    this.vwords.get(0).add_tag(this.ca_tag_keof, ApertiumRE.EMPTY_STRING, this.td.getPreferRules());
                    return get_next_word();
                }
                if (read == 92) {
                    String str2 = str + '\\';
                    int read2 = this.inputReader.read();
                    if (read2 == -1 || (this.null_flush && read2 == 0)) {
                        this.end_of_file = true;
                        this.vwords.get(0).add_ignored_string(str2);
                        this.vwords.get(0).add_tag(this.ca_tag_keof, ApertiumRE.EMPTY_STRING, this.td.getPreferRules());
                        return get_next_word();
                    }
                    str = str2 + ((char) read2);
                    read = 92;
                } else {
                    if (read == 94) {
                        if (str.length() > 0) {
                            this.vwords.get(0).add_ignored_string(str);
                        }
                        readRestOfWord(0);
                        return get_next_word();
                    }
                    str = str + ((char) read);
                }
            }
            read = this.inputReader.read();
        }
        this.end_of_file = true;
        if (this.DEBUG) {
            System.out.println("End of file add_tag in get_next_word()");
        }
        this.vwords.get(0).add_tag(this.ca_tag_keof, ApertiumRE.EMPTY_STRING, this.td.getPreferRules());
        return get_next_word();
    }

    /* JADX WARN: Code restructure failed: missing block: B:28:0x012c, code lost:
    
        r0 = r5.inputReader.read();
     */
    /* JADX WARN: Code restructure failed: missing block: B:29:0x0136, code lost:
    
        if (r0 == (-1)) goto L61;
     */
    /* JADX WARN: Code restructure failed: missing block: B:31:0x013d, code lost:
    
        if (r5.null_flush == false) goto L36;
     */
    /* JADX WARN: Code restructure failed: missing block: B:33:0x0141, code lost:
    
        if (r0 != 0) goto L36;
     */
    /* JADX WARN: Code restructure failed: missing block: B:35:0x0144, code lost:
    
        r5.end_of_file = true;
     */
    /* JADX WARN: Code restructure failed: missing block: B:36:0x014d, code lost:
    
        if (r7.length() <= 0) goto L34;
     */
    /* JADX WARN: Code restructure failed: missing block: B:37:0x0150, code lost:
    
        r5.vwords.get(r6).add_ignored_string(r7);
        java.lang.System.err.println("Warning (internal): kIGNORE was returned while reading a word");
        java.lang.System.err.println("Word being read: " + r5.vwords.get(r6).get_superficial_form());
        java.lang.System.err.println("Debug: " + r7);
     */
    /* JADX WARN: Code restructure failed: missing block: B:38:0x01a6, code lost:
    
        r5.vwords.get(r6).add_tag(r5.ca_tag_keof, org.apertium.transfer.ApertiumRE.EMPTY_STRING, r5.td.getPreferRules());
     */
    /* JADX WARN: Code restructure failed: missing block: B:39:0x01c1, code lost:
    
        return;
     */
    /* JADX WARN: Code restructure failed: missing block: B:41:0x01c5, code lost:
    
        if (r0 != 92) goto L60;
     */
    /* JADX WARN: Code restructure failed: missing block: B:44:0x0201, code lost:
    
        if (r0 != 47) goto L62;
     */
    /* JADX WARN: Code restructure failed: missing block: B:47:0x0215, code lost:
    
        if (r0 != 36) goto L48;
     */
    /* JADX WARN: Code restructure failed: missing block: B:48:0x0229, code lost:
    
        r7 = r7 + ((char) r0);
     */
    /* JADX WARN: Code restructure failed: missing block: B:53:0x021f, code lost:
    
        if (r7.charAt(0) == '*') goto L71;
     */
    /* JADX WARN: Code restructure failed: missing block: B:54:0x0222, code lost:
    
        lrlmClassify(r7, r6);
     */
    /* JADX WARN: Code restructure failed: missing block: B:55:0x0228, code lost:
    
        return;
     */
    /* JADX WARN: Code restructure failed: missing block: B:56:?, code lost:
    
        return;
     */
    /* JADX WARN: Code restructure failed: missing block: B:58:0x0204, code lost:
    
        lrlmClassify(r7, r6);
        r7 = org.apertium.transfer.ApertiumRE.EMPTY_STRING;
        r6 = 0;
     */
    /* JADX WARN: Code restructure failed: missing block: B:61:0x01c8, code lost:
    
        r7 = (r7 + '\\') + ((char) r5.inputReader.read());
     */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    void readRestOfWord(int r6) throws java.io.IOException {
        /*
            Method dump skipped, instructions count: 576
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: org.apertium.tagger.MorphoStream.readRestOfWord(int):void");
    }

    void lrlmClassify(String str, int i) {
        int classifyFinals;
        if (this.DEBUG) {
            System.out.println("Starting lrlmClassify -- str: >>" + str + "<<");
            System.out.println("MorphoStream.lrlmClassify -- vwords: " + this.vwords);
        }
        int i2 = 0;
        int i3 = -1;
        int i4 = 0;
        this.ms.init(this.me.getInitial());
        int i5 = 0;
        int length = str.length();
        while (i5 != length) {
            if (str.charAt(i5) != '<') {
                if (str.charAt(i5) == '+' && (classifyFinals = this.ms.classifyFinals()) != -1) {
                    i4 = i5 - 1;
                    i3 = classifyFinals;
                }
                this.ms.step(str.toLowerCase().charAt(i5), this.ca_any_char);
            } else {
                String str2 = ApertiumRE.EMPTY_STRING;
                int i6 = i5 + 1;
                while (true) {
                    if (i6 == length) {
                        break;
                    }
                    if (str.charAt(i6) == '\\') {
                        i6++;
                    } else if (str.charAt(i6) == '>') {
                        str2 = str.substring(i5, i6 + 1);
                        if (this.DEBUG) {
                            System.out.println("tag = " + str2);
                        }
                        i5 = i6;
                    }
                    i6++;
                }
                int cast = this.alphabet.cast(str2);
                if (cast != 0) {
                    this.ms.step(cast, this.ca_any_tag);
                } else {
                    this.ms.step(this.ca_any_tag);
                }
            }
            if (this.ms.size() == 0) {
                if (i4 == i2) {
                    if (this.debug) {
                        System.err.println("Warning: There is no coarse tag for the fine tag '" + str.substring(i2) + "'");
                        System.err.println("         This is because of an incomplete tagset definition or a dictionary error");
                    }
                    TaggerWord taggerWord = this.vwords.get(i);
                    taggerWord.add_tag(this.ca_tag_kundef, str.substring(i2), this.td.getPreferRules());
                    this.vwords.set(i, taggerWord);
                    return;
                }
                if (this.DEBUG) {
                    System.out.println("MorphoStream.lrlmclassify -- floor: " + i2);
                    System.out.println("MorphoStream.lrlmclassify -- last_pos: " + i4);
                }
                this.vwords.get(i).add_tag(i3, str.substring(i2, i4 + 1), this.td.getPreferRules());
                if (str.charAt(i4 + 1) == '+' && i4 + 1 < length) {
                    if (this.DEBUG) {
                        System.out.println("MorphoStream.lrlmClassify -- plus cut, word added: " + str.substring(i2, i4 + 1));
                    }
                    i2 = i4 + 1;
                    i4 = i2;
                    this.vwords.get(i).set_plus_cut(true);
                    if (this.vwords.size() <= i + 1) {
                        this.vwords.add(new TaggerWord(true));
                    }
                    i++;
                    this.ms.init(this.me.getInitial());
                }
                int i7 = i2;
                i2++;
                i5 = i7;
                if (this.DEBUG) {
                    System.out.println("MorphoStream.lrlmClassify -- floor post-increment assignment to i:");
                    System.out.println("-- i: " + i5 + ", floor: " + i2);
                }
            } else if (i5 == length - 1 && this.ms.classifyFinals() == -1) {
                if (i4 == i2) {
                    if (this.debug) {
                        System.err.println("Warning: There is no coarse tag for the fine tag '" + str.substring(i2) + "'");
                        System.err.println("         This is because of an incomplete tageset definition or a dictionary error");
                    }
                    this.vwords.get(i).add_tag(this.ca_tag_kundef, str.substring(i2), this.td.getPreferRules());
                    return;
                }
                this.vwords.get(i).add_tag(i3, str.substring(i2, i4), this.td.getPreferRules());
                if (str.charAt(i4 + 1) == '+' && i4 + 1 < length) {
                    i2 = i4 + 1;
                    i4 = i2;
                    this.vwords.get(i).set_plus_cut(true);
                    if (this.vwords.size() <= i + 1) {
                        this.vwords.add(new TaggerWord(true));
                    }
                    i++;
                    this.ms.init(this.me.getInitial());
                }
                int i8 = i2;
                i2++;
                i5 = i8;
            }
            i5++;
        }
        int classifyFinals2 = this.ms.classifyFinals();
        if (classifyFinals2 == -1) {
            classifyFinals2 = this.ca_tag_kundef;
            if (this.debug) {
                System.err.println("Warning: There is no coarse tag for the fine tag '" + str.substring(i2) + "'");
                System.err.println("         This is because of an incomplete tagset definition or a dictionary error");
            }
        }
        TaggerWord taggerWord2 = this.vwords.get(i);
        if (this.DEBUG) {
            System.out.println("add_tag called at the end of lrlmClassify.");
            System.out.println("end of lrlmClassify -- floor: " + i2);
        }
        if (this.DEBUG) {
            System.out.println("MorphoStream.lrlmClassify before last add_tag -- vwords: " + this.vwords);
        }
        taggerWord2.add_tag(classifyFinals2, str.substring(i2), this.td.getPreferRules());
        if (this.DEBUG) {
            System.out.println("MorphoStream.lrlmClassify after last add_tag -- vwords: " + this.vwords);
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public void setNullFlush(boolean z) {
        this.null_flush = z;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public boolean getEndOfFile() {
        return this.end_of_file;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public void setEndOfFile(boolean z) {
        this.end_of_file = z;
    }
}
