/*
 * Decompiled with CFR 0.152.
 */
package LBJ2.nlp;

import LBJ2.nlp.Sentence;
import LBJ2.nlp.Word;
import LBJ2.parse.LineByLine;
import java.util.LinkedList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class SentenceSplitter
extends LineByLine {
    private static final Pattern wordMatcher = Pattern.compile("([^-\\s]-\n\\s*(?=\\S)|\\S)+");
    private static final Pattern lowerCaseLetter = Pattern.compile("[a-z]");
    private static final Pattern capitalsAndDots = Pattern.compile("^([A-Z]\\.)*[A-Z]$");
    protected int currentOffset;
    protected LinkedList sentences;
    protected int index;
    protected String[] input;

    public static void main(String[] stringArray) {
        String string = null;
        try {
            string = stringArray[0];
            if (stringArray.length > 1) {
                throw new Exception();
            }
        }
        catch (Exception exception) {
            System.err.println("usage: java LBJ2.nlp.SentenceSplitter <file name>");
            System.exit(1);
        }
        SentenceSplitter sentenceSplitter = new SentenceSplitter(string);
        Sentence sentence = (Sentence)sentenceSplitter.next();
        while (sentence != null) {
            StringBuffer stringBuffer = new StringBuffer(sentence.text);
            for (int i = 0; i < stringBuffer.length(); ++i) {
                char c = stringBuffer.charAt(i);
                if (c != '\n' && c != '\r' && c != '\f') continue;
                stringBuffer.setCharAt(i, ' ');
            }
            System.out.println(stringBuffer);
            sentence = (Sentence)sentenceSplitter.next();
        }
    }

    public SentenceSplitter(String string) {
        super(string);
        this.sentences = new LinkedList();
    }

    public SentenceSplitter(String[] stringArray) {
        this.input = stringArray;
        this.sentences = new LinkedList();
    }

    protected String readLine() {
        if (this.input != null) {
            if (this.index < this.input.length) {
                return this.input[this.index++];
            }
            return null;
        }
        return super.readLine();
    }

    protected String getParagraph() {
        StringBuffer stringBuffer = new StringBuffer();
        String string = this.readLine();
        while (string != null && string.trim().length() == 0) {
            stringBuffer.append(string);
            stringBuffer.append("\n");
            string = this.readLine();
        }
        while (string != null && string.trim().length() != 0) {
            stringBuffer.append(string);
            stringBuffer.append("\n");
            string = this.readLine();
        }
        if (string != null) {
            stringBuffer.append(string);
            stringBuffer.append("\n");
        }
        return stringBuffer.toString();
    }

    public Object next() {
        if (this.sentences.size() == 0) {
            String string = this.getParagraph();
            if (string.trim().length() != 0) {
                this.process(string);
            }
            this.currentOffset += string.length();
        }
        if (this.sentences.size() == 0) {
            return null;
        }
        return this.sentences.removeFirst();
    }

    public Sentence[] splitAll() {
        String string = this.getParagraph();
        while (string.trim().length() != 0) {
            if (string.trim().length() != 0) {
                this.process(string);
            }
            this.currentOffset += string.length();
            string = this.getParagraph();
        }
        return this.sentences.toArray(new Sentence[this.sentences.size()]);
    }

    protected void process(String string) {
        int n;
        if (string.trim().length() == 0) {
            return;
        }
        Matcher matcher = wordMatcher.matcher(string);
        LinkedList<Word> linkedList = new LinkedList<Word>();
        while (matcher.find()) {
            linkedList.add(new Word(matcher.group(), matcher.start(), matcher.end() - 1));
        }
        Word[] wordArray = linkedList.toArray(new Word[linkedList.size()]);
        int n2 = wordArray[0].start;
        boolean bl = true;
        boolean bl2 = true;
        int n3 = 0;
        char[] cArray = string.toCharArray();
        for (n = 0; n < string.length() && bl2; ++n) {
            if (n3 != 0) {
                n3 = cArray[n] != '>' ? 1 : 0;
                continue;
            }
            if (cArray[n] == '<') {
                n3 = string.indexOf(62, n) != -1 ? 1 : 0;
                continue;
            }
            bl2 = !Character.isUpperCase(cArray[n]);
        }
        for (n3 = 0; n3 < wordArray.length; ++n3) {
            int n4 = wordArray[n3].form.lastIndexOf(46);
            n = wordArray[n3].form.lastIndexOf(63);
            if (n > n4) {
                n4 = n;
            }
            if ((n = wordArray[n3].form.lastIndexOf(33)) > n4) {
                n4 = n;
            }
            if (n4 == -1) continue;
            Word word = n3 + 1 < wordArray.length ? wordArray[n3 + 1] : null;
            Word word2 = n3 + 2 < wordArray.length ? wordArray[n3 + 2] : null;
            int n5 = wordArray[n3].form.length();
            if (bl2) {
                n = wordArray[n3].form.indexOf(46);
            }
            if ((!bl2 || n5 <= 5 || n != -1 && n != n4 || lowerCaseLetter.matcher(wordArray[n3].form.substring(n4)).find()) && !this.boundary(n4, wordArray[n3], word, word2)) continue;
            this.sentences.add(new Sentence(string.substring(n2, wordArray[n3].end + 1), this.currentOffset + n2, this.currentOffset + wordArray[n3].end));
            if (n3 + 1 < wordArray.length) {
                n2 = wordArray[n3 + 1].start;
                continue;
            }
            bl = false;
        }
        if (bl) {
            this.sentences.add(new Sentence(string.substring(n2, wordArray[wordArray.length - 1].end + 1), this.currentOffset + n2, this.currentOffset + wordArray[wordArray.length - 1].end));
        }
    }

    protected boolean boundary(int n, Word word, Word word2, Word word3) {
        char c = word.form.charAt(n);
        Word word4 = new Word(word.form.substring(0, n));
        Word word5 = new Word(word.form.substring(n + 1));
        Word word6 = new Word(word4.form);
        while (word6.form.length() > 0 && "\"'`{[(".indexOf(word6.form.charAt(0)) != -1) {
            word6.form = word6.form.substring(1);
        }
        if ("yahoo!".equalsIgnoreCase(word6.form + c)) {
            return false;
        }
        if (c == '?' || c == '!') {
            return word2 == null || word5.form.length() == 0 && (word2.capitalized || this.startsWithQuote(word2) || word2.form.equals(".") || word3 != null && word3.capitalized && (word2.form.equals("--") || word2.form.equals("-RBR-"))) || this.isClose(word5) && this.hasStartMarker(word2);
        }
        if (word2 == null) {
            return true;
        }
        if (word5.form.length() == 0) {
            if (this.startsWithQuote(word2) || this.startsWithOpenBracket(word2)) {
                return true;
            }
            if (word2.form.equals("-RBR-") && word3 != null && word3.form.equals("--")) {
                return false;
            }
            if (this.isClosingBracket(word2)) {
                return true;
            }
            if (word4.form.length() == 0 && word2.form.equals(".")) {
                return false;
            }
            if (word2.form.equals(".")) {
                return true;
            }
            if (word2.form.equals("--") && word3 != null && word3.capitalized && this.endsWithQuote(word4)) {
                return false;
            }
            if (word2.form.equals("--") && word3 != null && (word3.capitalized || this.startsWithQuote(word3))) {
                return true;
            }
            if (word2.capitalized || Character.isDigit(word2.form.charAt(0))) {
                return this.isTerminal(word6) || (!word6.form.equals("p.m") && !word6.form.equals("a.m") || !this.isTimeZone(word2)) && !this.isHonorific(word6) && !this.startsWithQuote(word4) && (!this.startsWithOpenBracket(word4) || this.endsWithCloseBracket(word4)) && (!capitalsAndDots.matcher(word4.form).find() || this.sentenceBeginner(word2));
            }
        }
        return this.isClose(word5) && this.hasStartMarker(word2) && !this.isHonorific(word6);
    }

    protected boolean sentenceBeginner(Word word) {
        return word.form.equals("The");
    }

    protected boolean startsWithQuote(Word word) {
        if (word.form.length() == 0) {
            return false;
        }
        return word.form.charAt(0) == '\'' || word.form.charAt(0) == '\"' || word.form.charAt(0) == '`';
    }

    protected boolean endsWithQuote(Word word) {
        return word.form.endsWith("'") || word.form.endsWith("''") || word.form.endsWith("'''") || word.form.endsWith("\"") || word.form.endsWith("'\"");
    }

    protected boolean isClose(Word word) {
        return this.isClosingBracket(word) || this.isClosingQuote(word);
    }

    protected boolean isClosingBracket(Word word) {
        return word.form.equals(")") || word.form.equals("}") || word.form.equals("]") || word.form.equals("-RBR-");
    }

    protected boolean isClosingQuote(Word word) {
        return word.form.equals("'") || word.form.equals("''") || word.form.equals("'''") || word.form.equals("\"") || word.form.equals("'\"");
    }

    protected boolean hasStartMarker(Word word) {
        return word.capitalized || this.startsWithOpenQuote(word) || this.startsWithOpenBracket(word);
    }

    protected boolean startsWithOpenQuote(Word word) {
        return word.form.startsWith("`") || word.form.startsWith("``") || word.form.startsWith("```") || word.form.startsWith("\"") || word.form.startsWith("\"`");
    }

    protected boolean startsWithOpenBracket(Word word) {
        return word.form.startsWith("(") || word.form.startsWith("{") || word.form.startsWith("[") || word.form.startsWith("-LBR-");
    }

    protected boolean endsWithCloseBracket(Word word) {
        return word.form.endsWith(")") || word.form.endsWith("}") || word.form.endsWith("]") || word.form.endsWith("-RBR-");
    }

    protected boolean isTimeZone(Word word) {
        return word.form.equals("AST") || word.form.equals("CST") || word.form.equals("EST") || word.form.equals("HST") || word.form.equals("MST") || word.form.equals("PST") || word.form.equals("ADT") || word.form.equals("CDT") || word.form.equals("EDT") || word.form.equals("HDT") || word.form.equals("MDT") || word.form.equals("PDT") || word.form.equals("UTC") || word.form.equals("UTC-11");
    }

    protected boolean isTerminal(Word word) {
        return word.form.equals("Esq") || word.form.equals("Jr") || word.form.equals("Sr") || word.form.equals("M.D") || word.form.equals("Ph.D");
    }

    protected boolean isHonorific(Word word) {
        return word.form.equals("APR") || word.form.equals("AUG") || word.form.equals("Adj") || word.form.equals("Adm") || word.form.equals("Adv") || word.form.equals("Apr") || word.form.equals("Asst") || word.form.equals("Aug") || word.form.equals("Bart") || word.form.equals("Bldg") || word.form.equals("Brig") || word.form.equals("Bros") || word.form.equals("Capt") || word.form.equals("Cmdr") || word.form.equals("Col") || word.form.equals("Comdr") || word.form.equals("Con") || word.form.equals("Cpl") || word.form.equals("DEC") || word.form.equals("DR") || word.form.equals("Dec") || word.form.equals("Dr") || word.form.equals("Ens") || word.form.equals("FEB") || word.form.equals("Feb") || word.form.equals("Gen") || word.form.equals("Gov") || word.form.equals("Hon") || word.form.equals("Hosp") || word.form.equals("Insp") || word.form.equals("JAN") || word.form.equals("JUL") || word.form.equals("JUN") || word.form.equals("Jan") || word.form.equals("Jul") || word.form.equals("Jun") || word.form.equals("Lt") || word.form.equals("MAR") || word.form.equals("MM") || word.form.equals("MR") || word.form.equals("MRS") || word.form.equals("MS") || word.form.equals("MT") || word.form.equals("Maj") || word.form.equals("Mar") || word.form.equals("Messrs") || word.form.equals("Mlle") || word.form.equals("Mme") || word.form.equals("Mr") || word.form.equals("Mrs") || word.form.equals("Ms") || word.form.equals("Msgr") || word.form.equals("Mt") || word.form.equals("NO") || word.form.equals("NOV") || word.form.equals("No") || word.form.equals("Nov") || word.form.equals("OCT") || word.form.equals("Oct") || word.form.equals("Op") || word.form.equals("Ord") || word.form.equals("Pfc") || word.form.equals("Ph") || word.form.equals("Prof") || word.form.equals("Pvt") || word.form.equals("Rep") || word.form.equals("Reps") || word.form.equals("Res") || word.form.equals("Rev") || word.form.equals("Rt") || word.form.equals("SEP") || word.form.equals("SEPT") || word.form.equals("ST") || word.form.equals("Sen") || word.form.equals("Sens") || word.form.equals("Sep") || word.form.equals("Sept") || word.form.equals("Sfc") || word.form.equals("Sgt") || word.form.equals("Sr") || word.form.equals("St") || word.form.equals("Supt") || word.form.equals("Surg") || word.form.equals("U.S") || word.form.equals("apr") || word.form.equals("aug") || word.form.equals("dec") || word.form.equals("feb") || word.form.equals("jan") || word.form.equals("jul") || word.form.equals("jun") || word.form.equals("nov") || word.form.equals("oct") || word.form.equals("sep") || word.form.equals("sept") || word.form.equals("v") || word.form.equals("vs");
    }
}

