/*
 * Decompiled with CFR 0.152.
 */
package LBJ2.nlp;

import LBJ2.nlp.Word;
import LBJ2.parse.LinkedChild;
import LBJ2.parse.LinkedVector;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Sentence
extends LinkedChild {
    private static final String[] protocols = new String[]{"telnet", "https", "file", "http", "nntp", "smtp"};
    private static final String[] topLevelDomains = new String[]{"museum", "travel", "aero", "arpa", "coop", "info", "jobs", "name", "biz", "com", "edu", "gov", "int", "mil", "net", "org", "pro", "ac", "ad", "ae", "af", "ag", "ai", "al", "am", "an", "ao", "aq", "ar", "as", "at", "au", "aw", "az", "ba", "bb", "bd", "be", "bf", "bg", "bh", "bi", "bj", "bm", "bn", "bo", "br", "bs", "bt", "bv", "bw", "by", "bz", "ca", "cc", "cd", "cf", "cg", "ch", "ci", "ck", "cl", "cm", "cn", "co", "cr", "cu", "cv", "cx", "cy", "cz", "de", "dj", "dk", "dm", "do", "dz", "ec", "ee", "eg", "er", "es", "et", "eu", "fi", "fj", "fk", "fm", "fo", "fr", "ga", "gb", "gd", "ge", "gf", "gg", "gh", "gi", "gl", "gm", "gn", "gp", "gq", "gr", "gs", "gt", "gu", "gw", "gy", "hk", "hm", "hn", "hr", "ht", "hu", "id", "ie", "il", "im", "in", "io", "iq", "ir", "is", "it", "je", "jm", "jo", "jp", "ke", "kg", "kh", "ki", "km", "kn", "kr", "kw", "ky", "kz", "la", "lb", "lc", "li", "lk", "lr", "ls", "lt", "lu", "lv", "ly", "ma", "mc", "md", "mg", "mh", "mk", "ml", "mm", "mn", "mo", "mp", "mq", "mr", "ms", "mt", "mu", "mv", "mw", "mx", "my", "mz", "na", "nc", "ne", "nf", "ng", "ni", "nl", "no", "np", "nr", "nu", "nz", "om", "pa", "pe", "pf", "pg", "ph", "pk", "pl", "pm", "pn", "pr", "ps", "pt", "pw", "py", "qa", "re", "ro", "ru", "rw", "sa", "sb", "sc", "sd", "se", "sg", "sh", "si", "sj", "sk", "sl", "sm", "sn", "so", "sr", "st", "su", "sv", "sy", "sz", "tc", "td", "tf", "tg", "th", "tj", "tk", "tl", "tm", "tn", "to", "tp", "tr", "tt", "tv", "tw", "tz", "ua", "ug", "uk", "um", "us", "uy", "uz", "va", "vc", "ve", "vg", "vi", "vn", "vu", "wf", "ws", "ye", "yt", "yu", "za", "zm", "zw"};
    private boolean[] inURL = null;
    public String text = "";

    public Sentence(String string) {
        this.text = string;
    }

    public Sentence(String string, int n, int n2) {
        super(n, n2);
        this.text = string;
    }

    private void myAdd(LinkedList linkedList, int n, String string) {
        linkedList.add(new Integer(n));
    }

    public LinkedVector wordSplit() {
        LinkedList linkedList = new LinkedList();
        Matcher matcher = Pattern.compile("\\s+").matcher(this.text);
        while (matcher.find()) {
            this.myAdd(linkedList, matcher.start() - 1, ")whitespace");
            this.myAdd(linkedList, matcher.end(), "(whitespace");
        }
        if (linkedList.size() > 0 && (Integer)linkedList.getLast() >= this.text.length()) {
            linkedList.removeLast();
        } else {
            this.myAdd(linkedList, this.text.length() - 1, ")$");
        }
        if (linkedList.size() > 1 && (Integer)linkedList.getFirst() == -1) {
            linkedList.removeFirst();
        } else {
            this.myAdd(linkedList, 0, "(^");
        }
        Pattern pattern = Pattern.compile("[^\\s\\d]");
        Pattern pattern2 = Pattern.compile("\\d");
        Pattern pattern3 = Pattern.compile("\\d,\\D");
        Pattern pattern4 = Pattern.compile("\\D,\\d");
        int n = this.text.indexOf(44);
        while (n != -1) {
            if (n > 0 && this.text.charAt(n - 1) != ',' && (pattern.matcher(this.text.substring(n - 1, n)).find() || n + 1 == this.text.length() && pattern2.matcher(this.text.substring(n - 1, n)).find() || n + 1 < this.text.length() && pattern3.matcher(this.text.substring(n - 1, n + 2)).find())) {
                this.myAdd(linkedList, n - 1, ")comma1");
                this.myAdd(linkedList, n, "(comma1");
            }
            if (n + 1 < this.text.length() && (pattern.matcher(this.text.substring(n + 1, n + 2)).find() || n == 0 && pattern2.matcher(this.text.substring(n + 1, n + 2)).find() || n > 0 && pattern4.matcher(this.text.substring(n - 1, n + 2)).find())) {
                this.myAdd(linkedList, n, ")comma2");
                this.myAdd(linkedList, n + 1, "(comma2");
            }
            n = this.text.indexOf(44, n + 1);
        }
        Pattern pattern5 = Pattern.compile("[^\\s,']");
        int n2 = this.text.indexOf(39);
        while (n2 != -1) {
            if (n2 - 1 > 0 && Character.isLetter(this.text.charAt(n2 - 2)) && this.text.charAt(n2 - 1) == 'n' && n2 + 1 < this.text.length() && this.text.charAt(n2 + 1) == 't' && (n2 + 2 == this.text.length() || !Character.isLetter(this.text.charAt(n2 + 2)) && this.text.charAt(n2 + 2) != '\'')) {
                this.myAdd(linkedList, n2 - 2, ")n't");
                this.myAdd(linkedList, n2 - 1, "(n't");
            } else if (n2 > 0 && (pattern5.matcher(this.text.substring(n2 - 1, n2)).find() && n2 + 1 < this.text.length() && this.text.charAt(n2 + 1) == '\'' || this.text.charAt(n2 - 1) == 's' && (n2 + 1 == this.text.length() || !Character.isLetter(this.text.charAt(n2 + 1)) && this.text.charAt(n2 + 1) != '\'') || Character.isLetter(this.text.charAt(n2 - 1)) && (n2 + 1 < this.text.length() && (n2 + 2 == this.text.length() || !Character.isLetter(this.text.charAt(n2 + 2)) && this.text.charAt(n2 + 2) != '\'') && (this.text.charAt(n2 + 1) == 'd' || this.text.charAt(n2 + 1) == 'm' || this.text.charAt(n2 + 1) == 's') || n2 + 2 < this.text.length() && (n2 + 3 == this.text.length() || !Character.isLetter(this.text.charAt(n2 + 3)) && this.text.charAt(n2 + 3) != '\'') && (this.text.substring(n2 + 1, n2 + 3).equals("ll") || this.text.substring(n2 + 1, n2 + 3).equals("re") || this.text.substring(n2 + 1, n2 + 3).equals("ve"))) || this.text.charAt(n2 - 1) == '.' && n2 - 1 > 0 && Character.isLetter(this.text.charAt(n2 - 2)) && n2 + 1 < this.text.length() && (n2 + 2 == this.text.length() || !Character.isLetter(this.text.charAt(n2 + 2)) && this.text.charAt(n2 + 2) != '\'') && this.text.charAt(n2 + 1) == 's')) {
                this.myAdd(linkedList, n2 - 1, ")contraction1");
                this.myAdd(linkedList, n2, "(contraction1");
            }
            if (n2 + 1 < this.text.length() && pattern5.matcher(this.text.substring(n2 + 1, n2 + 2)).find() && (!Character.isLetter(this.text.charAt(n2 + 1)) || n2 > 0 && this.text.charAt(n2 - 1) == '\'')) {
                this.myAdd(linkedList, n2, ")contraction2");
                this.myAdd(linkedList, n2 + 1, "(contraction2");
            }
            n2 = this.text.indexOf(39, n2 + 1);
        }
        Pattern pattern6 = Pattern.compile("[^\\s,':]");
        Pattern pattern7 = Pattern.compile("\\d:\\d");
        int n3 = this.text.indexOf(58);
        while (n3 != -1) {
            if (!(n3 >= 2 && n3 + 2 < this.text.length() && pattern7.matcher(this.text.substring(n3 - 2, n3 + 3)).find() || n3 > 2 && n3 + 2 < this.text.length() && (this.text.substring(n3 - 2, n3 + 3).equals("tp://") || this.text.substring(n3 - 2, n3 + 3).equals("TP://")) || this.partOfURL(n3))) {
                if (n3 >= 1 && pattern6.matcher(this.text.substring(n3 - 1, n3)).find()) {
                    this.myAdd(linkedList, n3 - 1, ")colon1");
                    this.myAdd(linkedList, n3, "(colon1");
                }
                if (n3 + 1 < this.text.length() && pattern6.matcher(this.text.substring(n3 + 1, n3 + 2)).find()) {
                    this.myAdd(linkedList, n3, ")colon2");
                    this.myAdd(linkedList, n3 + 1, "(colon2");
                }
            }
            n3 = this.text.indexOf(58, n3 + 1);
        }
        Pattern pattern8 = Pattern.compile("[^\\s,':/]");
        Pattern pattern9 = Pattern.compile("\\d/\\d");
        int n4 = this.text.indexOf(47);
        while (n4 != -1) {
            if (!(n4 >= 2 && n4 + 2 < this.text.length() && pattern9.matcher(this.text.substring(n4 - 2, n4 + 3)).find() || n4 > 3 && n4 + 1 < this.text.length() && (this.text.substring(n4 - 3, n4 + 2).equals("tp://") || this.text.substring(n4 - 3, n4 + 2).equals("TP://")) || n4 > 4 && (this.text.substring(n4 - 4, n4 + 1).equals("tp://") || this.text.substring(n4 - 4, n4 + 1).equals("TP://")) || this.partOfURL(n4))) {
                if (n4 >= 1 && pattern8.matcher(this.text.substring(n4 - 1, n4)).find()) {
                    this.myAdd(linkedList, n4 - 1, ")slash1");
                    this.myAdd(linkedList, n4, "(slash1");
                }
                if (n4 + 1 < this.text.length() && pattern8.matcher(this.text.substring(n4 + 1, n4 + 2)).find()) {
                    this.myAdd(linkedList, n4, ")slash2");
                    this.myAdd(linkedList, n4 + 1, "(slash2");
                }
            }
            n4 = this.text.indexOf(47, n4 + 1);
        }
        Pattern pattern10 = Pattern.compile("[^\\s,':/-]");
        Pattern pattern11 = Pattern.compile("\\w-\\w");
        Pattern pattern12 = Pattern.compile("-\\.?\\d");
        Pattern pattern13 = Pattern.compile("\\s-\\.?\\d");
        int n5 = this.text.indexOf(45);
        while (n5 != -1) {
            if (!(n5 + 1 < this.text.length() && n5 >= 1 && pattern11.matcher(this.text.substring(n5 - 1, n5 + 2)).find() || n5 + 2 < this.text.length() && (n5 == 0 && pattern12.matcher(this.text.substring(n5, n5 + 3)).find() || n5 > 0 && pattern13.matcher(this.text.substring(n5 - 1, n5 + 3)).find()) || this.partOfURL(n5))) {
                if (n5 >= 1 && pattern10.matcher(this.text.substring(n5 - 1, n5)).find()) {
                    this.myAdd(linkedList, n5 - 1, ")dash1");
                    this.myAdd(linkedList, n5, "(dash1");
                }
                if (n5 + 1 < this.text.length() && pattern10.matcher(this.text.substring(n5 + 1, n5 + 2)).find()) {
                    this.myAdd(linkedList, n5, ")dash2");
                    this.myAdd(linkedList, n5 + 1, "(dash2");
                }
            }
            n5 = this.text.indexOf(45, n5 + 1);
        }
        Pattern pattern14 = Pattern.compile("[^\\s,':/\\$-]");
        Pattern pattern15 = Pattern.compile("\\$\\.?\\d");
        Pattern pattern16 = Pattern.compile("(\\s|-)\\$\\.?\\d");
        int n6 = this.text.indexOf(36);
        while (n6 != -1) {
            if (!(n6 == 0 && n6 + 2 < this.text.length() && pattern15.matcher(this.text.substring(n6, n6 + 3)).find() || n6 > 0 && n6 + 2 < this.text.length() && pattern16.matcher(this.text.substring(n6 - 1, n6 + 3)).find() || this.partOfURL(n6))) {
                if (n6 >= 1 && pattern14.matcher(this.text.substring(n6 - 1, n6)).find()) {
                    this.myAdd(linkedList, n6 - 1, ")dollar1");
                    this.myAdd(linkedList, n6, "(dollar1");
                }
                if (n6 + 1 < this.text.length() && pattern14.matcher(this.text.substring(n6 + 1, n6 + 2)).find()) {
                    this.myAdd(linkedList, n6, ")dollar2");
                    this.myAdd(linkedList, n6 + 1, "(dollar2");
                }
            }
            n6 = this.text.indexOf(36, n6 + 1);
        }
        Pattern pattern17 = Pattern.compile("[^\\s,':/\\$\\.-]\\.\\.\\.");
        Pattern pattern18 = Pattern.compile("\\.\\.\\.[^\\s,':/\\$\\.-]");
        int n7 = this.text.indexOf(46);
        while (n7 != -1) {
            if (n7 > 0 && n7 + 2 < this.text.length() && pattern17.matcher(this.text.substring(n7 - 1, n7 + 3)).find()) {
                this.myAdd(linkedList, n7 - 1, ")ellipsis1");
                this.myAdd(linkedList, n7, "(ellipsis1");
            }
            if (n7 >= 2 && n7 + 1 < this.text.length() && pattern18.matcher(this.text.substring(n7 - 2, n7 + 2)).find()) {
                this.myAdd(linkedList, n7, ")ellipsis2");
                this.myAdd(linkedList, n7 + 1, "(ellipsis2");
            }
            n7 = this.text.indexOf(46, n7 + 1);
        }
        n7 = this.text.lastIndexOf(46);
        if (n7 != -1) {
            boolean bl = true;
            for (int i = n7 + 1; i < this.text.length() && bl; ++i) {
                bl = !Character.isLetterOrDigit(this.text.charAt(i));
            }
            if (bl) {
                if (n7 >= 1 && (this.text.charAt(n7 - 1) != '.' || n7 == 1 || this.text.charAt(n7 - 2) != '.') && pattern14.matcher(this.text.substring(n7 - 1, n7)).find()) {
                    this.myAdd(linkedList, n7 - 1, ")period1");
                    this.myAdd(linkedList, n7, "(period1");
                }
                if (n7 + 1 < this.text.length() && (n7 == 0 || this.text.charAt(n7 - 1) != '.' || n7 == 1 || this.text.charAt(n7 - 2) != '.') && pattern14.matcher(this.text.substring(n7 + 1, n7 + 2)).find()) {
                    this.myAdd(linkedList, n7, ")period2");
                    this.myAdd(linkedList, n7 + 1, "(period2");
                }
            } else {
                n7 = -1;
            }
        }
        Pattern pattern19 = Pattern.compile("[^\\s\\w,'\\.:/\\$-]");
        matcher = pattern19.matcher(this.text);
        while (matcher.find()) {
            if (this.partOfURL(matcher.start()) || matcher.start() + 1 >= this.text.length() || this.text.charAt(matcher.start()) == this.text.charAt(matcher.start() + 1) || matcher.start() + 1 == n7 || !pattern19.matcher(this.text.substring(matcher.start() + 1, matcher.start() + 2)).find()) continue;
            this.myAdd(linkedList, matcher.start(), ")punctuation1");
            this.myAdd(linkedList, matcher.start() + 1, "(punctuation1");
        }
        matcher = Pattern.compile("[^\\s\\w,'\\.:/\\$-]\\w").matcher(this.text);
        while (matcher.find()) {
            if (this.partOfURL(matcher.start())) continue;
            this.myAdd(linkedList, matcher.start(), ")punctuation2");
            this.myAdd(linkedList, matcher.start() + 1, "(punctuation2");
        }
        matcher = Pattern.compile("\\w[^\\s\\w,'\\.:/\\$-]").matcher(this.text);
        while (matcher.find()) {
            if (this.partOfURL(matcher.start())) continue;
            this.myAdd(linkedList, matcher.start(), ")punctuation3");
            this.myAdd(linkedList, matcher.start() + 1, "(punctuation3");
        }
        Integer[] integerArray = linkedList.toArray(new Integer[linkedList.size()]);
        int[] nArray = new int[integerArray.length];
        for (int i = 0; i < nArray.length; ++i) {
            nArray[i] = integerArray[i];
        }
        Arrays.sort(nArray);
        Word word = new Word(this.text.substring(nArray[0], nArray[1] + 1), nArray[0] + this.start, nArray[1] + this.start);
        for (int i = 2; i < nArray.length; i += 2) {
            word.next = new Word(this.text.substring(nArray[i], nArray[i + 1] + 1), word, nArray[i] + this.start, nArray[i + 1] + this.start);
            word = (Word)word.next;
        }
        this.inURL = null;
        return new LinkedVector(word);
    }

    private boolean partOfURL(int n) {
        int n2;
        if (this.inURL != null) {
            return this.inURL[n];
        }
        this.inURL = new boolean[this.text.length()];
        StringBuffer stringBuffer = new StringBuffer();
        stringBuffer.append("(?i)(");
        stringBuffer.append(protocols[0]);
        for (n2 = 1; n2 < protocols.length; ++n2) {
            stringBuffer.append("|");
            stringBuffer.append(protocols[n2]);
        }
        stringBuffer.append(")://\\S+|[a-zA-Z0-9][a-zA-Z0-9-]*\\.(");
        stringBuffer.append(topLevelDomains[0]);
        for (n2 = 0; n2 < topLevelDomains.length; ++n2) {
            stringBuffer.append("|");
            stringBuffer.append(topLevelDomains[n2]);
        }
        stringBuffer.append(")(/\\S+)?");
        Matcher matcher = Pattern.compile(stringBuffer.toString()).matcher(this.text);
        while (matcher.find()) {
            for (int i = matcher.start(); i < matcher.end(); ++i) {
                this.inURL[i] = true;
            }
        }
        return this.inURL[n];
    }

    public String toString() {
        return this.text;
    }
}

