/*
 * Decompiled with CFR 0.152.
 */
package Jet.Lex;

import Jet.Lisp.FeatureSet;
import Jet.Tipster.Annotation;
import Jet.Tipster.Document;
import Jet.Tipster.Span;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Vector;

public class Tokenizer {
    static Vector tokens;
    static String lastToken;
    static HashSet suffixes2;
    static HashSet suffixes3;

    public static void tokenize(Document document, Span span) {
        Tokenizer.findTokens(document, document.text(), span.start(), span.end());
    }

    public static String[] tokenize(String string) {
        tokens = new Vector();
        Tokenizer.findTokens(null, string, 0, string.length());
        return tokens.toArray(new String[0]);
    }

    private static void findTokens(Document document, String string, int n, int n2) {
        boolean bl = true;
        lastToken = "";
        n = Tokenizer.skipWSX(string, n, n2);
        while (n < n2) {
            int n3 = n++;
            while (n < n2 && !Character.isWhitespace(string.charAt(n))) {
                ++n;
            }
            String string2 = string.substring(n3, n);
            while (n < n2 && Character.isWhitespace(string.charAt(n))) {
                ++n;
            }
            boolean bl2 = n >= n2 && document != null;
            boolean[] blArray = Tokenizer.splitIntoTokens(string2, bl2);
            Tokenizer.buildTokens(document, string2, blArray, n3, n, bl);
            bl = false;
        }
    }

    private static boolean[] splitIntoTokens(String string, boolean bl) {
        char c;
        int n;
        char[] cArray = string.toCharArray();
        int n2 = cArray.length;
        boolean[] blArray = new boolean[n2 + 1];
        blArray[n2] = true;
        for (n = 0; n < n2; ++n) {
            c = cArray[n];
            if (Character.isLetterOrDigit(c) || c == '.') continue;
            blArray[n] = true;
            blArray[n + 1] = true;
        }
        for (n = 0; n < n2 - 1; ++n) {
            c = cArray[n];
            if (c != '`' && c != '\'' && c != '-' || c != cArray[n + 1] || !blArray[n]) continue;
            blArray[n + 1] = false;
        }
        for (n = 0; n < n2 - 2; ++n) {
            if (cArray[n] != '.' || cArray[n + 1] != '.' || cArray[n + 2] != '.' || !blArray[n]) continue;
            blArray[n + 1] = false;
            blArray[n + 2] = false;
        }
        for (n = 1; n < n2 - 2; ++n) {
            if (cArray[n] != ',' || !Character.isDigit(cArray[n - 1]) || !Character.isDigit(cArray[n + 1])) continue;
            blArray[n] = false;
            blArray[n + 1] = false;
        }
        if (bl) {
            if (cArray[n2 - 1] == '.') {
                blArray[n2 - 1] = true;
            } else if (n2 > 1 && cArray[n2 - 2] == '.' && "\"'}>)".indexOf(cArray[n2 - 1]) >= 0) {
                blArray[n2 - 2] = true;
            } else if (n2 > 2 && cArray[n2 - 3] == '.' && cArray[n2 - 2] == '\'' && cArray[n2 - 1] == '\'') {
                blArray[n2 - 3] = true;
            }
        }
        for (n = 0; n < n2 - 2; ++n) {
            if (!blArray[n + 3] || !suffixes3.contains(string.substring(n, n + 3))) continue;
            blArray[n] = true;
            blArray[n + 1] = false;
            blArray[n + 2] = false;
        }
        for (n = 0; n < n2 - 1; ++n) {
            if (!blArray[n + 2] || !suffixes2.contains(string.substring(n, n + 2))) continue;
            blArray[n] = true;
            blArray[n + 1] = false;
        }
        return blArray;
    }

    private static void buildTokens(Document document, String string, boolean[] blArray, int n, int n2, boolean bl) {
        int n3 = 0;
        for (int i = 1; i <= string.length(); ++i) {
            int n4;
            if (!blArray[i]) continue;
            int n5 = i;
            FeatureSet featureSet = null;
            int n6 = 0;
            for (n4 = n3; n4 < n5; ++n4) {
                if (Character.isDigit(string.charAt(n4))) {
                    n6 = n6 * 10 + Character.digit(string.charAt(n4), 10);
                    continue;
                }
                if (string.charAt(n4) == ',' && n6 > 0) continue;
                n6 = -1;
                break;
            }
            featureSet = Character.isUpperCase(string.charAt(n3)) ? (bl || lastToken.equals("_") || lastToken.equals("\"") || lastToken.equals("``") ? new FeatureSet("case", "forcedCap") : new FeatureSet("case", "cap")) : (n6 >= 0 ? new FeatureSet("intvalue", new Integer(n6)) : new FeatureSet());
            n4 = n5 == string.length() ? n2 : n5 + n;
            String string2 = string.substring(n3, n5);
            Tokenizer.recordToken(document, string2, n3 + n, n4, featureSet);
            n3 = n5;
            lastToken = string2;
        }
    }

    private static void recordToken(Document document, String string, int n, int n2, FeatureSet featureSet) {
        if (document == null) {
            tokens.addElement(string);
        } else {
            document.annotate("token", new Span(n, n2), featureSet);
        }
    }

    public static void tokenizeOnWS(Document document, Span span) {
        int n;
        String string = document.text();
        int n2 = span.end();
        for (n = span.start(); n < n2 && Character.isWhitespace(string.charAt(n)); ++n) {
        }
        while (n < n2) {
            int n3 = n++;
            while (n < n2 && !Character.isWhitespace(string.charAt(n))) {
                ++n;
            }
            while (n < n2 && Character.isWhitespace(string.charAt(n))) {
                ++n;
            }
            Tokenizer.recordToken(document, string, n3, n, new FeatureSet());
        }
    }

    public static int skipWS(Document document, int n, int n2) {
        while (n < n2 && Character.isWhitespace(document.charAt(n))) {
            ++n;
        }
        return n;
    }

    public static int skipWS(String string, int n, int n2) {
        while (n < n2 && Character.isWhitespace(string.charAt(n))) {
            ++n;
        }
        return n;
    }

    public static int skipWSX(Document document, int n, int n2) {
        while (n < n2) {
            if (Character.isWhitespace(document.charAt(n))) {
                ++n;
                continue;
            }
            if (document.charAt(n) != '<') break;
            ++n;
            while (n < n2 && document.charAt(n) != '>') {
                ++n;
            }
            if (n >= n2) continue;
            ++n;
        }
        return n;
    }

    public static int skipWSX(String string, int n, int n2) {
        while (n < n2) {
            if (Character.isWhitespace(string.charAt(n))) {
                ++n;
                continue;
            }
            if (string.charAt(n) != '<') break;
            ++n;
            while (n < n2 && string.charAt(n) != '>') {
                ++n;
            }
            if (n >= n2) continue;
            ++n;
        }
        return n;
    }

    public static Annotation[] gatherTokens(Document document, Span span) {
        Annotation annotation;
        int n = span.start();
        int n2 = span.end();
        ArrayList<Annotation> arrayList = new ArrayList<Annotation>();
        int n3 = Tokenizer.skipWSX(document, n, n2);
        while (n3 < n2 && (annotation = document.tokenAt(n3)) != null) {
            arrayList.add(annotation);
            n3 = annotation.span().end();
        }
        int n4 = arrayList.size();
        return arrayList.toArray(new Annotation[n4]);
    }

    public static String[] gatherTokenStrings(Document document, Span span) {
        Annotation[] annotationArray = Tokenizer.gatherTokens(document, span);
        int n = annotationArray.length;
        String[] stringArray = new String[n];
        for (int i = 0; i < n; ++i) {
            stringArray[i] = document.text(annotationArray[i]).trim();
        }
        return stringArray;
    }

    static {
        suffixes2 = new HashSet();
        suffixes3 = new HashSet();
        suffixes2.add("'s");
        suffixes2.add("'m");
        suffixes2.add("'d");
        suffixes2.add("'S");
        suffixes2.add("'M");
        suffixes2.add("'D");
        suffixes3.add("'re");
        suffixes3.add("'ve");
        suffixes3.add("n't");
        suffixes3.add("'ll");
        suffixes3.add("'RE");
        suffixes3.add("'VE");
        suffixes3.add("N'T");
        suffixes3.add("'LL");
    }
}

