/*
 * Decompiled with CFR 0.152.
 */
package tratz.pos.featgen;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;
import tratz.featgen.InitException;
import tratz.featgen.fer.ClosedClassFER;
import tratz.featgen.fer.WordNetPosTypesFER;
import tratz.parse.types.Token;
import tratz.pos.featgen.PosFeatureGenerator;
import tratz.types.ChecksumMap;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class DefaultEnglishPosFeatureGenerator
implements PosFeatureGenerator,
Serializable {
    public static final long serialVersionUID = 1L;
    private transient WordNetPosTypesFER mDerived;
    private transient ClosedClassFER mClosedClass;
    private ChecksumMap<String> mFeatMap = new ChecksumMap();
    private transient Matcher NUM_DASH_NUM = null;
    private transient Matcher HAS_DIGIT = null;
    private transient Matcher NUMBER_COMMA_ETC = null;
    private transient Matcher DIGITS = null;
    private transient Matcher HAVE_VERB = null;

    public DefaultEnglishPosFeatureGenerator() throws Exception {
        int minOccurrence = 1;
        int maxDepth = Integer.MAX_VALUE;
        try {
            BufferedReader reader = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream("data/brownClusters200.gz"))));
            String line = null;
            byte lastByte = 0;
            String lastByteString = "";
            String lastOne = null;
            while ((line = reader.readLine()) != null) {
                String[] split = line.split("\\t+");
                String path = split[0];
                String token = split[1];
                int occurrences = Integer.parseInt(split[2]);
                if (occurrences < minOccurrence) continue;
                String blarg = path.substring(0, Math.min(path.length(), maxDepth));
                if (!blarg.equals(lastOne)) {
                    lastOne = blarg;
                    lastByte = (byte)(lastByte + 1);
                    lastByteString = Byte.toString(lastByte);
                }
                this.mFeatMap.put(token, lastByte);
            }
            reader.close();
        }
        catch (IOException ioe) {
            throw new InitException(ioe);
        }
        this.mDerived = new WordNetPosTypesFER();
        this.mClosedClass = new ClosedClassFER();
    }

    @Override
    public void init() {
    }

    private String getPos(List<Token> tokens, int location) {
        return location >= 0 ? tokens.get(location).getPos() : "\u00de";
    }

    private String getBC(String s) {
        String bc;
        if (s == "\u00de") {
            bc = "-2";
        } else {
            String numericized = this.DIGITS.reset(s).replaceAll("\u019e");
            if (!this.mFeatMap.containsKey(numericized)) {
                String lower = numericized.toLowerCase();
                if (!this.mFeatMap.containsKey(lower)) {
                    bc = "-1";
                } else {
                    int val = this.mFeatMap.get(lower);
                    bc = Integer.toString(val);
                }
            } else {
                int val = this.mFeatMap.get(numericized);
                bc = Integer.toString(val);
            }
        }
        return bc;
    }

    private String getForm(List<Token> tokens, int index) {
        return tokens.get(index).getText();
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    @Override
    public Set<String> getFeats(List<Token> tokens, int index) {
        if (this.NUM_DASH_NUM == null) {
            this.mDerived = new WordNetPosTypesFER();
            this.mClosedClass = new ClosedClassFER();
            this.NUM_DASH_NUM = Pattern.compile("[0-9]+\\-[0-9]+").matcher("");
            this.HAS_DIGIT = Pattern.compile(".*[0-9].*").matcher("");
            this.NUMBER_COMMA_ETC = Pattern.compile("[0-9,\\.:]+").matcher("");
            this.DIGITS = Pattern.compile("[0-9]").matcher("");
            this.HAVE_VERB = Pattern.compile("having|have|had|has|'ve|'d").matcher("");
        }
        HashSet<String> f = new HashSet<String>();
        DefaultEnglishPosFeatureGenerator defaultEnglishPosFeatureGenerator = this;
        synchronized (defaultEnglishPosFeatureGenerator) {
            Token lastToken;
            int suffixDepth;
            Token prev;
            String wr4;
            int numTokens = tokens.size();
            String wl4 = index > 3 ? this.getForm(tokens, index - 4) : "\u00de";
            String wl3 = index > 2 ? this.getForm(tokens, index - 3) : "\u00de";
            String wl2 = index > 1 ? this.getForm(tokens, index - 2) : "\u00de";
            String wl1 = index > 0 ? this.getForm(tokens, index - 1) : "\u00de";
            String w0 = this.getForm(tokens, index);
            String wr1 = index < numTokens - 1 ? this.getForm(tokens, index + 1) : "\u00de";
            String wr2 = index < numTokens - 2 ? this.getForm(tokens, index + 2) : "\u00de";
            String wr3 = index < numTokens - 3 ? this.getForm(tokens, index + 3) : "\u00de";
            String string = wr4 = index < numTokens - 4 ? this.getForm(tokens, index + 4) : "\u00de";
            if (index == 0) {
                w0 = w0.toLowerCase();
            }
            if (index == 1) {
                wl1 = wl1.toLowerCase();
            }
            if (index == 2) {
                wl2 = wl2.toLowerCase();
            }
            if (index == 3) {
                wl3 = wl3.toLowerCase();
            }
            if (index == 4) {
                wl4 = wl4.toLowerCase();
            }
            boolean es0 = w0.endsWith("s") || w0.endsWith("S");
            String bcl4 = this.getBC(wl4);
            String bcl3 = this.getBC(wl3);
            String bcl2 = this.getBC(wl2);
            String bcl1 = this.getBC(wl1);
            String bc0 = this.getBC(w0);
            String bcr1 = this.getBC(wr1);
            String bcr2 = this.getBC(wr2);
            String bcr3 = this.getBC(wr3);
            String bcr4 = this.getBC(wr4);
            String pl4 = index > 3 ? this.getPos(tokens, index - 4) : "\u00de";
            String pl3 = index > 2 ? this.getPos(tokens, index - 3) : "\u00de";
            String pl2 = index > 1 ? this.getPos(tokens, index - 2) : "\u00de";
            String pl1 = index > 0 ? this.getPos(tokens, index - 1) : "\u00de";
            Token pv = null;
            Token preIN = null;
            for (int i = index - 1; i >= 0; --i) {
                Token t = tokens.get(i);
                if (t.getPos().startsWith("VB") || t.getPos().equals("MD")) {
                    pv = t;
                    break;
                }
                if (!t.getPos().equals("IN")) continue;
                preIN = t;
            }
            if (wr1.equals(".") || wr1.equals(",") || wr1.equals(";") || wr1.equals(":")) {
                Token t;
                String pos;
                Token whOrIn = null;
                for (int i = index - 1; i >= 0 && !(pos = (t = tokens.get(i)).getPos()).startsWith("VB"); --i) {
                    if (!pos.equals("IN") && !pos.startsWith("W")) continue;
                    whOrIn = t;
                    break;
                }
                if (whOrIn != null) {
                    f.add("preWhOrIn");
                    f.add("\u2eba\u2eba" + whOrIn.getText().toLowerCase());
                    f.add("\u2eba\u2eba" + whOrIn.getText().toLowerCase() + "\u2eba" + wr1);
                }
            }
            f.add((preIN == null ? "null" : preIN.getText().toLowerCase()) + "\u2ee0" + bc0);
            Token preTwoCommas = null;
            Token postTwoCommas = null;
            if (index > 1 && (prev = tokens.get(index - 2)).getText().equals(",")) {
                for (int i = index - 3; i >= 1; --i) {
                    Token t = tokens.get(i);
                    if (!t.getText().equals(",")) continue;
                    preTwoCommas = tokens.get(i - 1);
                    postTwoCommas = tokens.get(i + 1);
                    break;
                }
            }
            if (preTwoCommas != null) {
                f.add(this.getBC(preTwoCommas.getText()) + "\u2f17" + bc0);
                f.add(postTwoCommas.getText().startsWith("wh") + "\u2f15" + bc0);
            }
            boolean pnull = pv == null;
            String pvText = pnull ? "\u01a5" : pv.getText().toLowerCase();
            String pvPos = pnull ? "\u01a5" : pv.getPos();
            f.add(pvText + "\u2f14" + bc0);
            f.add(pvText + "\u2f13" + w0);
            f.add(pvPos + "\u2f12" + bc0);
            f.add(pvPos + "\u2f11" + bc0 + "+" + bcr1);
            f.add("\u2f10" + (pnull ? "\u01a5" : this.HAVE_VERB.reset(pvText).matches() + "+" + bc0));
            f.add(pvPos + "\u2f0f" + pl1 + "+" + bc0);
            Token prevVerb2 = null;
            if (pv != null) {
                for (int i = pv.getIndex() - 2; i >= 0; --i) {
                    Token t = tokens.get(i);
                    if (!t.getPos().startsWith("VB") && !t.getPos().equals("MD")) continue;
                    prevVerb2 = t;
                    break;
                }
                f.add("\u2f0c" + (prevVerb2 == null ? "null" : prevVerb2.getPos()) + "+" + bc0);
            }
            if (pv != null && (pl1.equals("CC") || pl1.equals(","))) {
                f.add(pvPos + "\u2f1c" + bc0);
            }
            f.add("\u2eda" + bc0 + es0 + bcr1);
            f.add("\u2edb" + bc0 + es0 + pl1);
            f.add("\u2edc" + bc0 + es0 + wl1);
            f.add("\u2edd" + bc0 + es0 + wr1);
            f.add("\u2ee1" + es0 + bcr1);
            f.add("\u2ee3" + pl1 + es0 + bcr1);
            f.add(es0 + "\u2ee2" + wr1);
            f.add(wl1 + "\u2ee4" + es0 + "+" + wr1);
            if (es0) {
                Token pdt = null;
                for (int i = index - 1; i >= 0; --i) {
                    Token t = tokens.get(i);
                    String pos = t.getPos();
                    if (pos.equals("DT")) {
                        pdt = t;
                        break;
                    }
                    if (!pos.startsWith("N") && !pos.startsWith("J") && !pos.startsWith("R") && !pos.startsWith("C")) break;
                }
                String pdt_text = pdt == null ? "n" : pdt.getText().toLowerCase();
                f.add(pdt_text + "\u2eda");
                if (pdt_text.equals("a") || pdt_text.equals("an") || pdt_text.equals("this") || pdt_text.equals("that")) {
                    f.add("singpdt\u2eda");
                }
                f.add("\u2ee2" + pvPos);
                f.add("\u2ee2\u2ee2" + (prevVerb2 == null ? "null" : prevVerb2.getPos()));
                f.add("\u2ee2\u2ee2\u2ee2" + pl1);
                if (preTwoCommas != null) {
                    f.add(this.getBC(preTwoCommas.getText()) + "\u2f17\u2ee2" + bc0);
                    f.add(postTwoCommas.getText().startsWith("wh") + "\u2f15\u2ee2" + bc0);
                }
            }
            f.add(wl1 + "\u2eec" + bc0);
            f.add(pl1 + "\u2eed" + bc0 + "+" + bcr1);
            f.add(pl2 + "\u2eee" + bc0 + "+" + bcr1);
            f.add(pl3 + "\u2eef" + bc0 + "+" + bcr1);
            f.add(pl2 + "\u2ef0" + pl1 + "+" + bc0);
            f.add(wl2 + "\u2ef1" + pl1);
            f.add(wl2 + "\u2ef2" + pl1 + "+" + bc0);
            f.add(w0 + "\u2ef3" + bcr1);
            f.add(wl2 + "\u2f08" + bcl1 + "+" + bc0);
            f.add(wl1 + "\u2f09" + bc0 + "+" + wr1);
            f.add(wl1 + "\u2f0a" + bc0 + "+" + bcr1);
            f.add(wl2 + "\u2f19" + wl1 + "+" + bc0);
            f.add(wl2 + "\u2f1a" + wl1 + "+" + bc0 + "+" + bcr1);
            f.add(wl2 + "\u2f1b" + pl1 + "+" + bc0 + "+" + bcr1);
            if ((pl1.equals("MD") || pl2.equals("MD") || pl3.equals("MD") || pl4.equals("MD")) && !pl1.startsWith("VB") && !pl2.startsWith("VB") && !pl3.startsWith("VB") && !pl4.startsWith("VB")) {
                f.add("\u2eba" + bc0);
                f.add("\u2ebat");
            }
            Set<String> wl1ders = this.mDerived.getProductions(wl1, "", new HashSet<String>());
            Set<String> w0ders = this.mDerived.getProductions(w0, "", new HashSet<String>());
            Set<String> wr1ders = this.mDerived.getProductions(wr1, "", new HashSet<String>());
            Set<String> wr2ders = this.mDerived.getProductions(wr2, "", new HashSet<String>());
            Set<String> wr3ders = this.mDerived.getProductions(wr3, "", new HashSet<String>());
            this.mClosedClass.getProductions(wl1, "", wl1ders);
            this.mClosedClass.getProductions(w0, "", w0ders);
            this.mClosedClass.getProductions(wr1, "", wr1ders);
            this.mClosedClass.getProductions(wr2, "", wr2ders);
            this.mClosedClass.getProductions(wr3, "", wr3ders);
            ArrayList<String> dev = new ArrayList<String>(w0ders);
            Collections.sort(dev);
            int devSize = dev.size();
            for (int i = 0; i < devSize; ++i) {
                String s1 = (String)dev.get(i);
                for (int j = i + 1; j < devSize; ++j) {
                    f.add("\u2eb6" + s1 + "," + (String)dev.get(j));
                }
            }
            for (String wl1d : wl1ders) {
                f.add("\u2eb7" + wl1d);
            }
            for (String w0d : w0ders) {
                f.add("\u2eb6" + w0d);
            }
            for (String wr1d : wr1ders) {
                f.add("\u2eb5" + wr1d);
            }
            for (String wr2d : wr2ders) {
                f.add("\u2eb4" + wr2d);
            }
            for (String wr3d : wr3ders) {
                f.add("\u2eb4\u2eb5" + wr3d);
            }
            int w0length = w0.length();
            int prefixDepth = Math.min(5, w0length);
            for (int i = 1; i < prefixDepth; ++i) {
                f.add("\u2ead" + w0.substring(0, i));
            }
            for (int i = suffixDepth = Math.min(9, w0length); i >= 0; --i) {
                f.add("\u2eac" + w0.substring(w0length - i, w0length));
            }
            f.add(Boolean.toString(w0.endsWith("ed") || w0.endsWith("n")));
            char firstChar = ' ';
            firstChar = w0.length() > 0 ? (char)w0.charAt(0) : (char)' ';
            boolean isUppercase = Character.isUpperCase(firstChar);
            f.add("\u2eab" + isUppercase);
            f.add("\u2eaa" + this.allLowerCase(w0));
            f.add("\u2ea9" + this.allUpperCase(w0));
            int dashIndex = w0.lastIndexOf("-");
            f.add("\u2ea6" + (dashIndex > -1));
            f.add("\u2ea2" + w0.contains("."));
            f.add("\u2ea8" + this.NUM_DASH_NUM.reset(w0).matches());
            f.add("\u2ea1" + this.HAS_DIGIT.reset(w0).matches());
            f.add("\u2ea0" + (isUppercase && es0));
            f.add("\u2e9f" + this.NUMBER_COMMA_ETC.reset(wr1).matches());
            f.add("\u2e9e" + this.NUMBER_COMMA_ETC.reset(wr2).matches());
            f.add("\u2e9d" + Character.isUpperCase(wl1.length() > 0 ? wl1.charAt(0) : (char)'n'));
            if (dashIndex > -1) {
                int firstDash;
                f.add("\u2e9c" + w0.substring(0, dashIndex));
                if (dashIndex < w0.length() - 1) {
                    String dashString = w0.substring(dashIndex + 1);
                    String endDashBC = this.getBC(dashString);
                    f.add(endDashBC + "\u2e9b" + bc0);
                    f.add(endDashBC + "\u2e9b" + bc0 + "+" + bcr1);
                }
                if ((firstDash = w0.indexOf(45)) > -1 && firstDash < w0.length() - 1) {
                    String preDash = w0.substring(0, dashIndex);
                    f.add("\u2f25" + preDash);
                }
                if (firstDash != dashIndex) {
                    int secondDash = w0.indexOf(45, firstDash + 1);
                }
            }
            if ((lastToken = tokens.get(numTokens - 1)).getText().equals(".")) {
                f.add("l.");
            }
            if (lastToken.getText().equals("!")) {
                f.add("l!");
            }
            if (lastToken.getText().equals("?")) {
                f.add("l?");
            }
            f.add("\u2ebc" + pl4);
            f.add("\u2ebb" + pl3);
            f.add("\u2ebd" + pl2);
            f.add("\u2ec1" + pl1);
            f.add("\u2f1d" + pl1);
            f.add("\u2f1d" + pl2);
            f.add("\u2eae" + pl2 + "|" + pl1);
            if (w0.equalsIgnoreCase("that")) {
                // empty if block
            }
            f.add("\u2ec4" + wl3);
            f.add("\u2ec5" + wl2);
            f.add("\u2ec6" + wl1);
            f.add("\u2ec7" + w0);
            f.add("\u2ec9" + wr3);
            f.add("\u2eca" + wr2);
            f.add("\u2ecb" + wr1);
            f.add("\u2ecf" + bcl4);
            f.add("\u2ed3" + bcl3);
            f.add("\u2ed4" + bcl2);
            f.add("\u2ed9" + bcl1);
            f.add("\u2ed1" + bc0);
            f.add("\u2ed2" + bcr4);
            f.add("\u2ed8" + bcr3);
            f.add("\u2ed5" + bcr2);
            f.add("\u2ed7" + bcr1);
            f.add("\u2ee9" + bcr1);
            f.add("\u2ee9" + bcr2);
            f.add("\u2eea" + bcl2);
            f.add("\u2eea" + bcl1);
            f.add("\u2ecc" + bcr1);
            f.add("\u2ecc" + bcr2);
            f.add("\u2ecc" + bcr3);
            f.add("\u2ecd" + bcl1);
            f.add("\u2ecd" + bcl2);
            f.add("\u2ecd" + bcl3);
            f.add(wl2 + "\u2e8e" + wl1);
            f.add(wl1 + "\u2e8f" + w0);
            f.add(w0 + "\u2e91" + wr1);
            f.add(wr1 + "\u2e92" + wr2);
            f.add(wl1 + "\u2e90" + wr1);
            f.add(bcl2 + "\u2ee5" + bcl1);
            f.add(bcl1 + "\u2ee6" + bc0);
            f.add(bc0 + "\u2ee7" + bcr1);
            f.add(bcr1 + "\u2ee8" + bcr2);
            f.add(bcl1 + "\u2eeb" + bcr1);
            f.add(wl2 + "\u2e93" + wl1 + "|" + w0);
            f.add(wl1 + "\u2e95" + w0 + "|" + wr1);
            f.add(w0 + "\u2e99" + wr1 + "|" + wr2);
            f.add(wl2 + "\u2e94" + wl1 + "|" + wr1);
            f.add(wl1 + "\u2e98" + wr1 + "|" + wr2);
            f.add(bcl2 + "\u2f1f" + bcl1 + "+" + bc0);
            f.add(bcl1 + "\u2f21" + bc0 + "+" + bcr1);
            f.add(bc0 + "\u2f22" + bcr1 + "+" + bcr2);
            f.add(bcl2 + "\u2f23" + bcl1 + "+" + bcr1);
            f.add(bcl1 + "\u2f24" + bcr1 + "+" + bcr2);
        }
        return f;
    }

    private boolean allUpperCase(String s) {
        int len = s.length();
        boolean allUpper = true;
        for (int i = 0; i < len; ++i) {
            boolean isUpper = Character.isUpperCase(s.charAt(i));
            if (isUpper) continue;
            allUpper = false;
            break;
        }
        return allUpper;
    }

    private boolean allLowerCase(String s) {
        int len = s.length();
        boolean allLower = true;
        for (int i = 0; i < len; ++i) {
            boolean isLower = Character.isLowerCase(s.charAt(i));
            if (isLower) continue;
            allLower = false;
            break;
        }
        return allLower;
    }
}

