/*
 * Decompiled with CFR 0.152.
 */
package AceJet;

import AceJet.Ace;
import AceJet.AceDocument;
import AceJet.EDTtypeData;
import AceJet.LearnRelations;
import AceJet.Mention;
import AceJet.NameSubtyper;
import AceJet.PerfectAce;
import Jet.Chunk.Chunker;
import Jet.Control;
import Jet.JetTest;
import Jet.Lex.EnglishLex;
import Jet.Parser.SynFun;
import Jet.Pat.Pat;
import Jet.Refres.Resolve;
import Jet.Tipster.Annotation;
import Jet.Tipster.Document;
import Jet.Tipster.ExternalDocument;
import Jet.Tipster.Span;
import java.io.BufferedReader;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintStream;
import java.util.HashMap;
import java.util.Iterator;
import java.util.StringTokenizer;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.Vector;

public class EDTtype {
    static ExternalDocument doc;
    static final String ACEdir = "C:/Documents and Settings/Ralph Grishman/My Documents/ACE/";
    static final String typeDictFile = "C:/Documents and Settings/Ralph Grishman/My Documents/ACE/new EDT type dict.txt";
    static final String genericFile = "C:/Documents and Settings/Ralph Grishman/My Documents/ACE/new generic dict.txt";
    static final boolean monocase = true;
    static Vector tokens;
    static HashMap tokenStartMap;
    static PrintStream writer;
    static PrintStream gwriter;
    static TreeMap typeDataMap;
    static TreeSet genericHeads;
    static int trainingMentions;
    static int correct;
    static int incorrect;
    static int unknown;
    private static final String[] partitives;
    private static final String[] governmentTitles;
    static HashMap specifiedEDTtype;

    public static void main(String[] stringArray) throws IOException {
        System.out.println("Starting ACE EDT Type / Generic Training ...");
        JetTest.initializeFromConfig("props/train EDT.properties");
        Chunker.loadModel();
        Pat.trace = false;
        writer = new PrintStream(new FileOutputStream(typeDictFile));
        gwriter = new PrintStream(new FileOutputStream(genericFile));
        AceDocument.ace2004 = false;
        EDTtype.trainFromFileList("C:/Documents and Settings/Ralph Grishman/My Documents/ACE/training all.txt");
        EDTtype.trainFromFileList("C:/Documents and Settings/Ralph Grishman/My Documents/ACE/feb02 all.txt");
        EDTtype.trainFromFileList("C:/Documents and Settings/Ralph Grishman/My Documents/ACE/sep02 all.txt");
        EDTtype.trainFromFileList("C:/Documents and Settings/Ralph Grishman/My Documents/ACE/aug03 all.txt");
        AceDocument.ace2004 = true;
        EDTtype.trainFromFileList("C:/Documents and Settings/Ralph Grishman/My Documents/ACE/training04 nwire 21andup.txt");
        EDTtype.trainFromFileList("C:/Documents and Settings/Ralph Grishman/My Documents/ACE/training04 bnews 21andup.txt");
        EDTtype.trainFromFileList("C:/Documents and Settings/Ralph Grishman/My Documents/ACE/training04 chinese.txt");
        EDTtype.writeTypeDict(writer);
        EDTtype.writeGenericDict(gwriter);
        EDTtypeData.reportSubtypeTotals();
        System.out.println(trainingMentions + " training mentions");
        System.out.println(correct + " correct predictions, " + incorrect + " incorrect");
        System.out.println(unknown + " unknown");
    }

    static void trainFromFileList(String string) throws IOException {
        String string2;
        BufferedReader bufferedReader = new BufferedReader(new FileReader(string));
        int n = 0;
        while ((string2 = bufferedReader.readLine()) != null) {
            System.out.println("\nProcessing document " + ++n + ": " + string2);
            String string3 = ACEdir + string2 + ".sgm";
            doc = new ExternalDocument("sgml", string3);
            doc.setAllTags(true);
            doc.open();
            Ace.monocase = Ace.allLowerCase(doc);
            Control.processDocument(doc, null, false, n);
            EDTtype.collectTokens();
            String string4 = string2.startsWith("aug03") || string2.startsWith("training04") ? ".apf.xml" : ".sgm.tmx.rdc.xml";
            String string5 = ACEdir + string2 + string4;
            AceDocument aceDocument = new AceDocument(string3, string5);
            LearnRelations.findEntityMentions(aceDocument);
            EDTtype.processMentions(doc);
        }
    }

    static void collectTokens() {
        tokens = doc.annotationsOfType("token");
        tokenStartMap = new HashMap();
        for (int i = 0; i < tokens.size(); ++i) {
            Annotation annotation = (Annotation)tokens.get(i);
            int n = annotation.start();
            tokenStartMap.put(new Integer(n), new Integer(i));
        }
    }

    static void processMentions(ExternalDocument externalDocument) {
        Vector vector = Resolve.gatherMentions(externalDocument, new Span(0, externalDocument.length()));
        for (int i = 0; i < vector.size(); ++i) {
            Object object;
            boolean bl;
            Annotation annotation = (Annotation)vector.get(i);
            Annotation annotation2 = Resolve.getHeadC(annotation);
            String string = (String)annotation2.get("cat");
            if (string.equals("pro") || string.equals("det") || string.equals("name")) continue;
            String string2 = Resolve.normalizeName(externalDocument.text(annotation2));
            string2 = string2.toLowerCase();
            Mention mention = (Mention)LearnRelations.mentionStartMap.get(new Integer(annotation2.start()));
            String string3 = "OTHER";
            String string4 = "";
            if (mention != null) {
                string3 = mention.type;
                string4 = mention.subtype;
            }
            if (!AceDocument.ace2004) {
                string4 = "*";
            }
            boolean bl2 = bl = trainingMentions < 200000;
            if (bl) {
                ++trainingMentions;
                object = (EDTtypeData)typeDataMap.get(string2);
                if (object == null) {
                    object = new EDTtypeData(string2);
                    typeDataMap.put(string2, object);
                }
                ((EDTtypeData)object).incrementTypeCount(string3, string4, 1);
                if (mention == null) continue;
                ((EDTtypeData)object).incrementGenericCount(mention.generic);
                continue;
            }
            object = EDTtype.bareType(EDTtype.getTypeSubtype(externalDocument, null, annotation));
            if (((String)object).equals(string3)) {
                ++correct;
                continue;
            }
            ++incorrect;
            System.out.print("Word: " + string2);
            System.out.println(" predict " + (String)object + ", should be " + string3);
        }
    }

    static void writeTypeDict(PrintStream printStream) {
        Iterator iterator = typeDataMap.values().iterator();
        while (iterator.hasNext()) {
            ((EDTtypeData)iterator.next()).write(printStream);
        }
        printStream.close();
    }

    public static void readTypeDict() {
        String string = JetTest.getConfigFile("Ace.EDTtype.fileName");
        if (string != null) {
            EDTtype.readTypeDict(string);
        } else {
            System.out.println("EDTtype.readTypeDict:  no file name specified in config file");
        }
    }

    public static void readTypeDict(String string) {
        System.out.println("Loading type dictionary " + string);
        typeDataMap = new TreeMap();
        try {
            String string2;
            BufferedReader bufferedReader = new BufferedReader(new FileReader(string));
            while ((string2 = bufferedReader.readLine()) != null) {
                EDTtypeData eDTtypeData = EDTtypeData.readLine(string2);
                if (eDTtypeData == null) continue;
                typeDataMap.put(eDTtypeData.word, eDTtypeData);
            }
            System.out.println("Type dictionary loaded.");
        }
        catch (IOException iOException) {
            System.out.print("Unable to load dictionary due to exception: ");
            System.out.println(iOException);
        }
    }

    static void writeGenericDict(PrintStream printStream) {
        Iterator iterator = typeDataMap.values().iterator();
        while (iterator.hasNext()) {
            EDTtypeData eDTtypeData = (EDTtypeData)iterator.next();
            if (eDTtypeData.genericCount <= 0 && eDTtypeData.nonGenericCount <= 0) continue;
            printStream.println(eDTtypeData.word + " | " + eDTtypeData.genericCount + " " + eDTtypeData.nonGenericCount);
        }
        printStream.close();
    }

    public static void readGenericDict(String string) {
        System.out.println("Loading generic dictionary.");
        genericHeads = new TreeSet();
        try {
            String string2;
            BufferedReader bufferedReader = new BufferedReader(new FileReader(string));
            while ((string2 = bufferedReader.readLine()) != null) {
                int n;
                int n2 = string2.indexOf(124);
                if (n2 <= 1) {
                    System.out.println("** error in generic dict: " + string2);
                    return;
                }
                String string3 = string2.substring(0, n2 - 1);
                String string4 = string2.substring(n2 + 2);
                StringTokenizer stringTokenizer = new StringTokenizer(string4);
                String string5 = stringTokenizer.nextToken();
                String string6 = stringTokenizer.nextToken();
                int n3 = Integer.valueOf(string5);
                if (n3 <= (n = Integer.valueOf(string6).intValue()) || n3 + n <= 2) continue;
                genericHeads.add(string3);
            }
            System.out.println("Generic dictionary loaded.");
        }
        catch (IOException iOException) {
            System.out.print("Unable to load dictionary due to exception: ");
            System.out.println(iOException);
        }
    }

    public static boolean hasGenericHead(ExternalDocument externalDocument, Annotation annotation) {
        Annotation annotation2 = Resolve.getHeadC(annotation);
        String string = Resolve.normalizeName(externalDocument.text(annotation2).trim());
        return genericHeads.contains(string);
    }

    public static String getTypeSubtype(ExternalDocument externalDocument, Annotation annotation, Annotation annotation2) {
        Object object;
        String string;
        String string2 = SynFun.getHead(externalDocument, annotation2).toLowerCase();
        String string3 = SynFun.getDet(annotation2);
        boolean bl = SynFun.getHuman(annotation2);
        Annotation annotation3 = Resolve.getHeadC(annotation2);
        if (Ace.perfectMentions) {
            string = PerfectAce.getTypeSubtype(annotation3);
            if (string != null && !string.equals("")) {
                return string;
            }
            System.out.println("*** no type info for " + externalDocument.text(annotation3));
        }
        if ((string = EDTtype.getGazetteerTypeSubtype(externalDocument, annotation2)) != null) {
            return string;
        }
        String string4 = Resolve.normalizeName(externalDocument.text(annotation3).trim());
        String string5 = SynFun.getName(externalDocument, annotation2);
        String string6 = (String)annotation3.get("cat");
        if (string5 != null) {
            if (string2 != null && !string2.equalsIgnoreCase("otherName")) {
                String string7 = string2.toUpperCase();
                String string8 = NameSubtyper.classify(string5, string7);
                return EDTtype.typeAndSubtype(string7, string8);
            }
            return "OTHER";
        }
        if (EDTtype.in(string2, partitives) || annotation3.get("cat") == "q") {
            for (object = annotation2; object != null && ((Annotation)object).get("of") == null; object = (Annotation)((Annotation)object).get("headC")) {
            }
            if (object != null) {
                Annotation annotation4 = (Annotation)((Annotation)object).get("of");
                System.out.println("Using computed type for " + string2);
                String string9 = EDTtype.getTypeSubtype(externalDocument, null, annotation4);
                if (EDTtype.bareType(string9).equals("GPE") && (string2.equals("part") || string2.equals("portion"))) {
                    string9 = "LOCATION:Region-Subnational";
                }
                return string9;
            }
        }
        if (string6.equals("pro") || string6.equals("det") || string6.equals("q")) {
            return "OTHER";
        }
        object = EDTtype.handCodedEDTtype(string3, string4);
        if (object != null) {
            return object;
        }
        object = EDTtype.lookUpEDTtype(string4.toLowerCase());
        if (object != null) {
            return ((String)object).intern();
        }
        object = EDTtype.lookUpEDTtype(string2);
        if (object != null) {
            return ((String)object).intern();
        }
        String[] stringArray = new String[]{string2};
        String[] stringArray2 = EnglishLex.nounPlural(stringArray);
        object = EDTtype.lookUpEDTtype(stringArray2[0]);
        if (object != null) {
            return ((String)object).intern();
        }
        if (Ace.preferRelations && (bl || annotation != null && annotation.get("human") == "t")) {
            return "PERSON";
        }
        ++unknown;
        return "OTHER";
    }

    private static String getGazetteerTypeSubtype(Document document, Annotation annotation) {
        String[] stringArray = Resolve.getHeadTokens(document, annotation);
        if (annotation.get("cat") == "np") {
            if (Ace.gazetteer.isNational(stringArray) || Ace.gazetteer.isNationals(stringArray)) {
                return "PERSON";
            }
        } else if (Ace.gazetteer.isNationality(stringArray)) {
            return "GPE:Nation";
        }
        return null;
    }

    static String handCodedEDTtype(String string, String string2) {
        String string3 = (String)specifiedEDTtype.get(string2);
        if (string3 == null) {
            return null;
        }
        if (string == null) {
            return "OTHER";
        }
        return string3;
    }

    static String lookUpEDTtype(String string) {
        if (string == null) {
            return null;
        }
        EDTtypeData eDTtypeData = (EDTtypeData)typeDataMap.get(string.toLowerCase());
        if (eDTtypeData == null) {
            return null;
        }
        return eDTtypeData.getBestTypeSubtype();
    }

    private static boolean in(Object object, Object[] objectArray) {
        for (int i = 0; i < objectArray.length; ++i) {
            if (objectArray[i] == null || !objectArray[i].equals(object)) continue;
            return true;
        }
        return false;
    }

    static String bareType(String string) {
        int n = string.indexOf(58);
        if (n > 0) {
            return string.substring(0, n);
        }
        return string;
    }

    static String subtype(String string) {
        int n = string.indexOf(58);
        if (n > 0) {
            return string.substring(n + 1);
        }
        return "";
    }

    static String typeAndSubtype(String string, String string2) {
        return string.toUpperCase() + ":" + string2;
    }

    static {
        typeDataMap = new TreeMap();
        trainingMentions = 0;
        correct = 0;
        incorrect = 0;
        unknown = 0;
        partitives = new String[]{"group", "part", "member", "portion", "center", "bunch", "couple", "remainder", "rest", "lot", "percent", "%", "dozen", "hundred", "thousand", "some", "either", "neither", "any", "each", "all", "both", "none", "most", "many", "afew", "one", "q"};
        governmentTitles = new String[]{"Vice-President", "Vice-Premier", "Prime-Minister", "Foreign-Minister", "Foreign-Secretary", "Secretary-of-State", "Attorney-General", "Justice-Minister", "Secretary-General"};
        specifiedEDTtype = new HashMap();
        specifiedEDTtype.put("force", "ORGANIZATION:Other");
        specifiedEDTtype.put("board", "ORGANIZATION:Commercial");
        specifiedEDTtype.put("prison", "FACILITY:Building");
        specifiedEDTtype.put("room", "FACILITY:Subarea-Building");
        specifiedEDTtype.put("home", "FACILITY:Building");
        specifiedEDTtype.put("state", "GPE:State-or-Province");
        specifiedEDTtype.put("land", "LOCATION:Region-National");
        specifiedEDTtype.put("minister", "PERSON");
    }
}

