/*
 * Decompiled with CFR 0.152.
 */
package AceJet;

import AceJet.Ace;
import AceJet.Gazetteer;
import Jet.Refres.Resolve;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.TreeMap;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

public class APFNameAnalyzer {
    static String encoding = "ISO-8859-1";
    static HashMap startTag;
    static HashSet endTag;
    static DocumentBuilder builder;
    static final String ACEdir = "C:/Documents and Settings/Ralph Grishman/My Documents/ACE/";
    static final String fileList = "C:/Documents and Settings/Ralph Grishman/My Documents/ACE/training nwire.txt";
    static int identityCount;
    static int equalsIgnoreCaseCount;
    static int lastNameCount;
    static int lastTwoNameCount;
    static int firstNameCount;
    static int personSubseqCount;
    static int acronymCount;
    static int reverseAcronymCount;
    static int abbreviationCount;
    static int reverseAbbreviationCount;
    static int capitalCount;
    static int subseqCount;
    static int leftovers;
    static int[] ACEoffsetMap;
    static int[] JEToffsetMap;
    static HashMap standardType;
    static boolean trace;

    public static void main(String[] stringArray) throws Exception {
        String string;
        Resolve.trace = false;
        Ace.gazetteer = new Gazetteer();
        Ace.gazetteer.load("C:/Documents and Settings/Ralph Grishman/My Documents/ACE/loc.dict");
        DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
        documentBuilderFactory.setValidating(false);
        builder = documentBuilderFactory.newDocumentBuilder();
        BufferedReader bufferedReader = new BufferedReader(new FileReader(fileList));
        int n = 0;
        while ((string = bufferedReader.readLine()) != null) {
            System.out.println("\nProcessing document " + ++n + ": " + string);
            String string2 = ACEdir + string + ".sgm";
            boolean bl = fileList.indexOf("03") > 0;
            String string3 = ACEdir + string + (bl ? ".apf.xml" : ".sgm.tmx.rdc.xml");
            APFNameAnalyzer.analyzeDocument(string2, string3);
        }
        APFNameAnalyzer.report();
    }

    private static void analyzeDocument(String string, String string2) throws SAXException, IOException {
        Document document = builder.parse(string2);
        StringBuffer stringBuffer = APFNameAnalyzer.readDocument(string);
        APFNameAnalyzer.computeOffsets(stringBuffer);
        APFNameAnalyzer.findNames(document, stringBuffer);
    }

    static void findNames(Document document, StringBuffer stringBuffer) {
        startTag = new HashMap();
        endTag = new HashSet();
        NodeList nodeList = document.getElementsByTagName("entity");
        for (int i = 0; i < nodeList.getLength(); ++i) {
            Object object;
            Element element = (Element)nodeList.item(i);
            NodeList nodeList2 = element.getElementsByTagName("entity_type");
            Element element2 = (Element)nodeList2.item(0);
            String string = APFNameAnalyzer.getElementText(element, "entity_type");
            ArrayList arrayList = new ArrayList();
            NodeList nodeList3 = element.getElementsByTagName("name");
            TreeMap<Integer, String> treeMap = new TreeMap<Integer, String>();
            for (int j = 0; j < nodeList3.getLength(); ++j) {
                object = (Element)nodeList3.item(j);
                String string2 = APFNameAnalyzer.getElementText((Element)object, "start");
                int n = Integer.parseInt(string2);
                int n2 = JEToffsetMap[n];
                String string3 = APFNameAnalyzer.getElementText((Element)object, "end");
                int n3 = Integer.parseInt(string3);
                int n4 = JEToffsetMap[n3];
                String string4 = stringBuffer.substring(n2, n4 + 1);
                treeMap.put(new Integer(n2), string4);
            }
            Iterator iterator = treeMap.values().iterator();
            while (iterator.hasNext()) {
                object = (String)iterator.next();
                APFNameAnalyzer.analyzeNames(arrayList, (String)object, string);
            }
        }
    }

    private static String getElementText(Element element, String string) {
        NodeList nodeList = element.getElementsByTagName(string);
        Element element2 = (Element)nodeList.item(0);
        String string2 = element2.getFirstChild().getNodeValue();
        return string2;
    }

    static StringBuffer readDocument(String string) throws IOException {
        String string2;
        File file = new File(string);
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(file), encoding));
        StringBuffer stringBuffer = new StringBuffer();
        while ((string2 = bufferedReader.readLine()) != null) {
            stringBuffer.append(string2 + "\n");
        }
        return stringBuffer;
    }

    static void computeOffsets(StringBuffer stringBuffer) {
        boolean bl = false;
        int n = 0;
        int n2 = stringBuffer.length();
        ACEoffsetMap = new int[n2];
        JEToffsetMap = new int[n2];
        for (int i = 0; i < n2; ++i) {
            if (stringBuffer.charAt(i) == '<') {
                bl = true;
            }
            APFNameAnalyzer.JEToffsetMap[i - n] = i;
            if (bl) {
                ++n;
            }
            APFNameAnalyzer.ACEoffsetMap[i] = i - n;
            if (stringBuffer.charAt(i) != '>') continue;
            bl = false;
        }
    }

    static void analyzeNames(ArrayList arrayList, String string, String string2) {
        String string3;
        int n;
        String[] stringArray = Gazetteer.splitAtWS(string);
        stringArray = Resolve.normalizeGazName(stringArray, false, false);
        string = Resolve.concat(stringArray);
        arrayList.add(string);
        Object[] objectArray = Ace.gazetteer.capitalToCountry(stringArray);
        for (n = 0; n < arrayList.size() - 1; ++n) {
            string3 = (String)arrayList.get(n);
            if (!string.equals(string3)) continue;
            ++identityCount;
            return;
        }
        for (n = arrayList.size() - 2; n >= 0; --n) {
            string3 = (String)arrayList.get(n);
            Object[] objectArray2 = Gazetteer.splitAtWS(string3);
            if (string.equalsIgnoreCase(string3)) {
                ++equalsIgnoreCaseCount;
                return;
            }
            if ((string2.equals("PER") || string2.equals("PERSON")) && stringArray.length == 1 && string.equals(objectArray2[objectArray2.length - 1])) {
                ++lastNameCount;
                return;
            }
            if ((string2.equals("PER") || string2.equals("PERSON")) && stringArray.length == 2 && objectArray2.length > 2 && stringArray[0].equalsIgnoreCase(objectArray2[objectArray2.length - 2]) && stringArray[1].equals(objectArray2[objectArray2.length - 1])) {
                ++lastTwoNameCount;
                return;
            }
            if ((string2.equals("PER") || string2.equals("PERSON")) && stringArray.length == 1 && string.equals(objectArray2[0])) {
                ++firstNameCount;
                return;
            }
            if ((string2.equals("PER") || string2.equals("PERSON")) && Resolve.matchFullName(stringArray, "", (String[])objectArray2, "") >= 0) {
                ++personSubseqCount;
                return;
            }
            if ((string2.equals("ORG") || string2.equals("ORGANIZATION") || string2.equals("GPE")) && stringArray.length == 1 && APFNameAnalyzer.isAcronym((String[])objectArray2, string)) {
                ++acronymCount;
                return;
            }
            if ((string2.equals("ORG") || string2.equals("ORGANIZATION") || string2.equals("GPE")) && objectArray2.length == 1 && APFNameAnalyzer.isAcronym(stringArray, string3)) {
                ++reverseAcronymCount;
                return;
            }
            if ((string2.equals("ORG") || string2.equals("ORGANIZATION") || string2.equals("GPE")) && stringArray.length == 1 && APFNameAnalyzer.isAbbreviation((String[])objectArray2, string)) {
                ++abbreviationCount;
                return;
            }
            if ((string2.equals("ORG") || string2.equals("ORGANIZATION") || string2.equals("GPE")) && objectArray2.length == 1 && Resolve.isAbbreviation(stringArray, string3) == 0) {
                ++reverseAbbreviationCount;
                return;
            }
            if (string2.equals("GPE") && objectArray != null && Resolve.equalArray(objectArray, objectArray2)) {
                ++capitalCount;
                return;
            }
            if (string2.equals("PER") || string2.equals("PERSON") || Resolve.matchFullName(stringArray, "", (String[])objectArray2, "") < 0) continue;
            ++subseqCount;
            return;
        }
        if (arrayList.size() > 1) {
            System.out.println(string + " is alias of " + arrayList.get(0));
            ++leftovers;
        }
    }

    static void report() {
        System.out.println("Coreference counts:");
        System.out.println("  identity:              " + identityCount);
        System.out.println("  identityIgnoringCase:  " + equalsIgnoreCaseCount);
        System.out.println("  last name:             " + lastNameCount);
        System.out.println("  last two names:        " + lastTwoNameCount);
        System.out.println("  first name:            " + firstNameCount);
        System.out.println("  other subseq (person): " + personSubseqCount);
        System.out.println("  acronym:               " + acronymCount);
        System.out.println("  name follows acronym:  " + reverseAcronymCount);
        System.out.println("  abbreviation:          " + abbreviationCount);
        System.out.println("  name follows abbrev.:  " + reverseAbbreviationCount);
        System.out.println("  capital of country     " + capitalCount);
        System.out.println("  subseq. (not person):  " + subseqCount);
        System.out.println("  other:                 " + leftovers);
    }

    public static boolean isAcronym(String[] stringArray, String string) {
        if (stringArray.length < 2 || string.length() < 2) {
            return false;
        }
        int n = 0;
        for (int i = 0; i < stringArray.length; ++i) {
            if (stringArray[i].equalsIgnoreCase("the") || stringArray[i].equalsIgnoreCase("of") || stringArray[i].equalsIgnoreCase("for") || stringArray[i].equalsIgnoreCase("and")) continue;
            if (n < string.length() && stringArray[i].charAt(0) == string.charAt(n)) {
                ++n;
                continue;
            }
            return false;
        }
        if (trace) {
            System.out.println("Refres: recognizing " + string + " as acronym of " + Resolve.concat(stringArray));
        }
        return true;
    }

    public static boolean isAbbreviation(String[] stringArray, String string) {
        if (stringArray.length < 2 || string.length() < 4 || string.length() % 2 == 1) {
            return false;
        }
        int n = 0;
        for (int i = 0; i < stringArray.length; ++i) {
            if (stringArray[i].equalsIgnoreCase("the") || stringArray[i].equalsIgnoreCase("of") || stringArray[i].equalsIgnoreCase("for") || stringArray[i].equalsIgnoreCase("and")) continue;
            if (n < string.length() - 1 && stringArray[i].charAt(0) == string.charAt(n) && string.charAt(n + 1) == '.') {
                n += 2;
                continue;
            }
            return false;
        }
        if (trace) {
            System.out.println("Refres: recognizing " + string + " as abbreviation of " + Resolve.concat(stringArray));
        }
        return true;
    }

    static {
        identityCount = 0;
        equalsIgnoreCaseCount = 0;
        lastNameCount = 0;
        lastTwoNameCount = 0;
        firstNameCount = 0;
        personSubseqCount = 0;
        acronymCount = 0;
        reverseAcronymCount = 0;
        abbreviationCount = 0;
        reverseAbbreviationCount = 0;
        capitalCount = 0;
        subseqCount = 0;
        leftovers = 0;
        ACEoffsetMap = null;
        JEToffsetMap = null;
        standardType = new HashMap();
        standardType.put("GSP", "GPE");
        standardType.put("PER", "PERSON");
        standardType.put("ORG", "ORGANIZATION");
        standardType.put("LOC", "LOCATION");
        standardType.put("FAC", "FACILITY");
        trace = false;
    }
}

