/*
 * Decompiled with CFR 0.152.
 */
package Jet.HMM;

import AceJet.Ace;
import Jet.HMM.BigramHMMemitter;
import Jet.HMM.HMM;
import Jet.HMM.HMMannotator;
import Jet.HMM.HMMemitter;
import Jet.HMM.HMMstate;
import Jet.HMM.WordFeatureHMMemitter;
import Jet.JetTest;
import Jet.Lex.Tokenizer;
import Jet.Lisp.FeatureSet;
import Jet.Scorer.NEScorer;
import Jet.Scorer.NameTagger;
import Jet.Tipster.Annotation;
import Jet.Tipster.AnnotationColor;
import Jet.Tipster.Document;
import Jet.Tipster.DocumentCollection;
import Jet.Tipster.ExternalDocument;
import Jet.Tipster.Span;
import Jet.Zoner.SentenceSplitter;
import Jet.Zoner.SpecialZoner;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Vector;

public class HMMNameTagger
implements NameTagger {
    public HMM nameHMM;
    public HMMannotator annotator;
    String[][] tagTable;
    String[] NEtypeTable;
    String[] tagsToRead;
    String[] tagsToCache;
    static String[] tagsToScore;
    Class emitterClass;
    static final String home = "C:/Documents and Settings/Ralph Grishman/My Documents/";
    static final String ACEdir = "C:/Documents and Settings/Ralph Grishman/My Documents/ACE 05/V4/";
    static boolean useAceBigrams;

    public HMMNameTagger(Class clazz) {
        if (!HMMemitter.class.isAssignableFrom(clazz)) {
            System.out.println("HMMNameTagger constructor invoked with invalid class " + clazz);
            return;
        }
        this.emitterClass = clazz;
        this.nameHMM = new HMM(clazz);
        this.annotator = new HMMannotator(this.nameHMM);
        this.annotator.setBItag(false);
        this.annotator.setAnnotateEachToken(false);
    }

    private void readTagTable(String string) {
        try {
            BufferedReader bufferedReader = new BufferedReader(new FileReader(string));
            this.readTagTable(bufferedReader);
        }
        catch (IOException iOException) {
            iOException.printStackTrace();
        }
    }

    private void readTagTable(BufferedReader bufferedReader) {
        this.annotator.readTagTable(bufferedReader);
        HashSet<String> hashSet = new HashSet<String>();
        ArrayList<String> arrayList = new ArrayList<String>();
        String[][] stringArray = this.annotator.getTagTable();
        for (int i = 0; i < stringArray.length; ++i) {
            hashSet.add(stringArray[i][0]);
            arrayList.add(stringArray[i][3]);
        }
        this.NEtypeTable = arrayList.toArray(new String[0]);
        this.tagsToCache = this.NEtypeTable;
        this.nameHMM.setTagsToCache(this.tagsToCache);
        tagsToScore = new String[]{"ENAMEX"};
        hashSet.add("SENT");
        hashSet.add("TURN");
        this.tagsToRead = new String[]{"ENAMEX", "VALUE", "TIMEX"};
    }

    private void writeTagTable(BufferedWriter bufferedWriter) {
        this.annotator.writeTagTable(bufferedWriter);
    }

    public void buildNameHMM(String string) {
        this.readTagTable(string);
        HMMstate hMMstate = new HMMstate("start", "", this.emitterClass);
        this.nameHMM.addState(hMMstate);
        hMMstate.addArc("other");
        hMMstate.addArc("end");
        for (int i = 0; i < this.NEtypeTable.length; ++i) {
            hMMstate.addArc("pre-" + this.NEtypeTable[i]);
            hMMstate.addArc("i-" + this.NEtypeTable[i]);
            hMMstate.addArc("b-" + this.NEtypeTable[i]);
        }
        HMMstate hMMstate2 = new HMMstate("other", "other", this.emitterClass);
        this.nameHMM.addState(hMMstate2);
        hMMstate2.addArc("other");
        hMMstate2.addArc("end");
        for (int i = 0; i < this.NEtypeTable.length; ++i) {
            hMMstate2.addArc("pre-" + this.NEtypeTable[i]);
        }
        HMMstate hMMstate3 = new HMMstate("end", "", this.emitterClass);
        this.nameHMM.addState(hMMstate3);
        for (int i = 0; i < this.NEtypeTable.length; ++i) {
            String string2 = this.NEtypeTable[i];
            HMMstate hMMstate4 = new HMMstate("pre-" + string2, "other", this.emitterClass);
            this.nameHMM.addState(hMMstate4);
            hMMstate4.addArc("i-" + string2);
            hMMstate4.addArc("b-" + string2);
            HMMstate hMMstate5 = new HMMstate("i-" + string2, string2, this.emitterClass);
            this.nameHMM.addState(hMMstate5);
            HMMstate hMMstate6 = new HMMstate("b-" + string2, string2, this.emitterClass);
            this.nameHMM.addState(hMMstate6);
            hMMstate6.addArc("m-" + string2);
            hMMstate6.addArc("e-" + string2);
            HMMstate hMMstate7 = new HMMstate("m-" + string2, string2, this.emitterClass);
            this.nameHMM.addState(hMMstate7);
            hMMstate7.addArc("m-" + string2);
            hMMstate7.addArc("e-" + string2);
            HMMstate hMMstate8 = new HMMstate("e-" + string2, string2, this.emitterClass);
            this.nameHMM.addState(hMMstate8);
            HMMstate hMMstate9 = new HMMstate("post-" + string2, "other", this.emitterClass);
            this.nameHMM.addState(hMMstate9);
            for (int j = 0; j < this.NEtypeTable.length; ++j) {
                hMMstate5.addArc("pre-" + this.NEtypeTable[j]);
                hMMstate8.addArc("pre-" + this.NEtypeTable[j]);
                hMMstate9.addArc("pre-" + this.NEtypeTable[j]);
                if (i == j) continue;
                hMMstate5.addArc("i-" + this.NEtypeTable[j]);
                hMMstate5.addArc("b-" + this.NEtypeTable[j]);
                hMMstate8.addArc("i-" + this.NEtypeTable[j]);
                hMMstate8.addArc("b-" + this.NEtypeTable[j]);
            }
            hMMstate5.addArc("post-" + string2);
            hMMstate5.addArc("end");
            hMMstate8.addArc("post-" + string2);
            hMMstate8.addArc("end");
            hMMstate9.addArc("other");
            hMMstate9.addArc("end");
        }
        this.nameHMM.resolveNames();
        this.nameHMM.resetForTraining();
    }

    public void train(String string) throws IOException {
        DocumentCollection documentCollection = new DocumentCollection(string);
        documentCollection.open();
        for (int i = 0; i < documentCollection.size(); ++i) {
            ExternalDocument externalDocument = documentCollection.get(i);
            System.out.println("\nTraining from document " + (i + 1) + ": " + externalDocument.fileName());
            this.train(externalDocument);
        }
        this.nameHMM.computeProbabilities();
    }

    public void train(String string, String string2) throws IOException {
        String string3;
        BufferedReader bufferedReader = new BufferedReader(new FileReader(string2));
        int n = 0;
        while ((string3 = bufferedReader.readLine()) != null) {
            System.out.println("\nTraining from document " + ++n + ": " + string3);
            String string4 = string + string3;
            ExternalDocument externalDocument = new ExternalDocument("sgml", string4);
            this.train(externalDocument);
        }
        this.nameHMM.computeProbabilities();
    }

    public void train(ExternalDocument externalDocument) throws IOException {
        externalDocument.setAllTags(true);
        externalDocument.open();
        externalDocument.stretchAll();
        externalDocument.annotateWithTag("TEXT");
        SpecialZoner.findSpecialZones(externalDocument);
        this.nameHMM.newDocument();
        Vector<Annotation> vector2 = externalDocument.annotationsOfType("TEXT");
        if (vector2 == null) {
            System.out.println("No <TEXT> in " + externalDocument.fileName() + ", skipped.");
            return;
        }
        for (Annotation vector3 : vector2) {
            Span span = vector3.span();
            Ace.monocase = Ace.allLowerCase(externalDocument);
            System.out.println(">>> Monocase is " + Ace.monocase);
            SentenceSplitter.split(externalDocument, span);
        }
        Vector<Annotation> vector = externalDocument.annotationsOfType("sentence");
        if (vector == null) {
            return;
        }
        for (Annotation annotation : vector) {
            Span span = annotation.span();
            Ace.monocase = Ace.allLowerCase(externalDocument, span) || Ace.titleCase(externalDocument, span);
            Tokenizer.tokenize(externalDocument, span);
            this.annotator.trainOnSpan(externalDocument, span);
        }
        externalDocument.clearAnnotations();
    }

    public void store(String string) throws IOException {
        BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter((OutputStream)new FileOutputStream(string), JetTest.encoding));
        this.writeTagTable(bufferedWriter);
        bufferedWriter.write("endtags");
        bufferedWriter.newLine();
        this.nameHMM.store(new PrintWriter(bufferedWriter));
    }

    public void load(String string) throws IOException {
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(string), JetTest.encoding));
        this.readTagTable(bufferedReader);
        this.nameHMM.load(bufferedReader);
    }

    public void tagDocument(Document document) {
        this.nameHMM.newDocument();
        document.annotateWithTag("TEXT");
        SpecialZoner.findSpecialZones(document);
        Vector<Annotation> vector = document.annotationsOfType("TEXT");
        for (Annotation object22 : vector) {
            Span span = object22.span();
            Ace.monocase = Ace.allLowerCase(document);
            System.out.println(">>> Monocase is " + Ace.monocase);
            SentenceSplitter.split(document, span);
        }
        Vector<Annotation> vector2 = document.annotationsOfType("sentence");
        for (Annotation annotation : vector2) {
            Span span = annotation.span();
            Ace.monocase = Ace.allLowerCase(document, span) || Ace.titleCase(document, span);
            Tokenizer.tokenize(document, span);
            this.tag(document, span);
        }
    }

    public void newDocument() {
        this.nameHMM.newDocument();
    }

    public void tag(Document document, Span span) {
        if (HMMNameTagger.inZone(document, span, "POSTER") || HMMNameTagger.inZone(document, span, "SPEAKER")) {
            HMMNameTagger.tagPersonZone(document, span, this.annotator);
        } else {
            this.annotator.annotateSpan(document, span);
        }
    }

    public static boolean inZone(Document document, Span span, String string) {
        int n;
        String string2 = document.text();
        int n2 = span.end();
        for (n = span.start(); n < n2 && Character.isWhitespace(string2.charAt(n)); ++n) {
        }
        Vector<Annotation> vector = document.annotationsOfType(string);
        if (vector == null) {
            return false;
        }
        for (int i = 0; i < vector.size(); ++i) {
            Annotation annotation = vector.get(i);
            Span span2 = annotation.span();
            if (n < span2.start() || n >= span2.end()) continue;
            return true;
        }
        return false;
    }

    public static void tagPersonZone(Document document, Span span, HMMannotator hMMannotator) {
        int n;
        int n2;
        String string = document.text();
        int n3 = span.end();
        for (n2 = span.start(); n2 < n3 && Character.isWhitespace(string.charAt(n2)); ++n2) {
        }
        for (n = n2; n < n3 && string.charAt(n) != ','; ++n) {
        }
        if (n >= n3) {
            Span span2 = new Span(n2, n3);
            document.annotate("ENAMEX", span2, new FeatureSet("TYPE", "PERSON"));
        } else {
            Span span3 = new Span(n2, n);
            Span span4 = new Span(n, n3);
            if (n > n2) {
                document.annotate("ENAMEX", span3, new FeatureSet("TYPE", "PERSON"));
            }
            if (n3 > n) {
                hMMannotator.annotateSpan(document, span4);
            }
        }
    }

    public static void main(String[] stringArray) throws IOException {
        if (stringArray.length < 5 || stringArray.length % 2 == 0) {
            System.err.println("HMMNameTagger requires 3 + 2n arguments for n training corpora:");
            System.err.println("  state-file model-file uni/bigram directory1 filelist1 [directory2 filelist2] ...");
            System.exit(1);
        }
        new AnnotationColor(ACEdir);
        String string = stringArray[0];
        String string2 = stringArray[1];
        useAceBigrams = stringArray[2].equals("bigram");
        HMMNameTagger hMMNameTagger = new HMMNameTagger(useAceBigrams ? BigramHMMemitter.class : WordFeatureHMMemitter.class);
        hMMNameTagger.buildNameHMM(string);
        for (int i = 3; i < stringArray.length; i += 2) {
            String string3 = stringArray[i];
            if (!string3.endsWith("/")) {
                string3 = string3 + "/";
            }
            String string4 = stringArray[i + 1];
            hMMNameTagger.train(string3, string4);
        }
        hMMNameTagger.store(string2);
    }

    static void aceTrainTest() throws IOException {
        HMMNameTagger hMMNameTagger = new HMMNameTagger(useAceBigrams ? BigramHMMemitter.class : WordFeatureHMMemitter.class);
        hMMNameTagger.buildNameHMM("acedata/ACEnameTags.txt");
        String string = "C:/Documents and Settings/Ralph Grishman/My Documents/HMM/NE/ACE BBN Collection.txt";
        String string2 = "C:/Documents and Settings/Ralph Grishman/My Documents/HMM/NE/ACE training Collection.txt";
        String string3 = "C:/Documents and Settings/Ralph Grishman/My Documents/HMM/NE/ACE aug03 Collection.txt";
        String string4 = "C:/Documents and Settings/Ralph Grishman/My Documents/ACE/training04 nwire 21andup ne.txt";
        String string5 = "C:/Documents and Settings/Ralph Grishman/My Documents/ACE/training04 bnews 21andup ne.txt";
        hMMNameTagger.train(string);
        hMMNameTagger.train(string2);
        hMMNameTagger.train(string4);
        hMMNameTagger.train(string5);
        if (useAceBigrams) {
            hMMNameTagger.store("acedata/ACEname04bigramHMM.txt");
        } else {
            hMMNameTagger.store("acedata/ACEname04HMM.txt");
        }
        HMMNameTagger.aceTest(hMMNameTagger);
    }

    private static void aceLoadTest() throws IOException {
        HMMNameTagger hMMNameTagger = new HMMNameTagger(useAceBigrams ? BigramHMMemitter.class : WordFeatureHMMemitter.class);
        if (useAceBigrams) {
            hMMNameTagger.load("acedata/ACEname04bigramHMM.txt");
        } else {
            hMMNameTagger.load("acedata/ACEname04HMM.txt");
        }
        HMMNameTagger.aceTest(hMMNameTagger);
    }

    private static void aceTest(HMMNameTagger hMMNameTagger) throws IOException {
        String string = "C:/Documents and Settings/Ralph Grishman/My Documents/ACE/training04 nwire 20 sgm.txt";
        String string2 = "C:/Documents and Settings/Ralph Grishman/My Documents/ACE/training04 nwire 20 ne.txt";
        BigramHMMemitter.useBigrams = false;
        NEScorer.scoreCollection((NameTagger)hMMNameTagger, string, string2, tagsToScore);
    }

    static void ace05TrainTest() throws IOException {
        HMMNameTagger hMMNameTagger = new HMMNameTagger(useAceBigrams ? BigramHMMemitter.class : WordFeatureHMMemitter.class);
        hMMNameTagger.buildNameHMM("acedata/ACE05nameTags.txt");
        String string = "C:/Documents and Settings/Ralph Grishman/My Documents/ACE 05/V4/NE/tailNE.txt";
        String string2 = "C:/Documents and Settings/Ralph Grishman/My Documents/ACE/training04 nwire 21andup ne.txt";
        String string3 = "C:/Documents and Settings/Ralph Grishman/My Documents/ACE/training04 bnews 21andup ne.txt";
        hMMNameTagger.train(string);
        hMMNameTagger.train(string);
        hMMNameTagger.train(string2);
        hMMNameTagger.train(string3);
        if (useAceBigrams) {
            hMMNameTagger.store("acedata/ACEname05bigramHMM.txt");
        } else {
            hMMNameTagger.store("acedata/ACEname05HMM.txt");
        }
        HMMNameTagger.ace05Test(hMMNameTagger);
    }

    private static void ace05LoadTest() throws IOException {
        HMMNameTagger hMMNameTagger = new HMMNameTagger(useAceBigrams ? BigramHMMemitter.class : WordFeatureHMMemitter.class);
        if (useAceBigrams) {
            hMMNameTagger.load("acedata/ACEname05bigramHMM.txt");
        } else {
            hMMNameTagger.load("acedata/ACEname05HMM.txt");
        }
        HMMNameTagger.ace05Test(hMMNameTagger);
    }

    private static void ace05Test(HMMNameTagger hMMNameTagger) throws IOException {
        String string = "C:/Documents and Settings/Ralph Grishman/My Documents/ACE 05/V4/NE/headSgm.txt";
        String string2 = "C:/Documents and Settings/Ralph Grishman/My Documents/ACE 05/V4/NE/headNE.txt";
        BigramHMMemitter.useBigrams = false;
        NEScorer.scoreCollection((NameTagger)hMMNameTagger, string, string2, tagsToScore);
    }

    private static void mucTrainTest() throws IOException {
        HMMNameTagger hMMNameTagger = new HMMNameTagger(WordFeatureHMMemitter.class);
        hMMNameTagger.buildNameHMM("data/MUCnameTags.txt");
        String string = "C:/Documents and Settings/Ralph Grishman/My Documents/HMM/NE/NE train Collection.txt";
        hMMNameTagger.train(string);
        hMMNameTagger.store("data/MUCnameHMM.txt");
        HMMNameTagger.mucTest(hMMNameTagger);
    }

    private static void mucLoadTest() throws IOException {
        HMMNameTagger hMMNameTagger = new HMMNameTagger(WordFeatureHMMemitter.class);
        hMMNameTagger.buildNameHMM("data/MUCnameTags.txt");
        hMMNameTagger.load("data/MUCnameHMM.txt");
        HMMNameTagger.mucTest(hMMNameTagger);
    }

    private static void mucTest(HMMNameTagger hMMNameTagger) throws IOException {
        String string = "C:/Documents and Settings/Ralph Grishman/My Documents/HMM/NE/NE test Collection.txt";
        String string2 = "C:/Documents and Settings/Ralph Grishman/My Documents/HMM/NE/NE key Collection.txt";
        NEScorer.scoreCollection((NameTagger)hMMNameTagger, string, string2, tagsToScore);
    }

    static void galeTrainTest() throws IOException {
        HMMNameTagger hMMNameTagger = new HMMNameTagger(useAceBigrams ? BigramHMMemitter.class : WordFeatureHMMemitter.class);
        hMMNameTagger.buildNameHMM("acedata/ACE05nameTags.txt");
        String string = "C:/Documents and Settings/Ralph Grishman/My Documents/HMM/NE/ACE BBN Collection.txt";
        String string2 = "C:/Documents and Settings/Ralph Grishman/My Documents/HMM/NE/ACE training Collection.txt";
        String string3 = "C:/Documents and Settings/Ralph Grishman/My Documents/HMM/NE/ACE aug03 Collection.txt";
        String string4 = "C:/Documents and Settings/Ralph Grishman/My Documents/ACE/training04 nwire 21andup ne.txt";
        String string5 = "C:/Documents and Settings/Ralph Grishman/My Documents/ACE/training04 bnews 21andup ne.txt";
        String string6 = "C:/Documents and Settings/Ralph Grishman/My Documents/ACE 05/V4/NE/tailNE.txt";
        String string7 = "C:/Documents and Settings/Ralph Grishman/My Documents/ACE 05/names/NYTfilelist.txt";
        String string8 = "C:/Documents and Settings/Ralph Grishman/My Documents/ACE 05/names/AFPfilelist.txt";
        String string9 = "C:/Documents and Settings/Ralph Grishman/My Documents/Ace 07/ET/NE/filelist.sgm";
        hMMNameTagger.train(string);
        hMMNameTagger.train(string2);
        hMMNameTagger.train(string4);
        hMMNameTagger.train(string5);
        hMMNameTagger.train(string6);
        hMMNameTagger.train(string6);
        hMMNameTagger.train(string7);
        hMMNameTagger.train(string9);
        if (useAceBigrams) {
            hMMNameTagger.store("acedata/AceNameBigram07HMM.txt");
        } else {
            hMMNameTagger.store("acedata/AceName07HMM.txt");
        }
        HMMNameTagger.ace05Test(hMMNameTagger);
    }

    static void galeLoadTest() throws IOException {
        HMMNameTagger hMMNameTagger = new HMMNameTagger(useAceBigrams ? BigramHMMemitter.class : WordFeatureHMMemitter.class);
        if (useAceBigrams) {
            hMMNameTagger.load("acedata/ACEname06bigramHMM.txt");
        } else {
            hMMNameTagger.load("acedata/ACEname06HMM.txt");
        }
        HMMNameTagger.ace05Test(hMMNameTagger);
    }

    static {
        useAceBigrams = false;
    }
}

