/*
 * Decompiled with CFR 0.152.
 */
package Jet.HMM;

import Jet.HMM.HMMNameTagger;
import Jet.HMM.InteractiveAnnotator;
import Jet.HMM.SentenceWithMargin;
import Jet.HMM.WordFeatureHMMemitter;
import Jet.Lex.Tokenizer;
import Jet.Scorer.SGMLScorer;
import Jet.Tipster.Annotation;
import Jet.Tipster.AnnotationColor;
import Jet.Tipster.Document;
import Jet.Tipster.DocumentCollection;
import Jet.Tipster.ExternalDocument;
import Jet.Tipster.Span;
import Jet.Zoner.SentenceSplitter;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.Vector;

public class ActiveLearner {
    static HMMNameTagger nt;
    static String[] tagsToRead;
    static final int initialTrainingSetSize = 50;
    static final int testSetSize = 50;
    static final boolean activeTraining = true;
    static final boolean simulatedTraining = false;
    static final boolean multithread = true;
    static final int sentencesPerSweep = 5;
    static ArrayList sentencesWithSmallestMargin;
    static ArrayList sentencesToAnnotate;
    static ArrayList documentsBeingAnnotated;
    static InteractiveAnnotator annotationThread;
    static ArrayList poolSentences;
    public static volatile boolean keepLearning;
    static int sentencesInPool;
    static DocumentCollection col;
    static PrintWriter logFile;

    public static void main(String[] stringArray) throws IOException {
        String string = "C:/Documents and Settings/Ralph Grishman/My Documents/";
        String string2 = string + "active.log";
        logFile = new PrintWriter(new BufferedWriter(new FileWriter(string2)));
        new AnnotationColor(string + "HMM");
        col = new DocumentCollection(string + "HMM/NE/ACE training Collection.txt");
        ActiveLearner.initialize();
        for (int i = 0; i <= 500; i += 5) {
            ActiveLearner.learn();
            if (!keepLearning) break;
        }
        logFile.close();
    }

    static void initialize() {
        int n;
        Object object;
        Vector<Annotation> vector;
        Serializable serializable;
        int n2;
        col.open();
        for (n2 = 0; n2 < col.size(); ++n2) {
            ExternalDocument externalDocument = col.get(n2);
            System.out.println("Reading " + externalDocument.fileName());
            externalDocument.setAllTags(true);
            externalDocument.open();
            serializable = externalDocument.annotationsOfType("TEXT");
            vector = ((Vector)serializable).iterator();
            while (vector.hasNext()) {
                Iterator<Annotation> iterator = (Annotation)vector.next();
                Span object2 = ((Annotation)((Object)iterator)).span();
                SentenceSplitter.split(externalDocument, object2);
                object = externalDocument.annotationsOfType("sentence");
                if (object == null) continue;
                Iterator<Annotation> iterator2 = ((Vector)object).iterator();
                while (iterator2.hasNext()) {
                    Annotation annotation = iterator2.next();
                    Span span = annotation.span();
                    Tokenizer.tokenize(externalDocument, span);
                }
            }
        }
        n2 = 0;
        for (n = 0; n < 50; ++n) {
            serializable = col.get(n);
            vector = ((Document)serializable).annotationsOfType("sentence");
            if (vector == null) continue;
            for (Annotation annotation : vector) {
                annotation.put("training", "true");
                ++n2;
            }
        }
        System.out.println(n2 + " sentences in initial training set");
        for (n = 0; n < col.size(); ++n) {
            serializable = col.get(n);
            vector = ((Document)serializable).annotationsOfType("ENAMEX");
            if (vector == null) continue;
            for (Annotation annotation : vector) {
                ((Document)serializable).annotate("TRUENAMEX", annotation.span(), annotation.attributes());
                if (n < 50) continue;
                ((Document)serializable).removeAnnotation(annotation);
            }
        }
        nt = new HMMNameTagger(WordFeatureHMMemitter.class);
        nt.buildNameHMM("data/ACEnameTags.txt");
        ActiveLearner.nt.nameHMM.recordMargin();
        for (n = 0; n < col.size(); ++n) {
            serializable = col.get(n);
            ActiveLearner.nt.nameHMM.newDocument();
            vector = ((Document)serializable).annotationsOfType("sentence");
            if (vector == null) continue;
            for (Annotation annotation : vector) {
                if (annotation.get("training") == null) continue;
                object = annotation.span();
                ActiveLearner.nt.annotator.trainOnSpan((Document)serializable, (Span)object);
            }
        }
        ActiveLearner.nt.nameHMM.computeProbabilities();
    }

    static void learn() {
        int n;
        Comparable comparable;
        Object object;
        int n2;
        int n3 = 0;
        sentencesInPool = 0;
        sentencesWithSmallestMargin = new ArrayList(5);
        double d = 0.0;
        for (n2 = 0; n2 < col.size() && keepLearning; ++n2) {
            ExternalDocument externalDocument = col.get(n2);
            if (documentsBeingAnnotated.contains(externalDocument)) continue;
            ActiveLearner.nt.nameHMM.newDocument();
            Vector<Annotation> vector = externalDocument.annotationsOfType("sentence");
            if (vector == null) continue;
            for (Annotation annotation : vector) {
                if (annotation.get("training") != null) continue;
                object = annotation.span();
                ActiveLearner.nt.annotator.annotateSpan(externalDocument, (Span)object);
                double d2 = ActiveLearner.nt.nameHMM.getMargin();
                if (sentencesWithSmallestMargin.size() < 5) {
                    sentencesWithSmallestMargin.add(new SentenceWithMargin(externalDocument, annotation, d2));
                    if (d < d2) {
                        d = d2;
                    }
                } else if (d2 < d) {
                    SentenceWithMargin object2 = (SentenceWithMargin)Collections.max(sentencesWithSmallestMargin);
                    sentencesWithSmallestMargin.remove(object2);
                    sentencesWithSmallestMargin.add(new SentenceWithMargin(externalDocument, annotation, d2));
                    comparable = (SentenceWithMargin)Collections.max(sentencesWithSmallestMargin);
                    d = comparable.margin;
                }
                ++sentencesInPool;
            }
        }
        n2 = 0;
        int n4 = 0;
        int n5 = 0;
        int n6 = 0;
        for (n = col.size() - 50; n < col.size(); ++n) {
            object = col.get(n);
            SGMLScorer sGMLScorer = new SGMLScorer((Document)object, (Document)object);
            sGMLScorer.match("TRUENAMEX", "ENAMEX");
            n2 += sGMLScorer.totalTagsInDoc1;
            n4 += sGMLScorer.totalTagsInDoc2;
            n5 += sGMLScorer.totalMatchingTags;
            n6 += sGMLScorer.totalMatchingAttrs;
        }
        System.out.println("Overall Type Recall:          " + (float)n5 / (float)n4);
        System.out.println("Overall Type Precision:       " + (float)n5 / (float)n2);
        System.out.println("Overall Attribute Recall:     " + (float)n6 / (float)n4);
        System.out.println("Overall Attribute Precision:  " + (float)n6 / (float)n2);
        if (logFile != null) {
            logFile.println(n3 + ", " + (float)n6 / (float)n4 + ", " + (float)n6 / (float)n2);
        }
        for (n = 0; n < col.size(); ++n) {
            object = col.get(n);
            Vector<Annotation> vector = ((Document)object).annotationsOfType("sentence");
            if (vector == null) continue;
            for (Annotation annotation : vector) {
                if (annotation.get("training") != null) continue;
                comparable = annotation.span();
                ActiveLearner.eraseAnnotationsInside((Document)object, "ENAMEX", (Span)comparable);
            }
        }
        if (annotationThread != null) {
            try {
                if (annotationThread.isAlive()) {
                    System.out.println("Waiting for annotation thread.");
                }
                annotationThread.join();
                System.out.println("Annotation thread finished.");
            }
            catch (InterruptedException interruptedException) {
                System.out.println(interruptedException);
            }
            for (n = 0; n < sentencesToAnnotate.size(); ++n) {
                object = (SentenceWithMargin)sentencesToAnnotate.get(n);
                ActiveLearner.nt.annotator.trainOnSpan(((SentenceWithMargin)object).document, ((SentenceWithMargin)object).sentence.span());
                ++n3;
            }
            ActiveLearner.nt.nameHMM.computeProbabilities();
        }
        if (!keepLearning) {
            return;
        }
        sentencesToAnnotate = new ArrayList(sentencesWithSmallestMargin);
        documentsBeingAnnotated = new ArrayList();
        for (n = 0; n < sentencesToAnnotate.size(); ++n) {
            object = (SentenceWithMargin)sentencesToAnnotate.get(n);
            documentsBeingAnnotated.add(((SentenceWithMargin)object).document);
        }
        annotationThread = new InteractiveAnnotator(sentencesToAnnotate);
        annotationThread.setPriority(6);
        annotationThread.start();
        System.out.println("*** initiated annotation Thread ***");
    }

    private static void eraseAnnotationsInside(Document document, String string, Span span) {
        Vector vector = document.annotationsOfType(string);
        if (vector == null) {
            return;
        }
        vector = (Vector)vector.clone();
        for (int i = 0; i < vector.size(); ++i) {
            Annotation annotation = (Annotation)vector.get(i);
            if (!annotation.span().within(span)) continue;
            document.removeAnnotation(annotation);
        }
    }

    private static void addToTraining(Document document, Annotation annotation) {
        Span span = annotation.span();
        System.out.println("Now annotating:");
        System.out.println(document.text(annotation));
        int n = span.start();
        int n2 = span.end();
        for (int i = n; i < n2; ++i) {
            Vector<Annotation> vector = document.annotationsAt(i, "TRUENAMEX");
            if (vector == null) continue;
            for (Annotation annotation2 : vector) {
                document.annotate("ENAMEX", annotation2.span(), annotation2.attributes());
            }
        }
        annotation.put("training", "true");
        ActiveLearner.nt.annotator.trainOnSpan(document, span);
    }

    static {
        tagsToRead = new String[]{"ENAMEX", "TIMEX", "NUMEX"};
        documentsBeingAnnotated = new ArrayList();
        annotationThread = null;
        keepLearning = true;
        sentencesInPool = 0;
        logFile = null;
    }
}

