/*
 * Decompiled with CFR 0.152.
 */
package Jet.NE;

import Jet.Lex.EnglishLex;
import Jet.Lex.Tokenizer;
import Jet.Lisp.FeatureSet;
import Jet.NE.AlphaFeature;
import Jet.NE.Dictionary;
import Jet.NE.DocumentToSentenceIterator;
import Jet.NE.Evaluator;
import Jet.NE.FirstWordFeature;
import Jet.NE.LexiconCategoryFeature;
import Jet.NE.NamedEntityInDictionaryFeature;
import Jet.NE.NonAlphaFeature;
import Jet.NE.NumericalFeatures;
import Jet.NE.PatternFeature;
import Jet.NE.RegexpMatchFeature;
import Jet.NE.SentenceToTokenSequencePipe;
import Jet.NE.SummarizedPatternFeature;
import Jet.NE.TokenLowerText;
import Jet.NE.TrieDictionary;
import Jet.Tipster.Annotation;
import Jet.Tipster.Document;
import Jet.Tipster.DocumentCollection;
import Jet.Tipster.ExternalDocument;
import Jet.Tipster.Span;
import Jet.Util.IOUtils;
import Jet.Zoner.SentenceSplitter;
import Jet.Zoner.SpecialZoner;
import edu.umass.cs.mallet.base.fst.CRF3;
import edu.umass.cs.mallet.base.pipe.Pipe;
import edu.umass.cs.mallet.base.pipe.SerialPipes;
import edu.umass.cs.mallet.base.pipe.TokenSequence2FeatureVectorSequence;
import edu.umass.cs.mallet.base.pipe.tsf.TokenText;
import edu.umass.cs.mallet.base.types.Instance;
import edu.umass.cs.mallet.base.types.InstanceList;
import edu.umass.cs.mallet.base.types.Sequence;
import edu.umass.cs.mallet.base.types.TokenSequence;
import edu.umass.cs.mallet.base.util.PropertyList;
import java.io.BufferedWriter;
import java.io.Closeable;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.OutputStream;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Vector;
import javax.xml.parsers.ParserConfigurationException;
import org.xml.sax.SAXException;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class CRFNameTagger {
    private static final String PUNCTUATIONS = "[,\\.;:?!()]";
    private static final String QUOTES = "[\"`']";
    private static final String OPEN_PAREN = "[\\[({]";
    private static final String CLOSE_PAREN = "[\\])}]";
    private CRF3 crf;
    private List<Pipe> features = new ArrayList<Pipe>();
    private PropertyList properties;

    public CRFNameTagger() {
        this.addFeatures();
    }

    public void train(Collection<Document> collection) {
        Pipe pipe = this.createPipe();
        CRF3 cRF3 = new CRF3(pipe, null);
        InstanceList instanceList = new InstanceList(pipe);
        for (Document document : collection) {
            DocumentToSentenceIterator documentToSentenceIterator = new DocumentToSentenceIterator(document, "TEXT", instanceList.size() + 1);
            while (documentToSentenceIterator.hasNext()) {
                Instance instance = documentToSentenceIterator.nextInstance();
                instance.setPropertyList(this.properties);
                instanceList.add(instance.getPipedCopy(pipe));
            }
        }
        cRF3.addStatesForLabelsConnectedAsIn(instanceList);
        cRF3.train(instanceList);
        this.crf = cRF3;
    }

    public void annotate(Document document, Span span) {
        Pipe pipe = this.crf.getInputPipe();
        Instance instance = new Instance(span, null, "sentence", document);
        instance.setPropertyList(this.properties);
        instance = pipe.pipe(instance);
        Sequence sequence = (Sequence)instance.getData();
        Sequence sequence2 = this.crf.viterbiPath(sequence).output();
        Vector<Annotation> vector = document.annotationsOfType("token", span);
        assert (vector.size() == sequence.size());
        assert (vector.size() == sequence2.size());
        int n = 0;
        while (n < vector.size()) {
            String string = (String)sequence2.get(n);
            if (!string.startsWith("B-")) {
                ++n;
                continue;
            }
            int n2 = ((Annotation)vector.get(n)).start();
            String string2 = "I-" + string.substring(2);
            ++n;
            while (n < vector.size() && sequence2.get(n).equals(string2)) {
                ++n;
            }
            int n3 = ((Annotation)vector.get(n - 1)).end();
            FeatureSet featureSet = new FeatureSet();
            featureSet.put("TYPE", string.substring(2));
            document.annotate("ENAMEX", new Span(n2, n3), featureSet);
        }
    }

    public void setProperty(String string, Object object) {
        this.properties = PropertyList.add(string, object, this.properties);
    }

    protected Pipe createPipe() {
        Pipe[] pipeArray = new Pipe[]{new SentenceToTokenSequencePipe(), this.createFeaturePipe(), new TokenSequence2FeatureVectorSequence()};
        return new SerialPipes(pipeArray);
    }

    protected Pipe createFeaturePipe() {
        Pipe[] pipeArray = this.features.toArray(new Pipe[this.features.size()]);
        return new SerialPipes(pipeArray);
    }

    protected void addFeatures() {
        this.addFeature(new FirstWordFeature("FIRST_WORD"));
        this.addFeature(new NumericalFeatures("NUMERICAL"));
        this.addFeature(new RegexpMatchFeature("INITCAP", "\\p{Lu}.*"));
        this.addFeature(new RegexpMatchFeature("CAPITALIZED", "\\p{Lu}\\p{Ll}*"));
        this.addFeature(new RegexpMatchFeature("ALLCAPS", "\\p{Lu}+"));
        this.addFeature(new RegexpMatchFeature("ALLDIGITS", "[0-9]+"));
        this.addFeature(new RegexpMatchFeature("TWO_DIGITS", "[0-9]{2}"));
        this.addFeature(new RegexpMatchFeature("FOUR_DIGITS", "[0-9]{4}"));
        this.addFeature(new RegexpMatchFeature("MORETHANFOURDIGITS", "[0-9]{5,}"));
        this.addFeature(new RegexpMatchFeature("ROMAN_NUMBER", "[IXV]+"));
        this.addFeature(new RegexpMatchFeature("CAPITALANDDIGIT", "[A-Z0-9]+"));
        this.addFeature(new RegexpMatchFeature("YEAR_DECADE", "(?:[0-9]{2})?[0-9]{2}s"));
        this.addFeature(new RegexpMatchFeature("MIXEDCAPS", "\\p{Lu}\\p{Ll}+\\p{Lu}.*"));
        this.addFeature(new RegexpMatchFeature("MULTIDOT", "\\.\\.+"));
        this.addFeature(new RegexpMatchFeature("ENDSINDOT", "[^\\.].*\\."));
        this.addFeature(new RegexpMatchFeature("CONTAINSDASH", "\\w+-\\w*"));
        this.addFeature(new RegexpMatchFeature("ACRONYM", "\\p{Lu}[\\p{Lu}\\.]\\.[\\p{Lu}\\.]"));
        this.addFeature(new RegexpMatchFeature("CAP_OTHER_PERIOD", "[A-Z].+\\."));
        this.addFeature(new RegexpMatchFeature("CAP_PERIOD", "[A-Z]\\."));
        this.addFeature(new RegexpMatchFeature("SINGLECHAR", "."));
        this.addFeature(new RegexpMatchFeature("CAPLETTER", "[A-Z]"));
        this.addFeature(new RegexpMatchFeature("PUNCTUATION", PUNCTUATIONS));
        this.addFeature(new RegexpMatchFeature("QUOTE", QUOTES));
        this.addFeature(new AlphaFeature("a="));
        this.addFeature(new NonAlphaFeature("A="));
        this.addFeature(new PatternFeature("p="));
        this.addFeature(new SummarizedPatternFeature("P="));
        this.addFeature(new TokenText("W="));
        this.addFeature(new TokenLowerText("w="));
        this.addFeature(new RegexpMatchFeature("CURRENCY", "\\p{Sc}"));
        this.addFeature(new LexiconCategoryFeature("CAT="));
        this.addFeature(new RegexpMatchFeature("OPEN_PAREN", OPEN_PAREN));
        this.addFeature(new RegexpMatchFeature("CLOSE_PAREN", CLOSE_PAREN));
        this.addFeature(new NamedEntityInDictionaryFeature("NE="));
    }

    public void addFeature(Pipe pipe) {
        this.features.add(pipe);
    }

    public void writeModel(OutputStream outputStream) throws IOException {
        ObjectOutputStream objectOutputStream = new ObjectOutputStream(outputStream);
        objectOutputStream.writeObject(this.crf);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void writeModel(File file) throws IOException {
        FileOutputStream fileOutputStream = new FileOutputStream(file);
        try {
            this.writeModel(fileOutputStream);
        }
        finally {
            IOUtils.closeQuietly(fileOutputStream);
        }
    }

    public void readModel(InputStream inputStream) throws IOException, ClassNotFoundException {
        ObjectInputStream objectInputStream = new ObjectInputStream(inputStream);
        this.crf = (CRF3)objectInputStream.readObject();
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void readModel(File file) throws IOException, ClassNotFoundException {
        FileInputStream fileInputStream = new FileInputStream(file);
        try {
            this.readModel(fileInputStream);
        }
        finally {
            IOUtils.closeQuietly(fileInputStream);
        }
    }

    public static void main(String[] stringArray) throws Exception {
        if (stringArray.length < 3) {
            CRFNameTagger.usage();
        }
        File file = new File(stringArray[1]);
        File file2 = new File(stringArray[2]);
        EnglishLex.readLexicon("data", "Jet4.dict");
        if (stringArray[0].equals("train")) {
            if (stringArray.length != 3) {
                CRFNameTagger.usage();
            }
            CRFNameTagger.train(file, file2);
        } else if (stringArray[0].equals("test")) {
            if (stringArray.length != 4) {
                CRFNameTagger.usage();
            }
            File file3 = new File(stringArray[3]);
            CRFNameTagger.test(file, file2, file3);
        } else {
            CRFNameTagger.usage();
        }
    }

    private static void usage() {
        System.err.printf("usage: java %s train|test args", CRFNameTagger.class.getName());
        System.err.println();
        System.err.println();
        System.err.println("train parameters: ");
        System.err.println("    modelFilename targetDirectory");
        System.err.println();
        System.err.println("test parameters: ");
        System.err.println("    modelFilename targetDirectory outputDirectory");
        System.exit(1);
    }

    private static void train(File file, File file2) throws IOException, ParserConfigurationException, SAXException {
        Collection<Document> collection = CRFNameTagger.loadDocumentCollection(file2);
        CRFNameTagger.prepareDocuments(collection);
        Dictionary dictionary = CRFNameTagger.loadDictionary();
        CRFNameTagger cRFNameTagger = new CRFNameTagger();
        cRFNameTagger.setProperty("dictionary", dictionary);
        cRFNameTagger.train(collection);
        cRFNameTagger.writeModel(file);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public static void test(File file, File file2, File file3) throws IOException, ParserConfigurationException, SAXException, ClassNotFoundException {
        Object object;
        Object object2;
        Iterator<Object> iterator;
        Collection<Document> collection = CRFNameTagger.loadDocumentCollection(file2);
        CRFNameTagger.prepareDocuments(collection);
        Dictionary dictionary = CRFNameTagger.loadDictionary();
        CRFNameTagger cRFNameTagger = new CRFNameTagger();
        cRFNameTagger.setProperty("dictionary", dictionary);
        cRFNameTagger.readModel(file);
        InstanceList instanceList = new InstanceList(cRFNameTagger.crf.getInputPipe());
        for (Document document : collection) {
            iterator = document.annotationsOfType("sentence");
            Iterator object42 = iterator.iterator();
            while (object42.hasNext()) {
                object2 = (Annotation)object42.next();
                object = new Instance(((Annotation)object2).span(), null, "sentence", document);
                ((Instance)object).setPropertyList(cRFNameTagger.properties);
                object = ((Instance)object).getPipedCopy(cRFNameTagger.crf.getInputPipe());
                instanceList.add((Instance)object);
            }
        }
        Object object5 = null;
        try {
            object5 = new PrintStream(new File(file3, "tokens.txt"));
            for (int i = 0; i < instanceList.size(); ++i) {
                iterator = instanceList.getInstance(i);
                TokenSequence tokenSequence = (TokenSequence)((Instance)((Object)iterator)).getSource();
                object2 = (Sequence)((Instance)((Object)iterator)).getTarget();
                object = (Sequence)((Instance)((Object)iterator)).getData();
                Sequence sequence = cRFNameTagger.crf.transduce((Sequence)object);
                assert (tokenSequence.size() == sequence.size());
                assert (tokenSequence.size() == object2.size());
                for (int j = 0; j < tokenSequence.size(); ++j) {
                    ((PrintStream)object5).printf("%-20s %15s %15s", tokenSequence.getToken(j).getText(), object2.get(j), sequence.get(j));
                    ((PrintStream)object5).println();
                }
                ((PrintStream)object5).println();
            }
        }
        catch (Throwable throwable) {
            IOUtils.closeQuietly(object5);
            throw throwable;
        }
        IOUtils.closeQuietly((Closeable)object5);
        Evaluator evaluator = new Evaluator();
        for (Document document : collection) {
            object2 = new Document(document);
            ((Document)object2).removeAnnotationsOfType("ENAMEX");
            object = ((Document)object2).annotationsOfType("sentence");
            Iterator iterator2 = object.iterator();
            while (iterator2.hasNext()) {
                Annotation annotation = (Annotation)iterator2.next();
                cRFNameTagger.annotate((Document)object2, annotation.span());
            }
            evaluator.evaluate((Document)object2, document);
        }
        System.out.printf("%-15s\t%10s\t%10s", "type", "precision", "recall");
        System.out.println();
        for (String string : evaluator.getTypes()) {
            double d = evaluator.getPrecision(string);
            double d2 = evaluator.getRecall(string);
            System.out.printf("%-15s\t%10.2f\t%10.2f", string, d, d2);
            System.out.println();
        }
        System.out.printf("%-15s\t%10.2f\t%10.2f", "TOTAL", evaluator.getPrecision(), evaluator.getRecall());
        System.out.println();
        for (Document document : collection) {
            document.removeAnnotationsOfType("ENAMEX");
        }
        for (Document document : collection) {
            Vector<Annotation> vector = document.annotationsOfType("sentence");
            for (Annotation annotation : vector) {
                cRFNameTagger.annotate(document, annotation.span());
            }
            object = CRFNameTagger.getId(document);
            document.removeAnnotationsOfType("token");
            document.setSGMLwrapMargin(0);
            File file4 = new File(file3, (String)object + ".sgm");
            BufferedWriter bufferedWriter = null;
            try {
                bufferedWriter = new BufferedWriter(new FileWriter(file4));
                bufferedWriter.append(document.writeSGML(null));
            }
            catch (Exception exception) {
                try {
                    throw new RuntimeException(exception);
                }
                catch (Throwable throwable) {
                    IOUtils.closeQuietly(bufferedWriter);
                    throw throwable;
                }
            }
            IOUtils.closeQuietly(bufferedWriter);
        }
    }

    private static String getId(Document document) {
        Vector<Annotation> vector = document.annotationsOfType("DOCNO");
        if (vector != null && vector.size() != 0) {
            return document.normalizedText((Annotation)vector.get(0));
        }
        vector = document.annotationsOfType("DOCID");
        if (vector != null && vector.size() != 0) {
            return document.normalizedText((Annotation)vector.get(0));
        }
        return null;
    }

    private static Collection<Document> loadDocumentCollection(File file) {
        DocumentCollection documentCollection = new DocumentCollection(file.getPath());
        ArrayList<Document> arrayList = new ArrayList<Document>();
        if (!documentCollection.open()) {
            return null;
        }
        for (int i = 0; i < documentCollection.size(); ++i) {
            ExternalDocument externalDocument = documentCollection.get(i);
            externalDocument.setAllTags(true);
            if (!externalDocument.open()) {
                return null;
            }
            arrayList.add(externalDocument);
        }
        return arrayList;
    }

    private static void prepareDocuments(Collection<Document> collection) {
        for (Document document : collection) {
            SpecialZoner.findSpecialZones(document);
            Vector<Annotation> vector = document.annotationsOfType("TEXT");
            for (Annotation annotation : vector) {
                SentenceSplitter.split(document, annotation.span());
            }
            Vector<Annotation> vector2 = document.annotationsOfType("sentence");
            Iterator object2 = vector2.iterator();
            while (object2.hasNext()) {
                Annotation annotation = (Annotation)object2.next();
                Tokenizer.tokenize(document, annotation.span());
            }
            document.removeAnnotationsOfType("textBreak");
            document.removeAnnotationsOfType("dateline");
        }
    }

    private static Dictionary loadDictionary() throws IOException {
        TrieDictionary trieDictionary = new TrieDictionary("data/wsj.ned.da", "data/wsj.ned.cdb");
        return trieDictionary;
    }
}

