package AceJet;

//Author:       Ralph Grishman
//Date:         July 10, 2004

import java.util.*;
import java.io.*;
import opennlp.maxent.*;
import opennlp.maxent.io.*;

import Jet.Tipster.*;

/**
 *  methods to determine the ACE subclass of a name.
 */

public class NameSubtyper {
	
	static final String home =
	    "C:/Documents and Settings/Ralph Grishman/My Documents/";
	static final String ACEdir = home + "ACE/";
	static final String fileList =
		ACEdir + "training04 nwire 21andup.txt";
	static PrintStream writer;
	static final String featureFile = home + "jet temp/ACEnameSubtypeFeatures.txt";
	static final String modelFile = home + "jet temp/ACEnameSubtypeModel.txt";
	static GISModel model;
	
	/**
	 *  train a subclass model from the ACE 2004 training data.
	 */
	 	
	public static void main (String[] args) throws IOException {
		AceDocument.ace2004 = true;
		writer = new PrintStream (new FileOutputStream (featureFile));
		processFileList (ACEdir + "training04 nwire 21andup.txt");
		processFileList (ACEdir + "training04 bnews.txt");
		processFileList (ACEdir + "training04 chinese.txt");
		createModel ();
		store (modelFile);
		System.out.println ("Finished.");
	}
	
	private static void processFileList (String fileList) throws IOException {
		// open list of files
		BufferedReader reader = new BufferedReader (new FileReader(fileList));
		int docCount = 0;
		String currentDoc;
		while ((currentDoc = reader.readLine()) != null) { 
			// process file 'currentDoc'
			docCount++;
			System.out.println ("\nProcessing document " + docCount + ": " + currentDoc);
			String textFileName = ACEdir + currentDoc + ".sgm";
			String APFfileName = ACEdir + currentDoc + ".apf.xml";
			analyzeDocument (textFileName, APFfileName);
		}
	}
	
	private static void analyzeDocument (String textFileName, String APFfileName) {
		AceDocument aceDoc = new AceDocument (textFileName, APFfileName);
		
		ArrayList entities = aceDoc.entities;
		for (int ientity=0; ientity<entities.size(); ientity++) {
			AceEntity entity = (AceEntity) entities.get(ientity);
			String type = entity.type;
			String subtype = entity.subtype;
			ArrayList names = entity.names;
			for (int iname=0; iname<names.size(); iname++) {
				AceEntityName name = (AceEntityName) names.get(iname);
				String text = name.text;
				if (type.equals("WEA"))
				  System.out.println ("Found weapon name " + text);
				if (type.equals("VEH"))
				  System.out.println ("Found vehicle name " + text);
				else if (!type.equals("PERSON")){     // no subtypes for PER
					String[] tokens = text.split("\\s");
					String[] features = NEfeatures (tokens, type);
					for (int ifeat=0; ifeat<features.length; ifeat++) {
						writer.print (features[ifeat] + " ");
					}
					writer.println (subtype);
				}					
			}
		}
	}
	
	private static String[] NEfeatures (String[] tokens, String type) {
		String[] features = new String[tokens.length+1];
		type = type.substring(0,3);
		features[0] = type;
		for (int i=0; i<tokens.length; i++) {
			features[i+1] = type + "=" + tokens[i].toLowerCase();
		}
		return features;
	}
	
	public static void createModel () {
		try {
	    FileReader datafr = new FileReader(new File(featureFile));
	    EventStream es = 
				new BasicEventStream(new PlainTextByLineDataStream(datafr));
	    GIS.SMOOTHING = false;
	    GIS.SMOOTHING_OBSERVATION = 0.1;
	    model = GIS.trainModel(es, 100, 2);
		} catch (Exception e) {
		    System.out.print("Unable to create model due to exception: ");
		    System.out.println(e);
		}
	}
	
	public static void store (String modelFileName) {
		try {
			File outputFile = new File(modelFileName);
    	GISModelWriter writer = new SuffixSensitiveGISModelWriter(model, outputFile);
    	writer.persist();
    } catch (IOException e) {
    	System.out.println ("MaxEntNE.saveModel: unable to save model");
    	System.out.println (e);
    }
	}
	
	public static void load (String modelFileName) {		
		try {
		    model = new SuffixSensitiveGISModelReader(new File(modelFileName)).getModel();
		    System.out.println ("GIS model loaded.");
		} catch (Exception e) {
		    e.printStackTrace();
		    System.exit(0);
		}
	}

	/**
	 *  return the most likely subclass of name <CODE>name</CODE>, of EDT type
	 *  <CODE>type</CODE>.
	 */
	 	
	public static String classify (String name, String type) {
		if (type.equals("PERSON"))
			return "";
		if (type.equals("VEH") || type.equals("WEA"))
			return "Other";
		if (model == null)
			load (modelFile);
		String[] tokens = name.split("\\s");
		String[] features = NEfeatures (tokens, type);
		String subtype = model.getBestOutcome(model.eval(features));
		return subtype;
	}
}