package AceJet;

/**
 *   determines EDT class and genericity of words from training corpus.
 *   Main program writes EDT type dictionary and generic dictionary.
 */
 
import java.util.*;
import java.io.*;

import Jet.*;
import Jet.Lisp.FeatureSet;
import Jet.Pat.Pat;
import Jet.Tipster.*;
import Jet.Parser.SynFun;
import Jet.Refres.Resolve;
import Jet.Lex.EnglishLex;
import Jet.Chunk.Chunker;
 import Jet.Sense.*;
 
import net.didion.jwnl.JWNLException;
import net.didion.jwnl.data.Synset;

import org.w3c.dom.*;
import org.xml.sax.*;
import javax.xml.parsers.*;
 
public class EDTtypeEnsemble extends EDTtype {
	static int mctDecided = 0, wsdDecided = 0;

	public static PrintStream out = System.out;
	static final String fileList = ACEdir + "acedata2/nwiresplit_ACEsep02dev/heldbacknwire.txt";
	static String currentDoc;
	static boolean wsdOnlyAsBackup = false;
	
 	/*
 	 *  main creates EDT esponse files for the documents listed in fileList and generates
 	 * an accuracy score for these responses
 	 */
 	 
 	public static void main (String[] args) 
	    throws IOException {
	    	String options = "";
	    	if (args.length > 0) options = args[0];
	    	wsdOnlyAsBackup = options.indexOf('b') >= 0;
	    	if (wsdOnlyAsBackup) 
	    		System.out.println("Using WSD only for words not found in type dictionary.");
	    	else
				System.out.println("Choosing WSD or MCT based on confidence values.");
			
			// if (options.indexOf('n') >= 0) {
			// 	switchTo2004Format();
			// }
			
			JetTest.initializeFromConfig("props/ME ace.properties");
			Ace.gazetteer = new Gazetteer();
			Ace.gazetteer.load();
			Chunker.loadModel();
			readTypeDict(typeDictFile);
			// new Jet.Console();
			Pat.trace = false;
			// initialize APF reader
			DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
			factory.setValidating(false);
//			builder = factory.newDocumentBuilder();
			// open list of files
			BufferedReader reader = new BufferedReader (new FileReader(fileList));
			int docCount = 0;
			while ((currentDoc = reader.readLine()) != null) { 
				// if (true) continue;
				// process file 'currentDoc'
				docCount++;
				System.out.println ("\nProcessing document " + docCount + ": " + currentDoc);
				out.println ("\nProcessing document " + docCount + ": " + currentDoc);
				// read document
				String textFileName = currentDoc + ".sgm";
				doc = new ExternalDocument("sgml", textFileName);
				doc.setAllTags(true);
				doc.open();
				// process document
				Control.processDocument (doc, null, false, docCount);
				// collect all tokens
				collectTokens();
				// read key file with mention information
				// (populate mentionSet and mentionStartMap)
				//Ace.countXmlChars(doc);
				String suffix = (AceDocument.ace2004) ?
								".apf.xml" : ".sgm.tmx.rdc.xml";
				String apfFileName = currentDoc + suffix;
				//org.w3c.dom.Document apfDoc = builder.parse(apfFileName);
				AceDocument aceDoc = new AceDocument(textFileName, apfFileName);
				LearnRelations.findEntityMentions (aceDoc);
				// process possible
				processMentions(doc);
			}
			System.out.println (trainingMentions + " training mentions");
			System.out.println (correct + " correct predictions, " + incorrect + " incorrect");
			System.out.println(unknown + " unknown");
			System.out.println( wsdDecided + " decided by WSD, " + mctDecided + " decided by MCT.");
	    }
	    

	static void processMentions (ExternalDocument doc) {
		// gather all mentions in document using Resolve.gatherMentions
		Vector mentions = Resolve.gatherMentions(doc, new Span(0, doc.length()));
		// for each mention
		for (int imention=0; imention<mentions.size(); imention++) {
			//    look up in APF.mentionSet
			Annotation mention = (Annotation) mentions.get(imention);
			Annotation head = Resolve.getHeadC(mention);
			String cat = (String) head.get("cat");
			if (cat.equals("pro") || cat.equals("det") || cat.equals("name"))
				continue;
			String headString = Resolve.normalizeName(doc.text(head));
			if (monocase)
				headString = headString.toLowerCase();
			Mention apfMention = 
				(Mention) LearnRelations.mentionStartMap.get(new Integer(head.start()));
			//    if in, classify, else "other"
			String EDTtype = "OTHER";
			if (apfMention != null)
				EDTtype = apfMention.type;
				String prediction = "";
//				if (wsdOnlyAsBackup) {
//					prediction = getMCTType(doc, null, mention);
//				}
//				else
				prediction = getTypeSubtype(doc, null, mention);
				if (prediction.equals(EDTtype)) {
					correct++;
				} else {
					incorrect++;
					System.out.print   ("Mention: " + doc.text(mention));
					System.out.println (" predict " + prediction + ", should be " + EDTtype);
				}
		}			
	}


// Previous version:
//	
//	public static String getType (ExternalDocument doc, Annotation entity,
//								  Annotation mention) {
//		Object paobj = SynFun.getPA(mention);
//		FeatureSet pa = (paobj instanceof FeatureSet) ? (FeatureSet)paobj : null;
//		String paHead = null;
//		String det = null;
//		boolean isHumanMention = false;
//		if (pa != null) {
//			paHead = ((String) pa.get("head")).toLowerCase();
//			det = (String) pa.get("det");
//			isHumanMention = pa.get("human") != null;
//		}
//		Annotation headC = Resolve.getHeadC (mention);
//		String headWord = Resolve.normalizeName(doc.text(headC).trim());
//		String name = SynFun.getName(doc, mention);
//		String cat = (String) headC.get("cat");
//		// for named mentions, use type assigned by name tagger
//		if (name != null) {
//			if (paHead != null && !paHead.equalsIgnoreCase("otherName")) {
//				return paHead.toUpperCase();
//			} else {
//				return "OTHER";
//			}
//		}
//		// for phrases such as "group of X", "part of X", "all of X", 
//		// use the type of phrase X.
//		if (in(paHead, partitives)) {
//			// 'of' complement may be at lower level of tree, so go down tree
//			// until we find such a complement
//			Annotation x = mention;
//			while (x != null && x.get("of") == null)
//				x = (Annotation) x.get("headC");
//			if (x != null) {
//				Annotation of = (Annotation) x.get("of");
//				System.out.println ("Using computed type for " + paHead);
//				String type = getType(doc, null, of);
//				// special case:  parts of a GPE are a LOCATION
//				if (type.equals("GPE") &&
//				    (paHead.equals("part") || paHead.equals("portion")))
//				   type = "LOCATION";
//				return type;
//			}
//		}
//		// for pronouns, not in partitives, return "OTHER"
//		// (this suppresses entities whose first mention is a pronoun)
//		if (cat.equals("pro") || cat.equals("det"))
//			return "OTHER";
//		// for some nouns, EDTtype depends on whether they appear with a
//		// determiner;  handle these separately
//		String type = handCodedEDTtype (det, headWord);
//		if (type != null)
//			return type;
//			
//		// For all other nouns, look head up in both the EDT type dictionary and the SenseIndex
//		TypePrediction wsdPrediction = null;
//		try {
//            wsdPrediction = SenseResources.currentSenseIndex.lookUpEDTtype(headC);
//        } catch (SenseResourceException e) {} catch (JWNLException e) {}
//        
//		//    For "most common type" lookup in type dictionary, first use actual (inflected) head
//		TypePrediction mctPrediction = lookUpEDTtype(headWord.toLowerCase());
//		//    then try with regularized head from PA structure		
//		if (mctPrediction == null) mctPrediction = lookUpEDTtype(paHead);
//		// if there is no entry for singular form, check if plural form has entry
//		if (mctPrediction == null) {
//			String[] singular = new String[1];
//			singular[0] = paHead;
//			String[] plural = EnglishLex.nounPlural(singular);
//			mctPrediction = lookUpEDTtype(plural[0]);
//		}
//		
//		if (wsdPrediction != null || mctPrediction != null) {
//			if (wsdPrediction == null || (mctPrediction != null && 
//				mctPrediction.getConfidence() >= wsdPrediction.getConfidence())) {
//				mctDecided++;
//				return validType(mctPrediction.getType().toUpperCase().intern());
//			}
//			wsdDecided++;
//			return validType( wsdPrediction.getType().toUpperCase().intern() );
//		}
//		// if no entries at all, and entity has feature 'human' from Comlex,
//		// treat as a person
//		if (Ace.preferRelations) {
//			if (isHumanMention || (entity != null && entity.get("human") == "t"))
//			return "PERSON";
//		}
//		unknown++;
//		return "OTHER";
//	}

	/* (non-Javadoc)
	 * Determines the ACE type and subtype of mention.
	 * Replaces the pure most-common-type algorithm of EDTtype with a version that first consults the
	 * most-common-type algorithm, and then, if this produces a type of "OTHER," returns a type based
	 * on the most common sense of the mention's head in WordNet.
	 * 
	 * @see AceJet.EDTtype#getTypeSubtype(Jet.Tipster.ExternalDocument, Jet.Tipster.Annotation, Jet.Tipster.Annotation)
	 */
	public static String getTypeSubtype (ExternalDocument doc, 
		Annotation entity, Annotation mention) {
		Annotation headC = Resolve.getHeadC (mention);
		String type = EDTtype.getTypeSubtype(doc,entity,mention);
		String subtype = "";
		if (type.trim().equalsIgnoreCase("OTHER")) {
			String headWord = Resolve.normalizeName(doc.text(headC).trim());
			String name = SynFun.getName(doc, mention);
			String headCat = (String) headC.get("cat");
			// for named mentions, use type assigned by name tagger
			if (name != null) return "OTHER";
			// pronoun mentions are 'OTHER'
			// if (headCat.equals("pro") || headCat.equals("det")) return "OTHER";
			if (headWord.equals("us")) return "OTHER"; // << patch / rg / Aug 5 2004
			try {
//				TypePrediction wsdPrediction;
//				wsdPrediction = SenseResources.currentSenseIndex.lookUpEDTtype(headC);
//				if (wsdPrediction == null) return "OTHER";
//				type = wsdPrediction.getType();
				Synset[] senses = SenseUtils.getSynsets(doc.text(headC));
				if (senses == null) return "OTHER";
				Category cat = SenseUtils.ontology.categorize(senses[0]);
				if (cat == null) return "OTHER";
				type = cat.getLabel().toUpperCase().trim();
				if (type != null) {
					System.out.println("Using WSD for entity typing.");
					if (type.equalsIgnoreCase("WEAPON") || type.equalsIgnoreCase("VEHICLE")) 
						type = type.substring(0,3);
					subtype = EDTtypeData.bestSubtype(type);
					if (! (subtype.length() == 0)) return EDTtype.typeAndSubtype(type,subtype);
				}
			} catch (JWNLException e) {}
		}
		return type;
	}
//
//	public static String getMCTType (ExternalDocument doc, Annotation entity,
//								  Annotation mention) {
//		Object paobj = SynFun.getPA(mention);
//		FeatureSet pa = (paobj instanceof FeatureSet) ? (FeatureSet)paobj : null;
//		String paHead = null;
//		String det = null;
//		boolean isHumanMention = false;
//		if (pa != null) {
//			paHead = ((String) pa.get("head")).toLowerCase();
//			det = (String) pa.get("det");
//			isHumanMention = pa.get("human") != null;
//		}
//		Annotation headC = Resolve.getHeadC (mention);
//		String headWord = Resolve.normalizeName(doc.text(headC).trim());
//		String name = SynFun.getName(doc, mention);
//		String cat = (String) headC.get("cat");
//		// for named mentions, use type assigned by name tagger
//		if (name != null) {
//			if (paHead != null && !paHead.equalsIgnoreCase("otherName")) {
//				return paHead.toUpperCase();
//			} else {
//				return "OTHER";
//			}
//		}
//		// for phrases such as "group of X", "part of X", "all of X", 
//		// use the type of phrase X.
//		if (in(paHead, partitives)) {
//			// 'of' complement may be at lower level of tree, so go down tree
//			// until we find such a complement
//			Annotation x = mention;
//			while (x != null && x.get("of") == null)
//				x = (Annotation) x.get("headC");
//			if (x != null) {
//				Annotation of = (Annotation) x.get("of");
//				System.out.println ("Using computed type for " + paHead);
//				String type = getTypeSubtype(doc, null, of);
//				// special case:  parts of a GPE are a LOCATION
//				if (type.equals("GPE") &&
//					(paHead.equals("part") || paHead.equals("portion")))
//				   type = "LOCATION";
//				return type;
//			}
//		}
//		// for pronouns, not in partitives, return "OTHER"
//		// (this suppresses entities whose first mention is a pronoun)
//		if (cat.equals("pro") || cat.equals("det"))
//			return "OTHER";
//		// for some nouns, EDTtype depends on whether they appear with a
//		// determiner;  handle these separately
//		String type = handCodedEDTtype (det, headWord);
//		if (type != null)
//			return type;
//			
//		// For all other nouns, look head up in both the EDT type dictionary.
//        
//		//    For "most common type" lookup in type dictionary, first use actual (inflected) head
//		TypePrediction mctPrediction = lookUpEDTtype(headWord.toLowerCase());
//		//    then try with regularized head from PA structure		
//		if (mctPrediction == null) mctPrediction = lookUpEDTtype(paHead);
//		// if there is no entry for singular form, check if plural form has entry
//		if (mctPrediction == null) {
//			String[] singular = new String[1];
//			singular[0] = paHead;
//			String[] plural = EnglishLex.nounPlural(singular);
//			mctPrediction = lookUpEDTtype(plural[0]);
//		}
//		
//		if (mctPrediction != null) mctDecided++;
//		else {
//			try {
//				mctPrediction = SenseResources.currentSenseIndex.lookUpEDTtype(headC);
//				if (mctPrediction != null && mctPrediction.getType().trim().equalsIgnoreCase("FACILITY")) mctPrediction = null;
//				if (mctPrediction != null) wsdDecided++;
//			} catch (SenseResourceException e) {} catch (JWNLException e) {}
//		}
//		
//		if (mctPrediction != null) return validType(mctPrediction.getType().toUpperCase().intern());
//		// if no entries at all, and entity has feature 'human' from Comlex,
//		// treat as a person
//		if (Ace.preferRelations) {
//			if (isHumanMention || (entity != null && entity.get("human") == "t"))
//			return "PERSON";
//		}
//		unknown++;
//		return "OTHER";
//	}
}