package AceJet;

//Author:       Ralph Grishman
//Date:         July 10, 2003

import java.util.*;
import java.io.*;
import Jet.*;
import Jet.Control;
import Jet.Refres.Resolve;
import Jet.Lex.Tokenizer;
import Jet.Pat.Pat;
import Jet.Lisp.*;
import Jet.Tipster.*;
import Jet.Parser.SynFun;
// import net.didion.jwnl.JWNLException;

/**
 *  procedures for generating ACE EDT output for a Jet document.
 */

public class Ace {
	
	public static final boolean useParser = true;
	static final boolean useParseCollection = true;
	public static boolean perfectMentions = false;
	public static boolean perfectEntities = false;
	static final boolean asr = false;
	
	static final boolean useWordNet = true;
	public static boolean preferRelations = false;
	public static boolean preferEntities = !preferRelations;
	
	public static boolean monocase = false;
	
	static PrintWriter apf;
	static ExternalDocument doc;
	public static Gazetteer gazetteer;
	static int aceEntityNo;
	static HashMap aceTypeDict;
	static final String suffix = ".sgm.apf";
	// for formal evaluation
	// static final String suffix = ".apf.xml";
	static String currentDocPath;
	static String currentDoc;
	static String sourceType = "text";
	static final String ACEdir =
	    "C:/Documents and Settings/Ralph Grishman/My Documents/ACE/";
	static final String dictFile = ACEdir + "new EDT type dict.txt";
	static final String genericFile = ACEdir + "new generic dict.txt";
	// static final String fileList = ACEdir + "nwire-sep02.txt";
	// static final String fileList = ACEdir + "sep02 all.txt";
	// static final String fileList = ACEdir + "aug03 all.txt";
	// static final String fileList = ACEdir + "aug 03 bnews 20.txt";
	// static final String fileList = ACEdir + "bnews-aug03.txt";
	// static final String fileList = ACEdir + "nwire-aug03-25.txt";
	// ------------ sep 03 evaluation corpora -------------
	// static final String fileList = ACEdir + "sep03eval-bnews.txt";
	// static final String fileList = ACEdir + "sep03eval-nwire.txt";
	// ------------- 2004 training corpora
	// static final String fileList = ACEdir + "training04 nwire 20.txt";
	// static final String fileList = ACEdir + "training04 bnews 20.txt";
	// static final String fileList = ACEdir + "training04 nwire.txt";
	// static final String fileList = ACEdir + "training04 bnews.txt";
	// static final String fileList = ACEdir + "asr pilot.txt";
	// static final String fileList = ACEdir + "training04 perfect mention.txt"; 
	// ------------- 2004 evaluation corpora
	static final String fileList = ACEdir + "eval04 nwire.txt";
	// static final String fileList = ACEdir + "eval04 bnews.txt";
	// static final String fileList = ACEdir + "eval04 asr.txt";
	// static final String fileList = ACEdir + "corefeval04 nwire.txt";
	// static final String fileList = ACEdir + "corefeval04 bnews.txt";
	// static final String fileList = ACEdir + "rdreval04 nwire.txt";
	// static final String fileList = ACEdir + "rdreval04 bnews.txt";
	// relational models (from Adam's file and from corpus)
	// public static final String patternFile1 = ACEdir + "relations/" + "patterns 9-27.log";
	// public static final String patternFile2 = ACEdir + "relations/" + "patterns sep02.log";
	// public static final String patternFile3 = ACEdir + "relations/" + "patterns aug03.log";
	// public static final String handPatternFile = ACEdir + "lisp/" + "patterns.log";
	// static final String generalPatternFile = ACEdir + "lisp/" + "generalPatterns.log";	
	public static final String patternFile1 = ACEdir + "relations/" + "patterns04.log";
	// public static RelationPatternSet adam, eve, general;
	public static RelationPatternSet eve;
	// public static HashMap partitiveMap;
	
	public static void main (String[] args) throws IOException {
		// initialize Jet
		System.out.println("Starting ACE Jet...");
		if (useParser) {
			if (useParseCollection) {
				JetTest.initializeFromConfig("props/ace use parses.properties");
			} else {
				JetTest.initializeFromConfig("props/ace parser.properties");
			}
		} else {
			if (asr) {
				JetTest.initializeFromConfig("props/ME ace asr.properties");
			} else {
				JetTest.initializeFromConfig("props/ME ace.properties");
			}
		}
		// load type dictionary and gazetteer
		EDTtype.readTypeDict(dictFile);
		EDTtype.readGenericDict (genericFile);
		gazetteer = new Gazetteer();
		gazetteer.load("data/loc.dict");
		// load wordnet info
		/*
		if (useWordNet) {
			try {
				SenseResources.initializeSenseResources();
			} catch (JWNLException e) {
				System.out.println("Problem initializing Java WordNet interface. " + e.getMessage());
			} catch (SenseResourceException e) {
				System.out.println("Problem initializing sense resource. " + e.getMessage());
			} catch (IOException e) {
				System.out.println("Problem retrieving sense resource file. " + e.getMessage());
			} catch (ClassNotFoundException e) {
				System.out.println("Problem deserializing sense resource file. " + e.getMessage());
			}
		}
		*/
		// load relational models
		// adam = new RelationPatternSet();
		// adam.load(handPatternFile, 0 );
		eve = new RelationPatternSet();
		eve.load(patternFile1, 0);
		// eve.load(patternFile2, 0);
		// eve.load(patternFile3, 0);
		// BuildRelationModel.buildProbModel (eve);  // << added 9/25
		// general = new RelationPatternSet();
		// general.load(generalPatternFile, 0);
		// new Jet.Console();
		Pat.trace = false;
		Resolve.trace = false;
		// open list of files
		BufferedReader reader = new BufferedReader (new FileReader(fileList));
		int docCount = 0;
		while ((currentDocPath = reader.readLine()) != null) { 
			// process file 'currentDoc'
			docCount++;
			// if (docCount != 2) continue;
			System.out.println ("\nProcessing document " + docCount + ": " + currentDocPath);
			// read document
			if (useParseCollection) {
				if (perfectMentions) {
					doc = new ExternalDocument("sgml", ACEdir + "perfect parses/" + currentDocPath + ".sgm");
					String textFile = ACEdir + currentDocPath + ".sgm";
					// String keyFile = ACEdir + currentDocPath + ".apf.xml";
					// String keyFile = ACEdir + currentDocPath + ".mentions.apf.xml"; //<< for corefeval
					String keyFile = ACEdir + currentDocPath + ".entities.apf.xml"; //<< for rdreval
					AceDocument keyDoc = new AceDocument(textFile, keyFile);			
					PerfectAce.buildEntityMentionMap (doc, keyDoc);
				} else {
					doc = new ExternalDocument("sgml", ACEdir + "parses/" + currentDocPath + ".sgm");
				}
			} else {
				doc = new ExternalDocument("sgml", ACEdir + currentDocPath + ".sgm");
			}
			doc.setAllTags(true);
			doc.setEmptyTags(new String[] {"W", "TURN"});
			doc.open();
			// process document
			monocase = allLowerCase(doc);
			System.out.println (">>> Monocase is " + monocase);
			gazetteer.setMonocase(monocase);
			Jet.HMM.BigramHMMemitter.useBigrams = monocase;
			Jet.HMM.HMMstate.otherPreference = monocase ? 1.0 : 0.0;
			Control.processDocument (doc, null, docCount == -1, docCount);
			tagReciprocalRelations(doc);
			// new View (doc, docCount);
			// open apf file
			currentDoc = currentDocPath;
			if (currentDocPath.indexOf('/') >= 0)
				currentDoc = currentDocPath.substring(currentDocPath.lastIndexOf('/')+1);
			sourceType = "text";
			if (currentDocPath.indexOf("bnews") >= 0)
				sourceType = "broadcast news";
			else if (currentDocPath.indexOf("nwire") >= 0)
				sourceType = "newswire";
			else if (currentDocPath.indexOf("npaper") >= 0)
				sourceType = "newspaper";
			String apfFileName = ACEdir + currentDocPath + suffix;
			apf = new PrintWriter(new BufferedWriter(new FileWriter(apfFileName)));
			// write entities
			writeAPF();
			// break;
		}
	}
	
	public static boolean allLowerCase (Document doc) {
		Vector textSegments = doc.annotationsOfType ("TEXT");
		if (textSegments == null || textSegments.size() == 0)
			return false;
		boolean allLower = true;
		boolean allUpper = true;
		Annotation text = (Annotation) textSegments.get(0);
		Span span = text.span();
		for (int i=span.start(); i<span.end(); i++) {
			if (Character.isUpperCase(doc.charAt(i)))
				allLower = false;
			if (Character.isLowerCase(doc.charAt(i)))
				allUpper = false;
			}
		return allLower || allUpper;
	}
		
	/**
	 *  write entity information in APF format for each entity annotation
	 *  on document doc.
	 */
	 
	private static void writeAPF () {
		aceEntityNo = 0;
		// partitiveMap = new HashMap();
		LearnRelations.resetMentions(); // for relations
		String docText = doc.text();
		Vector entities = doc.annotationsOfType("entity");
		AceDocument aceDoc = new AceDocument(currentDoc + ".sgm", sourceType, currentDoc, docText);
		for (int ientity=0; ientity<entities.size(); ientity++) {
			AceEntity aceEntity = buildEntity(((Annotation) entities.get(ientity)), ientity, docText);
			if (aceEntity != null) 
				aceDoc.addEntity(aceEntity);
		}
		LearnRelations.findAndWriteRelations(currentDoc, doc, aceDoc);
		aceDoc.write(apf);
	}
	
	/**
	 *  write information on <CODE>entity</CODE> in APF format.  If the
	 *  entity is not a valid EDT type, nothing is written.
	 */
	 
	private static AceEntity buildEntity (Annotation entity, int ientity, String docText) {
		Vector mentions = (Vector) entity.get("mentions");
		Annotation firstMention = (Annotation) mentions.get(0);
		String aceTypeSubtype;
		if (useWordNet) {
			aceTypeSubtype = EDTtypeEnsemble.getTypeSubtype (doc, entity, firstMention);
		} else {
			aceTypeSubtype = EDTtype.getTypeSubtype (doc, entity, firstMention);
		}
		String aceType = EDTtype.bareType(aceTypeSubtype);
		System.out.println ("Type of " + Resolve.normalizeName(doc.text(firstMention)) + " is " + aceTypeSubtype);
		if (aceType.equals("OTHER")) return null;
		String aceSubtype = EDTtype.subtype(aceTypeSubtype);
		// don't tag items generic for Ace 2004
		boolean generic = !AceDocument.ace2004 && isGeneric(firstMention);
		
		if (generic) {
			System.out.println ("Identified generic mention " +
			                    Resolve.normalizeName(doc.text(firstMention)));
		}
		
		aceEntityNo++;
		System.out.println("Generating ace entity " + aceEntityNo +
		                   " (internal entity " + ientity + ") = " +
		                   Resolve.normalizeName(doc.text(firstMention)) +
		                   " [" + aceType + "]");
		String entityID = currentDoc + "-" + aceEntityNo;
		AceEntity aceEntity = new AceEntity (entityID, aceType, aceSubtype, generic);
		for (int imention=0; imention<mentions.size(); imention++) {
			Annotation mention = (Annotation) mentions.get(imention);
			Annotation head = Resolve.getHeadC(mention);
			String mentionID = aceEntityNo + "-" + imention;
			AceEntityMention aceMention = buildMention (mention, head, mentionID, aceType, docText);
			aceEntity.addMention(aceMention);
			LearnRelations.addMention
			    (aceType, aceSubtype, getNgHead(mention).span(), head.span(), mentionID, entityID, generic);
			boolean isNameMention = aceMention.type == "NAME";
			if (isNameMention) {
				aceEntity.addName(new AceEntityName(head.span(), docText));
			}
		}
		return aceEntity;
	}
	
	static final String[] locativePrepositions = {"in", "at", "to", "near"};
	
	/**
	 *  write the information for <CODE>mention</CODE> with head <CODE>head</CODE>
	 *  in APF format.
	 */
	 
	private static AceEntityMention buildMention 
			(Annotation mention, Annotation head, String mentionID, String entityType, String docText) {
		Span mentionSpan = mention.span();
		Span headSpan = head.span();
		String mentionType = mentionType(head, mention);
		AceEntityMention m =
			new AceEntityMention (mentionID, mentionType, mentionSpan, headSpan, docText);
		if (entityType.equals("GPE")) {
			if (perfectMentions)
				m.role = PerfectAce.getMentionRole(head);
			else {
				String prep = governingPreposition(mention);
				if ((prep != null && in(prep, locativePrepositions)) ||
				     // for location in dateline
				     Resolve.sentenceNumber(mention.start()) == 0) {
					m.role = "LOC";
				} else {
					m.role = "GPE";
				}
			}
		}
		return m;
	}
	
	/**
	 *  determine the mention type of a mention (NOMINAL, PRONOUN, or NAME)
	 *  from its <CODE>head</CODE>.
	 */
	 
	private static String mentionType (Annotation head, Annotation mention) {
		if (perfectMentions)
			return PerfectAce.getMentionType (head);
		String cat = (String) head.get("cat");
		String mcat = (String) mention.get("cat");
		if (mention.get("preName-1") != null || mention.get("nameMod-1") != null ||
		    cat == "adj")
			return "PRE";
		else if (cat == "n" || cat == "title" || cat == "tv" || cat == "v")
			return "NOMINAL";
		else if (cat == "pro" || cat == "det" /*for possessives - his, its */ ||
			       cat == "adj" || cat == "ven" || cat == "q" ||
		         cat == "np" /* for headless np's */ || cat == "wp" || cat == "wp$")
			return "PRONOUN";
		else // cat == "name"
			return "NAME";
	}
	
	static final String[] genericFriendlyDeterminers =
		{"no", "neither", "any", "many", "every", "each"};
	static final String[] clearGenericPronouns = 
		{"everyone", "anyone", "everybody", "anybody",
		"something", "who", "whoever", "whomever",
		"wherever", "whatever", "where"};
		
	private static boolean isGeneric (Annotation mention) {
		Annotation ngHead = getNgHead (mention);
		Annotation headC = Resolve.getHeadC (mention);
		if (headC.get("cat") == "n") {
			// is always generic
			String det = SynFun.getDet(mention);
			if (det != null && in(det, genericFriendlyDeterminers))
				return true;
			// OR is generic head
			if (!EDTtype.hasGenericHead(doc, mention)) return false;
			// if (pa.get("number") != "plural") return false;
			if (ngHead.get("poss") != null || det == "poss") return false;
			if (ngHead.get("quant") != null || det == "q") return false;
			//    AND is in generic environment
			Annotation vg = governingVerbGroup(mention);
			if (vg != null) {
				FeatureSet vpa = (FeatureSet) vg.get("pa");
				if (vpa != null && vpa.get("tense") != "past"
				                && vpa.get("aspect") == null) {
				    System.out.println ("Governing verb group = " + doc.text(vg));
				    System.out.println ("Verb group pa = " + vpa);
					return true;
				}
			}
			return false;
		} else if (headC.get("cat") == "pro" || headC.get("cat") == "np"
		                                     || headC.get("cat") == "det") {
			String pronoun = SynFun.getHead(doc, mention);
			return in(pronoun,clearGenericPronouns) ||
			       in(pronoun,genericFriendlyDeterminers);  // << added Oct. 10
		} else /* head is a name */ return false;
	}
	
	private static Annotation getNgHead (Annotation ng) {
		Annotation hd = ng;
		while (true) {
			ng = (Annotation) hd.get("headC");
			if (ng == null) return hd;
			if (ng.get("cat") != "np"  || ng.get("possPrefix") == "true") return hd;
			hd = ng;
		}
	}
	
	private static boolean in (Object o, Object[] array) {
		for (int i=0; i<array.length; i++)
			// if (array[i] == o) return true;
			if (array[i] != null && array[i].equals(o)) return true;
		return false;
	}
	
	/**
	 *  assigns reciprocal relations subject-1 and object-1
	 */
	 
	public static void tagReciprocalRelations (Document doc) {
		Vector constits = doc.annotationsOfType("constit");
		if (constits != null) {
			for (int j = 0; j < constits.size();  j++) {
				Annotation ann = (Annotation) constits.elementAt(j);
				if (ann.get("subject") != null) {
					Annotation subject = (Annotation) ann.get("subject");
					if (subject.get("subject-1") == null) {
						subject.put("subject-1", ann);
					}
				}
				if (ann.get("object") != null) {
					Annotation object = (Annotation) ann.get("object");
					if (object.get("object-1") == null) {
						object.put("object-1", ann);
					}
				}
			}
		}
	}
				
	static Annotation governingVerbGroup (Annotation ann) {
		Annotation governingConstituent;
		if (ann.get("subject-1") != null) {
			governingConstituent = (Annotation) ann.get("subject-1");
		} else if (ann.get("object-1") != null) {
			governingConstituent = (Annotation) ann.get("object-1");
		} else return null;
		return Resolve.getHeadC(governingConstituent);
	}
	
	static String governingPreposition (Annotation ann) {
		Annotation pp = (Annotation) ann.get("p-obj-1");
		if (pp == null)
			return null;
		Annotation[] ppChildren = (Annotation[]) pp.get("children");
		if (ppChildren.length != 2)
			return null;
		Annotation in = ppChildren[0];
		String prep = doc.text(in).trim();
		return prep;
	}
	
	static int getACEoffset (int posn) {
		return posn;
	}
	
	static int getJetOffset (int posn) {
		return posn;
	}	
}