package AceJet;

//Author:       Ralph Grishman
//Date:         July 25, 2003

import java.util.*;
import java.io.*;
import Jet.*;
import Jet.Control;
import Jet.Parser.SynFun;
import Jet.Parser.StatParser;
import Jet.Refres.Resolve;
import Jet.Lex.Tokenizer;
import Jet.Lisp.*;
import Jet.Pat.Pat;
import Jet.Tipster.ExternalDocument;
import Jet.Tipster.Span;
import Jet.Tipster.Annotation;
import Jet.Tipster.View;
import Jet.Chunk.Chunker;

import opennlp.maxent.*;
import opennlp.maxent.io.*;

/**
 *  procedures for learning the syntactic indications of ACE relations
 *     1. process document, creating constituents with syntactic relations
 *     2. read in ACE relations, building table with
 *              relation  |  offset of arg1  |  offset of arg2
 *     3. iterate over syntactic relations;
 *          if pair links two candidates, record it as candidate
 *     4. iterate over pairs of consecutive mentions, record as candidate
 *     5. review candidates, report as relations / non-relations
 *     6. report ACE relations not covered
 */

public class LearnRelations {
	
	static final boolean useParser = true;
	static final boolean useParseCollection = true;
	
	static ExternalDocument doc;
	static String currentDoc;
	static final String ACEdir =
	    "C:/Documents and Settings/Ralph Grishman/My Documents/ACE/";
	static final String[] relations = {"of", "poss", "nameMod"};
	static final String rootDir = 
	    ACEdir + "relations/";
	static final String patternFile = rootDir + "patterns04.log";
	
	// mapping from mention ID to Mention
	static HashMap mentionIDMap;
	// mapping from start of head to Mention
	static HashMap mentionStartMap;
	// set of mentions (excluding generic)
	static TreeSet mentionSet;
	// set of all mentions, including generics
	static TreeSet allMentionSet;
	// list of relation mentions (from APF file)
	static ArrayList relMentionList;
	// list of entity mention pairs which are candidates for relations
	static ArrayList candidates;
	// list of relations (formed from relation mentions)
	static ArrayList relationList;
	// set of generated patterns
	static TreeMap patternSet = new TreeMap();
	// file onto which patterns are written
	static PrintStream writer;
	// name of current doc (used in generating ID's)
	static String docName;
	// true if writing relations file
	static boolean writingRelations = false;
	// set true to expand conjuncts
	static final boolean expandConjuncts = true;
	
	public static void main (String[] args) throws IOException {
		if (true) return;
		writingRelations = true;
		// initialize Jet
		System.out.println("Starting ACE Jet...");
		if (useParser) {
			if (useParseCollection) {
				JetTest.initializeFromConfig("props/ace use parses.properties");
			} else {
				JetTest.initializeFromConfig("props/ace parser.properties");
			}
		} else {
			JetTest.initializeFromConfig("props/ME ace.properties");
		}
		Chunker.loadModel();
		Ace.gazetteer = new Gazetteer();
		Ace.gazetteer.load("data/loc.dict");
		// new Jet.Console();
		Pat.trace = false;
		Resolve.trace = false;
		
		writer = new PrintStream (new FileOutputStream (patternFile));
		// learnFromFileList (ACEdir + "training all but eeld.txt");
		// learnFromFileList (ACEdir + "sep02 all.txt");
		// learnFromFileList (ACEdir + "aug03 all.txt");
		// learnFromFileList (ACEdir + "training04 nwire 21andup.txt");
		learnFromFileList (ACEdir + "training04 nwire.txt");
		// learnFromFileList (ACEdir + "training04 bnews 21andup.txt");
		learnFromFileList (ACEdir + "training04 bnews.txt");
		learnFromFileList (ACEdir + "training04 chinese.txt");
		learnFromFileList (ACEdir + "training04 arabic.txt");
		reportPatterns();
		writer.close();
	}
	
	static void learnFromFileList (String fileList) throws IOException {
		// open list of files
		BufferedReader reader = new BufferedReader (new FileReader(fileList));
		int docCount = 0;
		while ((currentDoc = reader.readLine()) != null) { 
			// if (true) continue;
			// process file 'currentDoc'
			docCount++;
			// if (docCount > 1) break;
			System.out.println ("\nProcessing document " + docCount + ": " + currentDoc);
			// read document
			String textFile;
			if (useParseCollection) {
				textFile = ACEdir + "parses/" + currentDoc + ".sgm";
			} else {
				textFile = ACEdir + currentDoc + ".sgm";
			}
			// read document
			doc = new ExternalDocument("sgml", textFile);
			doc.setAllTags(true);
			doc.open();
			// process document
			Ace.monocase = Ace.allLowerCase(doc);
			System.out.println (">>> Monocase is " + Ace.monocase);
			Control.processDocument (doc, null, docCount < 0, docCount);
			if (docCount < 3) new View(doc, docCount);
			// read key file with relation information
			readACErelations (textFile, ACEdir + currentDoc + ".apf.xml");
			// clear table of candidate mention pairs
			candidates = new ArrayList();
			// iterate over syntactic relations, record candidates for ACE relations
			findSyntacticPatterns (doc);
			// iterate over pairs of adjacent mentions, record candidates for ACE relations
			findAdjacencyPatterns ();
			// process candidates
			processCandidates();
			// process remaining ACE relations (should not be many)
			processLeftovers ();
		}
	}
	
	/**
	 *  identifies the relations in document 'd' (from file name 'currentDoc') and adds them
	 *  as AceRelations to AceDocument 'aceDoc'.
	 */
	
	static void findAndWriteRelations (String currentDoc, ExternalDocument d, AceDocument aceDoc) {
		doc = d;
		docName = currentDoc;
		// System.out.println ("PartitiveMap: " + Ace.partitiveMap);
		// clear table of candidate mention pairs
		candidates = new ArrayList();
		// record conjunct links for mentions
		findConjuncts (doc);
		// iterate over syntactic relations, record candidates for ACE relations
		findSyntacticPatterns (doc);
		// iterate over pairs of adjacent mentions, record candidates for ACE relations
		findAdjacencyPatterns ();
		// use model to determine which are ACE relations
		predictRelations ();
		// combine relation mentions into relations
		relationCoref (aceDoc);
	}
	
	/**
	 *  iterate over all syntactic relations;  for each, check if it
	 *  corresponds to an ACE relation.
	 */
	 
	private static void findSyntacticPatterns (ExternalDocument doc) {
		Vector constits = doc.annotationsOfType("constit");
		if (constits != null) {
			for (int j = 0; j < constits.size();  j++) {
				Annotation ann = (Annotation) constits.elementAt(j);
				for (int r = 0; r < relations.length; r++) {
					String relation = relations[r];
					if (ann.get(relation) != null) {
						Annotation value = (Annotation) ann.get(relation);
						checkSyntacticRelation (ann, relation, value);
					}
				}
				String verb = SynFun.getImmediateHead(ann);
				if (verb != null) {
					Annotation subject = (Annotation) ann.get("subject");
					Annotation object = (Annotation) ann.get("object");
					Annotation pp = (Annotation) ann.get("pp");
					if (subject != null && object != null) {
					/* -- version for use with chunker
					Annotation vp = (Annotation) ann.get("headC");
					if (vp.get("object") != null) {
						Annotation object = (Annotation) vp.get("object");
						String verb = SynFun.getNameOrHead(doc, vp);
						*/
						checkSyntacticRelation (subject, verb, object);
					}
					if (pp != null) {
						Annotation[] ppChildren = StatParser.children(pp);
						if (ppChildren != null & ppChildren.length == 2) {
							Annotation pNode = ppChildren[0];
							Annotation pObject = ppChildren[1];
							String p = SynFun.getHead(doc, pNode);
							if (subject != null)
								checkSyntacticRelation (subject, "s-" + verb + "-" + p, pObject);
							if (object != null)
								checkSyntacticRelation (object, "o-" + verb + "-" + p, pObject);
						}
					}
				}
			}
		}
	}
	
	/**
	 *  given a syntactic relation between two constituents, arg1 and arg2,
	 *  look for corresponding mentions;  if found, record as candidate for
	 *  relation.
	 */
	
	private static void checkSyntacticRelation
		(Annotation arg1, String relation, Annotation arg2) {
		// for each argument (constituent), find correponding Mention (if any)
		Annotation arg1Head = Resolve.getHeadC (arg1);
		Span span1 = arg1Head.span();
		int start1 = span1.start();
		Mention m1 = (Mention) mentionStartMap.get(new Integer(start1));
		if (m1 == null) return;
		Annotation arg2Head = Resolve.getHeadC (arg2);
		Span span2 = arg2Head.span();
		int start2 = span2.start();
		Mention m2 = (Mention) mentionStartMap.get(new Integer(start2));
		if (m2 == null) return;
		// if two mentions co-refer, they can't be in a relation
		if (m1.entityID.equals(m2.entityID)) return;
		if (m1.compareTo(m2) < 0) {
			ArrayList rels = recordCandidateWithConjuncts (m1, m2);
			recordSyntacticLink(rels, relation);
		} else {
			ArrayList rels = recordCandidateWithConjuncts (m2, m1);
			recordSyntacticLink(rels, relation + "-1");
		}
	}
	
	private static final int mentionWindow = 3;
	private static final int maxPatternLength = 100;
	
	private static void findAdjacencyPatterns () {
		if (mentionSet.isEmpty()) return;
		ArrayList mentionList = new ArrayList(mentionSet);
		for (int i=0; i<mentionList.size()-1; i++) {
			for (int j=1; j<=mentionWindow && i+j<mentionList.size(); j++) {
				Mention m1 = (Mention) mentionList.get(i);
				Mention m2 = (Mention) mentionList.get(i+j);
				// if two mentions co-refer, they can't be in a relation
				if (m1.entityID.equals(m2.entityID)) continue;
				// try for pattern between m1 and m2
				ArrayList pattern = patternBetweenMentions (m1, m2);
				// if pattern crosses sentence boundary, skip (don't use)
				if (pattern != null && pattern.size() <= maxPatternLength) {
					ArrayList rels = recordCandidateWithConjuncts (m1, m2);
					recordLinearLink(rels, pattern);
				}
			}
		}
	}
	
	private static RelationMention recordCandidate (Mention m1, Mention m2) {	
		// assumes m1 precedes m2
		for (int i=0; i<candidates.size(); i++) {
			RelationMention r = (RelationMention) candidates.get(i);
			if (r.mention1 == m1 && r.mention2 == m2) return r;
		}
		RelationMention r = new RelationMention (m1, m2);
		candidates.add(r);
		return r;
	}

	private static void processCandidates () {
		for (int i=0; i<candidates.size(); i++) {
			RelationMention r = (RelationMention) candidates.get(i);
			Mention m1 = r.mention1;
			Mention m2 = r.mention2;
			if (m1.generic || m2.generic) continue;
			String subtype1 = (m1.subtype.equals("")) ? "*" : m1.subtype;
			String subtype2 = (m2.subtype.equals("")) ? "*" : m2.subtype;	
			String pattern = "arg1-arg2 "
		                     + m1.type + " " + subtype1 + " " + getHead(m1) 
		                     + " [ " + r.syntacticLink + " : " + concat(r.linearLink) + " ] " 
		                     + m2.type + " " + subtype2 + " " + getHead(m2);
			checkRelation (m1, pattern, m2);
		}
	}
			
	/**
	 *  given two mentions, m1 and m2, connected by [syntactic or linear] relation
	 *  'pattern', determines whether they are connected by an ACE relation
	 */
	
	private static void checkRelation (Mention m1, String pattern, Mention m2) {
		// if these mentions are part of the same entity, ignore
		if (m1.entityID.equals(m2.entityID)) return;
		System.out.println ("For " + doc.text(m1.headSpan) + " and " + doc.text(m2.headSpan));
		boolean found = false;
		for (int i=0; i<relMentionList.size(); i++) {
			RelationMention rel = (RelationMention) relMentionList.get(i);
			if (rel.mention1.equals(m1) && rel.mention2.equals(m2)) {
				recordPattern (pattern + " --> " + rel.relationType + " " + rel.relationSubtype);
				rel.setAnalyzed();
				found = true;
			} else if (rel.mention1.equals(m2) && rel.mention2.equals(m1)) {
			    String prefix = pattern.substring(0,9);
			    if (prefix.equals("arg1-arg2"))
			    	pattern = "arg2-arg1" + pattern.substring(9);
			    else  // prefix is "arg2-arg1"
			    	pattern = "arg1-arg2" + pattern.substring(9);
				recordPattern (pattern + " --> " + rel.relationType + " " + rel.relationSubtype);
				rel.setAnalyzed();
				found = true;
			}
		}
		if (! found) recordPattern (pattern + " --> 0");
	}
	
	/**
	 *  look for ACE relations from the answer key which have not been processed
	 *  yet -- by syntactic or adjacency patterns.
	 */
	 
	private static void processLeftovers () {
		for (int i=0; i<relMentionList.size(); i++) {
			RelationMention rel = (RelationMention) relMentionList.get(i);
			if (!rel.analyzed) {
				Mention m1 = rel.mention1;
				Mention m2 = rel.mention2;
				String pattern;
				String subtype1 = (m1.subtype.equals("")) ? "*" : m1.subtype;
				String subtype2 = (m2.subtype.equals("")) ? "*" : m2.subtype;
				if (m1.compareTo(m2) < 0) {
					pattern = "arg1-arg2 "
			                  + m1.type + " " + subtype1 + " " + getHead(m1) 
			                  + " [ " + "0 : " + concat(patternBetweenMentions (m1, m2)) + " ] " 
			                  + m2.type + " " + subtype2 + " " + getHead(m2);
		        } else if (m1.compareTo(m2) > 0) {
		        	pattern = "arg2-arg1 "
			                  + m2.type + " " + subtype2 + " " + getHead(m2) 
			                  + " [ " + "0 : " + concat(patternBetweenMentions (m2, m1)) + " ] " 
			                  + m1.type + " " + subtype1 + " " + getHead(m1);
			    } else /* m1 == m2 */ {
			    	System.out.println ("*** Relation with two identical arguments -- ignored.");
			    	return;
			    }
				System.out.println ("Leftover -- for " + doc.text(m1.headSpan) + " and " + doc.text(m2.headSpan));
				recordPattern (pattern + " --> " + rel.relationType + " " + rel.relationSubtype);
			}
		}
	}
	
	/**
	 *  returns a linear pattern between Mentions m1 and m2.
	 */
	 
	private static ArrayList patternBetweenMentions (Mention m1, Mention m2) {
		ArrayList pattern;
		// if arg1 contains arg2
		if (m1.extent.start() <= m2.extent.start() &&
		    m1.extent.end() >= m2.extent.end()) {
			// connect head of arg1 and full extent of arg2
			pattern = patternBetweenSpans (m1, m2, m1.headSpan, m2.extent);
		// if arg2 contains arg1
		} else if (m2.extent.start() <= m1.extent.start() &&
		    m2.extent.end() >= m1.extent.end()) {
			// connect full extent of arg1 and head of arg2
			pattern = patternBetweenSpans (m1, m2, m1.extent, m2.headSpan);
		} else {
			// else connect full extents of both
			pattern = patternBetweenSpans (m1, m2, m1.extent, m2.extent);
		}
		return pattern;
	}
	
	private static ArrayList patternBetweenSpans 
	    (Mention mention1, Mention mention2, Span span1, Span span2) {
		ArrayList pattern = null;
		if (span1.start() < span2.start()) {
			pattern = buildPattern(span1.end(), span2.start());
		} else {
			System.out.println ("*** Unexpected span order.");
			System.out.println ("Span1 = " + doc.text(span1));
			System.out.println ("Span2 = " + doc.text(span2));
			pattern = null;
		}
		return pattern;
	}
	
	/**
	 *  returns a list of constituents spanning the document from
	 *  'start' to 'end'.  In choosing among constituents, prefer
	 *  the longest constituent at each starting point.  Among
	 *  constituents of the same length, prefer the one of highest
	 *  rank.
	 */
	
	private static ArrayList buildPattern (int start, int end) {
		int posn = Tokenizer.skipWS (doc, start, doc.length());
		ArrayList pattern = new ArrayList();
		while (posn < end) {
			Vector constits = doc.annotationsAt (posn, "constit");
			Annotation token;
			if (constits != null) {
				// find longest constit which does not go past 'end'
				// among constit's of same length, prefer one with highest rank
				Annotation best = null;
				int furthest = -1;
				int bestRank = -1;
				for (int i=0; i<constits.size(); i++) {
					Annotation constit = (Annotation) constits.get(i);
					String cat = (String) constit.get("cat");
					int constitEnd = constit.span().end();
					if ((constitEnd > furthest ||
					     (constitEnd == furthest && categoryRank(cat) > bestRank))
					    && constitEnd <= end) {
						furthest = constit.span().end();
						best = constit;
						bestRank = categoryRank(cat);
					}
				}
				if (best == null) return null;
				String cat = (String)best.get("cat");
				if (cat.equals(".")) return null;
				String c;
				if (cat == "adv" || cat == "timex" || cat == "q") //<< added Sep. 21
					c =  cat + "(" + SynFun.getHead(doc, best) + ")";
				else
					c = SynFun.getHead(doc, best);
				if (writingRelations || !RelationPattern.noiseToken(c)) // << added Sep. 17
				pattern.add(c);
				posn = furthest;
			} else if ((token = doc.tokenAt(posn)) != null) {
				String text = doc.text(token).trim();
				if (text.equals(".")) return null;
				pattern.add(text);
				posn = token.span().end();
			} else {
				System.out.println ("buildPattern:  no constits at position " + posn);
				return null;
			}
		}
		return pattern;
	}
	
	static HashMap categoryRankTable = new HashMap();
	static {categoryRankTable.put("name", new Integer(1));
			categoryRankTable.put("timex", new Integer(1));
			categoryRankTable.put("np", new Integer(2));
			categoryRankTable.put("np-pro", new Integer(2));
	        categoryRankTable.put("vgroup", new Integer(2));
	        categoryRankTable.put("vgroup-inf", new Integer(2));
	        categoryRankTable.put("vgroup-pass", new Integer(2));
	        categoryRankTable.put("vgroup-ving", new Integer(2));
	        categoryRankTable.put("vgroup-ven", new Integer(2));
	        categoryRankTable.put("vp", new Integer(3));
	        categoryRankTable.put("vp-inf", new Integer(3));
	        categoryRankTable.put("vingo", new Integer(3));
	        categoryRankTable.put("s", new Integer(4));
	     }
	static int categoryRank (String category) {
		Integer rankI = (Integer) categoryRankTable.get(category);
		if (rankI == null)
			return 0;
		else
			return rankI.intValue();
	}
	
	static String concat (ArrayList strings) {
		if (strings == null) return null;
		if (strings.size() == 0) return "";
		StringBuffer result = new StringBuffer((String) strings.get(0));
		for (int i=1; i<strings.size(); i++) {
			result.append(" ");
			result.append((String) strings.get(i));
		}
		return result.toString();
	}
	
	/**
	 *  returns the pa head associated with a mention, or the text if
	 *  there is no pa
	 */
	
	static String getHead (Mention m) {
		Vector anns = doc.annotationsAt(m.headSpan.start(), "constit");
		if (anns != null) {
			for (int i=0; i<anns.size(); i++) {
				Annotation ann = (Annotation) anns.get(i);
				String cat = (String) ann.get("cat");
				if (cat == "n" || cat == "pro" || cat == "name" ||
				    cat == "adj" || cat == "ven" ||
				    (cat == "det" && ann.get("tposs") == "t")) {
				    // added Sept. 17th
				    if (cat == "name") {
				    	String[] name = Resolve.getNameTokens(doc, ann);
				    	if (Ace.gazetteer.isCountry(name)) return "country";
				    	if (Ace.gazetteer.isNationality(name)) return "nationality";
				    }
					FeatureSet pa = (FeatureSet) ann.get("pa");
					if (pa != null) {
						String head = (String) pa.get("head");
						if (head != null) return head;
					}
				}
			}
		}
		return doc.text(m.headSpan).trim().replace(' ','-').replace('\n', '-');
	}
	
	// ---- CONJUNCTION FUNCTIONS ----
	
	static void findConjuncts (ExternalDocument doc) {
		Vector constits = doc.annotationsOfType("constit");
		if (constits != null) {
			for (int j = 0; j < constits.size();  j++) {
				Annotation ann = (Annotation) constits.elementAt(j);
				Annotation conj = (Annotation) ann.get("conj");
				if (conj != null) {
					ArrayList conjuncts = new ArrayList();
					conjuncts.add(ann);
					while (conj != null) {
						conjuncts.add(conj);
						conj = (Annotation) conj.get("conj");
					}
					recordConjunct (conjuncts);
				}
			}
		}
	}		
	
	static void recordConjunct (ArrayList conjuncts) {
		// System.out.println ("### recording conjuncts " + conjuncts);
		String type = "";
		Mention m;
		ArrayList mentions = new ArrayList();
		for (int i=0; i<conjuncts.size(); i++) {
			Annotation ann = (Annotation) conjuncts.get(i);
			m = mentionForAnnotation(ann);
			if (m == null) return;
			mentions.add(m);
			if (i == 0) {
				type = m.type;
			} else {
				if (!type.equals(m.type)) return;
			}
		}
		for (int i=0; i<mentions.size()-1; i++) {
			Mention m1 = (Mention) mentions.get(i);
			Mention m2 = (Mention) mentions.get(i+1);
			m1.conjunctf = m2;
			m2.conjunctb = m1;
			System.out.println ("Found conjuncts " +
				doc.text(m1.headSpan) + " and " + doc.text(m2.headSpan));
		}
		return;
	}	
	
	static Mention mentionForAnnotation (Annotation a) {
		Annotation argHead = Resolve.getHeadC (a);
		Span span = argHead.span();
		int start = span.start();
		return (Mention) mentionStartMap.get(new Integer(start));
	}
	
	static ArrayList getConjuncts (Mention m) {
		ArrayList a = new ArrayList();
		a.add(m);
		if (!expandConjuncts) return a;
		Mention n = m;
		while (n.conjunctf != null) {
			n = n.conjunctf;
			a.add(n);
		}
		n = m;
		while (n.conjunctb != null) {
			n = n.conjunctb;
			a.add(n);
		}
		return a;
	}
	
	static ArrayList recordCandidateWithConjuncts (Mention m1, Mention m2) {
		ArrayList relations = new ArrayList();
		if (m1.extent.end() < m2.extent.start() ||
		    m2.extent.end() < m1.extent.start()) {
			ArrayList a1 = getConjuncts (m1);
			ArrayList a2 = getConjuncts (m2);
			if (a1.contains(m2))
				return relations;
			for (int i=0; i<a1.size(); i++) {
				for (int j=0; j<a2.size(); j++) {
					Mention c1 = (Mention) a1.get(i);
					Mention c2 = (Mention) a2.get(j);
					RelationMention r = recordCandidate(c1, c2);
					relations.add(r);
				}
			}
		} else {
			RelationMention r = recordCandidate(m1, m2);
			relations.add(r);
		}
		return relations;
	}
	
	static void recordSyntacticLink (ArrayList r, String link) {
		for (int i=0; i<r.size(); i++) {
			RelationMention rm = (RelationMention) r.get(i);
			rm.syntacticLink = link;
		}
	}
	
	static void recordLinearLink (ArrayList r, ArrayList link) {
		for (int i=0; i<r.size(); i++) {
			RelationMention rm = (RelationMention) r.get(i);
			// set link if current link is absent ("0") or longer
			if (rm.linearLink.size() > link.size() ||
			    (rm.linearLink.size() > 0 && rm.linearLink.get(0).equals("0"))) {
			    rm.linearLink = link;
			    // System.out.println ("+++ Setting linear link to " + link);
			    // System.out.println ("    in " + rm);
			}
		}
	}
	
	/**
	 *  reads the APF file from 'apfFile" and extracts the entity and relation
	 *  mentions.
	 */
	 
	private static void readACErelations (String textFile, String apfFile) {
		AceDocument aceDoc = new AceDocument(textFile, apfFile);
		findEntityMentions (aceDoc);
		findRelationMentions (aceDoc);
	}
	
	static HashMap standardType = new HashMap();
	static {standardType.put("GSP", "GPE");
	        standardType.put("PER", "PERSON");
	        standardType.put("ORG", "ORGANIZATION");
	        standardType.put("LOC", "LOCATION");
	        standardType.put("FAC", "FACILITY");
	     }
	     
	/**
	 *  traverses APF document (apfDOC) and 
	 *     creates Mention objects and places them in mentionSet
	 *     creates MentionStartMap mapping start of head to mention
	 *     creates mentionIDMap mapping mentionID to mention
	 */
	
	static void findEntityMentions (AceDocument aceDoc) {
		resetMentions ();
		ArrayList entities = aceDoc.entities;
		for (int i=0; i<entities.size(); i++) {
			AceEntity entity = (AceEntity) entities.get(i);
			String type = entity.type;
			String subtype = entity.subtype;
			if (standardType.containsKey(type))
					type = (String) standardType.get(type);
			ArrayList mentions = entity.mentions;
			for (int j=0; j<mentions.size(); j++) {
				AceEntityMention mention = (AceEntityMention) mentions.get(j);
				Span extent = aceSpanToJetSpan (mention.extent);
				Span head = aceSpanToJetSpan (mention.head);
				addMention (type, subtype, extent, head, mention.id, entity.id, entity.generic);
			}
		}
	}
	
	/**
	 *  convert a Span as used within an AceDocument (when 'end' is the position of the last
	 *  character of the sequence) to a Span as used within Jet (where 'end' is one past the
	 *  last whitespace following the sequence).
	 */
	 
	static Span aceSpanToJetSpan (Span aceSpan) {
		int start = aceSpan.start();
		int aceEnd = aceSpan.end();
		int jetEnd = Tokenizer.skipWS (doc, aceEnd+1, doc.length());
		Span jetSpan = new Span (start, jetEnd);
		// System.out.println ("aceSpan: " + aceSpan + " jetSpan: " + jetSpan);
		return jetSpan;
	}		
	
	static void resetMentions () {
		mentionStartMap = new HashMap();
		mentionIDMap = new HashMap();
		mentionSet = new TreeSet();
		allMentionSet = new TreeSet();
	}
		
	static void addMention (String type, String subtype, Span span, Span spanHead, String id, 
			String entityID, boolean generic) {
		Mention m = new Mention (type, subtype, span, spanHead, id, entityID);
		m.generic = generic;
		if (!generic) mentionSet.add(m);
		allMentionSet.add(m);
		mentionStartMap.put(new Integer(spanHead.start()), m);
		mentionIDMap.put(id, m);
	}
	
	/**
	 *  traverses APF document (apfDOC) and builds ArrayList relMentionList,
	 *  with one entry for each EXPLICIT relation recorded in the APF document.
	 *  For the present, IMPLICIT relations are ignored.
	 */
	
	private static void findRelationMentions (AceDocument aceDoc) {
		relMentionList = new ArrayList();
		ArrayList relations = aceDoc.relations;
		for (int i=0; i<relations.size(); i++) {
			AceRelation relation = (AceRelation) relations.get(i);
			String type = relation.type;
			String subtype = relation.subtype;
			String relationClass = relation.relClass;
			if (relationClass.equals("IMPLICIT")) continue;
			ArrayList relationMentions = relation.mentions;
			for (int j=0; j<relationMentions.size(); j++) {
				AceRelationMention relationMention = (AceRelationMention) relationMentions.get(j);
				// create new RelationMention with type & subtype
				RelationMention acerel = new RelationMention (type, subtype);
				relMentionList.add(acerel);
				Mention m1 = (Mention) mentionIDMap.get(relationMention.arg1.id);
				if (m1 == null) {
					System.out.println ("Undefined mention " + relationMention.arg1.id);
				} else {
					acerel.setArg(1, m1);
				}
				Mention m2 = (Mention) mentionIDMap.get(relationMention.arg2.id);
				if (m2 == null) {
					System.out.println ("Undefined mention " + relationMention.arg2.id);
				} else {
					acerel.setArg(2, m2);
				}
			}
		}
	}
	
	private static void recordPattern (String pattern) {
		System.out.println (">> " + pattern);
		Integer countI = (Integer) patternSet.get(pattern);
		int count = (countI==null) ? 0 : countI.intValue();
		patternSet.put(pattern, new Integer(count+1));
		writer.println(pattern);
	}
	
	private static void reportPatterns () {
		Iterator it = patternSet.keySet().iterator();
		while (it.hasNext()) {
			String pattern = (String) it.next();
			int count = ((Integer) patternSet.get(pattern)).intValue();
			if (count > 1)
				System.out.println (count + "X: " + pattern);
		}
	}
	
	/**
	 *  given a set of candidate RelationMentions (entity mentions which 
	 *  are adjacent in the sentence or co-occur in some syntactic relation), 
	 *  uses models to identify actual RelationMentions.
	 */
	 
	private static void predictRelations () {
		relMentionList = new ArrayList();
		/*
		GISModel m = null;
		GISModel[] msub = new GISModel[5];
		try {
		    m  = new SuffixSensitiveGISModelReader(new File(BuildRelationModel.typeModelFile)).getModel();
		    for (int i=0; i<5; i++) {
		    	msub[i] = new SuffixSensitiveGISModelReader
		        	(new File(BuildRelationModel.subtypeModelFile[i])).getModel();
		    }
		    System.out.println ("GIS models loaded.");
		} catch (Exception e) {
		    e.printStackTrace();
		    System.exit(0);
		}
		*/
		for (int i=0; i<candidates.size(); i++) {
			RelationMention rm = (RelationMention) candidates.get(i);
			System.out.println ("For relation mention " + i + " = " + rm);
			if (rm.mention1.generic || rm.mention2.generic) {
				System.out.println (" mention is generic -- suppressed");
				continue;
			}
			RelationPattern match1 = null; // Ace.adam.findMatch(rm, 5);
			RelationPattern match2 = Ace.eve.findMatch(rm, 21);
			RelationPattern match3 = null; // Ace.general.findMatch(rm,5);
			String predictedType = "0", predictedSubtype = "";
			/* tried Aug 10 04 - poor results - DISC not consistent
			Integer start1 = new Integer (rm.mention1.extent.start());
			Integer start2 = new Integer (rm.mention2.extent.start());
			System.out.println ("### Spans start at " + start1 + " and " + start2);
			if (start2.equals(Ace.partitiveMap.get(start1))) {
				predictedType = "DISC";
				predictedSubtype = "";
				System.out.println ("### partitive (DISC) relation");
			} else
			*/
			// 
			if (match1 != null) {
				predictedType = match1.relationType;
				predictedSubtype = match1.relationSubtype;
				System.out.println ("Best Adam pattern = " + match1.string); // <<<<<
			} else if (match2 != null) {
				System.out.println ("Best corpus pattern = " + match2.string); // <<<<<
				predictedType = match2.relationType;
				predictedSubtype = match2.relationSubtype;
			} else if (match3 != null) {
				System.out.println ("Best general pattern = " + match3.string); // <<<<<
				predictedType = match3.relationType;
				predictedSubtype = match3.relationSubtype;
			} /* else {
				predictedType  = m.getBestOutcome(m.eval(buildPredictFeatures(rm)));
				if (!predictedType.equals("0")) {
					int itype = BuildRelationModel.typeToIndex(predictedType);
					// System.out.println ("+++ type = " + predictedType + " itype= " + itype);
					predictedSubtype  = msub[itype].getBestOutcome(msub[itype].eval(buildPredictFeatures(rm)));
				}
			} */
			/*
			   else {
			   	int j = BuildRelationModel.mostLikelySubtype(rm);
				if (j < 0 || j == BuildRelationModel.NO_RELATION)
					predictedType = "0";
				else {
					predictedType = BuildRelationModel.typeSubtype[j].substring(0,4).trim();
					predictedSubtype = BuildRelationModel.typeSubtype[j].substring(5);
				}
			}
			*/
			if (!predictedType.equals("0")) {
				if (predictedType.endsWith("-1")) {
					rm.swapArgs();
					predictedType = predictedType.substring(0,predictedType.length()-2);
				}
				rm.relationType = predictedType;
				rm.relationSubtype = predictedSubtype;
				relMentionList.add(rm);
				rm.id = relMentionList.size() + "";
			}
			System.out.println ("     Predicting ACE relation " + predictedType);
		}
	}
	
	private static String[] buildPredictFeatures (RelationMention rm) {
		String[] features = new String[7];
		features[0] = "mType1=" + rm.mention1.type;
		features[1] = "mType2=" + rm.mention2.type;
		features[2] = "mHead1=" + getHead(rm.mention1);
		features[3] = "mHead2=" + getHead(rm.mention2);
		features[4] = "synLink=" + rm.syntacticLink;
		ArrayList pattern = rm.linearLink;
		features[5] = "nConstit=" + pattern.size();
		if (pattern.size() > 0) {
			features[6] = "lastConstit=" + pattern.get(pattern.size()-1);
			// features[7] = "firstTwo=" + mentionHead1 + ":" + pattern.get(0);
			// features[8] = "lastTwo=" + pattern.get(pattern.size()-1) + ":" + mentionType2;
		} else {
			features[6] = " ";
			// features[7] = " ";
			// features[8] = " ";
		}
		return features;
	}
	
	/**
	 *  takes the set of RelationMentions (in relMentionList) and combines them
	 *  into aceRelations (on relationList).
	 */
	 
	private static void relationCoref (AceDocument aceDoc) {
		relationList = new ArrayList();
		System.out.println ("RelationCoref: " + relMentionList.size() + " relation mentions"); 
	loop: for (int i=0; i<relMentionList.size(); i++) {
			RelationMention rm = (RelationMention) relMentionList.get(i);
			String eid1 = rm.mention1.entityID;
			String eid2 = rm.mention2.entityID;
			for (int j=0; j<relationList.size(); j++) {
				AceRelation r = (AceRelation) relationList.get(j);
				if (eid1 == r.arg1.id && eid2 == r.arg2.id) {
					r.addMention(rm.toAce(aceDoc));
					continue loop;
				}
			}
			String relID = docName + "-R" + (relationList.size() + 1);
			AceRelation newr = new AceRelation (relID, rm.relationType, rm.relationSubtype, "EXPLICIT",
			  aceDoc.findEntity(eid1), aceDoc.findEntity(eid2));
			newr.addMention(rm.toAce(aceDoc));
			relationList.add(newr);
			aceDoc.addRelation(newr);
		}
		System.out.println ("RelationCoref: " + relationList.size() + " relations");
	}
	
}

class Mention implements Comparable {
	String type;		    // EDT type
	String subtype;     // EDT subtype
	Span extent;
	Span headSpan;
	String id;          // mention id
	String entityID;
	boolean generic;
	Mention conjunctf, conjunctb;
	
	Mention (String t, String st, Span e, Span h, String mentionId, String eid) {
		type = t;
		subtype = st;
		extent = e;
		headSpan = h;
		id = mentionId;
		entityID = eid;
		conjunctf = null;
		conjunctb = null;
	}
	
	public boolean equals (Object o) {
		return (o instanceof Mention) && (((Mention)o).headSpan).equals(headSpan);
	}
	
	public int compareTo (Object o) {
		if (!(o instanceof Mention)) throw new ClassCastException();
		return headSpan.compareTo(((Mention)o).headSpan);
	}	
}

/**
 *  an instance of a (possible) relation, obtained either from
 *  the APF file or from the document.
 */	

class RelationMention extends RelationInstance {
		
	String id;
	Mention mention1, mention2;     // 
	boolean analyzed = false;		// true if relation has been paired with candidate
	                                // (for key relations)
	
	RelationMention (String t, String s) {
		relationType = t;
		relationSubtype = s;
		syntacticLink = "0";
		linearLink = new ArrayList();
		linearLink.add("0");
	}
	
	RelationMention (Mention m1, Mention m2) {
		mention1 = m1;
		mention2 = m2;
		syntacticLink = "0";
		linearLink = new ArrayList();
		linearLink.add("0");
	}
	
	void setArg (int argNum, Mention m) {
		if (argNum == 1) {
			mention1 = m;
		} else if (argNum == 2) {
			mention2 = m;
		} else {
			System.out.println ("Invalid argument " + argNum + " to setArg");
		}
		return;
	}
	
	void setAnalyzed () {
		analyzed = true;
	}
	
	void swapArgs () {
		Mention temp = mention1;
		mention1 = mention2;
		mention2 = temp;
	}
	
	String getType1 () {
		return mention1.type;
	}
	
	String getType2 () {
		return mention2.type;
	}
	
	public String toString() {
		return mention1.type + " " + mention1.subtype + " " + LearnRelations.getHead(mention1) 
		       + " [ " + syntacticLink + " : " + LearnRelations.concat(linearLink) + " ] " 
		       + mention2.type + " " +  mention2.subtype + " " + LearnRelations.getHead(mention2);
	}
	
	AceRelationMention toAce (AceDocument ad) {
		return new AceRelationMention (id, ad.findEntityMention(mention1.id), 
		                                   ad.findEntityMention(mention2.id));
	}
}
