package AceJet;

import java.util.*;
import java.io.*;

/**
 *  analyze a set of ACE APF files for coreference relations
 *  between nominals.
 */

public class APFNomAnalyzer {

	static final String ACEdir =
	    "C:/Documents and Settings/Ralph Grishman/My Documents/ACE/";
	static final String fileList =
		// ACEdir + "training all.txt";
		// ACEdir + "feb02 all.txt";
		// ACEdir + "sep02 all.txt";
		// ACEdir + "aug03 all.txt";
		// ACEdir + "files-to-process.txt";
		ACEdir + "training nwire.txt";
		
	static int identityCount = 0;
	static int synonymCount = 0;
	static int differentCount = 0;
	
	public static void main (String [] args) throws Exception  {
		// initialize WordNet
		WordNetInterface.initialize();
		
		// open list of files
		BufferedReader reader = new BufferedReader (new FileReader(fileList));
		int docCount = 0;
		String currentDoc;
		while ((currentDoc = reader.readLine()) != null) { 
			// process file 'currentDoc'
			docCount++;
			System.out.println ("\nProcessing document " + docCount + ": " + currentDoc);
			String textFileName = ACEdir + currentDoc + ".sgm";
			boolean newData = fileList.indexOf("03") > 0;
			String APFfileName = ACEdir + currentDoc + (newData ? ".apf.xml" : ".sgm.tmx.rdc.xml");
			analyzeDocument (textFileName, APFfileName);
		}
		report();
	}
	
	private static void analyzeDocument (String textFileName, String APFfileName) {
	  AceDocument aceDoc = new AceDocument (textFileName, APFfileName);
		findEntityMentions (aceDoc);
	}
	
	/**
	 *  retrieve the mentions for each entity and call 'analyzeMentions' for them.
	 */
	 
	static void findEntityMentions (AceDocument aceDoc) {
		ArrayList entities = aceDoc.entities;
		for (int i=0; i<entities.size(); i++) {
			AceEntity entity = (AceEntity) entities.get(i);
			if (entity.generic) continue;
			analyzeMentions(entity.type, entity.mentions);
		}
	}
	
	private static void analyzeMentions (String entityType, ArrayList mentions) {
		HashSet priorHeads = new HashSet();
		for (int imention = 0; imention < mentions.size(); imention++) {
			AceEntityMention mention = (AceEntityMention) mentions.get(imention);
			String type = mention.type;
			String head = mention.headText.toLowerCase();
			if (type.equals("NOMINAL")) {
				if (priorHeads.isEmpty()) {
					// first nominal mention -- ignore
				} else if (priorHeads.contains(head)) {
					identityCount++;
				} else if (WordNetInterface.containsSynonym(priorHeads, head)) {
					synonymCount++;
				}	else {
					System.out.println ("different heads: " + head + " and " + priorHeads);
					differentCount++;
				}
				priorHeads.add(head);
			}
		}
	}
	
	private static void report () {
		System.out.println (identityCount + " same head pairs");  
		System.out.println (synonymCount + " synonymous head pairs");
		System.out.println (differentCount + " different head pairs");
	}			

}