package AceJet;

import java.util.*;
import java.io.*;

import Jet.Tipster.Span;

import org.w3c.dom.*;
import org.xml.sax.*;
import javax.xml.parsers.*;

/**
 *  an Ace Entity Mention, with information from the APF ACE key.
 */

public class AceEntityMention {
	
	String id;
	/**
	 *  the type of the mention:  NAME, NOMINAL, or PRONOUN.
	 */
	String type;
	/**
	 *  the extent of the mention, with start and end positions based on
	 *  ACE offsets (excluding XML tags).
	 */
	Span extent;
	/**
	 *  the text of the extent.
	 */
	String text;
	/**
	 *  the span of the head of the mention, with start and end positions based
	 *  on the ACE offsets (excluding XML tags).
	 */
	public Span head;
	/**
	 *  the text of the head.
	 */
	String headText;
	/**
	 *  for entities of type GPE, the role of the mention (ORG, LOC, GPE, or PER).
	 */
	String role;
	
	public AceEntityMention (String id, String type, Span extent, Span head, 
	    String fileText) {
		this.id = id;
		this.type = type;
		this.extent = convertSpan(extent, fileText);
		this.head = convertSpan(head, fileText);
		text = fileText.substring(this.extent.start(), this.extent.end()+1);
		headText = fileText.substring(this.head.start(), this.head.end()+1); 
	}
	
	/**
	 *  create an AceEntityMention from the information in the APF file.
	 *  @param mentionElement the XML element from the APF file containing
	 *                       information about this mention
	 *  @param fileText      the text of the document, including XML tags
	 */
	
	public AceEntityMention (Element mentionElement, String fileText) {
		id = mentionElement.getAttribute("ID");
		type = mentionElement.getAttribute("TYPE");
		role = mentionElement.getAttribute("ROLE");
		if (role == null)
			role = "";
		// System.out.print ("Found mention " + id + " of type " + type);
		NodeList extents = mentionElement.getElementsByTagName("extent");
		Element extentElement = (Element) extents.item(0);
		extent = decodeCharseq(extentElement);
		text = fileText.substring(extent.start(), extent.end()+1); 
		NodeList heads = mentionElement.getElementsByTagName("head");
		Element headElement = (Element) heads.item(0);
		head = decodeCharseq(headElement);
		headText = fileText.substring(head.start(), head.end()+1); 
		// System.out.println (" " + span + " (" + text + ")");
	}
	
	static Span convertSpan (Span jetSpan, String fileText) {
		int start = jetSpan.start();
		int end = jetSpan.end() - 1;
		while (end > start && Character.isWhitespace(fileText.charAt(end)))
			end--;
		return new Span(start, end);
	}
	
	void write (PrintWriter w) {
		String apfType = AceDocument.ace2004 ? type.substring(0,3) : type;
		w.print   ("    <entity_mention TYPE=\"" + apfType + "\" ID=\"" + id + "\"");
		if (role != null && !role.equals(""))
			w.print (" ROLE=\"" + role + "\"");
		w.println (">");
		w.println ("      <extent>");
		writeCharseq (w, extent, text);
		w.println ("      </extent>");
		w.println ("      <head>");
		writeCharseq (w, head, headText);
		w.println ("      </head>");
		w.println ("    </entity_mention>");
	}
	
	static Span decodeCharseq (Element e) {
		String startS, endS;
		if (AceDocument.ace2004) {
			NodeList charseqs = e.getElementsByTagName("charseq");
			Element charseq = (Element) charseqs.item(0);
			startS = charseq.getAttribute("START");
			endS = charseq.getAttribute("END");
		} else {
			startS = AceDocument.getElementText (e, "start");
			endS = AceDocument.getElementText (e, "end");
		}
		int start = Integer.parseInt(startS);
		int end = Integer.parseInt(endS);
		Span span = new Span (start, end);
		return span;
	}
	
	static void writeCharseq (PrintWriter w, Span s, String txt) {
		if (AceDocument.ace2004) {
			w.print   ("        <charseq START=\"" + s.start() + "\"" +
			                             " END=\"" + s.end() + "\">");
			w.print   (txt);
			w.println ("</charseq>");
		} else {
			w.println ("        <charseq>");
			w.println ("          <start>" + s.start() + "</start>");
			w.println ("          <!-- string = \"" + txt + "\" -->");
			w.println ("          <end>" + s.end() + "</end>");
			w.println ("        </charseq>");
		}
	}
		
}