package AceJet;

import java.util.*;
import java.io.*;

import org.w3c.dom.*;
import org.xml.sax.*;
import javax.xml.parsers.*;

/**
 *  an Ace Key Document
 */

public class AceDocument {
	
	/**
	 *  true for new (2004) APF format
	 */
	 
	public static boolean ace2004 = true;
	
	private static DocumentBuilder builder = null;
	private String fileText;
	private StringBuffer fileTextWithXML;
	
	/**
	 *  the name of the source file
	 */
	 
	public String sourceFile;
	
	/**
	 *  the type of source:  newswire or bnews
	 */
	 
	public String sourceType;
	
	/**
	 *  the document ID
	 */
	 
	public String docID;
	/**
	 *  a list of the entities in the document
	 */
	public ArrayList entities = new ArrayList();
	/**
	 *  a list of the relations in the document
	 */
	public ArrayList relations = new ArrayList();
	
	private static final String encoding = "ISO-8859-1";  // default:  ISO-LATIN-1
	
	public AceDocument (String sourceFile, String sourceType, String docID, String docText) {
		this.sourceFile = sourceFile;	
		this.sourceType = sourceType;
		this.docID = docID;
		fileText = docText;
	}
	
	/**
	 *  create a new AceDocument from the source document in 'textFileName'
	 *  and the APF file 'APFfileName'
	 */
	
	public AceDocument (String textFileName, String APFfileName) {
		try {
		// initialize APF reader
		if (builder == null) {
			DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
			factory.setValidating(false);
			builder = factory.newDocumentBuilder();
			}
			analyzeDocument (textFileName, APFfileName);
		} catch (SAXException e) {
			System.out.println ("AceDocument:  Exception in initializing APF reader: " + e);
		} catch (IOException e) {
			System.out.println ("AceDocument:  Exception in initializing APF reader: " + e);
		} catch (ParserConfigurationException e) {
			System.out.println ("AceDocument:  Exception in initializing APF reader: " + e);
		}
	}

	private void analyzeDocument (String textFileName, String APFfileName) 
	    throws SAXException, IOException {
		Document apfDoc = builder.parse(APFfileName);
		fileTextWithXML = readDocument(textFileName);
		fileText = eraseXML(fileTextWithXML);
		readAPFdocument (apfDoc, fileText);
	}
	
	/**
	 *  read APF document and create entities and relations
	 */
	 
	void readAPFdocument (Document apfDoc, String fileText) {
		NodeList sourceFileElements = apfDoc.getElementsByTagName("source_file");
		Element sourceFileElement = (Element) sourceFileElements.item(0);
		sourceFile = sourceFileElement.getAttribute("URI");
		sourceType = sourceFileElement.getAttribute("SOURCE");
		
		NodeList documentElements = apfDoc.getElementsByTagName("document");
		Element documentElement = (Element) documentElements.item(0);
		docID = documentElement.getAttribute("DOCID");
		
		if (Ace.perfectMentions & !Ace.perfectEntities) {
			readPerfectMentions (apfDoc, fileText);
			return;
		}		
		
		NodeList entityElements = apfDoc.getElementsByTagName("entity");
		for (int i=0; i<entityElements.getLength(); i++) {
			Element entityElement = (Element) entityElements.item(i);
			AceEntity entity = new AceEntity (entityElement, fileText);
			addEntity(entity);
		}
		NodeList relationElements = apfDoc.getElementsByTagName("relation");
		for (int i=0; i<relationElements.getLength(); i++) {
			Element relationElement = (Element) relationElements.item(i);
			AceRelation relation = new AceRelation (relationElement, this);
			addRelation(relation);
		}
	}
	
	public void addEntity (AceEntity entity) {
		entities.add(entity);
	}
	
	public void addRelation (AceRelation relation) {
		relations.add(relation);
	}
	
	/*  assumes elementType is a leaf element type */
	
	static String getElementText (Element e, String elementType) {
		NodeList typeList = e.getElementsByTagName(elementType);
		Element typeElement = (Element) typeList.item(0);
		String text = (String) typeElement.getFirstChild().getNodeValue();
		return text;
	}
	
	void readPerfectMentions (Document apfDoc, String fileText) {
		NodeList mentionElements = apfDoc.getElementsByTagName("entity_mention");
		for (int i=0; i<mentionElements.getLength(); i++) {
			Element mentionElement = (Element) mentionElements.item(i);
			String entityId = "E" + mentionElement.getAttribute("ID");
			String type = mentionElement.getAttribute("ENTITY_TYPE");
			if (AceEntity.standardType.containsKey(type))
					type = (String) AceEntity.standardType.get(type);
			String subtype = mentionElement.getAttribute("ENTITY_SUBTYPE");
			// adjust for missing subtypes in training data
			if ((!type.equals("PERSON")) && (!type.equals("")) && subtype.equals(""))
				subtype = "Other";
			AceEntityMention mention = new AceEntityMention (mentionElement, fileText);
			AceEntity entity = new AceEntity (entityId, type, subtype, false);
			entity.addMention(mention);
			addEntity(entity);
		}
	}
	
	/**
	 *  read file 'fileName' and return its contents as a StringBuffer
	 */
	 
	static StringBuffer readDocument (String fileName) throws IOException {
		File file = new File(fileName);
		String line;
		BufferedReader reader = new BufferedReader (
			// (new FileReader(file));
			new InputStreamReader (new FileInputStream(file), encoding));
		StringBuffer fileText = new StringBuffer();
		while((line = reader.readLine()) != null)
			fileText.append(line + "\n");
		return fileText;
	}
	
	/**
	 *  compute ACEoffsetMap, a map from ACE offsets (which exclude XML tags
	 *  to Jet offsets (which include all characters in the file)
	 */
	 
	static String eraseXML (StringBuffer fileTextWithXML) {
		boolean inTag = false;
		int length = fileTextWithXML.length();
		StringBuffer fileText = new StringBuffer();
		for (int i=0; i<length; i++) {
			char c = fileTextWithXML.charAt(i);
			if(c == '<') inTag = true;
			if (!inTag) fileText.append(c);
			if(c == '>') inTag = false;
		}
		return fileText.toString();
	}
	
	public Jet.Tipster.Document JetDocument() {
		Jet.Tipster.Document doc = new Jet.Tipster.Document (fileTextWithXML.toString());
		doc.annotateWithTag("TEXT");
		return doc;
	}
	
	/**
	 *  returns the AceEntity with ID 'id', or null if no such AceEntity.
	 */
	
	AceEntity findEntity (String id) {
		for (int i=0; i<entities.size(); i++) {
			AceEntity entity = (AceEntity) entities.get(i);
			if (entity.id.equals(id)) {
				return entity;
			}
		}
		System.out.println ("*** unable to find entity with id " + id);
		return null;
	}
	
	/**
	 *  returns the AceEntityMention with ID 'id', or null if no such AceEntity.
	 */
	 
	AceEntityMention findEntityMention (String id) {
		for (int i=0; i<entities.size(); i++) {
			AceEntity entity = (AceEntity) entities.get(i);
			AceEntityMention mention = entity.findMention(id);
			if (mention != null) {
				return mention;
			}
		}
		System.out.println ("*** unable to find entity mention with id " + id);
		return null;
	}
	
	/**
	 *  writes the AceDocument to 'w' in APF format.
	 */
	 
	public void write (PrintWriter w) {
		w.println ("<?xml version=\"1.0\"?>");
		w.println ("<!DOCTYPE source_file SYSTEM \"apf.v4.0.1.dtd\">");
		w.print   ("<source_file URI=\"" + sourceFile + "\"");
		w.println (" SOURCE=\"" + sourceType + "\" TYPE=\"text\" AUTHOR=\"NYU\">");
		w.println ("<document DOCID=\"" + docID + "\">");
		for (int i=0; i<entities.size(); i++) {
			AceEntity entity = (AceEntity) entities.get(i);
			entity.write(w);
		}
		for (int i=0; i<relations.size(); i++) {
			AceRelation relation = (AceRelation) relations.get(i);
			relation.write(w);
		}
		w.println ("</document>");
		w.println ("</source_file>");
		w.close();
	}
	
	public static void main (String[] args) {
		String home =
	    "C:/Documents and Settings/Ralph Grishman/My Documents/";
	  String ace = home + "ace/";
	  String xmlFile =  ace + "training04/English/nwire/APW20001001.2021.0521.apf.xml";
	  String textFile = ace + "training04/English/nwire/APW20001001.2021.0521.sgm";
	  // include next 3 lines to test old-style file
	  /*
	  xmlFile = ace + "training/nwire/APW19980213.1302.sgm.tmx.rdc.xml";
	  textFile = ace + "training/nwire/APW19980213.1302.sgm";
	  ace2004 = false;
	  */
	  AceDocument ad = new AceDocument(textFile, xmlFile);
	  ad.write(new PrintWriter(System.out));
	}
}