package Jet.Sense;

import java.io.*;
import java.util.*;

import net.didion.jwnl.JWNLException;
import net.didion.jwnl.data.*;
import net.didion.jwnl.data.list.*;

/**
 * A CategorySorter that associates each WordNet noun sense with an ACE category
 *  
 * @author M. David Westbrook
 * Jun 21, 2004
 *  */
public class ACECategorySorter extends CategorySorter implements Serializable{
	static final long serialVersionUID = 1; // Helps with serialization
	
	public ACECategorySorter() throws JWNLException {
		buildCategories();
	}
	
    /**
     * Creates the Category instances representing the ACE categories, and populates them
     * with appropriate WordNet Synsets;
     */
    private void buildCategories() throws JWNLException {
		Synset[] catsyns;
		categories = new Category[] {
			new Category("gpe", POS.NOUN),
			new Category("person", POS.NOUN),
			new Category("organization", POS.NOUN),
			new Category("location", POS.NOUN),
			new Category("facility", POS.NOUN),
			new Category("vehicle", POS.NOUN),
			new Category("weapon", POS.NOUN)};
		
		// Note that in some cases, Synsets acquired through one word below will also be available as a sense
		// of another word used below, or may be hypernyms of other relevant Synsets . In such cases, the 
		// (most general) Synset need only be added once (accidental overlaps may occur, but they won't alter results). So, 
		// for example, some gpe-relevant senses of "country" are omitted because they have already been included as 
		// senses of "nation." 
		
		catsyns = SenseUtils.getSynsetsForBaseForm("nation");
		categories[0].add(catsyns[0]);
		categories[0].add(catsyns[1]);
		categories[0].add(catsyns[2]);
		catsyns = SenseUtils.getSynsetsForBaseForm("government");
		categories[0].add(catsyns[0]);
		catsyns = SenseUtils.getSynsetsForBaseForm("country");
		categories[0].add(catsyns[0]);
		catsyns = SenseUtils.getSynsetsForBaseForm("kingdom");
		categories[0].add(catsyns[1]);
		categories[0].add(catsyns[2]);
		categories[0].add(catsyns[3]);
		catsyns = SenseUtils.getSynsetsForBaseForm("protectorate");
		categories[0].add(catsyns[0]);
		catsyns = SenseUtils.getSynsetsForBaseForm("union");
		categories[0].add(catsyns[6]);
		catsyns = SenseUtils.getSynsetsForBaseForm("domain");
		categories[0].add(catsyns[1]);
		catsyns = SenseUtils.getSynsetsForBaseForm("administrative_district");
		categories[0].add(catsyns[0]);
		catsyns = SenseUtils.getSynsetsForBaseForm("territory");
		categories[0].add(catsyns[1]);
		catsyns = SenseUtils.getSynsetsForBaseForm("municipality");
		categories[0].add(catsyns[1]);
		
		HashSet badPeople = new HashSet();
		badPeople.addAll(Arrays.asList(SenseUtils.getSynsetsForBaseForm("face")));
		badPeople.addAll(Arrays.asList(SenseUtils.getSynsetsForBaseForm("national")));
		badPeople.addAll(Arrays.asList(SenseUtils.getSynsetsForBaseForm("abomination")));
		badPeople.addAll(Arrays.asList(SenseUtils.getSynsetsForBaseForm("divider")));
		badPeople.addAll(Arrays.asList(SenseUtils.getSynsetsForBaseForm("counter")));
		badPeople.addAll(Arrays.asList(SenseUtils.getSynsetsForBaseForm("life")));
		catsyns = SenseUtils.getSynsetsForBaseForm("person");
		addHyponymsExcept(categories[1], catsyns[0], badPeople);
		
		catsyns = SenseUtils.getSynsetsForBaseForm("organization");
		HashSet badOrganizations = new HashSet();
		badOrganizations.addAll(Arrays.asList(SenseUtils.getSynsetsForBaseForm("line_of_defense")));
		badOrganizations.addAll(Arrays.asList(SenseUtils.getSynsetsForBaseForm("force")));
		badOrganizations.addAll(Arrays.asList(SenseUtils.getSynsetsForBaseForm("alliance")));
		badOrganizations.addAll(Arrays.asList(SenseUtils.getSynsetsForBaseForm("mission")));
		badOrganizations.addAll(Arrays.asList(SenseUtils.getSynsetsForBaseForm("pool")));
		addHyponymsExcept(categories[2], catsyns[0], badOrganizations);
		HashSet badOrganizations2 = new HashSet();
		badOrganizations2.addAll(Arrays.asList(SenseUtils.getSynsetsForBaseForm("top_brass")));
		badOrganizations2.addAll(Arrays.asList(SenseUtils.getSynsetsForBaseForm("executive")));
		badOrganizations2.addAll(Arrays.asList(SenseUtils.getSynsetsForBaseForm("government_officials")));
		badOrganizations2.addAll(Arrays.asList(SenseUtils.getSynsetsForBaseForm("management")));
		addHyponymsExcept(categories[2], catsyns[2], badOrganizations2);
		
		catsyns = SenseUtils.getSynsetsForBaseForm("region");
		categories[3].add(catsyns[0]);
		categories[3].add(catsyns[2]);
		catsyns = SenseUtils.getSynsetsForBaseForm("outer_space");
		categories[3].add(catsyns[0]);
		catsyns = SenseUtils.getSynsetsForBaseForm("celestial_body");
		categories[3].add(catsyns[0]);
		catsyns = SenseUtils.getSynsetsForBaseForm("pass");
		categories[3].add(catsyns[3]);
		catsyns = SenseUtils.getSynsetsForBaseForm("aclinic_line");
		categories[3].add(catsyns[0]);
		catsyns = SenseUtils.getSynsetsForBaseForm("agonic_line");
		categories[3].add(catsyns[0]);
		catsyns = SenseUtils.getSynsetsForBaseForm("isogonic_line");
		categories[3].add(catsyns[0]);
		catsyns = SenseUtils.getSynsetsForBaseForm("timber_line");
		categories[3].add(catsyns[0]);
		catsyns = SenseUtils.getSynsetsForBaseForm("fault_line");
		categories[3].add(catsyns[0]);
		catsyns = SenseUtils.getSynsetsForBaseForm("great_circle");
		categories[3].add(catsyns[0]);
		catsyns = SenseUtils.getSynsetsForBaseForm("snow_line");
		categories[3].add(catsyns[0]);
		catsyns = SenseUtils.getSynsetsForBaseForm("latitude");
		categories[3].add(catsyns[2]);
		catsyns = SenseUtils.getSynsetsForBaseForm("celestial_orbit");
		categories[3].add(catsyns[0]);
		catsyns = SenseUtils.getSynsetsForBaseForm("trade_route");
		categories[3].add(catsyns[0]);
		categories[3].add(catsyns[1]);
		catsyns = SenseUtils.getSynsetsForBaseForm("watershed");
		categories[3].add(catsyns[0]);
		catsyns = SenseUtils.getSynsetsForBaseForm("celestial_point");
		categories[3].add(catsyns[0]);
		catsyns = SenseUtils.getSynsetsForBaseForm("geographic_point");
		categories[3].add(catsyns[0]);
		catsyns = SenseUtils.getSynsetsForBaseForm("thalweg");
		categories[3].add(catsyns[0]);
		categories[3].add(catsyns[1]);
		catsyns = SenseUtils.getSynsetsForBaseForm("border");
		categories[3].add(catsyns[1]);
		catsyns = SenseUtils.getSynsetsForBaseForm("dry_land");
		categories[3].add(catsyns[0]);
		catsyns = SenseUtils.getSynsetsForBaseForm("body_of_water");
		categories[3].add(catsyns[0]);
		
		catsyns = SenseUtils.getSynsetsForBaseForm("facility");
		categories[4].add(catsyns[0]);
		categories[4].add(catsyns[3]);
		HashSet badStructures = new HashSet();
		badStructures.addAll(Arrays.asList(SenseUtils.getSynsetsForBaseForm("boarding")));
		badStructures.addAll(Arrays.asList(SenseUtils.getSynsetsForBaseForm("body")));
		badStructures.addAll(Arrays.asList(SenseUtils.getSynsetsForBaseForm("coil")));
		badStructures.addAll(Arrays.asList(SenseUtils.getSynsetsForBaseForm("cross")));
		badStructures.addAll(Arrays.asList(SenseUtils.getSynsetsForBaseForm("house_of_cards")));
		badStructures.addAll(Arrays.asList(SenseUtils.getSynsetsForBaseForm("hull")));
		badStructures.addAll(Arrays.asList(SenseUtils.getSynsetsForBaseForm("lamination")));
		badStructures.addAll(Arrays.asList(SenseUtils.getSynsetsForBaseForm("sign")));
		badStructures.addAll(Arrays.asList(SenseUtils.getSynsetsForBaseForm("balance")));
		badStructures.addAll(Arrays.asList(SenseUtils.getSynsetsForBaseForm("weapons_platform")));
		badStructures.addAll(Arrays.asList(SenseUtils.getSynsetsForBaseForm("arrangement")));
		badStructures.addAll(Arrays.asList(SenseUtils.getSynsetsForBaseForm("superstructure")));
		catsyns = SenseUtils.getSynsetsForBaseForm("structure");
		addHyponymsExcept(categories[4], catsyns[0], badStructures);
		catsyns = SenseUtils.getSynsetsForBaseForm("chamber");
		categories[4].add(catsyns[2]);
		categories[4].add(catsyns[4]);
		catsyns = SenseUtils.getSynsetsForBaseForm("airlock");
		categories[4].add(catsyns[0]);
		catsyns = SenseUtils.getSynsetsForBaseForm("scaffold");
		categories[4].add(catsyns[0]);
		categories[4].add(catsyns[1]);
		catsyns = SenseUtils.getSynsetsForBaseForm("bomb_shelter");
		categories[4].add(catsyns[0]);
		catsyns = SenseUtils.getSynsetsForBaseForm("tomb");
		categories[4].add(catsyns[0]);
		catsyns = SenseUtils.getSynsetsForBaseForm("hyperbaric_chamber");
		categories[4].add(catsyns[0]);
		catsyns = SenseUtils.getSynsetsForBaseForm("compound");
		categories[4].add(catsyns[2]);
		catsyns = SenseUtils.getSynsetsForBaseForm("lock");
		categories[4].add(catsyns[3]);
		catsyns = SenseUtils.getSynsetsForBaseForm("pen");
		categories[4].add(catsyns[1]);
		categories[4].add(catsyns[2]);
		catsyns = SenseUtils.getSynsetsForBaseForm("penal_facility");
		categories[4].add(catsyns[0]);
		catsyns = SenseUtils.getSynsetsForBaseForm("pound");
		categories[4].add(catsyns[10]);
		catsyns = SenseUtils.getSynsetsForBaseForm("recess");
		categories[4].add(catsyns[3]);
		catsyns = SenseUtils.getSynsetsForBaseForm("vivarium");
		categories[4].add(catsyns[0]);
		catsyns = SenseUtils.getSynsetsForBaseForm("yard");
		categories[4].add(catsyns[1]);
		categories[4].add(catsyns[2]);
		categories[4].add(catsyns[3]);
		categories[4].add(catsyns[4]);
		catsyns = SenseUtils.getSynsetsForBaseForm("baseball_diamond");
		categories[4].add(catsyns[0]);
		catsyns = SenseUtils.getSynsetsForBaseForm("grounds");
		categories[4].add(catsyns[2]);
		catsyns = SenseUtils.getSynsetsForBaseForm("playing_field");
		categories[4].add(catsyns[0]);
		catsyns = SenseUtils.getSynsetsForBaseForm("industrial_park");
		categories[4].add(catsyns[0]);
		catsyns = SenseUtils.getSynsetsForBaseForm("parade_ground");
		categories[4].add(catsyns[0]);
		catsyns = SenseUtils.getSynsetsForBaseForm("fairground");
		categories[4].add(catsyns[0]);
		catsyns = SenseUtils.getSynsetsForBaseForm("park");
		categories[4].add(catsyns[0]);
		categories[4].add(catsyns[1]);
		categories[4].add(catsyns[4]);
		catsyns = SenseUtils.getSynsetsForBaseForm("picnic_area");
		categories[4].add(catsyns[0]);
		catsyns = SenseUtils.getSynsetsForBaseForm("public_square");
		categories[4].add(catsyns[0]);
		catsyns = SenseUtils.getSynsetsForBaseForm("toll_plaza");
		categories[4].add(catsyns[0]);
		catsyns = SenseUtils.getSynsetsForBaseForm("subdivision");
		categories[4].add(catsyns[0]);
		catsyns = SenseUtils.getSynsetsForBaseForm("track");
		categories[4].add(catsyns[2]);
		catsyns = SenseUtils.getSynsetsForBaseForm("trail");
		categories[4].add(catsyns[1]);
		catsyns = SenseUtils.getSynsetsForBaseForm("road");
		categories[4].add(catsyns[0]);

		catsyns = SenseUtils.getSynsetsForBaseForm("transport");
		categories[5].add(catsyns[0]);
		
		catsyns = SenseUtils.getSynsetsForBaseForm("weapon");
		categories[6].add(catsyns[0]);
		catsyns = SenseUtils.getSynsetsForBaseForm("weaponry");
		categories[6].add(catsyns[0]);
		catsyns = SenseUtils.getSynsetsForBaseForm("explosive");
		categories[6].add(catsyns[0]);
		
		buildLabelIndex();
		addAbbreviations();
	}

	/**
	 * Add entries to the labelIndex for the three-letter forms of the ACE category
	 * labels.
	 */
	private void addAbbreviations() {
		for (int c=0; c < categories.length; c++) {
			String label = categories[c].getLabel();
			if (label.length() > 3) {
				String abbrev = label.substring(0, 3);
				labelIndex.put(abbrev, categories[c]);
			}
		}
	}
	
	/**
	 * Adds to cat all Synsets that are both hyponyms of syn and not present in excluded.
	 * 
     * @param cat The category to which Synsets will be added.
     * @param syn The Synset whose hyponyms will be added to cat
     * @param excluded A collection of Synsets that will be prevented from being added to cat
     * @throws JWNLException Indicates a problem with the Java-WordNet interface.
     */
    public void addHyponymsExcept(Category cat, Synset syn, HashSet excluded) throws JWNLException {
		PointerTargetNodeList rels = PointerUtils.getDirectHyponyms(syn);
		Iterator hypos = rels.iterator();
		while (hypos.hasNext()) {
			PointerTargetNode rel = (PointerTargetNode) hypos.next();
			Synset target = rel.getSynset();
			if (!excluded.contains(target)) cat.add(target);
		}
	}
	
    /* (non-Javadoc)
     * @return Returns the first category in the list of ace categories containing a 
     * Synset that is a hypernym of sense. Note that the order of the ace categories
     * is important: if sense is a hyponym of both a gpe Synset and an organization Synset, 
     * and the gpe entry precedes the organization entry in categories, the gpe category
     * will be returned.
     * @see Jet.Sense.CategorySorter#categorize(net.didion.jwnl.data.Synset)
     */
    public Category categorize(Synset sense) throws JWNLException {
		PointerTargetTree senseTree;
		List senseExplode;
		Iterator senseIt;
		
		senseTree = PointerUtils.getHypernymTree(sense);
		senseExplode = senseTree.toList();
		senseIt = senseExplode.iterator();
		
		while (senseIt.hasNext()) {
			int currDist = 0;
			PointerTargetNodeList plist = (PointerTargetNodeList) senseIt.next();
			Iterator pIt = plist.iterator();
			while (pIt.hasNext()) {
				PointerTargetNode node = (PointerTargetNode) pIt.next();
				Synset hyp = (Synset) node.getPointerTarget();
				for (int c=0; c < categories.length; c++)
					if (categories[c].contains(hyp)) return categories[c];
			}
		}
		return null;
    }
}
