/*
 * Created on Nov 30, 2003
 */

/**
 * Miscellaneous useful functions for working with WordNet and other disambiguation resources.
 *
 * @author M. David Westbrook
 * Version 06_07_04 -- A version specifically for inclusion in Jet -- excludes methods used for 
 * class-based experiments and deprecated synonym testing.
 * Version 06_07_04.07_21_04 -- A branch that removes all references to classes used for anything
 * more complicated than simple Synset lookups and categorization
 */

package Jet.Sense;
import net.didion.jwnl.*;
import net.didion.jwnl.data.*;
import net.didion.jwnl.data.list.*;
import Jet.SenseResources;
import Jet.SenseResourceException;
import Jet.Lex.*;
import Jet.Lisp.*;

import java.io.IOException;
import java.util.*;

public class SenseUtils {
	//public static SenseCorpus corpus = null;
	//public static CoarseDictionary courseMapping = null;
	public static CategorySorter ontology = null;
    public static net.didion.jwnl.dictionary.Dictionary dict = null;
    	//Fully qualified to avoid conflict with java.util.Dictionary
    public static POS defaultPOS = POS.NOUN;
    	//Default database to search is the noun db. 
    private static boolean debug = false;
	
	/**
     * @param syn1 A Synset.
     * @param syn2 A Synset.
     * @return true if syn1 and syn2 are in the same Synset or if one is an immediate hyponym 
     * of the other
     * @throws JWNLException Indicates a problem with the Java-WordNet interface.
     * @throws SenseResourceException
     */
    public static boolean areSynonyms(Synset syn1, Synset syn2) 
	throws JWNLException, SenseResourceException {
		if (syn1==null || syn2==null) return false;
		if (syn1.equals(syn2)) return true;
		if (isHyponymOf(syn1, syn2)) return true;
		if (isHyponymOf(syn2, syn1)) return true;
		return false;
	}

	/**
     * @param syn1 A Synset.
     * @param syn2 A Synset.
     * @return true if syn1 is a direct hyponym of syn2
     * @throws JWNLException Indicates a problem with the Java-WordNet interface.
     */
    public static boolean isHyponymOf( Synset syn1, Synset syn2 ) throws JWNLException {
		PointerTargetNodeList rels = PointerUtils.getDirectHyponyms(syn2);
		Iterator hypos = rels.iterator();
		while (hypos.hasNext()) {
			PointerTargetNode rel = (PointerTargetNode) hypos.next();
			Synset target = rel.getSynset();
			if (syn1.equals(target))
				return true;
		}
		return false;
	}
	
	/**
	 * Takes a word, a part of speech, and the index i of a sense, and returns the offset of the Synset of 
	 * the ith sense of that word.
	 * 
     * @param lemma A word
     * @param pos A part of speech indicating which WordNet database to consult
     * @param sensenum The number of the sense of lemma for which an offset is requested
     * @return An offset into the WordNet database for the indicated part-of-speech.
     * @throws JWNLException Indicates a problem with the Java-WordNet interface.
     */
    public static long offsetFromSenseNum(String lemma, POS pos, int sensenum) throws JWNLException {
		IndexWord lookup = dict.lookupIndexWord(pos, lemma);
		if (lookup == null) return -1;
		Synset sense = lookup.getSense(sensenum);
		return sense.getOffset();
	}
	
    /**
     * Takes an inflected word, finds its base form (if possible) using jet's lexicon, 
     * and looks this base form up in the WordNet database for the default part-of-speech
     * (NOUN, unless defaultPOS field has been modified).
     * 
     * @param inflectedWord A word, not necessarily in its base form.
     * @return All senses of the base form of inflectedWord from WordNet.
     * @throws JWNLException Indicates a problem with the Java-WordNet interface.
     */
    public static Synset[] getSynsets(String inflectedWord)
        throws JWNLException {
      	String base = "";
        try {
	        base = getBaseForm(defaultPOS, inflectedWord);
	        if (base == null)
	            base = inflectedWord;
	        // System.out.println("Getting senses for "+base);
	        IndexWord lookup = dict.lookupIndexWord(defaultPOS, base);
	        if (lookup == null) return null;
	        return lookup.getSenses();
      	} catch (Exception e) {
      		System.out.println ("Error in SenseUtils.getSynsets for token " + base);
      		System.out.println ("  " + e);
      		return null;
      	}
    }
    
	/**
	 * Takes an inflected word, finds its base form (if possible) using jet's lexicon, 
     * and looks this base form up in the WordNet database for the part-of-speech
     * indicated by pos.
     * 
     * @param inflectedWord A word, not necessarily in its base form.
     * @param pos The part-of-speech of the word
     * @return All senses of the base form of inflectedWord from WordNet (null if no senses found).
     * @throws JWNLException Indicates a problem with the Java-WordNet interface.
     */
    public static Synset[] getSynsets(String inflectedWord, POS pos)
		throws JWNLException {
		String base = getBaseForm(pos, inflectedWord);
		if (base == null)
			base = inflectedWord;
		//System.out.println("Getting senses for "+base);
		IndexWord lookup = dict.lookupIndexWord(pos, base);
		if (lookup == null) return null;
		return lookup.getSenses();
	}
	
    /**
     * Looks a word up in WordNet without first transforming the word into a base form
     * 
     * @param base The base form of a word
     * @return All senses of base in the WordNet database for the default part-of-speech
     * (NOUN, unless the defaultPOS field has been modified), or null if no senses found.
     * @throws JWNLException Indicates a problem with the Java-WordNet interface.
     */
    public static Synset[] getSynsetsForBaseForm(String base) 
    	throws JWNLException {
		IndexWord lookup = dict.lookupIndexWord(defaultPOS, base);
		if (lookup == null) return null;
		return lookup.getSenses();
    }
    
    /**
     * Returns the first entry in the jet lexicon for the word inflectedQuery that has the indicated
     * part of speech.
     * 
     * @param pos The part-of-speech for which a lexicon entry is sought
     * @param inflectedQuery The word for which a lexicon entry is sought
     * @return The FeatureSet of the first lexicon entry for word inflectedQuery with part-of-speech pos.
     */
    public static FeatureSet getPOSDef(POS pos, String inflectedQuery) {
    	// Only works for nouns and verbs at the moment.
        FeatureSet[] defs = Lexicon.lookUp(new String[] { inflectedQuery });
        if (defs==null) return null;
        String posStr;

        if (pos == POS.VERB)
            posStr = ";v;tv;ving;ven;";
        else
            posStr = ";n;";

        for (int i = 0; i < defs.length; i++) {
            String catStr = (String) defs[i].get("cat");
            if (catStr != null && posStr.indexOf(";" + catStr + ";") >= 0)
                return defs[i];
        }
        return null;
    }

	/**
	 * Returns part-of-speech labels for all parts of speech for which the jet lexicon has
	 * an entry for inflectedQuery
     * @param inflectedQuery The word to be looked up.
     * @return An array of part-of-speech labels.
     */
    public static String[] getAllPOS(String inflectedQuery) {
		String queryStr = inflectedQuery.toLowerCase();
		FeatureSet[] defs = Lexicon.lookUp(new String[] { queryStr });
		if (defs==null) return new String[0];
		String[] result = new String[defs.length];
		for (int i = 0; i < defs.length; i++) 
		   result[i] = (String) defs[i].get("cat");
		return result;
	}
	
    /**
     * Uses the jet lexicon to retrieve a base form of the indicated part-of-speech for
     * inflectedQuery
     * @param pos The part-of-speech of inflectedQuery.
     * @param inflectedQuery The word for which we want the base form.
     * @return A base form of inflectedQuery if the word is found in the lexicon, and null otherwise.
     */
    public static String getBaseForm(POS pos, String inflectedQuery) {
        FeatureSet def = getPOSDef(pos, inflectedQuery);
        if (def == null)
            return null;
        FeatureSet pa = (FeatureSet) def.get("pa");
        if (pa == null)
            return null;
        String baseForm = (String) pa.get("head");
        return baseForm;
    }
    
    /**
     * Converts an array of Synsets into a HashSet of Synsets
     * 
     * @param syns The Synsets to be converted
     * @return A HashSet containing all Synsets in syns
     */
    public static HashSet synHashSet(Synset[] syns) {
		HashSet synHash = new HashSet();
		for (int s = 0; s < syns.length; s++)
			synHash.add(syns[s]);
		return synHash;
    }
    
    /**
     * Combines two Synset arrays into a single array with no duplicates
     * @param syns1
     * @param syns2
     * @return An array containing all Synsets in syns1 and syns2, with no duplicates.
     */
    public static Synset[] synsetUnion(Synset[] syns1, Synset[] syns2) {
    	HashSet set = new HashSet(Arrays.asList(syns1));
    	set.addAll(Arrays.asList(syns2));
    	return (Synset[]) set.toArray(syns1);
    }

    /**
     * Returns an object, representing sense number sensenum of word 
     * lemma with part-of-speech pos, that can serve as a HashMap key 
     * @param lemma
     * @param pos
     * @param sensenum
     * @return An Object that can serve as a HashMap key.
     * @throws JWNLException
     */
    public static Object keyFromSenseNum(String lemma, POS pos, int sensenum) throws JWNLException {
		IndexWord lookup = dict.lookupIndexWord(pos, lemma);
		if (lookup == null) return null;
		if (lookup.getSenseCount() < sensenum) return null;
		Synset sense = lookup.getSense(sensenum);
		return sense.getKey();
    }
    
    /**
     * Returns an Object, representing the Synset with with part-of-speech pos and 
     * the indicated offset in WordNet, that can serve as a HashMap key 
     * @param offset
     * @param pos
     * @return An Object that can serve as a HashMap key
     * @throws JWNLException Indicates a problem with the Java-WordNet interface.
     */
    public static Object keyFromOffset(long offset, POS pos) throws JWNLException {
    	Synset sense = dict.getSynsetAt(pos, offset);
    	return sense.getKey();
    }
	    
    /**
     * Returns a Synset for word lemma with part-of-speech pos, represeting the sensenum-th
     * entry in WordNet for lemma.
     * @param lemma The word.
     * @param pos The part-of-speech of lemma.
     * @param sensenum The index of the sense to be retrieved in the array of all senses for lemma
     * @return The Synset for the indicated sense
     * @throws JWNLException Indicates a problem with the Java-WordNet interface.
     */
    public static Synset synsetFromSenseNum(String lemma, POS pos, int sensenum) throws JWNLException {
		IndexWord lookup = dict.lookupIndexWord(pos, lemma);
		if (lookup == null) return null;
		if (lookup.getSenseCount() < sensenum) return null;
		Synset sense = lookup.getSense(sensenum);
    	return sense;
    }
}
