/*
 * Decompiled with CFR 0.152.
 */
package tratz.semantics.poss;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import tratz.cmdline.CommandLineOptions;
import tratz.cmdline.CommandLineOptionsParser;
import tratz.cmdline.ParsedCommandLine;
import tratz.featgen.MultiStepFeatureGenerator;
import tratz.jwni.WordNet;
import tratz.parse.io.ConllxSentenceReader;
import tratz.parse.io.SentenceReader;
import tratz.parse.types.Parse;
import tratz.parse.types.Token;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class FeatureExtractionRoutine {
    private boolean mInvertIds;
    private Set<String> mIds;
    private MultiStepFeatureGenerator mFeatGen;
    private Map<String, List<Parse>> mDatasourceToParses;
    private PrintWriter mFeatureWriter;
    public static final String OPT_INPUT_DIRECTORY = "inputdir";
    public static final String OPT_OUTPUT_FILE = "outputfile";
    public static final String OPT_IDS_FILE = "ids";
    public static final String OPT_INVERT_IDS = "invertids";
    public static final String OPT_WORDNET_DIR = "wndir";
    public static final String OPT_WFR_FILE = "wfr";
    public static final String OPT_FER_FILE = "fer";
    public static final String OPT_COMBO_FILE = "comborules";

    public FeatureExtractionRoutine(boolean invertIds, Set<String> ids, MultiStepFeatureGenerator featGen, Map<String, List<Parse>> datasourceToParses, PrintWriter featureWriter) {
        this.mInvertIds = invertIds;
        this.mIds = ids;
        this.mFeatGen = featGen;
        this.mDatasourceToParses = datasourceToParses;
        this.mFeatureWriter = featureWriter;
    }

    public static CommandLineOptions createOptions() {
        CommandLineOptions opts = new CommandLineOptions();
        opts.addOption(OPT_INPUT_DIRECTORY, "file", "directory containing the input files");
        opts.addOption(OPT_OUTPUT_FILE, "file", "the output file");
        opts.addOption(OPT_IDS_FILE, "file", "the file containing the list of ids of interest");
        opts.addOption(OPT_WORDNET_DIR, "file", "the dictionary directory (dict) of WordNet");
        opts.addOption(OPT_WFR_FILE, "file", "the file contaning the word-finding rules");
        opts.addOption(OPT_FER_FILE, "file", "the file contaning the feature extraction rules");
        opts.addOption(OPT_COMBO_FILE, "file", "the file contaning the combination feature rules");
        return opts;
    }

    public static void main(String[] args) throws Exception {
        ParsedCommandLine cmdLine = new CommandLineOptionsParser().parseOptions(FeatureExtractionRoutine.createOptions(), args);
        String inputDirectory = cmdLine.getStringValue(OPT_INPUT_DIRECTORY);
        String outputFileString = cmdLine.getStringValue(OPT_OUTPUT_FILE);
        String idsFile = cmdLine.getStringValue(OPT_IDS_FILE);
        boolean invertIds = Boolean.parseBoolean(cmdLine.getStringValue(OPT_INVERT_IDS));
        File wnDir = new File(cmdLine.getStringValue(OPT_WORDNET_DIR));
        String wfrFile = cmdLine.getStringValue(OPT_WFR_FILE);
        String featGenFile = cmdLine.getStringValue(OPT_FER_FILE);
        String comboRulesFile = cmdLine.getStringValue(OPT_COMBO_FILE);
        File outputFile = new File(outputFileString);
        outputFile.getParentFile().mkdirs();
        System.err.print("Reading WordNet...");
        new WordNet(wnDir);
        System.err.println("Done.");
        Set<String> idsToGeneratorFor = FeatureExtractionRoutine.readIds(idsFile);
        HashMap<String, List<Parse>> datasourceToSentences = new HashMap<String, List<Parse>>();
        System.err.print("Reading WSJ...");
        ArrayList<Parse> parses = new ArrayList<Parse>();
        FeatureExtractionRoutine.readSentences("AutoparsedPtbFull.conllX", new ConllxSentenceReader(), parses);
        datasourceToSentences.put("PTB_WSJ", parses);
        System.err.println("Done.");
        System.err.print("Reading Jungle Book...");
        ArrayList<Parse> parses2 = new ArrayList<Parse>();
        FeatureExtractionRoutine.readSentences("JBrebuilt.conllX", new ConllxSentenceReader(), parses2);
        datasourceToSentences.put("JB", parses2);
        System.err.println("Done.");
        System.err.print("Reading HDFRE...");
        ArrayList<Parse> parses3 = new ArrayList<Parse>();
        FeatureExtractionRoutine.readSentences("HDrebuilt.conllX", new ConllxSentenceReader(), parses3);
        datasourceToSentences.put("HDFRE", parses3);
        System.err.print("Reading Wiktionary...");
        MultiStepFeatureGenerator featGen = new MultiStepFeatureGenerator(wfrFile, featGenFile, comboRulesFile);
        PrintWriter featureWriter = new PrintWriter(new FileWriter(outputFile));
        FeatureExtractionRoutine routine = new FeatureExtractionRoutine(invertIds, idsToGeneratorFor, featGen, datasourceToSentences, featureWriter);
        File[] files = new File(inputDirectory).listFiles();
        HashSet<PossessiveInstance> allInstances = new HashSet<PossessiveInstance>();
        for (File f : files) {
            if (!f.getName().endsWith(".csv")) continue;
            System.err.println("Generating features for file: " + f.getName());
            String clazz = f.getName().substring(0, f.getName().indexOf(".")).toUpperCase();
            BufferedReader reader = new BufferedReader(new FileReader(f));
            String line = null;
            while ((line = reader.readLine()) != null) {
                if ((line = line.trim()).equals("")) continue;
                String[] split = line.split("\\t+");
                for (int i = 0; i < split.length; ++i) {
                    split[i] = FeatureExtractionRoutine.removeQuotes(split[i]);
                }
                int id = Integer.parseInt(split[0]);
                String datasource = split[1];
                String token1Text = split[2];
                String token2Text = split[3];
                int token1Index = Integer.parseInt(split[4]) - 1;
                int token2Index = Integer.parseInt(split[5]) - 1;
                if (token1Index == -2 || token2Index == -2) {
                    System.err.println("Skipping");
                    continue;
                }
                int sentenceId = Integer.parseInt(split[6]);
                String sentenceText = split[7];
                PossessiveInstance instance = new PossessiveInstance(id, datasource, token1Text, token2Text, token1Index, token2Index, sentenceId, clazz);
                if (allInstances.contains(instance)) {
                    System.err.println("Duplicate! : " + instance.token1Text + "\t" + instance.token2Text);
                }
                allInstances.add(instance);
                routine.doSomething(instance);
            }
            reader.close();
        }
        featureWriter.close();
    }

    private void doSomething(PossessiveInstance instance) throws Exception {
        if (!this.mInvertIds && this.mIds.contains(instance.id + "\t" + instance.datasource) || this.mInvertIds && !this.mIds.contains(instance.id + "\t" + instance.datasource)) {
            List<Parse> parses = this.mDatasourceToParses.get(instance.datasource);
            if (instance.sentenceId >= parses.size()) {
                System.err.println("Uhoh: " + instance.id + "\t" + instance.datasource + "\t" + instance.token1Text + "\t" + instance.token2Text);
            }
            Parse parse = parses.get(instance.sentenceId);
            List<Token> tokens = parse.getSentence().getTokens();
            Token pl = tokens.get(instance.token1Index);
            Token pr = tokens.get(instance.token2Index);
            Set<String> features = this.mFeatGen.generateFeatures(tokens, parse, instance.token2Index);
            this.mFeatureWriter.print(instance.id + "_" + instance.datasource + "\u0018" + instance.clazz + "\u0018");
            for (String feat : features) {
                this.mFeatureWriter.print(feat + "\u0018");
            }
            this.mFeatureWriter.println();
        }
    }

    private static void readSentences(String inputFile, SentenceReader sreader, List<Parse> parses) throws IOException {
        BufferedReader reader = new BufferedReader(new FileReader(inputFile));
        Parse parse = null;
        while ((parse = sreader.readSentence(reader)) != null) {
            parses.add(parse);
        }
        reader.close();
    }

    private static Set<String> readIds(String idsFile) throws Exception {
        HashSet<String> ids = new HashSet<String>();
        BufferedReader reader = new BufferedReader(new FileReader(idsFile));
        String line = null;
        while ((line = reader.readLine()) != null) {
            ids.add(line);
        }
        reader.close();
        return ids;
    }

    public static String removeQuotes(String s) {
        if (s.startsWith("\"") && s.endsWith("\"")) {
            s = s.substring(1, s.length() - 1);
        }
        return s;
    }

    public static class PossessiveInstance {
        int id;
        String datasource;
        String token1Text;
        String token2Text;
        int token1Index;
        int token2Index;
        int sentenceId;
        String clazz;

        public PossessiveInstance(int id, String datasource, String token1Text, String token2Text, int token1Index, int token2Index, int sentenceId, String clazz) {
            this.id = id;
            this.datasource = datasource;
            this.token1Text = token1Text;
            this.token2Text = token2Text;
            this.token1Index = token1Index;
            this.token2Index = token2Index;
            this.sentenceId = sentenceId;
            this.clazz = clazz;
        }

        public int hashCode() {
            return this.id + this.datasource.hashCode();
        }

        public boolean equals(Object other) {
            if (!(other instanceof PossessiveInstance)) {
                return false;
            }
            PossessiveInstance i = (PossessiveInstance)other;
            return this.datasource.equals(i.datasource) && this.token1Index == i.token1Index && this.token2Index == i.token2Index && this.sentenceId == i.sentenceId;
        }
    }
}

