/*
 * Decompiled with CFR 0.152.
 */
package tratz.semantics.nn;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.Set;
import tratz.cmdline.CommandLineOptions;
import tratz.cmdline.CommandLineOptionsParser;
import tratz.cmdline.ParsedCommandLine;
import tratz.featgen.MultiStepFeatureGenerator;
import tratz.jwni.WordNet;
import tratz.parse.types.Arc;
import tratz.parse.types.Parse;
import tratz.parse.types.Sentence;
import tratz.parse.types.Token;

public class NnFeatureExtractionRoutine {
    public static final String PARAM_INPUT_DIR = "indir";
    public static final String PARAM_OUTPUT_FILE = "outfile";
    public static final String PARAM_WORDNET_DIR = "wndir";
    public static final String PARAM_WFR_RULES_FILE = "wfr";
    public static final String PARAM_FER_RULES_FILE = "fer";
    public static final String PARAM_COMBO_RULES_FILE = "comborules";

    private static CommandLineOptions createOptions() {
        CommandLineOptions opts = new CommandLineOptions();
        opts.addOption(PARAM_INPUT_DIR, "file", "location of input directory");
        opts.addOption(PARAM_OUTPUT_FILE, "file", "location of output file");
        opts.addOption(PARAM_WORDNET_DIR, "file", "location of WordNet's dictionary (dict) dir");
        opts.addOption(PARAM_WFR_RULES_FILE, "file", "location of the file containing the word-finding rules");
        opts.addOption(PARAM_FER_RULES_FILE, "file", "location of the file containing the feature-extraction rules");
        opts.addOption(PARAM_COMBO_RULES_FILE, "file", "location of the file containing the combination rules");
        return opts;
    }

    public static void main(String[] args) throws Exception {
        File[] files;
        ParsedCommandLine cmdLine = new CommandLineOptionsParser().parseOptions(NnFeatureExtractionRoutine.createOptions(), args);
        File inDir = new File(cmdLine.getStringValue(PARAM_INPUT_DIR));
        File outFile = new File(cmdLine.getStringValue(PARAM_OUTPUT_FILE));
        File wnDir = new File(cmdLine.getStringValue(PARAM_WORDNET_DIR));
        String wfrRulesFile = cmdLine.getStringValue(PARAM_WFR_RULES_FILE);
        String ferRulesFile = cmdLine.getStringValue(PARAM_FER_RULES_FILE);
        String comboRulesFile = cmdLine.getStringValue(PARAM_COMBO_RULES_FILE);
        if (!outFile.getParentFile().exists()) {
            outFile.getParentFile().mkdirs();
        }
        new WordNet(wnDir);
        PrintWriter featureWriter = new PrintWriter(new FileWriter(outFile));
        MultiStepFeatureGenerator nnFeatGen = new MultiStepFeatureGenerator(wfrRulesFile, ferRulesFile, comboRulesFile);
        HashSet<String> instanceSet = new HashSet<String>();
        for (File f : files = inDir.listFiles()) {
            String fname = f.getName();
            if (!fname.matches("[A-Z].*")) continue;
            int periodIndex = f.getName().indexOf(46);
            String className = f.getName().substring(0, periodIndex == -1 ? f.getName().length() : periodIndex);
            BufferedReader reader = new BufferedReader(new FileReader(f));
            String line = null;
            while ((line = reader.readLine()) != null) {
                if ((line = line.trim()).equals("") || line.startsWith("#") || line.startsWith("//")) continue;
                String[] split = line.split("\\t+");
                String pl = null;
                String pr = null;
                pl = new LinkedList<String>(Arrays.asList(split[0].split("\\s+"))).getLast();
                pr = new LinkedList<String>(Arrays.asList(split[1].split("\\s+"))).getLast();
                if (instanceSet.contains(pl + "\t" + pr)) {
                    System.err.println("Duplicate: " + pl + "\t" + pr + "\t" + f.getName());
                    continue;
                }
                featureWriter.print(pl + "_" + pr + "\u0018" + className + "\u0018");
                instanceSet.add(pl + "\t" + pr);
                ArrayList<Token> tokens = new ArrayList<Token>();
                int wordIndex = 0;
                Token leftToken = new Token(pl, "NN", 1);
                Token rightToken = new Token(pr, "NN", 2);
                tokens.add(leftToken);
                tokens.add(rightToken);
                Arc nnLink = new Arc(leftToken, rightToken, "nn");
                ArrayList<Arc> arcs = new ArrayList<Arc>();
                arcs.add(nnLink);
                Parse parse = new Parse(new Sentence(tokens), null, arcs);
                Set<String> features = nnFeatGen.generateFeatures(tokens, parse, wordIndex);
                for (String feat : features) {
                    featureWriter.print(feat);
                    featureWriter.print("\u0018");
                }
                featureWriter.println();
            }
            reader.close();
        }
        featureWriter.close();
    }
}

