/*
 * Decompiled with CFR 0.152.
 */
package tratz.semantics.srl;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import tratz.cmdline.CommandLineOptions;
import tratz.cmdline.CommandLineOptionsParser;
import tratz.cmdline.ParsedCommandLine;
import tratz.featgen.MultiStepFeatureGenerator;
import tratz.jwni.IndexEntry;
import tratz.jwni.POS;
import tratz.jwni.WordNet;
import tratz.parse.io.ConllxSentenceReader;
import tratz.parse.transform.VchTransformer;
import tratz.parse.types.Parse;
import tratz.parse.types.Token;
import tratz.semantics.srl.SentenceReaderForSRL;

public class FeatureExtractionRoutinePredicates {
    public static Set<String> POS_TO_INCLUDE = new HashSet<String>(Arrays.asList("JJ", "VBN", "VBD", "VB", "VBP", "VBZ", "VBG"));
    public static final String OPT_INPUT_FILES = "input";
    public static final String OPT_OUTPUT_DIR = "outputdir";
    public static final String OPT_AUTOPARSED_FILES = "autoparsedfiles";
    public static final String OPT_WORDNET_DIR = "wndir";
    public static final String OPT_WFR_RULES = "wfr";
    public static final String OPT_FEATURE_RULES = "fer";
    public static final String OPT_COMBO_RULES = "comborules";
    public static final String OPT_IS_FOR_TESTING = "isfortesting";
    public static final String OPT_TRUTH_FILE = "truthfile";
    public static final String OPT_PROPBANK_FRAMES_FILES = "propbankframes";

    private static CommandLineOptions createOptions() {
        CommandLineOptions cmdOpts = new CommandLineOptions();
        cmdOpts.addOption(OPT_INPUT_FILES, "file(s)", "file(s) containing the srl info");
        cmdOpts.addOption(OPT_OUTPUT_DIR, "file", "directory for output files");
        cmdOpts.addOption(OPT_AUTOPARSED_FILES, "file(s)", "file(s) containing versions of the files parsed using an automatic parser");
        cmdOpts.addOption(OPT_WORDNET_DIR, "file", "the dictionary (dict) directory of WordNet");
        cmdOpts.addOption(OPT_WFR_RULES, "file", "the file containing the word-finding rules");
        cmdOpts.addOption(OPT_FEATURE_RULES, "file", "the file containing the feature extraction rules");
        cmdOpts.addOption(OPT_COMBO_RULES, "file", "the file containing the combination rules");
        cmdOpts.addOption(OPT_IS_FOR_TESTING, "boolean", "indicates if the features being generated are for testing");
        cmdOpts.addOption(OPT_TRUTH_FILE, "file", "file that will contain two columns (1) the list of ids (2) the correct assignments");
        cmdOpts.addOption(OPT_PROPBANK_FRAMES_FILES, "file", "directory containg the PropBank frame files");
        return cmdOpts;
    }

    public static void main(String[] args) throws Exception {
        ParsedCommandLine cmdLine = new CommandLineOptionsParser().parseOptions(FeatureExtractionRoutinePredicates.createOptions(), args);
        String inputFiles = cmdLine.getStringValue(OPT_INPUT_FILES);
        File outputDir = new File(cmdLine.getStringValue(OPT_OUTPUT_DIR));
        String autoparsedFiles = cmdLine.getStringValue(OPT_AUTOPARSED_FILES);
        String wordNetDir = cmdLine.getStringValue(OPT_WORDNET_DIR);
        String wfrListFile = cmdLine.getStringValue(OPT_WFR_RULES);
        String featRulesFile = cmdLine.getStringValue(OPT_FEATURE_RULES);
        String comboRulesFile = cmdLine.getStringValue(OPT_COMBO_RULES);
        boolean generateUsingAutoParsed = autoparsedFiles != null;
        boolean isForTesting = cmdLine.getBooleanValue(OPT_IS_FOR_TESTING);
        File truthFile = new File(cmdLine.getStringValue(OPT_TRUTH_FILE));
        File propBankFramesDir = new File(cmdLine.getStringValue(OPT_PROPBANK_FRAMES_FILES));
        File[] frameFiles = propBankFramesDir.listFiles();
        HashSet<String> framesSet = new HashSet<String>();
        for (File f : frameFiles) {
            if (!f.getName().endsWith(".xml")) continue;
            String frame = f.getName().substring(0, f.getName().indexOf(".xml"));
            framesSet.add(frame);
        }
        PrintWriter truthWriter = new PrintWriter(new FileWriter(truthFile));
        new WordNet(new File(wordNetDir));
        VchTransformer vchTransformer = new VchTransformer();
        MultiStepFeatureGenerator featGen = new MultiStepFeatureGenerator(wfrListFile, featRulesFile, comboRulesFile);
        ConllxSentenceReader conllxReader = new ConllxSentenceReader();
        String[] inFiles = inputFiles.split(File.pathSeparator);
        String[] autoFiles = null;
        if (autoparsedFiles != null) {
            autoFiles = autoparsedFiles.split(File.pathSeparator);
        }
        int numFiles = inFiles.length;
        HashMap<String, StringBuilder> dependencyTypeToWriter = new HashMap<String, StringBuilder>();
        for (int f = 0; f < numFiles; ++f) {
            BufferedReader goldReader = new BufferedReader(new FileReader(inFiles[f]));
            BufferedReader autoParseReader = null;
            if (autoFiles != null) {
                autoParseReader = new BufferedReader(new FileReader(autoFiles[f]));
            }
            Object[] readResult = null;
            SentenceReaderForSRL srlReader = new SentenceReaderForSRL();
            int snum = 0;
            while ((readResult = srlReader.readSentence(goldReader)) != null && readResult[0] != null) {
                ++snum;
                Parse goldParse = (Parse)readResult[0];
                Parse autoParse = null;
                if (autoParseReader != null) {
                    autoParse = conllxReader.readSentence(autoParseReader);
                    vchTransformer.performTransformation(autoParse);
                }
                Map semArcs = (Map)readResult[1];
                List<Token> tokens = goldParse.getSentence().getTokens();
                List<Token> autoTokens = autoParse == null ? null : autoParse.getSentence().getTokens();
                for (int tokNum = 1; tokNum < tokens.size(); ++tokNum) {
                    StringBuilder writer;
                    Token goldToken = tokens.get(tokNum - 1);
                    Token autoToken = autoTokens == null ? null : autoTokens.get(tokNum - 1);
                    IndexEntry entry = WordNet.getInstance().lookupIndexEntry(POS.VERB, goldToken.getText());
                    String depWriterName = entry == null ? goldToken.getText().toLowerCase() : entry.getLemma();
                    if (depWriterName.equals("be")) continue;
                    String clazz = goldToken.getLexSense();
                    String label = snum + "_" + tokNum;
                    if (!goldToken.getPos().matches("IN|TO|RP|RB|PRP")) {
                        truthWriter.println(label + "\t" + clazz);
                    }
                    if ((goldToken.getLexSense() == null && !framesSet.contains(depWriterName) || !POS_TO_INCLUDE.contains(goldToken.getPos())) && (!isForTesting || !POS_TO_INCLUDE.contains(autoToken.getPos())) || goldToken.getPos().matches("IN|TO|RP|RB|PRP") || depWriterName.contains(".") || depWriterName.contains("/") || depWriterName.contains("\\") || depWriterName.contains(",")) continue;
                    if (!goldToken.getText().toLowerCase().startsWith("" + depWriterName.charAt(0)) && !depWriterName.equals("be") && clazz != null) {
                        System.err.println("Potential mismatch: " + goldToken.getText().toLowerCase() + " " + depWriterName.charAt(0) + " " + clazz);
                    }
                    if ((writer = (StringBuilder)dependencyTypeToWriter.get(depWriterName)) == null) {
                        writer = new StringBuilder();
                        dependencyTypeToWriter.put(depWriterName, writer);
                    }
                    writer.append(label + "\u0018" + clazz + "\u0018");
                    Set<String> feats = null;
                    feats = featGen.generateFeatures(generateUsingAutoParsed ? autoParse.getSentence().getTokens() : goldParse.getSentence().getTokens(), generateUsingAutoParsed ? autoParse : goldParse, tokNum - 1);
                    ArrayList<String> featList = new ArrayList<String>(feats);
                    Collections.sort(featList);
                    int numFeats = featList.size();
                    for (int i = 0; i < numFeats; ++i) {
                        String feat = (String)featList.get(i);
                        writer.append(feat);
                        writer.append('\u0018');
                    }
                    writer.append('\n');
                }
            }
            goldReader.close();
            if (autoParseReader == null) continue;
            autoParseReader.close();
        }
        truthWriter.close();
        for (String key : dependencyTypeToWriter.keySet()) {
            File outfile = new File(outputDir, key);
            outfile.getAbsoluteFile().getParentFile().mkdirs();
            PrintWriter writer = new PrintWriter(new FileWriter(outfile));
            writer.println(((StringBuilder)dependencyTypeToWriter.get(key)).toString());
            writer.close();
        }
    }
}

