/*
 * Decompiled with CFR 0.152.
 */
package cc.mallet.classify.tui;

import cc.mallet.pipe.CharSequence2TokenSequence;
import cc.mallet.pipe.FeatureSequence2AugmentableFeatureVector;
import cc.mallet.pipe.Noop;
import cc.mallet.pipe.Pipe;
import cc.mallet.pipe.SerialPipes;
import cc.mallet.pipe.Target2Label;
import cc.mallet.pipe.TokenSequence2FeatureSequence;
import cc.mallet.pipe.TokenSequence2FeatureSequenceWithBigrams;
import cc.mallet.pipe.TokenSequenceLowercase;
import cc.mallet.pipe.TokenSequenceRemoveNonAlpha;
import cc.mallet.pipe.TokenSequenceRemoveStopwords;
import cc.mallet.pipe.iterator.CsvIterator;
import cc.mallet.types.InstanceList;
import cc.mallet.util.CharSequenceLexer;
import cc.mallet.util.CommandOption;
import cc.mallet.util.MalletLogger;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.ObjectOutputStream;
import java.io.Reader;
import java.util.logging.Logger;
import java.util.regex.Pattern;

public class Csv2Vectors {
    private static Logger logger = MalletLogger.getLogger(Csv2Vectors.class.getName());
    static CommandOption.File inputFile = new CommandOption.File(Csv2Vectors.class, "input", "FILE", true, null, "The file containing data to be classified, one instance per line", null);
    static CommandOption.File outputFile = new CommandOption.File(Csv2Vectors.class, "output", "FILE", true, new File("text.vectors"), "Write the instance list to this file; Using - indicates stdout.", null);
    static CommandOption.String lineRegex = new CommandOption.String(Csv2Vectors.class, "line-regex", "REGEX", true, "^(\\S*)[\\s,]*(\\S*)[\\s,]*(.*)$", "Regular expression containing regex-groups for label, name and data.", null);
    static CommandOption.Integer labelOption = new CommandOption.Integer(Csv2Vectors.class, "label", "INTEGER", true, 2, "The index of the group containing the label string.", null);
    static CommandOption.Integer nameOption = new CommandOption.Integer(Csv2Vectors.class, "name", "INTEGER", true, 1, "The index of the group containing the instance name.", null);
    static CommandOption.Integer dataOption = new CommandOption.Integer(Csv2Vectors.class, "data", "INTEGER", true, 3, "The index of the group containing the data.", null);
    static CommandOption.File usePipeFromVectorsFile = new CommandOption.File(Csv2Vectors.class, "use-pipe-from", "FILE", true, new File("text.vectors"), "Use the pipe and alphabets from a previously created vectors file. Allows the creation, for example, of a test set of vectors that arecompatible with a previously created set of training vectors", null);
    static CommandOption.Boolean keepSequence = new CommandOption.Boolean(Csv2Vectors.class, "keep-sequence", "[TRUE|FALSE]", false, false, "If true, final data will be a FeatureSequence rather than a FeatureVector.", null);
    static CommandOption.Boolean keepSequenceBigrams = new CommandOption.Boolean(Csv2Vectors.class, "keep-sequence-bigrams", "[TRUE|FALSE]", false, false, "If true, final data will be a FeatureSequenceWithBigrams rather than a FeatureVector.", null);
    static CommandOption.Boolean removeStopWords = new CommandOption.Boolean(Csv2Vectors.class, "remove-stopwords", "[TRUE|FALSE]", false, false, "If true, remove common \"stop words\" from the text.", null);
    static CommandOption.Boolean preserveCase = new CommandOption.Boolean(Csv2Vectors.class, "preserve-case", "[TRUE|FALSE]", false, false, "If true, do not force all strings to lowercase.", null);

    public static void main(String[] args) throws FileNotFoundException, IOException {
        Pipe instancePipe;
        CommandOption.setSummary(Csv2Vectors.class, "A tool for creating instance lists of feature vectors from comma-separated-values");
        CommandOption.process(Csv2Vectors.class, args);
        if (args.length == 0) {
            CommandOption.getList(Csv2Vectors.class).printUsage(false);
            System.exit(-1);
        }
        if (inputFile == null) {
            System.err.println("You must include `--input FILE ...' in order to specify afile containing the instances, one per line.");
            System.exit(-1);
        }
        InstanceList previousInstanceList = null;
        if (!usePipeFromVectorsFile.wasInvoked()) {
            instancePipe = new SerialPipes(new Pipe[]{new Target2Label(), Csv2Vectors.keepSequenceBigrams.value ? new CharSequence2TokenSequence(CharSequenceLexer.LEX_NONWHITESPACE_CLASSES) : new CharSequence2TokenSequence(), Csv2Vectors.preserveCase.value ? new Noop() : new TokenSequenceLowercase(), Csv2Vectors.keepSequenceBigrams.value ? new TokenSequenceRemoveNonAlpha(true) : new Noop(), Csv2Vectors.removeStopWords.value ? new TokenSequenceRemoveStopwords(false, Csv2Vectors.keepSequenceBigrams.value) : new Noop(), Csv2Vectors.keepSequenceBigrams.value ? new TokenSequence2FeatureSequenceWithBigrams() : new TokenSequence2FeatureSequence(), Csv2Vectors.keepSequence.value || Csv2Vectors.keepSequenceBigrams.value ? new Noop() : new FeatureSequence2AugmentableFeatureVector()});
        } else {
            previousInstanceList = InstanceList.load(Csv2Vectors.usePipeFromVectorsFile.value);
            instancePipe = previousInstanceList.getPipe();
        }
        InstanceList ilist = new InstanceList(instancePipe);
        InputStreamReader fileReader = Csv2Vectors.inputFile.value.toString().equals("-") ? new InputStreamReader(System.in) : new FileReader(Csv2Vectors.inputFile.value);
        ilist.addThruPipe(new CsvIterator((Reader)fileReader, Pattern.compile(Csv2Vectors.lineRegex.value), Csv2Vectors.dataOption.value, Csv2Vectors.labelOption.value, Csv2Vectors.nameOption.value));
        ObjectOutputStream oos = Csv2Vectors.outputFile.value.toString().equals("-") ? new ObjectOutputStream(System.out) : new ObjectOutputStream(new FileOutputStream(Csv2Vectors.outputFile.value));
        oos.writeObject(ilist);
        oos.close();
        if (usePipeFromVectorsFile.wasInvoked()) {
            System.out.println(" output usepipe ilist pipe instance id =" + previousInstanceList.getPipe().getInstanceId());
            oos = new ObjectOutputStream(new FileOutputStream(Csv2Vectors.usePipeFromVectorsFile.value));
            oos.writeObject(previousInstanceList);
            oos.close();
        }
    }
}

