/*
 * Decompiled with CFR 0.152.
 */
package cc.mallet.topics.tui;

import cc.mallet.topics.PAM4L;
import cc.mallet.topics.ParallelTopicModel;
import cc.mallet.topics.TopicalNGrams;
import cc.mallet.types.InstanceList;
import cc.mallet.util.CommandOption;
import cc.mallet.util.Randoms;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.ObjectOutputStream;
import java.io.PrintWriter;

public class Vectors2Topics {
    static CommandOption.String inputFile = new CommandOption.String(Vectors2Topics.class, "input", "FILENAME", true, null, "The filename from which to read the list of training instances.  Use - for stdin.  The instances must be FeatureSequence or FeatureSequenceWithBigrams, not FeatureVector", null);
    static CommandOption.String testingFile = new CommandOption.String(Vectors2Topics.class, "testing", "FILENAME", false, null, "The filename from which to read the list of instances for empirical likelihood calculation.  Use - for stdin.  The instances must be FeatureSequence or FeatureSequenceWithBigrams, not FeatureVector", null);
    static CommandOption.String outputModelFilename = new CommandOption.String(Vectors2Topics.class, "output-model", "FILENAME", true, null, "The filename in which to write the binary topic model at the end of the iterations.  By default this is null, indicating that no file will be written.", null);
    static CommandOption.String inputModelFilename = new CommandOption.String(Vectors2Topics.class, "input-model", "FILENAME", true, null, "The filename from which to read the binary topic model to which the --input will be appended, allowing incremental training.  By default this is null, indicating that no file will be read.", null);
    static CommandOption.String inferencerFilename = new CommandOption.String(Vectors2Topics.class, "inferencer-filename", "FILENAME", true, null, "A topic inferencer applies a previously trained topic model to new documents.  By default this is null, indicating that no file will be written.", null);
    static CommandOption.String stateFile = new CommandOption.String(Vectors2Topics.class, "output-state", "FILENAME", true, null, "The filename in which to write the Gibbs sampling state after at the end of the iterations.  By default this is null, indicating that no file will be written.", null);
    static CommandOption.String topicKeysFile = new CommandOption.String(Vectors2Topics.class, "output-topic-keys", "FILENAME", true, null, "The filename in which to write the top words for each topic and any Dirichlet parameters.  By default this is null, indicating that no file will be written.", null);
    static CommandOption.String topicWordWeightsFile = new CommandOption.String(Vectors2Topics.class, "topic-word-weights-file", "FILENAME", true, null, "The filename in which to write unnormalized weights for every topic and word type.  By default this is null, indicating that no file will be written.", null);
    static CommandOption.String wordTopicCountsFile = new CommandOption.String(Vectors2Topics.class, "word-topic-counts-file", "FILENAME", true, null, "The filename in which to write a sparse representation of topic-word assignments.  By default this is null, indicating that no file will be written.", null);
    static CommandOption.String docTopicsFile = new CommandOption.String(Vectors2Topics.class, "output-doc-topics", "FILENAME", true, null, "The filename in which to write the topic proportions per document, at the end of the iterations.  By default this is null, indicating that no file will be written.", null);
    static CommandOption.Double docTopicsThreshold = new CommandOption.Double(Vectors2Topics.class, "doc-topics-threshold", "DECIMAL", true, 0.0, "When writing topic proportions per document with --output-doc-topics, do not print topics with proportions less than this threshold value.", null);
    static CommandOption.Integer docTopicsMax = new CommandOption.Integer(Vectors2Topics.class, "doc-topics-max", "INTEGER", true, -1, "When writing topic proportions per document with --output-doc-topics, do not print more than INTEGER number of topics.  A negative value indicates that all topics should be printed.", null);
    static CommandOption.Integer numTopics = new CommandOption.Integer(Vectors2Topics.class, "num-topics", "INTEGER", true, 10, "The number of topics to fit.", null);
    static CommandOption.Integer numThreads = new CommandOption.Integer(Vectors2Topics.class, "num-threads", "INTEGER", true, 1, "The number of threads for parallel training.", null);
    static CommandOption.Integer numIterations = new CommandOption.Integer(Vectors2Topics.class, "num-iterations", "INTEGER", true, 1000, "The number of iterations of Gibbs sampling.", null);
    static CommandOption.Integer randomSeed = new CommandOption.Integer(Vectors2Topics.class, "random-seed", "INTEGER", true, 0, "The random seed for the Gibbs sampler.  Default is 0, which will use the clock.", null);
    static CommandOption.Integer topWords = new CommandOption.Integer(Vectors2Topics.class, "num-top-words", "INTEGER", true, 20, "The number of most probable words to print for each topic after model estimation.", null);
    static CommandOption.Integer showTopicsInterval = new CommandOption.Integer(Vectors2Topics.class, "show-topics-interval", "INTEGER", true, 50, "The number of iterations between printing a brief summary of the topics so far.", null);
    static CommandOption.Integer outputModelInterval = new CommandOption.Integer(Vectors2Topics.class, "output-model-interval", "INTEGER", true, 0, "The number of iterations between writing the model (and its Gibbs sampling state) to a binary file.  You must also set the --output-model to use this option, whose argument will be the prefix of the filenames.", null);
    static CommandOption.Integer outputStateInterval = new CommandOption.Integer(Vectors2Topics.class, "output-state-interval", "INTEGER", true, 0, "The number of iterations between writing the sampling state to a text file.  You must also set the --output-state to use this option, whose argument will be the prefix of the filenames.", null);
    static CommandOption.Integer optimizeInterval = new CommandOption.Integer(Vectors2Topics.class, "optimize-interval", "INTEGER", true, 0, "The number of iterations between reestimating dirichlet hyperparameters.", null);
    static CommandOption.Integer optimizeBurnIn = new CommandOption.Integer(Vectors2Topics.class, "optimize-burn-in", "INTEGER", true, 200, "The number of iterations to run before first estimating dirichlet hyperparameters.", null);
    static CommandOption.Boolean useNgrams = new CommandOption.Boolean(Vectors2Topics.class, "use-ngrams", "true|false", false, false, "Rather than using LDA, use Topical-N-Grams, which models phrases.", null);
    static CommandOption.Boolean usePAM = new CommandOption.Boolean(Vectors2Topics.class, "use-pam", "true|false", false, false, "Rather than using LDA, use Pachinko Allocation Model, which models topical correlations.You cannot do this and also --use-ngrams.", null);
    static CommandOption.Double alpha = new CommandOption.Double(Vectors2Topics.class, "alpha", "DECIMAL", true, 50.0, "Alpha parameter: smoothing over topic distribution.", null);
    static CommandOption.Double beta = new CommandOption.Double(Vectors2Topics.class, "beta", "DECIMAL", true, 0.01, "Beta parameter: smoothing over unigram distribution.", null);
    static CommandOption.Double gamma = new CommandOption.Double(Vectors2Topics.class, "gamma", "DECIMAL", true, 0.01, "Gamma parameter: smoothing over bigram distribution", null);
    static CommandOption.Double delta = new CommandOption.Double(Vectors2Topics.class, "delta", "DECIMAL", true, 0.03, "Delta parameter: smoothing over choice of unigram/bigram", null);
    static CommandOption.Double delta1 = new CommandOption.Double(Vectors2Topics.class, "delta1", "DECIMAL", true, 0.2, "Topic N-gram smoothing parameter", null);
    static CommandOption.Double delta2 = new CommandOption.Double(Vectors2Topics.class, "delta2", "DECIMAL", true, 1000.0, "Topic N-gram smoothing parameter", null);
    static CommandOption.Integer pamNumSupertopics = new CommandOption.Integer(Vectors2Topics.class, "pam-num-supertopics", "INTEGER", true, 10, "When using the Pachinko Allocation Model (PAM) set the number of supertopics.  Typically this is about half the number of subtopics, although more may help.", null);
    static CommandOption.Integer pamNumSubtopics = new CommandOption.Integer(Vectors2Topics.class, "pam-num-subtopics", "INTEGER", true, 20, "When using the Pachinko Allocation Model (PAM) set the number of subtopics.", null);

    public static void main(String[] args) throws IOException {
        CommandOption.setSummary(Vectors2Topics.class, "A tool for estimating, saving and printing diagnostics for topic models, such as LDA.");
        CommandOption.process(Vectors2Topics.class, args);
        if (Vectors2Topics.usePAM.value) {
            InstanceList ilist = InstanceList.load(new File(Vectors2Topics.inputFile.value));
            System.out.println("Data loaded.");
            if (Vectors2Topics.inputModelFilename.value != null) {
                throw new IllegalArgumentException("--input-model not supported with --use-pam.");
            }
            PAM4L pam = new PAM4L(Vectors2Topics.pamNumSupertopics.value, Vectors2Topics.pamNumSubtopics.value);
            pam.estimate(ilist, Vectors2Topics.numIterations.value, 50, Vectors2Topics.showTopicsInterval.value, Vectors2Topics.outputModelInterval.value, Vectors2Topics.outputModelFilename.value, Vectors2Topics.randomSeed.value == 0 ? new Randoms() : new Randoms(Vectors2Topics.randomSeed.value));
            pam.printTopWords(Vectors2Topics.topWords.value, true);
            if (Vectors2Topics.stateFile.value != null) {
                pam.printState(new File(Vectors2Topics.stateFile.value));
            }
            if (Vectors2Topics.docTopicsFile.value != null) {
                PrintWriter out = new PrintWriter(new FileWriter(new File(Vectors2Topics.docTopicsFile.value)));
                pam.printDocumentTopics(out, Vectors2Topics.docTopicsThreshold.value, Vectors2Topics.docTopicsMax.value);
                out.close();
            }
            if (Vectors2Topics.outputModelFilename.value != null) {
                assert (pam != null);
                try {
                    ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(Vectors2Topics.outputModelFilename.value));
                    oos.writeObject(pam);
                    oos.close();
                }
                catch (Exception e) {
                    e.printStackTrace();
                    throw new IllegalArgumentException("Couldn't write topic model to filename " + Vectors2Topics.outputModelFilename.value);
                }
            }
        } else if (Vectors2Topics.useNgrams.value) {
            InstanceList ilist = InstanceList.load(new File(Vectors2Topics.inputFile.value));
            System.out.println("Data loaded.");
            if (Vectors2Topics.inputModelFilename.value != null) {
                throw new IllegalArgumentException("--input-model not supported with --use-ngrams.");
            }
            TopicalNGrams tng = new TopicalNGrams(Vectors2Topics.numTopics.value, Vectors2Topics.alpha.value, Vectors2Topics.beta.value, Vectors2Topics.gamma.value, Vectors2Topics.delta.value, Vectors2Topics.delta1.value, Vectors2Topics.delta2.value);
            tng.estimate(ilist, Vectors2Topics.numIterations.value, Vectors2Topics.showTopicsInterval.value, Vectors2Topics.outputModelInterval.value, Vectors2Topics.outputModelFilename.value, Vectors2Topics.randomSeed.value == 0 ? new Randoms() : new Randoms(Vectors2Topics.randomSeed.value));
            tng.printTopWords(Vectors2Topics.topWords.value, true);
            if (Vectors2Topics.stateFile.value != null) {
                tng.printState(new File(Vectors2Topics.stateFile.value));
            }
            if (Vectors2Topics.docTopicsFile.value != null) {
                PrintWriter out = new PrintWriter(new FileWriter(new File(Vectors2Topics.docTopicsFile.value)));
                tng.printDocumentTopics(out, Vectors2Topics.docTopicsThreshold.value, Vectors2Topics.docTopicsMax.value);
                out.close();
            }
            if (Vectors2Topics.outputModelFilename.value != null) {
                assert (tng != null);
                try {
                    ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(Vectors2Topics.outputModelFilename.value));
                    oos.writeObject(tng);
                    oos.close();
                }
                catch (Exception e) {
                    e.printStackTrace();
                    throw new IllegalArgumentException("Couldn't write topic model to filename " + Vectors2Topics.outputModelFilename.value);
                }
            }
        } else {
            ObjectOutputStream oos;
            ParallelTopicModel topicModel = null;
            if (Vectors2Topics.inputModelFilename.value != null) {
                try {
                    topicModel = ParallelTopicModel.read(new File(Vectors2Topics.inputModelFilename.value));
                }
                catch (Exception e) {
                    System.err.println("Unable to restore saved topic model " + Vectors2Topics.inputModelFilename.value + ": " + e);
                    System.exit(1);
                }
            } else {
                InstanceList training = InstanceList.load(new File(Vectors2Topics.inputFile.value));
                System.out.println("Data loaded.");
                topicModel = new ParallelTopicModel(Vectors2Topics.numTopics.value, Vectors2Topics.alpha.value, Vectors2Topics.beta.value);
                topicModel.addInstances(training);
            }
            topicModel.setTopicDisplay(Vectors2Topics.showTopicsInterval.value, Vectors2Topics.topWords.value);
            topicModel.setNumIterations(Vectors2Topics.numIterations.value);
            topicModel.setOptimizeInterval(Vectors2Topics.optimizeInterval.value);
            topicModel.setBurninPeriod(Vectors2Topics.optimizeBurnIn.value);
            if (Vectors2Topics.randomSeed.value != 0) {
                topicModel.setRandomSeed(Vectors2Topics.randomSeed.value);
            }
            if (Vectors2Topics.outputStateInterval.value != 0) {
                topicModel.setSaveState(Vectors2Topics.outputStateInterval.value, Vectors2Topics.stateFile.value);
            }
            if (Vectors2Topics.outputModelInterval.value != 0) {
                topicModel.setSaveSerializedModel(Vectors2Topics.outputModelInterval.value, Vectors2Topics.outputModelFilename.value);
            }
            topicModel.setNumThreads(Vectors2Topics.numThreads.value);
            topicModel.estimate();
            if (Vectors2Topics.topicKeysFile.value != null) {
                topicModel.printTopWords(new File(Vectors2Topics.topicKeysFile.value), Vectors2Topics.topWords.value, false);
            }
            if (Vectors2Topics.stateFile.value != null) {
                topicModel.printState(new File(Vectors2Topics.stateFile.value));
            }
            if (Vectors2Topics.docTopicsFile.value != null) {
                PrintWriter out = new PrintWriter(new FileWriter(new File(Vectors2Topics.docTopicsFile.value)));
                topicModel.printDocumentTopics(out, Vectors2Topics.docTopicsThreshold.value, Vectors2Topics.docTopicsMax.value);
                out.close();
            }
            if (Vectors2Topics.topicWordWeightsFile.value != null) {
                topicModel.printTopicWordWeights(new File(Vectors2Topics.topicWordWeightsFile.value));
            }
            if (Vectors2Topics.wordTopicCountsFile.value != null) {
                topicModel.printTypeTopicCounts(new File(Vectors2Topics.wordTopicCountsFile.value));
            }
            if (Vectors2Topics.outputModelFilename.value != null) {
                assert (topicModel != null);
                try {
                    oos = new ObjectOutputStream(new FileOutputStream(Vectors2Topics.outputModelFilename.value));
                    oos.writeObject(topicModel);
                    oos.close();
                }
                catch (Exception e) {
                    e.printStackTrace();
                    throw new IllegalArgumentException("Couldn't write topic model to filename " + Vectors2Topics.outputModelFilename.value);
                }
            }
            if (Vectors2Topics.inferencerFilename.value != null) {
                try {
                    oos = new ObjectOutputStream(new FileOutputStream(Vectors2Topics.inferencerFilename.value));
                    oos.writeObject(topicModel.getInferencer());
                    oos.close();
                }
                catch (Exception e) {
                    System.err.println(e.getMessage());
                }
            }
        }
    }
}

