cc.mallet.topics
Class LDAHyper

java.lang.Object
  extended by cc.mallet.topics.LDAHyper
All Implemented Interfaces:
java.io.Serializable
Direct Known Subclasses:
DMRTopicModel, LDAStream

Deprecated. Use ParallelTopicModel instead, which uses substantially faster data structures even for non-parallel operation.

public class LDAHyper
extends java.lang.Object
implements java.io.Serializable

Latent Dirichlet Allocation with optimized hyperparameters

Author:
David Mimno, Andrew McCallum
See Also:
Serialized Form

Nested Class Summary
 class LDAHyper.Topication
          Deprecated.  
 
Field Summary
protected  double[] alpha
          Deprecated.  
protected  Alphabet alphabet
          Deprecated.  
protected  double alphaSum
          Deprecated.  
protected  double beta
          Deprecated.  
protected  double betaSum
          Deprecated.  
 int burninPeriod
          Deprecated.  
protected  double[] cachedCoefficients
          Deprecated.  
protected  java.util.ArrayList<LDAHyper.Topication> data
          Deprecated.  
static double DEFAULT_BETA
          Deprecated.  
protected  int[] docLengthCounts
          Deprecated.  
protected  java.text.NumberFormat formatter
          Deprecated.  
 int iterationsSoFar
          Deprecated.  
 int numIterations
          Deprecated.  
protected  int numTopics
          Deprecated.  
protected  int numTypes
          Deprecated.  
protected  int[] oneDocTopicCounts
          Deprecated.  
 int optimizeInterval
          Deprecated.  
protected  java.lang.String outputModelFilename
          Deprecated.  
protected  int outputModelInterval
          Deprecated.  
protected  boolean printLogLikelihood
          Deprecated.  
protected  Randoms random
          Deprecated.  
 int saveSampleInterval
          Deprecated.  
protected  int saveStateInterval
          Deprecated.  
 int showTopicsInterval
          Deprecated.  
protected  double smoothingOnlyMass
          Deprecated.  
protected  java.lang.String stateFilename
          Deprecated.  
protected  InstanceList testing
          Deprecated.  
protected  int[] tokensPerTopic
          Deprecated.  
protected  LabelAlphabet topicAlphabet
          Deprecated.  
protected  int[][] topicDocCounts
          Deprecated.  
protected  gnu.trove.TIntIntHashMap[] typeTopicCounts
          Deprecated.  
 int wordsPerTopic
          Deprecated.  
 
Constructor Summary
LDAHyper(int numberOfTopics)
          Deprecated.  
LDAHyper(int numberOfTopics, double alphaSum, double beta)
          Deprecated.  
LDAHyper(int numberOfTopics, double alphaSum, double beta, Randoms random)
          Deprecated.  
LDAHyper(LabelAlphabet topicAlphabet, double alphaSum, double beta, Randoms random)
          Deprecated.  
 
Method Summary
 void addInstances(InstanceList training)
          Deprecated.  
 void addInstances(InstanceList training, java.util.List<LabelSequence> topics)
          Deprecated.  
 double empiricalLikelihood(int numSamples, InstanceList testing)
          Deprecated.  
 void estimate()
          Deprecated.  
 void estimate(int iterationsThisRound)
          Deprecated.  
 Alphabet getAlphabet()
          Deprecated.  
 int getCountFeatureTopic(int featureIndex, int topicIndex)
          Deprecated.  
 int getCountTokensPerTopic(int topicIndex)
          Deprecated.  
 java.util.ArrayList<LDAHyper.Topication> getData()
          Deprecated.  
 int getNumTopics()
          Deprecated.  
 IDSorter[] getSortedTopicWords(int topic)
          Deprecated.  
 LabelAlphabet getTopicAlphabet()
          Deprecated.  
protected  void initializeHistogramsAndCachedValues()
          Deprecated. Gather statistics on the size of documents and create histograms for use in Dirichlet hyperparameter optimization.
protected  int instanceLength(Instance instance)
          Deprecated.  
static void main(java.lang.String[] args)
          Deprecated.  
 double modelLogLikelihood()
          Deprecated.  
 void printDocumentTopics(java.io.File f)
          Deprecated.  
 void printDocumentTopics(java.io.PrintWriter pw)
          Deprecated.  
 void printDocumentTopics(java.io.PrintWriter pw, double threshold, int max)
          Deprecated.  
 void printState(java.io.File f)
          Deprecated.  
 void printState(java.io.PrintStream out)
          Deprecated.  
 void printTopWords(java.io.File file, int numWords, boolean useNewLines)
          Deprecated.  
 void printTopWords(java.io.PrintStream out, int numWords, boolean usingNewLines)
          Deprecated.  
static LDAHyper read(java.io.File f)
          Deprecated.  
protected  void sampleTopicsForOneDoc(FeatureSequence tokenSequence, FeatureSequence topicSequence, boolean shouldSaveState, boolean readjustTopicsAndStats)
          Deprecated.  
 void setBurninPeriod(int burninPeriod)
          Deprecated.  
 void setModelOutput(int interval, java.lang.String filename)
          Deprecated.  
 void setNumIterations(int numIterations)
          Deprecated.  
 void setOptimizeInterval(int interval)
          Deprecated.  
 void setRandomSeed(int seed)
          Deprecated.  
 void setSaveState(int interval, java.lang.String filename)
          Deprecated. Define how often and where to save the state
 void setTestingInstances(InstanceList testing)
          Deprecated. Held-out instances for empirical likelihood calculation
 void setTopicDisplay(int interval, int n)
          Deprecated.  
 double topicLabelMutualInformation()
          Deprecated.  
 void topicXMLReport(java.io.PrintWriter out, int numWords)
          Deprecated.  
 void topicXMLReportPhrases(java.io.PrintStream out, int numWords)
          Deprecated.  
 void write(java.io.File f)
          Deprecated.  
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

data

protected java.util.ArrayList<LDAHyper.Topication> data
Deprecated. 

alphabet

protected Alphabet alphabet
Deprecated. 

topicAlphabet

protected LabelAlphabet topicAlphabet
Deprecated. 

numTopics

protected int numTopics
Deprecated. 

numTypes

protected int numTypes
Deprecated. 

alpha

protected double[] alpha
Deprecated. 

alphaSum

protected double alphaSum
Deprecated. 

beta

protected double beta
Deprecated. 

betaSum

protected double betaSum
Deprecated. 

DEFAULT_BETA

public static final double DEFAULT_BETA
Deprecated. 
See Also:
Constant Field Values

smoothingOnlyMass

protected double smoothingOnlyMass
Deprecated. 

cachedCoefficients

protected double[] cachedCoefficients
Deprecated. 

testing

protected InstanceList testing
Deprecated. 

oneDocTopicCounts

protected int[] oneDocTopicCounts
Deprecated. 

typeTopicCounts

protected gnu.trove.TIntIntHashMap[] typeTopicCounts
Deprecated. 

tokensPerTopic

protected int[] tokensPerTopic
Deprecated. 

docLengthCounts

protected int[] docLengthCounts
Deprecated. 

topicDocCounts

protected int[][] topicDocCounts
Deprecated. 

iterationsSoFar

public int iterationsSoFar
Deprecated. 

numIterations

public int numIterations
Deprecated. 

burninPeriod

public int burninPeriod
Deprecated. 

saveSampleInterval

public int saveSampleInterval
Deprecated. 

optimizeInterval

public int optimizeInterval
Deprecated. 

showTopicsInterval

public int showTopicsInterval
Deprecated. 

wordsPerTopic

public int wordsPerTopic
Deprecated. 

outputModelInterval

protected int outputModelInterval
Deprecated. 

outputModelFilename

protected java.lang.String outputModelFilename
Deprecated. 

saveStateInterval

protected int saveStateInterval
Deprecated. 

stateFilename

protected java.lang.String stateFilename
Deprecated. 

random

protected Randoms random
Deprecated. 

formatter

protected java.text.NumberFormat formatter
Deprecated. 

printLogLikelihood

protected boolean printLogLikelihood
Deprecated. 
Constructor Detail

LDAHyper

public LDAHyper(int numberOfTopics)
Deprecated. 

LDAHyper

public LDAHyper(int numberOfTopics,
                double alphaSum,
                double beta)
Deprecated. 

LDAHyper

public LDAHyper(int numberOfTopics,
                double alphaSum,
                double beta,
                Randoms random)
Deprecated. 

LDAHyper

public LDAHyper(LabelAlphabet topicAlphabet,
                double alphaSum,
                double beta,
                Randoms random)
Deprecated. 
Method Detail

getAlphabet

public Alphabet getAlphabet()
Deprecated. 

getTopicAlphabet

public LabelAlphabet getTopicAlphabet()
Deprecated. 

getNumTopics

public int getNumTopics()
Deprecated. 

getData

public java.util.ArrayList<LDAHyper.Topication> getData()
Deprecated. 

getCountFeatureTopic

public int getCountFeatureTopic(int featureIndex,
                                int topicIndex)
Deprecated. 

getCountTokensPerTopic

public int getCountTokensPerTopic(int topicIndex)
Deprecated. 

setTestingInstances

public void setTestingInstances(InstanceList testing)
Deprecated. 
Held-out instances for empirical likelihood calculation


setNumIterations

public void setNumIterations(int numIterations)
Deprecated. 

setBurninPeriod

public void setBurninPeriod(int burninPeriod)
Deprecated. 

setTopicDisplay

public void setTopicDisplay(int interval,
                            int n)
Deprecated. 

setRandomSeed

public void setRandomSeed(int seed)
Deprecated. 

setOptimizeInterval

public void setOptimizeInterval(int interval)
Deprecated. 

setModelOutput

public void setModelOutput(int interval,
                           java.lang.String filename)
Deprecated. 

setSaveState

public void setSaveState(int interval,
                         java.lang.String filename)
Deprecated. 
Define how often and where to save the state

Parameters:
interval - Save a copy of the state every interval iterations.
filename - Save the state to this file, with the iteration number as a suffix

instanceLength

protected int instanceLength(Instance instance)
Deprecated. 

addInstances

public void addInstances(InstanceList training)
Deprecated. 

addInstances

public void addInstances(InstanceList training,
                         java.util.List<LabelSequence> topics)
Deprecated. 

initializeHistogramsAndCachedValues

protected void initializeHistogramsAndCachedValues()
Deprecated. 
Gather statistics on the size of documents and create histograms for use in Dirichlet hyperparameter optimization.


estimate

public void estimate()
              throws java.io.IOException
Deprecated. 
Throws:
java.io.IOException

estimate

public void estimate(int iterationsThisRound)
              throws java.io.IOException
Deprecated. 
Throws:
java.io.IOException

sampleTopicsForOneDoc

protected void sampleTopicsForOneDoc(FeatureSequence tokenSequence,
                                     FeatureSequence topicSequence,
                                     boolean shouldSaveState,
                                     boolean readjustTopicsAndStats)
Deprecated. 

getSortedTopicWords

public IDSorter[] getSortedTopicWords(int topic)
Deprecated. 

printTopWords

public void printTopWords(java.io.File file,
                          int numWords,
                          boolean useNewLines)
                   throws java.io.IOException
Deprecated. 
Throws:
java.io.IOException

printTopWords

public void printTopWords(java.io.PrintStream out,
                          int numWords,
                          boolean usingNewLines)
Deprecated. 

topicXMLReport

public void topicXMLReport(java.io.PrintWriter out,
                           int numWords)
Deprecated. 

topicXMLReportPhrases

public void topicXMLReportPhrases(java.io.PrintStream out,
                                  int numWords)
Deprecated. 

printDocumentTopics

public void printDocumentTopics(java.io.File f)
                         throws java.io.IOException
Deprecated. 
Throws:
java.io.IOException

printDocumentTopics

public void printDocumentTopics(java.io.PrintWriter pw)
Deprecated. 

printDocumentTopics

public void printDocumentTopics(java.io.PrintWriter pw,
                                double threshold,
                                int max)
Deprecated. 
Parameters:
pw - A print writer
threshold - Only print topics with proportion greater than this number
max - Print no more than this many topics

printState

public void printState(java.io.File f)
                throws java.io.IOException
Deprecated. 
Throws:
java.io.IOException

printState

public void printState(java.io.PrintStream out)
Deprecated. 

write

public void write(java.io.File f)
Deprecated. 

read

public static LDAHyper read(java.io.File f)
Deprecated. 

topicLabelMutualInformation

public double topicLabelMutualInformation()
Deprecated. 

empiricalLikelihood

public double empiricalLikelihood(int numSamples,
                                  InstanceList testing)
Deprecated. 

modelLogLikelihood

public double modelLogLikelihood()
Deprecated. 

main

public static void main(java.lang.String[] args)
                 throws java.io.IOException
Deprecated. 
Throws:
java.io.IOException