|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Object cc.mallet.topics.LDAHyper
public class LDAHyper
Latent Dirichlet Allocation with optimized hyperparameters
Nested Class Summary | |
---|---|
class |
LDAHyper.Topication
Deprecated. |
Field Summary | |
---|---|
protected double[] |
alpha
Deprecated. |
protected Alphabet |
alphabet
Deprecated. |
protected double |
alphaSum
Deprecated. |
protected double |
beta
Deprecated. |
protected double |
betaSum
Deprecated. |
int |
burninPeriod
Deprecated. |
protected double[] |
cachedCoefficients
Deprecated. |
protected java.util.ArrayList<LDAHyper.Topication> |
data
Deprecated. |
static double |
DEFAULT_BETA
Deprecated. |
protected int[] |
docLengthCounts
Deprecated. |
protected java.text.NumberFormat |
formatter
Deprecated. |
int |
iterationsSoFar
Deprecated. |
int |
numIterations
Deprecated. |
protected int |
numTopics
Deprecated. |
protected int |
numTypes
Deprecated. |
protected int[] |
oneDocTopicCounts
Deprecated. |
int |
optimizeInterval
Deprecated. |
protected java.lang.String |
outputModelFilename
Deprecated. |
protected int |
outputModelInterval
Deprecated. |
protected boolean |
printLogLikelihood
Deprecated. |
protected Randoms |
random
Deprecated. |
int |
saveSampleInterval
Deprecated. |
protected int |
saveStateInterval
Deprecated. |
int |
showTopicsInterval
Deprecated. |
protected double |
smoothingOnlyMass
Deprecated. |
protected java.lang.String |
stateFilename
Deprecated. |
protected InstanceList |
testing
Deprecated. |
protected int[] |
tokensPerTopic
Deprecated. |
protected LabelAlphabet |
topicAlphabet
Deprecated. |
protected int[][] |
topicDocCounts
Deprecated. |
protected gnu.trove.TIntIntHashMap[] |
typeTopicCounts
Deprecated. |
int |
wordsPerTopic
Deprecated. |
Constructor Summary | |
---|---|
LDAHyper(int numberOfTopics)
Deprecated. |
|
LDAHyper(int numberOfTopics,
double alphaSum,
double beta)
Deprecated. |
|
LDAHyper(int numberOfTopics,
double alphaSum,
double beta,
Randoms random)
Deprecated. |
|
LDAHyper(LabelAlphabet topicAlphabet,
double alphaSum,
double beta,
Randoms random)
Deprecated. |
Method Summary | |
---|---|
void |
addInstances(InstanceList training)
Deprecated. |
void |
addInstances(InstanceList training,
java.util.List<LabelSequence> topics)
Deprecated. |
double |
empiricalLikelihood(int numSamples,
InstanceList testing)
Deprecated. |
void |
estimate()
Deprecated. |
void |
estimate(int iterationsThisRound)
Deprecated. |
Alphabet |
getAlphabet()
Deprecated. |
int |
getCountFeatureTopic(int featureIndex,
int topicIndex)
Deprecated. |
int |
getCountTokensPerTopic(int topicIndex)
Deprecated. |
java.util.ArrayList<LDAHyper.Topication> |
getData()
Deprecated. |
int |
getNumTopics()
Deprecated. |
IDSorter[] |
getSortedTopicWords(int topic)
Deprecated. |
LabelAlphabet |
getTopicAlphabet()
Deprecated. |
protected void |
initializeHistogramsAndCachedValues()
Deprecated. Gather statistics on the size of documents and create histograms for use in Dirichlet hyperparameter optimization. |
protected int |
instanceLength(Instance instance)
Deprecated. |
static void |
main(java.lang.String[] args)
Deprecated. |
double |
modelLogLikelihood()
Deprecated. |
void |
printDocumentTopics(java.io.File f)
Deprecated. |
void |
printDocumentTopics(java.io.PrintWriter pw)
Deprecated. |
void |
printDocumentTopics(java.io.PrintWriter pw,
double threshold,
int max)
Deprecated. |
void |
printState(java.io.File f)
Deprecated. |
void |
printState(java.io.PrintStream out)
Deprecated. |
void |
printTopWords(java.io.File file,
int numWords,
boolean useNewLines)
Deprecated. |
void |
printTopWords(java.io.PrintStream out,
int numWords,
boolean usingNewLines)
Deprecated. |
static LDAHyper |
read(java.io.File f)
Deprecated. |
protected void |
sampleTopicsForOneDoc(FeatureSequence tokenSequence,
FeatureSequence topicSequence,
boolean shouldSaveState,
boolean readjustTopicsAndStats)
Deprecated. |
void |
setBurninPeriod(int burninPeriod)
Deprecated. |
void |
setModelOutput(int interval,
java.lang.String filename)
Deprecated. |
void |
setNumIterations(int numIterations)
Deprecated. |
void |
setOptimizeInterval(int interval)
Deprecated. |
void |
setRandomSeed(int seed)
Deprecated. |
void |
setSaveState(int interval,
java.lang.String filename)
Deprecated. Define how often and where to save the state |
void |
setTestingInstances(InstanceList testing)
Deprecated. Held-out instances for empirical likelihood calculation |
void |
setTopicDisplay(int interval,
int n)
Deprecated. |
double |
topicLabelMutualInformation()
Deprecated. |
void |
topicXMLReport(java.io.PrintWriter out,
int numWords)
Deprecated. |
void |
topicXMLReportPhrases(java.io.PrintStream out,
int numWords)
Deprecated. |
void |
write(java.io.File f)
Deprecated. |
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
---|
protected java.util.ArrayList<LDAHyper.Topication> data
protected Alphabet alphabet
protected LabelAlphabet topicAlphabet
protected int numTopics
protected int numTypes
protected double[] alpha
protected double alphaSum
protected double beta
protected double betaSum
public static final double DEFAULT_BETA
protected double smoothingOnlyMass
protected double[] cachedCoefficients
protected InstanceList testing
protected int[] oneDocTopicCounts
protected gnu.trove.TIntIntHashMap[] typeTopicCounts
protected int[] tokensPerTopic
protected int[] docLengthCounts
protected int[][] topicDocCounts
public int iterationsSoFar
public int numIterations
public int burninPeriod
public int saveSampleInterval
public int optimizeInterval
public int showTopicsInterval
public int wordsPerTopic
protected int outputModelInterval
protected java.lang.String outputModelFilename
protected int saveStateInterval
protected java.lang.String stateFilename
protected Randoms random
protected java.text.NumberFormat formatter
protected boolean printLogLikelihood
Constructor Detail |
---|
public LDAHyper(int numberOfTopics)
public LDAHyper(int numberOfTopics, double alphaSum, double beta)
public LDAHyper(int numberOfTopics, double alphaSum, double beta, Randoms random)
public LDAHyper(LabelAlphabet topicAlphabet, double alphaSum, double beta, Randoms random)
Method Detail |
---|
public Alphabet getAlphabet()
public LabelAlphabet getTopicAlphabet()
public int getNumTopics()
public java.util.ArrayList<LDAHyper.Topication> getData()
public int getCountFeatureTopic(int featureIndex, int topicIndex)
public int getCountTokensPerTopic(int topicIndex)
public void setTestingInstances(InstanceList testing)
public void setNumIterations(int numIterations)
public void setBurninPeriod(int burninPeriod)
public void setTopicDisplay(int interval, int n)
public void setRandomSeed(int seed)
public void setOptimizeInterval(int interval)
public void setModelOutput(int interval, java.lang.String filename)
public void setSaveState(int interval, java.lang.String filename)
interval
- Save a copy of the state every interval
iterations.filename
- Save the state to this file, with the iteration number as a suffixprotected int instanceLength(Instance instance)
public void addInstances(InstanceList training)
public void addInstances(InstanceList training, java.util.List<LabelSequence> topics)
protected void initializeHistogramsAndCachedValues()
public void estimate() throws java.io.IOException
java.io.IOException
public void estimate(int iterationsThisRound) throws java.io.IOException
java.io.IOException
protected void sampleTopicsForOneDoc(FeatureSequence tokenSequence, FeatureSequence topicSequence, boolean shouldSaveState, boolean readjustTopicsAndStats)
public IDSorter[] getSortedTopicWords(int topic)
public void printTopWords(java.io.File file, int numWords, boolean useNewLines) throws java.io.IOException
java.io.IOException
public void printTopWords(java.io.PrintStream out, int numWords, boolean usingNewLines)
public void topicXMLReport(java.io.PrintWriter out, int numWords)
public void topicXMLReportPhrases(java.io.PrintStream out, int numWords)
public void printDocumentTopics(java.io.File f) throws java.io.IOException
java.io.IOException
public void printDocumentTopics(java.io.PrintWriter pw)
public void printDocumentTopics(java.io.PrintWriter pw, double threshold, int max)
pw
- A print writerthreshold
- Only print topics with proportion greater than this numbermax
- Print no more than this many topicspublic void printState(java.io.File f) throws java.io.IOException
java.io.IOException
public void printState(java.io.PrintStream out)
public void write(java.io.File f)
public static LDAHyper read(java.io.File f)
public double topicLabelMutualInformation()
public double empiricalLikelihood(int numSamples, InstanceList testing)
public double modelLogLikelihood()
public static void main(java.lang.String[] args) throws java.io.IOException
java.io.IOException
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |