cc.mallet.topics
Class PolylingualTopicModel
java.lang.Object
cc.mallet.topics.PolylingualTopicModel
- All Implemented Interfaces:
- java.io.Serializable
public class PolylingualTopicModel
- extends java.lang.Object
- implements java.io.Serializable
Latent Dirichlet Allocation for loosely parallel corpora in arbitrary languages
- Author:
- David Mimno, Andrew McCallum
- See Also:
- Serialized Form
Method Summary |
void |
addInstances(InstanceList[] training)
|
void |
estimate()
|
void |
estimate(int iterationsThisRound)
|
java.util.ArrayList<PolylingualTopicModel.TopicAssignment> |
getData()
|
TopicInferencer |
getInferencer(int language)
Return a tool for estimating topic distributions for new documents |
int |
getNumTopics()
|
LabelAlphabet |
getTopicAlphabet()
|
void |
loadTestingIDs(java.io.File testingIDFile)
|
static void |
main(java.lang.String[] args)
|
double |
modelLogLikelihood()
|
void |
optimizeBetas()
|
void |
printDocumentTopics(java.io.File f)
|
void |
printDocumentTopics(java.io.PrintWriter pw)
|
void |
printDocumentTopics(java.io.PrintWriter pw,
double threshold,
int max)
|
void |
printState(java.io.File f)
|
void |
printState(java.io.PrintStream out)
|
void |
printTopWords(java.io.File file,
int numWords,
boolean useNewLines)
|
void |
printTopWords(java.io.PrintStream out,
int numWords,
boolean usingNewLines)
|
static PolylingualTopicModel |
read(java.io.File f)
|
protected void |
sampleTopicsForOneDoc(PolylingualTopicModel.TopicAssignment topicAssignment,
boolean shouldSaveState)
|
void |
setBurninPeriod(int burninPeriod)
|
void |
setModelOutput(int interval,
java.lang.String filename)
|
void |
setNumIterations(int numIterations)
|
void |
setOptimizeInterval(int interval)
|
void |
setRandomSeed(int seed)
|
void |
setSaveState(int interval,
java.lang.String filename)
Define how often and where to save the state |
void |
setTopicDisplay(int interval,
int n)
|
void |
write(java.io.File serializedModelFile)
|
Methods inherited from class java.lang.Object |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
data
protected java.util.ArrayList<PolylingualTopicModel.TopicAssignment> data
topicAlphabet
protected LabelAlphabet topicAlphabet
numStopwords
protected int numStopwords
numTopics
protected int numTopics
topicMask
protected int topicMask
topicBits
protected int topicBits
alphabets
protected Alphabet[] alphabets
vocabularySizes
protected int[] vocabularySizes
alpha
protected double[] alpha
alphaSum
protected double alphaSum
betas
protected double[] betas
betaSums
protected double[] betaSums
languageMaxTypeCounts
protected int[] languageMaxTypeCounts
DEFAULT_BETA
public static final double DEFAULT_BETA
- See Also:
- Constant Field Values
languageSmoothingOnlyMasses
protected double[] languageSmoothingOnlyMasses
languageCachedCoefficients
protected double[][] languageCachedCoefficients
oneDocTopicCounts
protected int[] oneDocTopicCounts
languageTypeTopicCounts
protected int[][][] languageTypeTopicCounts
languageTokensPerTopic
protected int[][] languageTokensPerTopic
docLengthCounts
protected int[] docLengthCounts
topicDocCounts
protected int[][] topicDocCounts
iterationsSoFar
protected int iterationsSoFar
numIterations
public int numIterations
burninPeriod
public int burninPeriod
saveSampleInterval
public int saveSampleInterval
optimizeInterval
public int optimizeInterval
showTopicsInterval
public int showTopicsInterval
wordsPerTopic
public int wordsPerTopic
saveModelInterval
protected int saveModelInterval
modelFilename
protected java.lang.String modelFilename
saveStateInterval
protected int saveStateInterval
stateFilename
protected java.lang.String stateFilename
random
protected Randoms random
formatter
protected java.text.NumberFormat formatter
printLogLikelihood
protected boolean printLogLikelihood
PolylingualTopicModel
public PolylingualTopicModel(int numberOfTopics)
PolylingualTopicModel
public PolylingualTopicModel(int numberOfTopics,
double alphaSum)
PolylingualTopicModel
public PolylingualTopicModel(int numberOfTopics,
double alphaSum,
Randoms random)
PolylingualTopicModel
public PolylingualTopicModel(LabelAlphabet topicAlphabet,
double alphaSum,
Randoms random)
loadTestingIDs
public void loadTestingIDs(java.io.File testingIDFile)
throws java.io.IOException
- Throws:
java.io.IOException
getTopicAlphabet
public LabelAlphabet getTopicAlphabet()
getNumTopics
public int getNumTopics()
getData
public java.util.ArrayList<PolylingualTopicModel.TopicAssignment> getData()
setNumIterations
public void setNumIterations(int numIterations)
setBurninPeriod
public void setBurninPeriod(int burninPeriod)
setTopicDisplay
public void setTopicDisplay(int interval,
int n)
setRandomSeed
public void setRandomSeed(int seed)
setOptimizeInterval
public void setOptimizeInterval(int interval)
setModelOutput
public void setModelOutput(int interval,
java.lang.String filename)
setSaveState
public void setSaveState(int interval,
java.lang.String filename)
- Define how often and where to save the state
- Parameters:
interval
- Save a copy of the state every interval
iterations.filename
- Save the state to this file, with the iteration number as a suffix
addInstances
public void addInstances(InstanceList[] training)
estimate
public void estimate()
throws java.io.IOException
- Throws:
java.io.IOException
estimate
public void estimate(int iterationsThisRound)
throws java.io.IOException
- Throws:
java.io.IOException
optimizeBetas
public void optimizeBetas()
sampleTopicsForOneDoc
protected void sampleTopicsForOneDoc(PolylingualTopicModel.TopicAssignment topicAssignment,
boolean shouldSaveState)
printTopWords
public void printTopWords(java.io.File file,
int numWords,
boolean useNewLines)
throws java.io.IOException
- Throws:
java.io.IOException
printTopWords
public void printTopWords(java.io.PrintStream out,
int numWords,
boolean usingNewLines)
printDocumentTopics
public void printDocumentTopics(java.io.File f)
throws java.io.IOException
- Throws:
java.io.IOException
printDocumentTopics
public void printDocumentTopics(java.io.PrintWriter pw)
printDocumentTopics
public void printDocumentTopics(java.io.PrintWriter pw,
double threshold,
int max)
- Parameters:
pw
- A print writerthreshold
- Only print topics with proportion greater than this numbermax
- Print no more than this many topics
printState
public void printState(java.io.File f)
throws java.io.IOException
- Throws:
java.io.IOException
printState
public void printState(java.io.PrintStream out)
modelLogLikelihood
public double modelLogLikelihood()
getInferencer
public TopicInferencer getInferencer(int language)
- Return a tool for estimating topic distributions for new documents
write
public void write(java.io.File serializedModelFile)
read
public static PolylingualTopicModel read(java.io.File f)
throws java.lang.Exception
- Throws:
java.lang.Exception
main
public static void main(java.lang.String[] args)
throws java.io.IOException
- Throws:
java.io.IOException