|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Object cc.mallet.classify.FeatureConstraintUtil
public class FeatureConstraintUtil
Utility functions for creating feature constraints that can be used with GE training.
Constructor Summary | |
---|---|
FeatureConstraintUtil()
|
Method Summary | |
---|---|
static double[][] |
getFeatureLabelCounts(InstanceList list,
boolean useValues)
|
static java.util.HashMap<java.lang.Integer,java.util.ArrayList<java.lang.Integer>> |
labelFeatures(InstanceList list,
java.util.ArrayList<java.lang.Integer> features)
|
static java.util.HashMap<java.lang.Integer,java.util.ArrayList<java.lang.Integer>> |
labelFeatures(InstanceList list,
java.util.ArrayList<java.lang.Integer> features,
boolean reject)
Label features using heuristic described in "Learning from Labeled Features using Generalized Expectation Criteria" Gregory Druck, Gideon Mann, Andrew McCallum. |
static java.util.HashMap<java.lang.Integer,double[]> |
readConstraintsFromFile(java.lang.String filename,
InstanceList data)
Reads feature constraints from a file, whether they are stored using Strings or indices. |
static java.util.HashMap<java.lang.Integer,double[]> |
readConstraintsFromFileIndex(java.lang.String filename,
InstanceList data)
Reads feature constraints stored using strings from a file. |
static java.util.HashMap<java.lang.Integer,double[]> |
readConstraintsFromFileString(java.lang.String filename,
InstanceList data)
Reads feature constraints stored using strings from a file. |
static java.util.HashMap<java.lang.Integer,double[][]> |
readRangeConstraintsFromFile(java.lang.String filename,
InstanceList data)
Reads range constraints stored using strings from a file. |
static java.util.ArrayList<java.lang.Integer> |
selectFeaturesByInfoGain(InstanceList list,
int numFeatures)
Select features with the highest information gain. |
static java.util.ArrayList<java.lang.Integer> |
selectTopLDAFeatures(int numSelFeatures,
ParallelTopicModel lda,
Alphabet alphabet)
Select top features in LDA topics. |
static java.util.HashMap<java.lang.Integer,double[]> |
setTargetsUsingData(InstanceList list,
java.util.ArrayList<java.lang.Integer> features)
|
static java.util.HashMap<java.lang.Integer,double[]> |
setTargetsUsingData(InstanceList list,
java.util.ArrayList<java.lang.Integer> features,
boolean normalize)
|
static java.util.HashMap<java.lang.Integer,double[]> |
setTargetsUsingData(InstanceList list,
java.util.ArrayList<java.lang.Integer> features,
boolean useValues,
boolean normalize)
Set target distributions using estimates from data. |
static java.util.HashMap<java.lang.Integer,double[]> |
setTargetsUsingFeatureVoting(java.util.HashMap<java.lang.Integer,java.util.ArrayList<java.lang.Integer>> labeledFeatures,
InstanceList trainingData)
Set target distributions using feature voting heuristic described in "Learning from Labeled Features using Generalized Expectation Criteria" Gregory Druck, Gideon Mann, Andrew McCallum. |
static java.util.HashMap<java.lang.Integer,double[]> |
setTargetsUsingHeuristic(java.util.HashMap<java.lang.Integer,java.util.ArrayList<java.lang.Integer>> labeledFeatures,
int numLabels,
double majorityProb)
Set target distributions using "Schapire" heuristic described in "Learning from Labeled Features using Generalized Expectation Criteria" Gregory Druck, Gideon Mann, Andrew McCallum. |
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Constructor Detail |
---|
public FeatureConstraintUtil()
Method Detail |
---|
public static java.util.HashMap<java.lang.Integer,double[][]> readRangeConstraintsFromFile(java.lang.String filename, InstanceList data)
filename
- File with feature constraints.data
- InstanceList used for alphabets.
public static java.util.HashMap<java.lang.Integer,double[]> readConstraintsFromFile(java.lang.String filename, InstanceList data)
filename
- File with feature constraints.data
- InstanceList used for alphabets.
public static java.util.HashMap<java.lang.Integer,double[]> readConstraintsFromFileString(java.lang.String filename, InstanceList data)
filename
- File with feature constraints.data
- InstanceList used for alphabets.
public static java.util.HashMap<java.lang.Integer,double[]> readConstraintsFromFileIndex(java.lang.String filename, InstanceList data)
filename
- File with feature constraints.data
- InstanceList used for alphabets.
public static java.util.ArrayList<java.lang.Integer> selectFeaturesByInfoGain(InstanceList list, int numFeatures)
list
- InstanceList for computing information gain.numFeatures
- Number of features to select.
public static java.util.ArrayList<java.lang.Integer> selectTopLDAFeatures(int numSelFeatures, ParallelTopicModel lda, Alphabet alphabet)
numSelFeatures
- Number of features to select.ldaEst
- LDAEstimatePr which provides an interface to an LDA model.seqAlphabet
- The alphabet for the sequence dataset, which may be different from the vector dataset alphabet.alphabet
- The vector dataset alphabet.
public static java.util.HashMap<java.lang.Integer,double[]> setTargetsUsingData(InstanceList list, java.util.ArrayList<java.lang.Integer> features)
public static java.util.HashMap<java.lang.Integer,double[]> setTargetsUsingData(InstanceList list, java.util.ArrayList<java.lang.Integer> features, boolean normalize)
public static java.util.HashMap<java.lang.Integer,double[]> setTargetsUsingData(InstanceList list, java.util.ArrayList<java.lang.Integer> features, boolean useValues, boolean normalize)
list
- InstanceList used to estimate targets.features
- List of features for constraints.normalize
- Whether to normalize by feature counts
public static java.util.HashMap<java.lang.Integer,double[]> setTargetsUsingHeuristic(java.util.HashMap<java.lang.Integer,java.util.ArrayList<java.lang.Integer>> labeledFeatures, int numLabels, double majorityProb)
labeledFeatures
- HashMap of feature indices to lists of label indices for that feature.numLabels
- Total number of labels.majorityProb
- Probability mass divided among majority labels.
public static java.util.HashMap<java.lang.Integer,double[]> setTargetsUsingFeatureVoting(java.util.HashMap<java.lang.Integer,java.util.ArrayList<java.lang.Integer>> labeledFeatures, InstanceList trainingData)
labeledFeatures
- HashMap of feature indices to lists of label indices for that feature.trainingData
- InstanceList to use for computing expectations with feature voting.
public static java.util.HashMap<java.lang.Integer,java.util.ArrayList<java.lang.Integer>> labelFeatures(InstanceList list, java.util.ArrayList<java.lang.Integer> features, boolean reject)
list
- InstanceList used to compute statistics for labeling features.features
- List of features to label.reject
- Whether to reject labeling features.
public static java.util.HashMap<java.lang.Integer,java.util.ArrayList<java.lang.Integer>> labelFeatures(InstanceList list, java.util.ArrayList<java.lang.Integer> features)
public static double[][] getFeatureLabelCounts(InstanceList list, boolean useValues)
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |