|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||
java.lang.Objectcc.mallet.classify.FeatureConstraintUtil
public class FeatureConstraintUtil
Utility functions for creating feature constraints that can be used with GE training.
| Constructor Summary | |
|---|---|
FeatureConstraintUtil()
|
|
| Method Summary | |
|---|---|
static double[][] |
getFeatureLabelCounts(InstanceList list,
boolean useValues)
|
static java.util.HashMap<java.lang.Integer,java.util.ArrayList<java.lang.Integer>> |
labelFeatures(InstanceList list,
java.util.ArrayList<java.lang.Integer> features)
|
static java.util.HashMap<java.lang.Integer,java.util.ArrayList<java.lang.Integer>> |
labelFeatures(InstanceList list,
java.util.ArrayList<java.lang.Integer> features,
boolean reject)
Label features using heuristic described in "Learning from Labeled Features using Generalized Expectation Criteria" Gregory Druck, Gideon Mann, Andrew McCallum. |
static java.util.HashMap<java.lang.Integer,double[]> |
readConstraintsFromFile(java.lang.String filename,
InstanceList data)
Reads feature constraints from a file, whether they are stored using Strings or indices. |
static java.util.HashMap<java.lang.Integer,double[]> |
readConstraintsFromFileIndex(java.lang.String filename,
InstanceList data)
Reads feature constraints stored using strings from a file. |
static java.util.HashMap<java.lang.Integer,double[]> |
readConstraintsFromFileString(java.lang.String filename,
InstanceList data)
Reads feature constraints stored using strings from a file. |
static java.util.HashMap<java.lang.Integer,double[][]> |
readRangeConstraintsFromFile(java.lang.String filename,
InstanceList data)
Reads range constraints stored using strings from a file. |
static java.util.ArrayList<java.lang.Integer> |
selectFeaturesByInfoGain(InstanceList list,
int numFeatures)
Select features with the highest information gain. |
static java.util.ArrayList<java.lang.Integer> |
selectTopLDAFeatures(int numSelFeatures,
ParallelTopicModel lda,
Alphabet alphabet)
Select top features in LDA topics. |
static java.util.HashMap<java.lang.Integer,double[]> |
setTargetsUsingData(InstanceList list,
java.util.ArrayList<java.lang.Integer> features)
|
static java.util.HashMap<java.lang.Integer,double[]> |
setTargetsUsingData(InstanceList list,
java.util.ArrayList<java.lang.Integer> features,
boolean normalize)
|
static java.util.HashMap<java.lang.Integer,double[]> |
setTargetsUsingData(InstanceList list,
java.util.ArrayList<java.lang.Integer> features,
boolean useValues,
boolean normalize)
Set target distributions using estimates from data. |
static java.util.HashMap<java.lang.Integer,double[]> |
setTargetsUsingFeatureVoting(java.util.HashMap<java.lang.Integer,java.util.ArrayList<java.lang.Integer>> labeledFeatures,
InstanceList trainingData)
Set target distributions using feature voting heuristic described in "Learning from Labeled Features using Generalized Expectation Criteria" Gregory Druck, Gideon Mann, Andrew McCallum. |
static java.util.HashMap<java.lang.Integer,double[]> |
setTargetsUsingHeuristic(java.util.HashMap<java.lang.Integer,java.util.ArrayList<java.lang.Integer>> labeledFeatures,
int numLabels,
double majorityProb)
Set target distributions using "Schapire" heuristic described in "Learning from Labeled Features using Generalized Expectation Criteria" Gregory Druck, Gideon Mann, Andrew McCallum. |
| Methods inherited from class java.lang.Object |
|---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
| Constructor Detail |
|---|
public FeatureConstraintUtil()
| Method Detail |
|---|
public static java.util.HashMap<java.lang.Integer,double[][]> readRangeConstraintsFromFile(java.lang.String filename,
InstanceList data)
filename - File with feature constraints.data - InstanceList used for alphabets.
public static java.util.HashMap<java.lang.Integer,double[]> readConstraintsFromFile(java.lang.String filename,
InstanceList data)
filename - File with feature constraints.data - InstanceList used for alphabets.
public static java.util.HashMap<java.lang.Integer,double[]> readConstraintsFromFileString(java.lang.String filename,
InstanceList data)
filename - File with feature constraints.data - InstanceList used for alphabets.
public static java.util.HashMap<java.lang.Integer,double[]> readConstraintsFromFileIndex(java.lang.String filename,
InstanceList data)
filename - File with feature constraints.data - InstanceList used for alphabets.
public static java.util.ArrayList<java.lang.Integer> selectFeaturesByInfoGain(InstanceList list,
int numFeatures)
list - InstanceList for computing information gain.numFeatures - Number of features to select.
public static java.util.ArrayList<java.lang.Integer> selectTopLDAFeatures(int numSelFeatures,
ParallelTopicModel lda,
Alphabet alphabet)
numSelFeatures - Number of features to select.ldaEst - LDAEstimatePr which provides an interface to an LDA model.seqAlphabet - The alphabet for the sequence dataset, which may be different from the vector dataset alphabet.alphabet - The vector dataset alphabet.
public static java.util.HashMap<java.lang.Integer,double[]> setTargetsUsingData(InstanceList list,
java.util.ArrayList<java.lang.Integer> features)
public static java.util.HashMap<java.lang.Integer,double[]> setTargetsUsingData(InstanceList list,
java.util.ArrayList<java.lang.Integer> features,
boolean normalize)
public static java.util.HashMap<java.lang.Integer,double[]> setTargetsUsingData(InstanceList list,
java.util.ArrayList<java.lang.Integer> features,
boolean useValues,
boolean normalize)
list - InstanceList used to estimate targets.features - List of features for constraints.normalize - Whether to normalize by feature counts
public static java.util.HashMap<java.lang.Integer,double[]> setTargetsUsingHeuristic(java.util.HashMap<java.lang.Integer,java.util.ArrayList<java.lang.Integer>> labeledFeatures,
int numLabels,
double majorityProb)
labeledFeatures - HashMap of feature indices to lists of label indices for that feature.numLabels - Total number of labels.majorityProb - Probability mass divided among majority labels.
public static java.util.HashMap<java.lang.Integer,double[]> setTargetsUsingFeatureVoting(java.util.HashMap<java.lang.Integer,java.util.ArrayList<java.lang.Integer>> labeledFeatures,
InstanceList trainingData)
labeledFeatures - HashMap of feature indices to lists of label indices for that feature.trainingData - InstanceList to use for computing expectations with feature voting.
public static java.util.HashMap<java.lang.Integer,java.util.ArrayList<java.lang.Integer>> labelFeatures(InstanceList list,
java.util.ArrayList<java.lang.Integer> features,
boolean reject)
list - InstanceList used to compute statistics for labeling features.features - List of features to label.reject - Whether to reject labeling features.
public static java.util.HashMap<java.lang.Integer,java.util.ArrayList<java.lang.Integer>> labelFeatures(InstanceList list,
java.util.ArrayList<java.lang.Integer> features)
public static double[][] getFeatureLabelCounts(InstanceList list,
boolean useValues)
|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||