Documentation
¶
Overview ¶
Package plugin provides plugin functions for sticker.
Index ¶
- Constants
- Variables
- func BinaryClassifierTrainer_L1SVC_DualCD(X sticker.FeatureVectors, Y []bool, C, epsilon float32, debug *log.Logger) (*sticker.BinaryClassifier, error)
- func BinaryClassifierTrainer_L2SVC_PrimalCD(X sticker.FeatureVectors, Y []bool, C, epsilon float32, debug *log.Logger) (*sticker.BinaryClassifier, error)
- func BinaryRankerTrainer_L1SVC_PrimalSGD(X sticker.FeatureVectors, pairIndices [][2]int, pairMargins []float32, ...) (*sticker.BinaryClassifier, error)
- func BipartitionWeightedGraph(n uint64, minLeftRight uint64, A []float32) ([]bool, error)
- func DatasetNoneFeatureSubSampler(ds *sticker.Dataset, seed int64) (*sticker.Dataset, error)
- func DatasetSqrtFeatureSubSampler(ds *sticker.Dataset, seed int64) (*sticker.Dataset, error)
- func DecodeLabelBoost(model *LabelBoost, r io.Reader) error
- func DecodeLabelBoostWithGobDecoder(model *LabelBoost, decoder *gob.Decoder) error
- func DecodeLabelForest(forest *LabelForest, r io.Reader) error
- func DecodeLabelForestWithGobDecoder(forest *LabelForest, decoder *gob.Decoder) error
- func DecodeLabelTree(tree *LabelTree, r io.Reader) error
- func DecodeLabelTreeWithGobDecoder(tree *LabelTree, decoder *gob.Decoder) error
- func EncodeLabelBoost(model *LabelBoost, w io.Writer) error
- func EncodeLabelBoostWithGobEncoder(model *LabelBoost, encoder *gob.Encoder) error
- func EncodeLabelForest(forest *LabelForest, w io.Writer) error
- func EncodeLabelForestWithGobEncoder(forest *LabelForest, encoder *gob.Encoder) error
- func EncodeLabelTree(tree *LabelTree, w io.Writer) error
- func EncodeLabelTreeWithGobEncoder(tree *LabelTree, encoder *gob.Encoder) error
- func InitializePlugin()
- func LeftRightAssignInitializer_topLabelGraph(ds *sticker.Dataset, params *LabelTreeParameters, rng *rand.Rand, ...) []bool
- func LeftRightAssignInitializer_topLabelTree(ds *sticker.Dataset, params *LabelTreeParameters, rng *rand.Rand, ...) []bool
- func LeftRightAssignInitializer_uniform(ds *sticker.Dataset, params *LabelTreeParameters, rng *rand.Rand, ...) []bool
- func LeftRightAssigner_greedyBottomRanks(ds *sticker.Dataset, delta []bool, debug *log.Logger) error
- func LeftRightAssigner_nDCG(ds *sticker.Dataset, delta []bool, debug *log.Logger) error
- func LeftRightAssigner_none(ds *sticker.Dataset, delta []bool, debug *log.Logger) error
- func Painter_TopLabelSubSet(ds *sticker.Dataset, Z []sticker.KeyValues32, K uint, debug *log.Logger) []uint32
- func Painter_TopLabels(ds *sticker.Dataset, Z []sticker.KeyValues32, K uint, debug *log.Logger) []uint32
- func SelectItemsAMAP(weights []float32, W float32) (float32, []int, error)
- type BinaryRankerTrainer
- type DatasetEntrySubSampler
- type DatasetFeatureSubSampler
- type DeterministicDatasetEntrySubSampler
- type LabelBoost
- type LabelBoostParameters
- type LabelForest
- func (forest *LabelForest) Classify(x sticker.FeatureVector) []uint64
- func (forest *LabelForest) ClassifyAll(X sticker.FeatureVectors) [][]uint64
- func (forest *LabelForest) ClassifyAllWithWeight(X sticker.FeatureVectors) ([][]uint64, [][]float32)
- func (forest *LabelForest) ClassifyWithWeight(x sticker.FeatureVector) ([]uint64, []float32)
- func (forest *LabelForest) GobEncode() ([]byte, error)
- func (forest *LabelForest) Predict(leafIds []uint64, K uint) sticker.LabelVector
- func (forest *LabelForest) PredictAll(leafIdsSlice [][]uint64, K uint) sticker.LabelVectors
- func (forest *LabelForest) PredictAllWithWeight(leafIdsSlice [][]uint64, weightsSlice [][]float32, K uint) sticker.LabelVectors
- func (forest *LabelForest) PredictWithWeight(leafIds []uint64, weights []float32, K uint) sticker.LabelVector
- type LabelTree
- func (tree *LabelTree) Classify(x sticker.FeatureVector) uint64
- func (tree *LabelTree) ClassifyAll(X sticker.FeatureVectors) []uint64
- func (tree *LabelTree) ClassifyAllWithWeight(X sticker.FeatureVectors) ([]uint64, []float32)
- func (tree *LabelTree) ClassifyWithWeight(x sticker.FeatureVector) (uint64, float32)
- func (tree *LabelTree) GobEncode() ([]byte, error)
- func (tree *LabelTree) IsTerminalLeaf(leafId uint64) bool
- func (tree *LabelTree) IsValidLeaf(leafId uint64) bool
- func (tree *LabelTree) Predict(leafId uint64, K uint) sticker.LabelVector
- func (tree *LabelTree) PredictAll(leafIdSlice []uint64, K uint) sticker.LabelVectors
- type LabelTreeParameters
- type LeftRightAssignInitializer
- type LeftRightAssigner
- type QueuePrioritizedByFloat32
- type QueuePrioritizedByFloat32Item
- type RandomDatasetEntrySubSampler
Constants ¶
const DefaultDatasetFeatureSubSamplerName = "none"
DefaultDatasetFeatureSubSamplerName is the default DatasetFeatureSubSampler name.
const DefaultLeftRightAssignInitializerName = "uniform"
DefaultLeftRightAssignInitializerName is the default LeftRightAssignInitializer name.
const DefaultLeftRightAssignerName = "nDCG"
DefaultLeftRightAssignerName is the default LeftRightAssigner name.
Variables ¶
var BinaryRankerTrainers = map[string]BinaryRankerTrainer{ "L1SVC_PrimalSGD": BinaryRankerTrainer_L1SVC_PrimalSGD, }
BinaryRankerTrainers is the map from the binary classifier trainer name to the corresponding binary classifier trainer.
var DatasetFeatureSubSamplers = map[string]DatasetFeatureSubSampler{ "none": DatasetNoneFeatureSubSampler, "sqrt": DatasetSqrtFeatureSubSampler, }
DatasetFeatureSubSamplers is the map from the sub-sampler name to the corresponding sub-sampler.
var LeftRightAssignInitializers = map[string]LeftRightAssignInitializer{ "topLabelGraph": LeftRightAssignInitializer_topLabelGraph, "topLabelTree": LeftRightAssignInitializer_topLabelTree, "uniform": LeftRightAssignInitializer_uniform, }
LeftRightAssignInitializers is the map from the initializer name to the corresponding left/right assignment initializer.
var LeftRightAssigners = map[string]LeftRightAssigner{ "greedyBottomRanks": LeftRightAssigner_greedyBottomRanks, "nDCG": LeftRightAssigner_nDCG, "none": LeftRightAssigner_none, }
LeftRightAssigners is the map from the assigner name to the left/right assigner.
var Painters = map[string]func(ds *sticker.Dataset, Z []sticker.KeyValues32, K uint, debug *log.Logger) []uint32{ "topLabels": Painter_TopLabels, "topLabelSubSet": Painter_TopLabelSubSet, }
Painters is the map from the painter's name to the painter function. A painter takes the dataset, the corresponding margin matrix, and the maximum number of requested labels, then returns the slice of selected labels. debug is used for debug logs.
Functions ¶
func BinaryClassifierTrainer_L1SVC_DualCD ¶
func BinaryClassifierTrainer_L1SVC_DualCD(X sticker.FeatureVectors, Y []bool, C, epsilon float32, debug *log.Logger) (*sticker.BinaryClassifier, error)
BinaryClassifierTrainer_L1SVC_DualCD trains a L1-Support Vector Classifier with Dual Coordinate Descent. This is registered to sticker.BinaryClassifierTrainers.
This function returns no error currently.
Reference: C. Hsieh, K. Chang, C. Lin, S. S. Keerthi, and S. Sundararajan. "A Dual Coordinate Descent Method for Large-Scale Linear SVM." Proceedings of the 25th international conference on Machine learning, ACM, 2008.
func BinaryClassifierTrainer_L2SVC_PrimalCD ¶
func BinaryClassifierTrainer_L2SVC_PrimalCD(X sticker.FeatureVectors, Y []bool, C, epsilon float32, debug *log.Logger) (*sticker.BinaryClassifier, error)
BinaryClassifierTrainer_L2SVC_PrimalCD trains a L2-Support Vector Classifier with Primal Coordinate Descent. This is registered to sticker.BinaryClassifierTrainers.
This is not recommended, because even if the dataset is normalized as saving its sparsity, it is too slow to converge due to its piece-wise quadratic form. It is difficult to control the scaling such that the magnitude of the first derivative equals to its corresponding newton step. Otherwise, the optimization would be slow even when the first derivative is not enough small. Furthermore, even if the optimization stops early, its performance is much worse than L1SVC_DualCD.
This function returns no error currently.
Reference: K. Chang, C. Hsieh, and C. Lin. "Coordinate Descent Method for Large-Scale L2-loss Linear Support Vector Machines." Journal of Machine Learning Research, vol. 9, pp. 1369-1398, 2008.
func BinaryRankerTrainer_L1SVC_PrimalSGD ¶
func BinaryRankerTrainer_L1SVC_PrimalSGD(X sticker.FeatureVectors, pairIndices [][2]int, pairMargins []float32, pairCs []float32, epsilon float32, debug *log.Logger) (*sticker.BinaryClassifier, error)
BinaryRankerTrainer_L1SVC_PrimalSGD trains a L1-Support Vector Ranker with primal stochastic gradient descent. This is registered to BinaryRankerTrainers.
This is the optimized implementation based on sticker.BinaryClassifierTrainer_L1SVC_PrimalSGD.
This function returns no error currently.
func BipartitionWeightedGraph ¶
BipartitionWeightedGraph bipartitions the given weighted graph with 1-spectral clustering method, and returns the bool slice which is true if it is in right. If the size of left or right is less than minLeftRight, this function won't try to bipartition.
This function returns an error if A is not legal adjacency matrix.
func DatasetNoneFeatureSubSampler ¶
DatasetNoneFeatureSubSampler does not any feature sub-sampling, and returns the given dataset itself.
This function returns no error currently.
func DatasetSqrtFeatureSubSampler ¶
DatasetSqrtFeatureSubSampler sub-samples sqrt(J) features (J is the number of the used features), and returns the feature sub-sampled dataset. This is registered to sticker.DatasetFeatureSubSamplers.
This function returns no error currently.
func DecodeLabelBoost ¶
func DecodeLabelBoost(model *LabelBoost, r io.Reader) error
DecodeLabelBoost decodes LabelBoost from r. Directly passing *os.File used by a gob.Decoder to this function causes mysterious errors. Thus, if users use gob.Decoder, then they should call DecodeLabelBoostWithGobDecoder.
This function returns an error in decoding.
func DecodeLabelBoostWithGobDecoder ¶
func DecodeLabelBoostWithGobDecoder(model *LabelBoost, decoder *gob.Decoder) error
DecodeLabelBoostWithGobDecoder decodes LabelBoost using decoder.
This function returns an error in decoding.
func DecodeLabelForest ¶
func DecodeLabelForest(forest *LabelForest, r io.Reader) error
DecodeLabelForest decodes LabelForest from r. Directly passing *os.File used by a gob.Decoder to this function causes mysterious errors. Thus, if users use gob.Decoder, then they should call DecodeLabelForestWithGobDecoder.
This function returns an error in decoding.
func DecodeLabelForestWithGobDecoder ¶
func DecodeLabelForestWithGobDecoder(forest *LabelForest, decoder *gob.Decoder) error
DecodeLabelForestWithGobDecoder decodes LabelForest using decoder.
This function returns an error in decoding.
func DecodeLabelTree ¶
DecodeLabelTree decodes LabelTree from r. Directly passing *os.File used by a gob.Decoder to this function causes mysterious errors. Thus, if users use gob.Decoder, then they should call DecodeLabelTreeWithGobDecoder.
This function returns an error in decoding.
func DecodeLabelTreeWithGobDecoder ¶
DecodeLabelTreeWithGobDecoder decodes LabelTree using decoder.
This function returns an error in decoding.
func EncodeLabelBoost ¶
func EncodeLabelBoost(model *LabelBoost, w io.Writer) error
EncodeLabelBoost encodes LabelForest to w. Directly passing *os.File used by a gob.Encoder to this function causes mysterious errors. Thus, if users use gob.Encoder, then they should call EncodeLabelBoostWithGobEncoder.
This function returns an error in encoding.
func EncodeLabelBoostWithGobEncoder ¶
func EncodeLabelBoostWithGobEncoder(model *LabelBoost, encoder *gob.Encoder) error
EncodeLabelBoostWithGobEncoder decodes LabelBoost using encoder.
This function returns an error in decoding.
func EncodeLabelForest ¶
func EncodeLabelForest(forest *LabelForest, w io.Writer) error
EncodeLabelForest encodes LabelForest to w. Directly passing *os.File used by a gob.Encoder to this function causes mysterious errors. Thus, if users use gob.Encoder, then they should call EncodeLabelForestWithGobEncoder.
This function returns an error in encoding.
func EncodeLabelForestWithGobEncoder ¶
func EncodeLabelForestWithGobEncoder(forest *LabelForest, encoder *gob.Encoder) error
EncodeLabelForestWithGobEncoder decodes LabelForest using encoder.
This function returns an error in decoding.
func EncodeLabelTree ¶
EncodeLabelTree encodes LabelTree to w. Directly passing *os.File used by a gob.Encoder to this function causes mysterious errors. Thus, if users use gob.Encoder, then they should call EncodeLabelTreeWithGobEncoder.
This function returns an error in encoding.
func EncodeLabelTreeWithGobEncoder ¶
EncodeLabelTreeWithGobEncoder decodes LabelTree using encoder.
This function returns an error in decoding.
func InitializePlugin ¶
func InitializePlugin()
InitializePlugin does nothing, because init functions in this package registers functions to sticker. Thus it is unnecessary for users to call any function in this package. Users can call this function for avoiding any import error.
func LeftRightAssignInitializer_topLabelGraph ¶
func LeftRightAssignInitializer_topLabelGraph(ds *sticker.Dataset, params *LabelTreeParameters, rng *rand.Rand, debug *log.Logger) []bool
LeftRightAssignInitializer_topLabelGraph returns the delta slice initialized with the cutting of the top-label graph. This is registered to sticker.LeftRightAssignInitializers.
func LeftRightAssignInitializer_topLabelTree ¶
func LeftRightAssignInitializer_topLabelTree(ds *sticker.Dataset, params *LabelTreeParameters, rng *rand.Rand, debug *log.Logger) []bool
LeftRightAssignInitializer_topLabelGraph returns the delta slice initialized with the cutting of the top-labels tree. This is registered to sticker.LeftRightAssignInitializers.
func LeftRightAssignInitializer_uniform ¶
func LeftRightAssignInitializer_uniform(ds *sticker.Dataset, params *LabelTreeParameters, rng *rand.Rand, debug *log.Logger) []bool
LeftRightAssignInitializer_uniform returns the delta slice initialized with the samples from uniform probability distribution. This is registered to LeftRightAssignInitializers.
func LeftRightAssigner_greedyBottomRanks ¶
func LeftRightAssigner_greedyBottomRanks(ds *sticker.Dataset, delta []bool, debug *log.Logger) error
LeftRightAssigner_greedyBottomRanks assigns left or right label as moving each entry which has that bottom-ranked labels from left to right. This is registered to sticker.LeftRightAssigners.
This function returns no error currently.
func LeftRightAssigner_nDCG ¶
LeftRightAssigner_nDCG assigns left or right on each label as maximizing the sum of left and right utilities with nDCGs. This is registered to LeftRightAssigners.
This function return no error currently.
NOTICE: In calculating nDCG, this function uses the base of logarithm is 2 because of precision.
func LeftRightAssigner_none ¶
LeftRightAssigner_none assigns left or right on each label with the given initialized delta, so the label assignment won't change from the given initialized delta. This is registered to LeftRightAssigners.
This function returns no error.
func Painter_TopLabelSubSet ¶
func Painter_TopLabelSubSet(ds *sticker.Dataset, Z []sticker.KeyValues32, K uint, debug *log.Logger) []uint32
Painter_TopLabelSubSet is the painter returns the most frequent top-K mis-classified co-occurring labels.
This is registered to Painters.
func Painter_TopLabels ¶
func Painter_TopLabels(ds *sticker.Dataset, Z []sticker.KeyValues32, K uint, debug *log.Logger) []uint32
Painter_TopLabels is the painter returns the most frequent top-K mis-classified labels.
This is registered to Painters.
func SelectItemsAMAP ¶
SelectItemsAMAP returns the sum of weights and the selected item ID slice as many as possible under the constraints that the sum of weights is at most W.
This function returns an error if some weights are negative.
Types ¶
type BinaryRankerTrainer ¶
type BinaryRankerTrainer func(X sticker.FeatureVectors, pairIndices [][2]int, pairMargins []float32, C []float32, epsilon float32, debug *log.Logger) (*sticker.BinaryClassifier, error)
BinaryRankerTrainer is the type of binary ranker trainers. A trainer returns a new BinaryClassifier on positive/negative pair indices pairIndices on X with the specified pair margins. A negative values of pairIndices means the zero-vector. C is the penalty parameter slice for reweighting each entry. epsilon is the tolerance parameter for checking the convergence. debug is used for debug logs.
type DatasetEntrySubSampler ¶
type DatasetEntrySubSampler interface { // SubSample returns the index slice contained in the sub-sample with the given seed. SubSample(ds *sticker.Dataset, seed uint) []int }
DatasetEntrySubSampler is the interface for entry sub-sampler on the dataset.
func NewDeterministicDatasetEntrySubSampler ¶
func NewDeterministicDatasetEntrySubSampler(n uint) DatasetEntrySubSampler
NewDeterministicDatasetEntrySubSampler returns an new DeterministicDatasetEntrySubSampler.
func NewRandomDatasetEntrySubSampler ¶
func NewRandomDatasetEntrySubSampler(n uint) DatasetEntrySubSampler
NewRandomDatasetEntrySubSampler returns an new RandomDatasetEntrySubSampler.
type DatasetFeatureSubSampler ¶
DatasetFeatureSubSampler is the type of feature sub-samplers. A sub-sampler returns the dataset whose features are sub-sampled.
type DeterministicDatasetEntrySubSampler ¶
type DeterministicDatasetEntrySubSampler struct {
// contains filtered or unexported fields
}
DeterministicDatasetEntrySubSampler is a deterministic DatasetEntrySubSampler. The sub-sampler simply returns the sub-dataset with the given size in order of the given dataset. The seed is used as the sub-sample start index.
This implements interface DatasetEntrySubSampler.
type LabelBoost ¶
type LabelBoost struct { // Params is the used LabelBoostParameters. Params *LabelBoostParameters // Biases is the bias slice used by splitters on each boosting round. Biases []float32 // Weights is the weight sparse matrix used by splitters on each boosting round. // Weights is the map from the feature key to the (roundID, the weight on the feature of #roundID splitter) slice. // This data structure reduces the number of times that the classifier accesses the golang's map a lot. WeightLists map[uint32]sticker.KeyValues32 // LabelLists is the label list slice used in each boosting round. // Each label list has the labels stickered to the entry if the classifier at the round returns positive score on the entry. LabelLists []sticker.LabelVector // The following members are not required. // // Summaries is the summary object slice for each boosting round. // The entries in this summary is considered to provide compact and useful information in best-effort, so this specification would be loose and rapidly changing. Summaries []map[string]interface{} }
LabelBoost is the multi-label boosting model.
func TrainLabelBoost ¶
func TrainLabelBoost(ds *sticker.Dataset, params *LabelBoostParameters, debug *log.Logger) (*LabelBoost, error)
TrainLabelBoost returns an trained LabelBoost on the given dataset ds.
func (*LabelBoost) GobEncode ¶
func (model *LabelBoost) GobEncode() ([]byte, error)
GobEncode returns the error always, because users should encode large LabelBoost objects with EncodeLabelBoost.
func (*LabelBoost) Nrounds ¶
func (model *LabelBoost) Nrounds() uint
Nrounds return the number of the rounds.
func (*LabelBoost) Predict ¶
func (model *LabelBoost) Predict(x sticker.FeatureVector, K uint, T uint) sticker.LabelVector
Predict returns the top-K predicted labels for the given data point x with the first T rounds.
func (*LabelBoost) PredictAll ¶
func (model *LabelBoost) PredictAll(X sticker.FeatureVectors, K uint, T uint) sticker.LabelVectors
PredictAll returns the slice of the top-K predicted labels for each data point in X with the first T rounds.
type LabelBoostParameters ¶
type LabelBoostParameters struct { // RankerTrainerName is the used BinaryRankerTrainer name. RankerTrainerName string // C is the penalty parameter for BinaryRankerTrainer. C float32 // Epsilon is the tolerance parameter for BinaryClassifierTrainer. Epsilon float32 // NegativeSampleSize is the size of each negative sample for Multi-Label Ranking Hinge Boosting. // Specify 0 for Multi-Label Hinge Boosting. NegativeSampleSize uint // PainterK is the maximum number of the painted target label. PainterK uint // PainterName is the used Painter name. PainterName string // T is the maxinum number of boosting rounds. T uint }
LabelBoostParameters is the parameters for LabelBoost.
func NewLabelBoostParameters ¶
func NewLabelBoostParameters() *LabelBoostParameters
NewLabelBoostParameters returns an LabelBoostParameters initialized with the default values.
type LabelForest ¶
type LabelForest struct { // TreeParams is the parameters for training each LabelTree. TreeParams *LabelTreeParameters // Trees is the slice of trained trees. Trees []*LabelTree // The following members are not required. // // SummaryS is the sub-sampling summary. // This summary is considered to provide compact and useful information in best-effort, so this specification would be loose and rapidly changing. Summary map[string]interface{} }
LabelForest is variously-modified FastXML (Prabhu+ 2014).
References:
(Prabhu+ 2014) Y. Prabhu, and M. Varma. "FastXML: A Fast, Accurate and Stable Tree-Classifier for Extreme Multi-Label Learning." Proceedings of the 20th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, pp. 263--272, 2014.
func TrainLabelForest ¶
func TrainLabelForest(ds *sticker.Dataset, ntrees uint, subSampler DatasetEntrySubSampler, params *LabelTreeParameters, debug *log.Logger) (*LabelForest, error)
TrainLabelForest returns a trained LabelForest on ds with multiple go-routines. The number of go-routines is runtime.GOMAXPROCS.
This function returns the last error in training each tree in the forest by multiple go-routines.
func (*LabelForest) Classify ¶
func (forest *LabelForest) Classify(x sticker.FeatureVector) []uint64
Classify returns the leaf id slice for the given feature vector.
func (*LabelForest) ClassifyAll ¶
func (forest *LabelForest) ClassifyAll(X sticker.FeatureVectors) [][]uint64
ClassifyAll returns the slice of the leaf id slices for each feature vector.
func (*LabelForest) ClassifyAllWithWeight ¶
func (forest *LabelForest) ClassifyAllWithWeight(X sticker.FeatureVectors) ([][]uint64, [][]float32)
ClassifyAllWithWeight returns the slice of the leaf id slices and the weight slices for each feature vector.
func (*LabelForest) ClassifyWithWeight ¶
func (forest *LabelForest) ClassifyWithWeight(x sticker.FeatureVector) ([]uint64, []float32)
ClassifyWithWeight returns the leaf id slice and the weight slice for the given feature vector.
func (*LabelForest) GobEncode ¶
func (forest *LabelForest) GobEncode() ([]byte, error)
GobEncode returns the error always, because users should encode large LabelForest objects with EncodeLabelForest.
func (*LabelForest) Predict ¶
func (forest *LabelForest) Predict(leafIds []uint64, K uint) sticker.LabelVector
Predict returns the top-K labels for the given result of Classify.
func (*LabelForest) PredictAll ¶
func (forest *LabelForest) PredictAll(leafIdsSlice [][]uint64, K uint) sticker.LabelVectors
PredictAll returns the top-K labels for the given result of ClassifyAll.
func (*LabelForest) PredictAllWithWeight ¶
func (forest *LabelForest) PredictAllWithWeight(leafIdsSlice [][]uint64, weightsSlice [][]float32, K uint) sticker.LabelVectors
PredictAllWithWeight returns the top-K labels for the given result of ClassifyAllWithWeight.
func (*LabelForest) PredictWithWeight ¶
func (forest *LabelForest) PredictWithWeight(leafIds []uint64, weights []float32, K uint) sticker.LabelVector
PredictWithWeight returns the top-K labels for the given result of ClassifyWithWeight.
type LabelTree ¶
type LabelTree struct { // SplitterSet is the map from a leaf id to the splitter used in the leaf. // If the splitter of the leaf is not nil, the splitter used for deciding whether x goes to the left or the right. // If it is nil, the leaf is terminal. SplitterSet map[uint64]*sticker.BinaryClassifier // LabelFreqSet is the map from a leaf id to the label frequency table in the leaf. // The table is constructed from the training dataset. // In the terminal leaf, it is used for prediction. LabelFreqSet map[uint64]sticker.SparseVector // The following members are not required. // // SummarySet is the map from a leaf id to the summary for the non-terminal leaf. // The entries in this summary is considered to provide compact and useful information in best-effort, so this specification would be loose and rapidly changing. SummarySet map[uint64]map[string]interface{} }
LabelTree is the data structure for trees in LabelForest. LabelTree can have at most 2^64 - 1 leaves.
func TrainLabelTree ¶
func TrainLabelTree(ds *sticker.Dataset, params *LabelTreeParameters, seed int64, debug *log.Logger) (*LabelTree, error)
TrainLabelTree returns a trained LabelTree on the given dataset. The 16 MSBs of seed are used as the tree id which is reported in the debug log.
This function returns an error if the height of the tree is greater than 64 or in training the tree.
func (*LabelTree) Classify ¶
func (tree *LabelTree) Classify(x sticker.FeatureVector) uint64
Classify returns the leaf id which x falls.
func (*LabelTree) ClassifyAll ¶
func (tree *LabelTree) ClassifyAll(X sticker.FeatureVectors) []uint64
ClassifyAll returns the leaf ID slice and the weight slice which each entry of X falls.
func (*LabelTree) ClassifyAllWithWeight ¶
func (tree *LabelTree) ClassifyAllWithWeight(X sticker.FeatureVectors) ([]uint64, []float32)
ClassifyAllWithWeight returns the leaf ID slice and the weight slice which each entry of X falls.
func (*LabelTree) ClassifyWithWeight ¶
func (tree *LabelTree) ClassifyWithWeight(x sticker.FeatureVector) (uint64, float32)
ClassifyWithWeight returns the leaf ID and the weight which x falls. Weight will not affect any prediction result on single trees, it affects on ensembled trees.
func (*LabelTree) GobEncode ¶
GobEncode returns the error always such that users should encode large LabelTree objects with EncodeLabelTree.
func (*LabelTree) IsTerminalLeaf ¶
IsTerminalLeaf returns true if the leaf is terminal, otherwise false.
func (*LabelTree) IsValidLeaf ¶
IsValidLeaf returns true if the leaf id is valid, otherwise false.
func (*LabelTree) Predict ¶
func (tree *LabelTree) Predict(leafId uint64, K uint) sticker.LabelVector
Predict returns the top-K labels for the given result of Classify.
func (*LabelTree) PredictAll ¶
func (tree *LabelTree) PredictAll(leafIdSlice []uint64, K uint) sticker.LabelVectors
PredictAll returns the top-K labels for the given result of ClassifyAll.
type LabelTreeParameters ¶
type LabelTreeParameters struct { // AssignerName is the used LeftRightAssigner name. AssignerName string // AssignInitializerName is the used LeftRightAssignInitializer name. AssignInitializerName string // ClassifierTrainerName is the used BinaryClassifierTrainer name. ClassifierTrainerName string // C is the inverse of the penalty parameter used by BinaryClassifierTrainer. C float32 // Epsilon is the tolerance parameter used by BinaryClassifierTrainer. Epsilon float32 // FeatureSubSamplerName is the used DatasetFeatureSubSampler name. FeatureSubSamplerName string // K is the maximum number of labels in the distribution in each terminal leaf. K uint // MaxEntriesInLeaf is the maximum number of entries in each terminal leaf. MaxEntriesInLeaf uint // SuppVecK is the maximum number of support vectors in summary of LabelTree. SuppVecK uint }
LabelTreeParameters has parameters of label trees.
func NewLabelTreeParameters ¶
func NewLabelTreeParameters() *LabelTreeParameters
NewLabelTreeParameters returns a new LabelTreeParameters with default values.
type LeftRightAssignInitializer ¶
type LeftRightAssignInitializer func(ds *sticker.Dataset, params *LabelTreeParameters, rng *rand.Rand, debug *log.Logger) []bool
LeftRightAssignInitializer is the type of the left/right assignment initializers. An initializer returns the initialized left/right assignment slice.
type LeftRightAssigner ¶
LeftRightAssigner is the type of the left/right assigners. An assigner modifies delta to store the result of the assignment. delta is also used as the initial value.
type QueuePrioritizedByFloat32 ¶
type QueuePrioritizedByFloat32 []QueuePrioritizedByFloat32Item
QueuePrioritizedByFloat32 is the queue prioritized by float32. This implements interface heap.Interface.
func (QueuePrioritizedByFloat32) Len ¶
func (q QueuePrioritizedByFloat32) Len() int
Len is for interface heap.Interface.
func (QueuePrioritizedByFloat32) Less ¶
func (q QueuePrioritizedByFloat32) Less(i, j int) bool
Less is for interface heap.Interface.
func (*QueuePrioritizedByFloat32) Pop ¶
func (q *QueuePrioritizedByFloat32) Pop() interface{}
Pop is for interface heap.Interface.
func (*QueuePrioritizedByFloat32) Push ¶
func (q *QueuePrioritizedByFloat32) Push(item interface{})
Push is for interface heap.Interface.
func (QueuePrioritizedByFloat32) Swap ¶
func (q QueuePrioritizedByFloat32) Swap(i, j int)
Swap is for interface heap.Interface.
type QueuePrioritizedByFloat32Item ¶
type QueuePrioritizedByFloat32Item struct {
// contains filtered or unexported fields
}
QueuePrioritizedByFloat32Item is the item structure of QueuePrioritizedByFloat32.
func NewQueuePrioritizedByFloat32Item ¶
func NewQueuePrioritizedByFloat32Item(priority float32, item interface{}) QueuePrioritizedByFloat32Item
NewQueuePrioritizedByFloat32Item returns a new QueuePrioritizedByFloat32Item.
func (QueuePrioritizedByFloat32Item) Item ¶
func (item QueuePrioritizedByFloat32Item) Item() interface{}
Item returns item.
func (QueuePrioritizedByFloat32Item) Priority ¶
func (item QueuePrioritizedByFloat32Item) Priority() float32
Priority returns priority.
type RandomDatasetEntrySubSampler ¶
type RandomDatasetEntrySubSampler struct {
// contains filtered or unexported fields
}
RandomDatasetEntrySubSampler is a random DatasetEntrySubSampler. This sub-sampler returns the sub-dataset with the given size with replacement from the given dataset. seed is used as the seed of the random number generator.
This implements interface DatasetEntrySubSampler.