plugin

package

v0.0.0-...-5d0fad7 Latest Latest Go to latest Published: May 17, 2018 License: MIT Imports: 11 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/hiro4bbh/sticker

Links

Open Source Insights

Documentation ¶

Overview ¶

Package plugin provides plugin functions for sticker.

Index ¶

Constants
Variables
func BinaryClassifierTrainer_L1SVC_DualCD(X sticker.FeatureVectors, Y []bool, C, epsilon float32, debug *log.Logger) (*sticker.BinaryClassifier, error)
func BinaryClassifierTrainer_L2SVC_PrimalCD(X sticker.FeatureVectors, Y []bool, C, epsilon float32, debug *log.Logger) (*sticker.BinaryClassifier, error)
func BinaryRankerTrainer_L1SVC_PrimalSGD(X sticker.FeatureVectors, pairIndices [][2]int, pairMargins []float32, ...) (*sticker.BinaryClassifier, error)
func BipartitionWeightedGraph(n uint64, minLeftRight uint64, A []float32) ([]bool, error)
func DatasetNoneFeatureSubSampler(ds *sticker.Dataset, seed int64) (*sticker.Dataset, error)
func DatasetSqrtFeatureSubSampler(ds *sticker.Dataset, seed int64) (*sticker.Dataset, error)
func DecodeLabelBoost(model *LabelBoost, r io.Reader) error
func DecodeLabelBoostWithGobDecoder(model *LabelBoost, decoder *gob.Decoder) error
func DecodeLabelForest(forest *LabelForest, r io.Reader) error
func DecodeLabelForestWithGobDecoder(forest *LabelForest, decoder *gob.Decoder) error
func DecodeLabelTree(tree *LabelTree, r io.Reader) error
func DecodeLabelTreeWithGobDecoder(tree *LabelTree, decoder *gob.Decoder) error
func EncodeLabelBoost(model *LabelBoost, w io.Writer) error
func EncodeLabelBoostWithGobEncoder(model *LabelBoost, encoder *gob.Encoder) error
func EncodeLabelForest(forest *LabelForest, w io.Writer) error
func EncodeLabelForestWithGobEncoder(forest *LabelForest, encoder *gob.Encoder) error
func EncodeLabelTree(tree *LabelTree, w io.Writer) error
func EncodeLabelTreeWithGobEncoder(tree *LabelTree, encoder *gob.Encoder) error
func InitializePlugin()
func LeftRightAssignInitializer_topLabelGraph(ds *sticker.Dataset, params *LabelTreeParameters, rng *rand.Rand, ...) []bool
func LeftRightAssignInitializer_topLabelTree(ds *sticker.Dataset, params *LabelTreeParameters, rng *rand.Rand, ...) []bool
func LeftRightAssignInitializer_uniform(ds *sticker.Dataset, params *LabelTreeParameters, rng *rand.Rand, ...) []bool
func LeftRightAssigner_greedyBottomRanks(ds *sticker.Dataset, delta []bool, debug *log.Logger) error
func LeftRightAssigner_nDCG(ds *sticker.Dataset, delta []bool, debug *log.Logger) error
func LeftRightAssigner_none(ds *sticker.Dataset, delta []bool, debug *log.Logger) error
func Painter_TopLabelSubSet(ds *sticker.Dataset, Z []sticker.KeyValues32, K uint, debug *log.Logger) []uint32
func Painter_TopLabels(ds *sticker.Dataset, Z []sticker.KeyValues32, K uint, debug *log.Logger) []uint32
func SelectItemsAMAP(weights []float32, W float32) (float32, []int, error)
type BinaryRankerTrainer
type DatasetEntrySubSampler
- func NewDeterministicDatasetEntrySubSampler(n uint) DatasetEntrySubSampler
- func NewRandomDatasetEntrySubSampler(n uint) DatasetEntrySubSampler
type DatasetFeatureSubSampler
type DeterministicDatasetEntrySubSampler
- func (sampler *DeterministicDatasetEntrySubSampler) SubSample(ds *sticker.Dataset, seed uint) []int
type LabelBoost
- func TrainLabelBoost(ds *sticker.Dataset, params *LabelBoostParameters, debug *log.Logger) (*LabelBoost, error)
- func (model *LabelBoost) GobEncode() ([]byte, error)
- func (model *LabelBoost) Nrounds() uint
- func (model *LabelBoost) Predict(x sticker.FeatureVector, K uint, T uint) sticker.LabelVector
- func (model *LabelBoost) PredictAll(X sticker.FeatureVectors, K uint, T uint) sticker.LabelVectors
type LabelBoostParameters
- func NewLabelBoostParameters() *LabelBoostParameters
type LabelForest
- func TrainLabelForest(ds *sticker.Dataset, ntrees uint, subSampler DatasetEntrySubSampler, ...) (*LabelForest, error)
- func (forest *LabelForest) Classify(x sticker.FeatureVector) []uint64
- func (forest *LabelForest) ClassifyAll(X sticker.FeatureVectors) [][]uint64
- func (forest *LabelForest) ClassifyAllWithWeight(X sticker.FeatureVectors) ([][]uint64, [][]float32)
- func (forest *LabelForest) ClassifyWithWeight(x sticker.FeatureVector) ([]uint64, []float32)
- func (forest *LabelForest) GobEncode() ([]byte, error)
- func (forest *LabelForest) Predict(leafIds []uint64, K uint) sticker.LabelVector
- func (forest *LabelForest) PredictAll(leafIdsSlice [][]uint64, K uint) sticker.LabelVectors
- func (forest *LabelForest) PredictAllWithWeight(leafIdsSlice [][]uint64, weightsSlice [][]float32, K uint) sticker.LabelVectors
- func (forest *LabelForest) PredictWithWeight(leafIds []uint64, weights []float32, K uint) sticker.LabelVector
type LabelTree
- func TrainLabelTree(ds *sticker.Dataset, params *LabelTreeParameters, seed int64, ...) (*LabelTree, error)
- func (tree *LabelTree) Classify(x sticker.FeatureVector) uint64
- func (tree *LabelTree) ClassifyAll(X sticker.FeatureVectors) []uint64
- func (tree *LabelTree) ClassifyAllWithWeight(X sticker.FeatureVectors) ([]uint64, []float32)
- func (tree *LabelTree) ClassifyWithWeight(x sticker.FeatureVector) (uint64, float32)
- func (tree *LabelTree) GobEncode() ([]byte, error)
- func (tree *LabelTree) IsTerminalLeaf(leafId uint64) bool
- func (tree *LabelTree) IsValidLeaf(leafId uint64) bool
- func (tree *LabelTree) Predict(leafId uint64, K uint) sticker.LabelVector
- func (tree *LabelTree) PredictAll(leafIdSlice []uint64, K uint) sticker.LabelVectors
type LabelTreeParameters
- func NewLabelTreeParameters() *LabelTreeParameters
type LeftRightAssignInitializer
type LeftRightAssigner
type QueuePrioritizedByFloat32
- func (q QueuePrioritizedByFloat32) Len() int
- func (q QueuePrioritizedByFloat32) Less(i, j int) bool
- func (q *QueuePrioritizedByFloat32) Pop() interface{}
- func (q *QueuePrioritizedByFloat32) Push(item interface{})
- func (q QueuePrioritizedByFloat32) Swap(i, j int)
type QueuePrioritizedByFloat32Item
- func NewQueuePrioritizedByFloat32Item(priority float32, item interface{}) QueuePrioritizedByFloat32Item
- func (item QueuePrioritizedByFloat32Item) Item() interface{}
- func (item QueuePrioritizedByFloat32Item) Priority() float32
type RandomDatasetEntrySubSampler
- func (sampler *RandomDatasetEntrySubSampler) SubSample(ds *sticker.Dataset, seed uint) []int

Constants ¶

View Source

const DefaultDatasetFeatureSubSamplerName = "none"

DefaultDatasetFeatureSubSamplerName is the default DatasetFeatureSubSampler name.

View Source

const DefaultLeftRightAssignInitializerName = "uniform"

DefaultLeftRightAssignInitializerName is the default LeftRightAssignInitializer name.

View Source

const DefaultLeftRightAssignerName = "nDCG"

DefaultLeftRightAssignerName is the default LeftRightAssigner name.

Variables ¶

View Source

var BinaryRankerTrainers = map[string]BinaryRankerTrainer{
	"L1SVC_PrimalSGD": BinaryRankerTrainer_L1SVC_PrimalSGD,
}

BinaryRankerTrainers is the map from the binary classifier trainer name to the corresponding binary classifier trainer.

View Source

var DatasetFeatureSubSamplers = map[string]DatasetFeatureSubSampler{
	"none": DatasetNoneFeatureSubSampler,
	"sqrt": DatasetSqrtFeatureSubSampler,
}

DatasetFeatureSubSamplers is the map from the sub-sampler name to the corresponding sub-sampler.

View Source

var LeftRightAssignInitializers = map[string]LeftRightAssignInitializer{
	"topLabelGraph": LeftRightAssignInitializer_topLabelGraph,
	"topLabelTree":  LeftRightAssignInitializer_topLabelTree,
	"uniform":       LeftRightAssignInitializer_uniform,
}

LeftRightAssignInitializers is the map from the initializer name to the corresponding left/right assignment initializer.

View Source

var LeftRightAssigners = map[string]LeftRightAssigner{
	"greedyBottomRanks": LeftRightAssigner_greedyBottomRanks,
	"nDCG":              LeftRightAssigner_nDCG,
	"none":              LeftRightAssigner_none,
}

LeftRightAssigners is the map from the assigner name to the left/right assigner.

View Source

var Painters = map[string]func(ds *sticker.Dataset, Z []sticker.KeyValues32, K uint, debug *log.Logger) []uint32{
	"topLabels":      Painter_TopLabels,
	"topLabelSubSet": Painter_TopLabelSubSet,
}

Painters is the map from the painter's name to the painter function. A painter takes the dataset, the corresponding margin matrix, and the maximum number of requested labels, then returns the slice of selected labels. debug is used for debug logs.

Functions ¶

func BinaryClassifierTrainer_L1SVC_DualCD ¶

func BinaryClassifierTrainer_L1SVC_DualCD(X sticker.FeatureVectors, Y []bool, C, epsilon float32, debug *log.Logger) (*sticker.BinaryClassifier, error)

BinaryClassifierTrainer_L1SVC_DualCD trains a L1-Support Vector Classifier with Dual Coordinate Descent. This is registered to sticker.BinaryClassifierTrainers.

This function returns no error currently.

Reference: C. Hsieh, K. Chang, C. Lin, S. S. Keerthi, and S. Sundararajan. "A Dual Coordinate Descent Method for Large-Scale Linear SVM." Proceedings of the 25th international conference on Machine learning, ACM, 2008.

func BinaryClassifierTrainer_L2SVC_PrimalCD ¶

func BinaryClassifierTrainer_L2SVC_PrimalCD(X sticker.FeatureVectors, Y []bool, C, epsilon float32, debug *log.Logger) (*sticker.BinaryClassifier, error)

BinaryClassifierTrainer_L2SVC_PrimalCD trains a L2-Support Vector Classifier with Primal Coordinate Descent. This is registered to sticker.BinaryClassifierTrainers.

This is not recommended, because even if the dataset is normalized as saving its sparsity, it is too slow to converge due to its piece-wise quadratic form. It is difficult to control the scaling such that the magnitude of the first derivative equals to its corresponding newton step. Otherwise, the optimization would be slow even when the first derivative is not enough small. Furthermore, even if the optimization stops early, its performance is much worse than L1SVC_DualCD.

This function returns no error currently.

Reference: K. Chang, C. Hsieh, and C. Lin. "Coordinate Descent Method for Large-Scale L2-loss Linear Support Vector Machines." Journal of Machine Learning Research, vol. 9, pp. 1369-1398, 2008.

func BinaryRankerTrainer_L1SVC_PrimalSGD ¶

func BinaryRankerTrainer_L1SVC_PrimalSGD(X sticker.FeatureVectors, pairIndices [][2]int, pairMargins []float32, pairCs []float32, epsilon float32, debug *log.Logger) (*sticker.BinaryClassifier, error)

BinaryRankerTrainer_L1SVC_PrimalSGD trains a L1-Support Vector Ranker with primal stochastic gradient descent. This is registered to BinaryRankerTrainers.

This is the optimized implementation based on sticker.BinaryClassifierTrainer_L1SVC_PrimalSGD.

This function returns no error currently.

func BipartitionWeightedGraph ¶

func BipartitionWeightedGraph(n uint64, minLeftRight uint64, A []float32) ([]bool, error)

BipartitionWeightedGraph bipartitions the given weighted graph with 1-spectral clustering method, and returns the bool slice which is true if it is in right. If the size of left or right is less than minLeftRight, this function won't try to bipartition.

This function returns an error if A is not legal adjacency matrix.

func DatasetNoneFeatureSubSampler ¶

func DatasetNoneFeatureSubSampler(ds *sticker.Dataset, seed int64) (*sticker.Dataset, error)

DatasetNoneFeatureSubSampler does not any feature sub-sampling, and returns the given dataset itself.

This function returns no error currently.

func DatasetSqrtFeatureSubSampler ¶

func DatasetSqrtFeatureSubSampler(ds *sticker.Dataset, seed int64) (*sticker.Dataset, error)

DatasetSqrtFeatureSubSampler sub-samples sqrt(J) features (J is the number of the used features), and returns the feature sub-sampled dataset. This is registered to sticker.DatasetFeatureSubSamplers.

This function returns no error currently.

func DecodeLabelBoost ¶

func DecodeLabelBoost(model *LabelBoost, r io.Reader) error

DecodeLabelBoost decodes LabelBoost from r. Directly passing *os.File used by a gob.Decoder to this function causes mysterious errors. Thus, if users use gob.Decoder, then they should call DecodeLabelBoostWithGobDecoder.

This function returns an error in decoding.

func DecodeLabelBoostWithGobDecoder ¶

func DecodeLabelBoostWithGobDecoder(model *LabelBoost, decoder *gob.Decoder) error

DecodeLabelBoostWithGobDecoder decodes LabelBoost using decoder.

This function returns an error in decoding.

func DecodeLabelForest ¶

func DecodeLabelForest(forest *LabelForest, r io.Reader) error

DecodeLabelForest decodes LabelForest from r. Directly passing *os.File used by a gob.Decoder to this function causes mysterious errors. Thus, if users use gob.Decoder, then they should call DecodeLabelForestWithGobDecoder.

This function returns an error in decoding.

func DecodeLabelForestWithGobDecoder ¶

func DecodeLabelForestWithGobDecoder(forest *LabelForest, decoder *gob.Decoder) error

DecodeLabelForestWithGobDecoder decodes LabelForest using decoder.

This function returns an error in decoding.

func DecodeLabelTree ¶

func DecodeLabelTree(tree *LabelTree, r io.Reader) error

DecodeLabelTree decodes LabelTree from r. Directly passing *os.File used by a gob.Decoder to this function causes mysterious errors. Thus, if users use gob.Decoder, then they should call DecodeLabelTreeWithGobDecoder.

This function returns an error in decoding.

func DecodeLabelTreeWithGobDecoder ¶

func DecodeLabelTreeWithGobDecoder(tree *LabelTree, decoder *gob.Decoder) error

DecodeLabelTreeWithGobDecoder decodes LabelTree using decoder.

This function returns an error in decoding.

func EncodeLabelBoost ¶

func EncodeLabelBoost(model *LabelBoost, w io.Writer) error

EncodeLabelBoost encodes LabelForest to w. Directly passing *os.File used by a gob.Encoder to this function causes mysterious errors. Thus, if users use gob.Encoder, then they should call EncodeLabelBoostWithGobEncoder.

This function returns an error in encoding.

func EncodeLabelBoostWithGobEncoder ¶

func EncodeLabelBoostWithGobEncoder(model *LabelBoost, encoder *gob.Encoder) error

EncodeLabelBoostWithGobEncoder decodes LabelBoost using encoder.

This function returns an error in decoding.

func EncodeLabelForest ¶

func EncodeLabelForest(forest *LabelForest, w io.Writer) error

EncodeLabelForest encodes LabelForest to w. Directly passing *os.File used by a gob.Encoder to this function causes mysterious errors. Thus, if users use gob.Encoder, then they should call EncodeLabelForestWithGobEncoder.

This function returns an error in encoding.

func EncodeLabelForestWithGobEncoder ¶

func EncodeLabelForestWithGobEncoder(forest *LabelForest, encoder *gob.Encoder) error

EncodeLabelForestWithGobEncoder decodes LabelForest using encoder.

This function returns an error in decoding.

func EncodeLabelTree ¶

func EncodeLabelTree(tree *LabelTree, w io.Writer) error

EncodeLabelTree encodes LabelTree to w. Directly passing *os.File used by a gob.Encoder to this function causes mysterious errors. Thus, if users use gob.Encoder, then they should call EncodeLabelTreeWithGobEncoder.

This function returns an error in encoding.

func EncodeLabelTreeWithGobEncoder ¶

func EncodeLabelTreeWithGobEncoder(tree *LabelTree, encoder *gob.Encoder) error

EncodeLabelTreeWithGobEncoder decodes LabelTree using encoder.

This function returns an error in decoding.

func InitializePlugin ¶

func InitializePlugin()

InitializePlugin does nothing, because init functions in this package registers functions to sticker. Thus it is unnecessary for users to call any function in this package. Users can call this function for avoiding any import error.

func LeftRightAssignInitializer_topLabelGraph ¶

func LeftRightAssignInitializer_topLabelGraph(ds *sticker.Dataset, params *LabelTreeParameters, rng *rand.Rand, debug *log.Logger) []bool

LeftRightAssignInitializer_topLabelGraph returns the delta slice initialized with the cutting of the top-label graph. This is registered to sticker.LeftRightAssignInitializers.

func LeftRightAssignInitializer_topLabelTree ¶

func LeftRightAssignInitializer_topLabelTree(ds *sticker.Dataset, params *LabelTreeParameters, rng *rand.Rand, debug *log.Logger) []bool

LeftRightAssignInitializer_topLabelGraph returns the delta slice initialized with the cutting of the top-labels tree. This is registered to sticker.LeftRightAssignInitializers.

func LeftRightAssignInitializer_uniform ¶

func LeftRightAssignInitializer_uniform(ds *sticker.Dataset, params *LabelTreeParameters, rng *rand.Rand, debug *log.Logger) []bool

LeftRightAssignInitializer_uniform returns the delta slice initialized with the samples from uniform probability distribution. This is registered to LeftRightAssignInitializers.

func LeftRightAssigner_greedyBottomRanks ¶

func LeftRightAssigner_greedyBottomRanks(ds *sticker.Dataset, delta []bool, debug *log.Logger) error

LeftRightAssigner_greedyBottomRanks assigns left or right label as moving each entry which has that bottom-ranked labels from left to right. This is registered to sticker.LeftRightAssigners.

This function returns no error currently.

func LeftRightAssigner_nDCG ¶

func LeftRightAssigner_nDCG(ds *sticker.Dataset, delta []bool, debug *log.Logger) error

LeftRightAssigner_nDCG assigns left or right on each label as maximizing the sum of left and right utilities with nDCGs. This is registered to LeftRightAssigners.

This function return no error currently.

NOTICE: In calculating nDCG, this function uses the base of logarithm is 2 because of precision.

func LeftRightAssigner_none ¶

func LeftRightAssigner_none(ds *sticker.Dataset, delta []bool, debug *log.Logger) error

LeftRightAssigner_none assigns left or right on each label with the given initialized delta, so the label assignment won't change from the given initialized delta. This is registered to LeftRightAssigners.

This function returns no error.

func Painter_TopLabelSubSet ¶

func Painter_TopLabelSubSet(ds *sticker.Dataset, Z []sticker.KeyValues32, K uint, debug *log.Logger) []uint32

Painter_TopLabelSubSet is the painter returns the most frequent top-K mis-classified co-occurring labels.

This is registered to Painters.

func Painter_TopLabels ¶

func Painter_TopLabels(ds *sticker.Dataset, Z []sticker.KeyValues32, K uint, debug *log.Logger) []uint32

Painter_TopLabels is the painter returns the most frequent top-K mis-classified labels.

This is registered to Painters.

func SelectItemsAMAP ¶

func SelectItemsAMAP(weights []float32, W float32) (float32, []int, error)

SelectItemsAMAP returns the sum of weights and the selected item ID slice as many as possible under the constraints that the sum of weights is at most W.

This function returns an error if some weights are negative.

Types ¶

type BinaryRankerTrainer ¶

type BinaryRankerTrainer func(X sticker.FeatureVectors, pairIndices [][2]int, pairMargins []float32, C []float32, epsilon float32, debug *log.Logger) (*sticker.BinaryClassifier, error)

BinaryRankerTrainer is the type of binary ranker trainers. A trainer returns a new BinaryClassifier on positive/negative pair indices pairIndices on X with the specified pair margins. A negative values of pairIndices means the zero-vector. C is the penalty parameter slice for reweighting each entry. epsilon is the tolerance parameter for checking the convergence. debug is used for debug logs.

type DatasetEntrySubSampler ¶

type DatasetEntrySubSampler interface {
	// SubSample returns the index slice contained in the sub-sample with the given seed.
	SubSample(ds *sticker.Dataset, seed uint) []int
}

DatasetEntrySubSampler is the interface for entry sub-sampler on the dataset.

func NewDeterministicDatasetEntrySubSampler ¶

func NewDeterministicDatasetEntrySubSampler(n uint) DatasetEntrySubSampler

NewDeterministicDatasetEntrySubSampler returns an new DeterministicDatasetEntrySubSampler.

func NewRandomDatasetEntrySubSampler ¶

func NewRandomDatasetEntrySubSampler(n uint) DatasetEntrySubSampler

NewRandomDatasetEntrySubSampler returns an new RandomDatasetEntrySubSampler.

type DatasetFeatureSubSampler ¶

type DatasetFeatureSubSampler func(ds *sticker.Dataset, seed int64) (*sticker.Dataset, error)

DatasetFeatureSubSampler is the type of feature sub-samplers. A sub-sampler returns the dataset whose features are sub-sampled.

type DeterministicDatasetEntrySubSampler ¶

type DeterministicDatasetEntrySubSampler struct {
	// contains filtered or unexported fields
}

DeterministicDatasetEntrySubSampler is a deterministic DatasetEntrySubSampler. The sub-sampler simply returns the sub-dataset with the given size in order of the given dataset. The seed is used as the sub-sample start index.

This implements interface DatasetEntrySubSampler.

func (*DeterministicDatasetEntrySubSampler) SubSample ¶

func (sampler *DeterministicDatasetEntrySubSampler) SubSample(ds *sticker.Dataset, seed uint) []int

SubSample is for interface DatasetEntrySubSampler.

type LabelBoost ¶

type LabelBoost struct {
	// Params is the used LabelBoostParameters.
	Params *LabelBoostParameters
	// Biases is the bias slice used by splitters on each boosting round.
	Biases []float32
	// Weights is the weight sparse matrix used by splitters on each boosting round.
	// Weights is the map from the feature key to the (roundID, the weight on the feature of #roundID splitter) slice.
	// This data structure reduces the number of times that the classifier accesses the golang's map a lot.
	WeightLists map[uint32]sticker.KeyValues32
	// LabelLists is the label list slice used in each boosting round.
	// Each label list has the labels stickered to the entry if the classifier at the round returns positive score on the entry.
	LabelLists []sticker.LabelVector
	// The following members are not required.
	//
	// Summaries is the summary object slice for each boosting round.
	// The entries in this summary is considered to provide compact and useful information in best-effort, so this specification would be loose and rapidly changing.
	Summaries []map[string]interface{}
}

LabelBoost is the multi-label boosting model.

func TrainLabelBoost ¶

func TrainLabelBoost(ds *sticker.Dataset, params *LabelBoostParameters, debug *log.Logger) (*LabelBoost, error)

TrainLabelBoost returns an trained LabelBoost on the given dataset ds.

func (*LabelBoost) GobEncode ¶

func (model *LabelBoost) GobEncode() ([]byte, error)

GobEncode returns the error always, because users should encode large LabelBoost objects with EncodeLabelBoost.

func (*LabelBoost) Nrounds ¶

func (model *LabelBoost) Nrounds() uint

Nrounds return the number of the rounds.

func (*LabelBoost) Predict ¶

func (model *LabelBoost) Predict(x sticker.FeatureVector, K uint, T uint) sticker.LabelVector

Predict returns the top-K predicted labels for the given data point x with the first T rounds.

func (*LabelBoost) PredictAll ¶

func (model *LabelBoost) PredictAll(X sticker.FeatureVectors, K uint, T uint) sticker.LabelVectors

PredictAll returns the slice of the top-K predicted labels for each data point in X with the first T rounds.

type LabelBoostParameters ¶

type LabelBoostParameters struct {
	// RankerTrainerName is the used BinaryRankerTrainer name.
	RankerTrainerName string
	// C is the penalty parameter for BinaryRankerTrainer.
	C float32
	// Epsilon is the tolerance parameter for BinaryClassifierTrainer.
	Epsilon float32
	// NegativeSampleSize is the size of each negative sample for Multi-Label Ranking Hinge Boosting.
	// Specify 0 for Multi-Label Hinge Boosting.
	NegativeSampleSize uint
	// PainterK is the maximum number of the painted target label.
	PainterK uint
	// PainterName is the used Painter name.
	PainterName string
	// T is the maxinum number of boosting rounds.
	T uint
}

LabelBoostParameters is the parameters for LabelBoost.

func NewLabelBoostParameters ¶

func NewLabelBoostParameters() *LabelBoostParameters

NewLabelBoostParameters returns an LabelBoostParameters initialized with the default values.

type LabelForest ¶

type LabelForest struct {
	// TreeParams is the parameters for training each LabelTree.
	TreeParams *LabelTreeParameters
	// Trees is the slice of trained trees.
	Trees []*LabelTree
	// The following members are not required.
	//
	// SummaryS is the sub-sampling summary.
	// This summary is considered to provide compact and useful information in best-effort, so this specification would be loose and rapidly changing.
	Summary map[string]interface{}
}

LabelForest is variously-modified FastXML (Prabhu+ 2014).

References:

(Prabhu+ 2014) Y. Prabhu, and M. Varma. "FastXML: A Fast, Accurate and Stable Tree-Classifier for Extreme Multi-Label Learning." Proceedings of the 20th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, pp. 263--272, 2014.

func TrainLabelForest ¶

func TrainLabelForest(ds *sticker.Dataset, ntrees uint, subSampler DatasetEntrySubSampler, params *LabelTreeParameters, debug *log.Logger) (*LabelForest, error)

TrainLabelForest returns a trained LabelForest on ds with multiple go-routines. The number of go-routines is runtime.GOMAXPROCS.

This function returns the last error in training each tree in the forest by multiple go-routines.

func (*LabelForest) Classify ¶

func (forest *LabelForest) Classify(x sticker.FeatureVector) []uint64

Classify returns the leaf id slice for the given feature vector.

func (*LabelForest) ClassifyAll ¶

func (forest *LabelForest) ClassifyAll(X sticker.FeatureVectors) [][]uint64

ClassifyAll returns the slice of the leaf id slices for each feature vector.

func (*LabelForest) ClassifyAllWithWeight ¶

func (forest *LabelForest) ClassifyAllWithWeight(X sticker.FeatureVectors) ([][]uint64, [][]float32)

ClassifyAllWithWeight returns the slice of the leaf id slices and the weight slices for each feature vector.

func (*LabelForest) ClassifyWithWeight ¶

func (forest *LabelForest) ClassifyWithWeight(x sticker.FeatureVector) ([]uint64, []float32)

ClassifyWithWeight returns the leaf id slice and the weight slice for the given feature vector.

func (*LabelForest) GobEncode ¶

func (forest *LabelForest) GobEncode() ([]byte, error)

GobEncode returns the error always, because users should encode large LabelForest objects with EncodeLabelForest.

func (*LabelForest) Predict ¶

func (forest *LabelForest) Predict(leafIds []uint64, K uint) sticker.LabelVector

Predict returns the top-K labels for the given result of Classify.

func (*LabelForest) PredictAll ¶

func (forest *LabelForest) PredictAll(leafIdsSlice [][]uint64, K uint) sticker.LabelVectors

PredictAll returns the top-K labels for the given result of ClassifyAll.

func (*LabelForest) PredictAllWithWeight ¶

func (forest *LabelForest) PredictAllWithWeight(leafIdsSlice [][]uint64, weightsSlice [][]float32, K uint) sticker.LabelVectors

PredictAllWithWeight returns the top-K labels for the given result of ClassifyAllWithWeight.

func (*LabelForest) PredictWithWeight ¶

func (forest *LabelForest) PredictWithWeight(leafIds []uint64, weights []float32, K uint) sticker.LabelVector

PredictWithWeight returns the top-K labels for the given result of ClassifyWithWeight.

type LabelTree ¶

type LabelTree struct {
	// SplitterSet is the map from a leaf id to the splitter used in the leaf.
	// If the splitter of the leaf is not nil, the splitter used for deciding whether x goes to the left or the right.
	// If it is nil, the leaf is terminal.
	SplitterSet map[uint64]*sticker.BinaryClassifier
	// LabelFreqSet is the map from a leaf id to the label frequency table in the leaf.
	// The table is constructed from the training dataset.
	// In the terminal leaf, it is used for prediction.
	LabelFreqSet map[uint64]sticker.SparseVector
	// The following members are not required.
	//
	// SummarySet is the map from a leaf id to the summary for the non-terminal leaf.
	// The entries in this summary is considered to provide compact and useful information in best-effort, so this specification would be loose and rapidly changing.
	SummarySet map[uint64]map[string]interface{}
}

LabelTree is the data structure for trees in LabelForest. LabelTree can have at most 2^64 - 1 leaves.

func TrainLabelTree ¶

func TrainLabelTree(ds *sticker.Dataset, params *LabelTreeParameters, seed int64, debug *log.Logger) (*LabelTree, error)

TrainLabelTree returns a trained LabelTree on the given dataset. The 16 MSBs of seed are used as the tree id which is reported in the debug log.

This function returns an error if the height of the tree is greater than 64 or in training the tree.

func (*LabelTree) Classify ¶

func (tree *LabelTree) Classify(x sticker.FeatureVector) uint64

Classify returns the leaf id which x falls.

func (*LabelTree) ClassifyAll ¶

func (tree *LabelTree) ClassifyAll(X sticker.FeatureVectors) []uint64

ClassifyAll returns the leaf ID slice and the weight slice which each entry of X falls.

func (*LabelTree) ClassifyAllWithWeight ¶

func (tree *LabelTree) ClassifyAllWithWeight(X sticker.FeatureVectors) ([]uint64, []float32)

ClassifyAllWithWeight returns the leaf ID slice and the weight slice which each entry of X falls.

func (*LabelTree) ClassifyWithWeight ¶

func (tree *LabelTree) ClassifyWithWeight(x sticker.FeatureVector) (uint64, float32)

ClassifyWithWeight returns the leaf ID and the weight which x falls. Weight will not affect any prediction result on single trees, it affects on ensembled trees.

func (*LabelTree) GobEncode ¶

func (tree *LabelTree) GobEncode() ([]byte, error)

GobEncode returns the error always such that users should encode large LabelTree objects with EncodeLabelTree.

func (*LabelTree) IsTerminalLeaf ¶

func (tree *LabelTree) IsTerminalLeaf(leafId uint64) bool

IsTerminalLeaf returns true if the leaf is terminal, otherwise false.

func (*LabelTree) IsValidLeaf ¶

func (tree *LabelTree) IsValidLeaf(leafId uint64) bool

IsValidLeaf returns true if the leaf id is valid, otherwise false.

func (*LabelTree) Predict ¶

func (tree *LabelTree) Predict(leafId uint64, K uint) sticker.LabelVector

Predict returns the top-K labels for the given result of Classify.

func (*LabelTree) PredictAll ¶

func (tree *LabelTree) PredictAll(leafIdSlice []uint64, K uint) sticker.LabelVectors

PredictAll returns the top-K labels for the given result of ClassifyAll.

type LabelTreeParameters ¶

type LabelTreeParameters struct {
	// AssignerName is the used LeftRightAssigner name.
	AssignerName string
	// AssignInitializerName is the used LeftRightAssignInitializer name.
	AssignInitializerName string
	// ClassifierTrainerName is the used BinaryClassifierTrainer name.
	ClassifierTrainerName string
	// C is the inverse of the penalty parameter used by BinaryClassifierTrainer.
	C float32
	// Epsilon is the tolerance parameter used by BinaryClassifierTrainer.
	Epsilon float32
	// FeatureSubSamplerName is the used DatasetFeatureSubSampler name.
	FeatureSubSamplerName string
	// K is the maximum number of labels in the distribution in each terminal leaf.
	K uint
	// MaxEntriesInLeaf is the maximum number of entries in each terminal leaf.
	MaxEntriesInLeaf uint
	// SuppVecK is the maximum number of support vectors in summary of LabelTree.
	SuppVecK uint
}

LabelTreeParameters has parameters of label trees.

func NewLabelTreeParameters ¶

func NewLabelTreeParameters() *LabelTreeParameters

NewLabelTreeParameters returns a new LabelTreeParameters with default values.

type LeftRightAssignInitializer ¶

type LeftRightAssignInitializer func(ds *sticker.Dataset, params *LabelTreeParameters, rng *rand.Rand, debug *log.Logger) []bool

LeftRightAssignInitializer is the type of the left/right assignment initializers. An initializer returns the initialized left/right assignment slice.

type LeftRightAssigner ¶

type LeftRightAssigner func(ds *sticker.Dataset, delta []bool, debug *log.Logger) error

LeftRightAssigner is the type of the left/right assigners. An assigner modifies delta to store the result of the assignment. delta is also used as the initial value.

type QueuePrioritizedByFloat32 ¶

type QueuePrioritizedByFloat32 []QueuePrioritizedByFloat32Item

QueuePrioritizedByFloat32 is the queue prioritized by float32. This implements interface heap.Interface.

func (QueuePrioritizedByFloat32) Len ¶

func (q QueuePrioritizedByFloat32) Len() int

Len is for interface heap.Interface.

func (QueuePrioritizedByFloat32) Less ¶

func (q QueuePrioritizedByFloat32) Less(i, j int) bool

Less is for interface heap.Interface.

func (*QueuePrioritizedByFloat32) Pop ¶

func (q *QueuePrioritizedByFloat32) Pop() interface{}

Pop is for interface heap.Interface.

func (*QueuePrioritizedByFloat32) Push ¶

func (q *QueuePrioritizedByFloat32) Push(item interface{})

Push is for interface heap.Interface.

func (QueuePrioritizedByFloat32) Swap ¶

func (q QueuePrioritizedByFloat32) Swap(i, j int)

Swap is for interface heap.Interface.

type QueuePrioritizedByFloat32Item ¶

type QueuePrioritizedByFloat32Item struct {
	// contains filtered or unexported fields
}

QueuePrioritizedByFloat32Item is the item structure of QueuePrioritizedByFloat32.

func NewQueuePrioritizedByFloat32Item ¶

func NewQueuePrioritizedByFloat32Item(priority float32, item interface{}) QueuePrioritizedByFloat32Item

NewQueuePrioritizedByFloat32Item returns a new QueuePrioritizedByFloat32Item.

func (QueuePrioritizedByFloat32Item) Item ¶

func (item QueuePrioritizedByFloat32Item) Item() interface{}

Item returns item.

func (QueuePrioritizedByFloat32Item) Priority ¶

func (item QueuePrioritizedByFloat32Item) Priority() float32

Priority returns priority.

type RandomDatasetEntrySubSampler ¶

type RandomDatasetEntrySubSampler struct {
	// contains filtered or unexported fields
}

RandomDatasetEntrySubSampler is a random DatasetEntrySubSampler. This sub-sampler returns the sub-dataset with the given size with replacement from the given dataset. seed is used as the seed of the random number generator.

This implements interface DatasetEntrySubSampler.

func (*RandomDatasetEntrySubSampler) SubSample ¶

func (sampler *RandomDatasetEntrySubSampler) SubSample(ds *sticker.Dataset, seed uint) []int

SubSample is for interface DatasetEntrySubSampler.

Source Files ¶

View all Source files

Directories ¶

Path	Synopsis
next

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL