Documentation ¶
Overview ¶
Package base provides base data structures and utils.
The base data structures and utils include:
- Parallel Scheduler
- Hyper-parameters Management
- Random Generator
- Similarity Metrics
- Sparse Data Structures
Index ¶
- Constants
- func Concatenate(vectors ...[]int) []int
- func CosineSimilarity(a, b *MarginalSubSet) float64
- func FillZeroMatrix(mat [][]float64)
- func FillZeroVector(vec []float64)
- func ImplicitSimilarity(a, b *MarginalSubSet) float64
- func MSDSimilarity(a, b *MarginalSubSet) float64
- func Max(a []int) int
- func Min(a []int) int
- func NewMatrix(row, col int) [][]float64
- func NewMatrixInt(row, col int) [][]int
- func Parallel(nTask int, nJob int, worker func(begin, end int))
- func ParallelFor(begin, end int, worker func(i int))
- func ParallelForSum(begin, end int, worker func(i int) float64) float64
- func ParallelMean(nTask int, nJob int, worker func(begin, end int) (sum float64)) float64
- func PearsonSimilarity(a, b *MarginalSubSet) float64
- type FuncSimilarity
- type Indexer
- type MarginalSubSet
- func (set *MarginalSubSet) Contain(id string) bool
- func (set *MarginalSubSet) Count() int
- func (set *MarginalSubSet) ForEach(f func(i int, id string, value float64))
- func (set *MarginalSubSet) ForEachIndex(f func(i, index int, value float64))
- func (set *MarginalSubSet) ForIntersection(other *MarginalSubSet, f func(id string, a, b float64))
- func (set *MarginalSubSet) GetID(i int) string
- func (set *MarginalSubSet) GetIndex(i int) int
- func (set *MarginalSubSet) Len() int
- func (set *MarginalSubSet) Less(i, j int) bool
- func (set *MarginalSubSet) Mean() float64
- func (set *MarginalSubSet) Swap(i, j int)
- type MaxHeap
- func (maxHeap *MaxHeap) Add(elem interface{}, score float64)
- func (maxHeap *MaxHeap) Len() int
- func (maxHeap *MaxHeap) Less(i, j int) bool
- func (maxHeap *MaxHeap) Pop() interface{}
- func (maxHeap *MaxHeap) Push(x interface{})
- func (maxHeap *MaxHeap) Swap(i, j int)
- func (maxHeap *MaxHeap) ToSorted() ([]interface{}, []float64)
- type ParamName
- type Params
- func (parameters Params) Copy() Params
- func (parameters Params) GetBool(name ParamName, _default bool) bool
- func (parameters Params) GetFloat64(name ParamName, _default float64) float64
- func (parameters Params) GetInt(name ParamName, _default int) int
- func (parameters Params) GetInt64(name ParamName, _default int64) int64
- func (parameters Params) GetString(name ParamName, _default string) string
- func (parameters Params) Merge(params Params) Params
- type RandomGenerator
- func (rng RandomGenerator) NewNormalMatrix(row, col int, mean, stdDev float64) [][]float64
- func (rng RandomGenerator) NewNormalVector(size int, mean, stdDev float64) []float64
- func (rng RandomGenerator) NewUniformMatrix(row, col int, low, high float64) [][]float64
- func (rng RandomGenerator) NewUniformVector(size int, low, high float64) []float64
- func (rng RandomGenerator) NewUniformVectorInt(size, low, high int) []int
- type RuntimeOptions
- func (options *RuntimeOptions) GetCVJobs() int
- func (options *RuntimeOptions) GetFitJobs() int
- func (options *RuntimeOptions) GetVerbose() bool
- func (options *RuntimeOptions) Log(v ...interface{})
- func (options *RuntimeOptions) Logf(format string, v ...interface{})
- func (options *RuntimeOptions) Logln(v ...interface{})
- type SparseVector
- func (vec *SparseVector) Add(index int, value float64)
- func (vec *SparseVector) ForEach(f func(i, index int, value float64))
- func (vec *SparseVector) ForIntersection(other *SparseVector, f func(index int, a, b float64))
- func (vec *SparseVector) Len() int
- func (vec *SparseVector) Less(i, j int) bool
- func (vec *SparseVector) SortIndex()
- func (vec *SparseVector) Swap(i, j int)
- type StringIndexer
Constants ¶
const ( Basic string = "basic" // Basic KNN Centered string = "centered" // KNN with centered ratings ZScore string = "z_score" // KNN with standardized ratings Baseline string = "baseline" // KNN with baseline ratings )
Predefined values for hyper-parameter Type.
const ( SGDOptimizer string = "sgd" // Fit model (FM) with stochastic gradient descent. BPROptimizer string = "bpr" // Fit model (FM) with bayesian personal ranking. )
Predefined values for hyper-parameter Optimizer.
const ( Pearson string = "pearson" // Pearson similarity Cosine string = "cosine" // Cosine similarity MSD string = "msd" // MSD similarity )
Predefined values for hyper-parameter Similarity.
const NotId = -1
NotId represents an ID doesn't exist.
Variables ¶
This section is empty.
Functions ¶
func Concatenate ¶
Concatenate merges vectors of integers to one.
func CosineSimilarity ¶
func CosineSimilarity(a, b *MarginalSubSet) float64
CosineSimilarity computes the cosine similarity between a pair of vectors.
func FillZeroMatrix ¶
func FillZeroMatrix(mat [][]float64)
FillZeroMatrix fills a matrix with zeros.
func ImplicitSimilarity ¶
func ImplicitSimilarity(a, b *MarginalSubSet) float64
ImplicitSimilarity computes similarity between two vectors with implicit feedback.
func MSDSimilarity ¶
func MSDSimilarity(a, b *MarginalSubSet) float64
MSDSimilarity computes the Mean Squared Difference similarity between a pair of vectors.
func NewMatrixInt ¶
NewMatrixInt creates a matrix of integers.
func Parallel ¶
Parallel schedules and runs tasks in parallel. nTask is the number of tasks. nJob is the number of executors. worker is the executed function which passed a range of task IDs (begin, end).
func ParallelFor ¶
ParallelFor runs for loop in parallel.
func ParallelForSum ¶
ParallelForSum runs for loop in parallel.
func ParallelMean ¶
ParallelMean schedules and runs tasks in parallel, then returns the mean of returned values. nJob is the number of executors. worker is the executed function which passed a range of task IDs (begin, end) and returns a double value.
func PearsonSimilarity ¶
func PearsonSimilarity(a, b *MarginalSubSet) float64
PearsonSimilarity computes the absolute Pearson correlation coefficient between a pair of vectors.
Types ¶
type FuncSimilarity ¶
type FuncSimilarity func(a, b *MarginalSubSet) float64
FuncSimilarity computes the similarity between a pair of vectors.
type Indexer ¶
type Indexer struct { Indices map[string]int // sparse ID -> dense index IDs []string // dense index -> sparse ID }
Indexer manages the map between sparse IDs and dense indices. A sparse ID is a user ID or item ID. The dense index is the internal user index or item index optimized for faster parameter access and less memory usage.
type MarginalSubSet ¶
type MarginalSubSet struct { Indexer *Indexer // the indexer Indices []int // the full list of indices Values []float64 // the full list of values SubSet []int // indices of the subset }
MarginalSubSet constructs a subset over a list of IDs, indices and values.
func NewMarginalSubSet ¶
func NewMarginalSubSet(indexer *Indexer, indices []int, values []float64, subset []int) *MarginalSubSet
NewMarginalSubSet creates a MarginalSubSet.
func (*MarginalSubSet) Contain ¶
func (set *MarginalSubSet) Contain(id string) bool
Contain returns true am ID existed in the subset.
func (*MarginalSubSet) Count ¶
func (set *MarginalSubSet) Count() int
Count gets the size of marginal subset.
func (*MarginalSubSet) ForEach ¶
func (set *MarginalSubSet) ForEach(f func(i int, id string, value float64))
ForEach iterates items in the subset with IDs.
func (*MarginalSubSet) ForEachIndex ¶
func (set *MarginalSubSet) ForEachIndex(f func(i, index int, value float64))
ForEachIndex iterates items in the subset with indices.
func (*MarginalSubSet) ForIntersection ¶
func (set *MarginalSubSet) ForIntersection(other *MarginalSubSet, f func(id string, a, b float64))
ForIntersection iterates items in the intersection of two subsets. The method find items with common indices in linear time.
func (*MarginalSubSet) GetID ¶
func (set *MarginalSubSet) GetID(i int) string
GetID returns the ID of i-th item.
func (*MarginalSubSet) GetIndex ¶
func (set *MarginalSubSet) GetIndex(i int) int
GetIndex returns the index of i-th item.
func (*MarginalSubSet) Less ¶
func (set *MarginalSubSet) Less(i, j int) bool
Less compares two items.
func (*MarginalSubSet) Mean ¶
func (set *MarginalSubSet) Mean() float64
Mean of ratings in the subset.
type MaxHeap ¶
type MaxHeap struct { Elem []interface{} // store elements Score []float64 // store scores K int // the size of heap }
MaxHeap is designed for store K maximal elements. Heap is used to reduce time complexity and memory complexity in top-K searching.
func (*MaxHeap) Less ¶
Less returns true if the score of i-th item is less than the score of j-th item. It is a method of heap.Interface.
func (*MaxHeap) Pop ¶
func (maxHeap *MaxHeap) Pop() interface{}
Pop the last item (the element with minimal score) in the MaxHeap. It is a method of heap.Interface.
func (*MaxHeap) Push ¶
func (maxHeap *MaxHeap) Push(x interface{})
Push a neighbors into the MaxHeap. It is a method of heap.Interface.
type ParamName ¶
type ParamName string
ParamName is the type of hyper-parameter names.
const ( Lr ParamName = "Lr" // learning rate Reg ParamName = "Reg" // regularization strength NEpochs ParamName = "NEpochs" // number of epochs NFactors ParamName = "NFactors" // number of factors RandomState ParamName = "RandomState" // random state (seed) UseBias ParamName = "UseBias" // use bias InitMean ParamName = "InitMean" // mean of gaussian initial parameter InitStdDev ParamName = "InitStdDev" // standard deviation of gaussian initial parameter InitLow ParamName = "InitLow" // lower bound of uniform initial parameter InitHigh ParamName = "InitHigh" // upper bound of uniform initial parameter NUserClusters ParamName = "NUserClusters" // number of user cluster NItemClusters ParamName = "NItemClusters" // number of item cluster Type ParamName = "Type" // type for KNN UserBased ParamName = "UserBased" // user based if true. otherwise item based. Similarity ParamName = "Similarity" // similarity metrics K ParamName = "K" // number of neighbors MinK ParamName = "MinK" // least number of neighbors Optimizer ParamName = "Optimizer" // optimizer for optimization (SGD/ALS/BPR) Shrinkage ParamName = "Shrinkage" // shrinkage strength of similarity Alpha ParamName = "Alpha" // alpha value, depend on context )
Predefined hyper-parameter names
type Params ¶
type Params map[ParamName]interface{}
Params stores hyper-parameters for an model. It is a map between strings (names) and interface{}s (values). For example, hyper-parameters for SVD is given by:
base.Params{ base.Lr: 0.007, base.NEpochs: 100, base.NFactors: 80, base.Reg: 0.1, }
func (Params) GetBool ¶
GetBool gets a bool parameter by name. Returns _default if not exists or type doesn't match.
func (Params) GetFloat64 ¶
GetFloat64 gets a float parameter by name. Returns _default if not exists or type doesn't match. The type will be converted if given int.
func (Params) GetInt ¶
GetInt gets a integer parameter by name. Returns _default if not exists or type doesn't match.
func (Params) GetInt64 ¶
GetInt64 gets a int64 parameter by name. Returns _default if not exists or type doesn't match. The type will be converted if given int.
type RandomGenerator ¶
RandomGenerator is the random generator for gorse.
func NewRandomGenerator ¶
func NewRandomGenerator(seed int64) RandomGenerator
NewRandomGenerator creates a RandomGenerator.
func (RandomGenerator) NewNormalMatrix ¶
func (rng RandomGenerator) NewNormalMatrix(row, col int, mean, stdDev float64) [][]float64
NewNormalMatrix makes a matrix filled with normal random floats.
func (RandomGenerator) NewNormalVector ¶
func (rng RandomGenerator) NewNormalVector(size int, mean, stdDev float64) []float64
NewNormalVector makes a vec filled with normal random floats.
func (RandomGenerator) NewUniformMatrix ¶
func (rng RandomGenerator) NewUniformMatrix(row, col int, low, high float64) [][]float64
NewUniformMatrix makes a matrix filled with uniform random floats.
func (RandomGenerator) NewUniformVector ¶
func (rng RandomGenerator) NewUniformVector(size int, low, high float64) []float64
NewUniformVector makes a vec filled with uniform random floats,
func (RandomGenerator) NewUniformVectorInt ¶
func (rng RandomGenerator) NewUniformVectorInt(size, low, high int) []int
NewUniformVectorInt makes a vec filled with uniform random integers.
type RuntimeOptions ¶
type RuntimeOptions struct { Verbose bool // Verbose switch FitJobs int // Number of jobs for model fitting CVJobs int // Number of jobs for cross validation }
RuntimeOptions defines options used for runtime.
func (*RuntimeOptions) GetCVJobs ¶
func (options *RuntimeOptions) GetCVJobs() int
GetCVJobs returns the number of concurrent jobs for cross validation.
func (*RuntimeOptions) GetFitJobs ¶
func (options *RuntimeOptions) GetFitJobs() int
GetFitJobs returns the number of concurrent jobs for model fitting.
func (*RuntimeOptions) GetVerbose ¶
func (options *RuntimeOptions) GetVerbose() bool
GetVerbose returns the indicator of verbose.
func (*RuntimeOptions) Logf ¶
func (options *RuntimeOptions) Logf(format string, v ...interface{})
Logf to logs with format.
func (*RuntimeOptions) Logln ¶
func (options *RuntimeOptions) Logln(v ...interface{})
Logln to logs with newline.
type SparseVector ¶
SparseVector is the data structure for the sparse vector.
func NewDenseSparseMatrix ¶
func NewDenseSparseMatrix(row int) []*SparseVector
NewDenseSparseMatrix creates an array of SparseVectors.
func (*SparseVector) ForEach ¶
func (vec *SparseVector) ForEach(f func(i, index int, value float64))
ForEach iterates items in the sparse vector.
func (*SparseVector) ForIntersection ¶
func (vec *SparseVector) ForIntersection(other *SparseVector, f func(index int, a, b float64))
ForIntersection iterates items in the intersection of two vectors. The method sorts two vectors by indices first, then find common indices in linear time.
func (*SparseVector) Less ¶
func (vec *SparseVector) Less(i, j int) bool
Less returns true if the index of i-th item is less than the index of j-th item.
func (*SparseVector) SortIndex ¶
func (vec *SparseVector) SortIndex()
SortIndex sorts items by indices.
type StringIndexer ¶
StringIndexer manages the map between names and indices. The index is the internal index optimized for faster parameter access and less memory usage.
func NewStringIndexer ¶
func NewStringIndexer() *StringIndexer
NewStringIndexer creates a StringIndexer.
func (*StringIndexer) Add ¶
func (set *StringIndexer) Add(name string)
Add adds a new ID to the indexer.
func (*StringIndexer) Len ¶
func (set *StringIndexer) Len() int
Len returns the number of indexed IDs.
func (*StringIndexer) ToIndex ¶
func (set *StringIndexer) ToIndex(name string) int
ToIndex converts a sparse ID to a dense index.
func (*StringIndexer) ToName ¶
func (set *StringIndexer) ToName(index int) string
ToName converts an index to a name.