base

package

v0.1.1 Latest Latest Go to latest Published: Nov 24, 2019 License: Apache-2.0 Imports: 9 Imported by: 2

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/zhenghaoz/gorse

Documentation ¶

Overview ¶

Package base provides base data structures and utils.

The base data structures and utils include:

Parallel Scheduler
Hyper-parameters Management
Random Generator
Similarity Metrics
Sparse Data Structures

Index ¶

Constants
func Concatenate(vectors ...[]int) []int
func CosineSimilarity(a, b *MarginalSubSet) float64
func FillZeroMatrix(mat [][]float64)
func FillZeroVector(vec []float64)
func ImplicitSimilarity(a, b *MarginalSubSet) float64
func MSDSimilarity(a, b *MarginalSubSet) float64
func Max(a []int) int
func Min(a []int) int
func NewMatrix(row, col int) [][]float64
func NewMatrixInt(row, col int) [][]int
func Parallel(nTask int, nJob int, worker func(begin, end int))
func ParallelFor(begin, end int, worker func(i int))
func ParallelForSum(begin, end int, worker func(i int) float64) float64
func ParallelMean(nTask int, nJob int, worker func(begin, end int) (sum float64)) float64
func PearsonSimilarity(a, b *MarginalSubSet) float64
type FuncSimilarity
type Indexer
- func NewIndexer() *Indexer
- func (set *Indexer) Add(ID int)
- func (set *Indexer) Len() int
- func (set *Indexer) ToID(index int) int
- func (set *Indexer) ToIndex(ID int) int
type MarginalSubSet
- func NewMarginalSubSet(indexer *Indexer, indices []int, values []float64, subset []int) *MarginalSubSet
- func (set *MarginalSubSet) Contain(id int) bool
- func (set *MarginalSubSet) Count() int
- func (set *MarginalSubSet) ForEach(f func(i, id int, value float64))
- func (set *MarginalSubSet) ForEachIndex(f func(i, index int, value float64))
- func (set *MarginalSubSet) ForIntersection(other *MarginalSubSet, f func(id int, a, b float64))
- func (set *MarginalSubSet) GetID(i int) int
- func (set *MarginalSubSet) GetIndex(i int) int
- func (set *MarginalSubSet) Len() int
- func (set *MarginalSubSet) Less(i, j int) bool
- func (set *MarginalSubSet) Mean() float64
- func (set *MarginalSubSet) Swap(i, j int)
type MaxHeap
- func NewMaxHeap(k int) *MaxHeap
- func (maxHeap *MaxHeap) Add(elem interface{}, score float64)
- func (maxHeap *MaxHeap) Len() int
- func (maxHeap *MaxHeap) Less(i, j int) bool
- func (maxHeap *MaxHeap) Pop() interface{}
- func (maxHeap *MaxHeap) Push(x interface{})
- func (maxHeap *MaxHeap) Swap(i, j int)
- func (maxHeap *MaxHeap) ToSorted() ([]interface{}, []float64)
type ParamName
type Params
- func (parameters Params) Copy() Params
- func (parameters Params) GetBool(name ParamName, _default bool) bool
- func (parameters Params) GetFloat64(name ParamName, _default float64) float64
- func (parameters Params) GetInt(name ParamName, _default int) int
- func (parameters Params) GetInt64(name ParamName, _default int64) int64
- func (parameters Params) GetString(name ParamName, _default string) string
- func (parameters Params) Merge(params Params) Params
type RandomGenerator
- func NewRandomGenerator(seed int64) RandomGenerator
- func (rng RandomGenerator) NewNormalMatrix(row, col int, mean, stdDev float64) [][]float64
- func (rng RandomGenerator) NewNormalVector(size int, mean, stdDev float64) []float64
- func (rng RandomGenerator) NewUniformMatrix(row, col int, low, high float64) [][]float64
- func (rng RandomGenerator) NewUniformVector(size int, low, high float64) []float64
- func (rng RandomGenerator) NewUniformVectorInt(size, low, high int) []int
type RuntimeOptions
- func (options *RuntimeOptions) GetCVJobs() int
- func (options *RuntimeOptions) GetFitJobs() int
- func (options *RuntimeOptions) GetVerbose() bool
- func (options *RuntimeOptions) Log(v ...interface{})
- func (options *RuntimeOptions) Logf(format string, v ...interface{})
- func (options *RuntimeOptions) Logln(v ...interface{})
type SparseVector
- func NewDenseSparseMatrix(row int) []*SparseVector
- func NewSparseVector() *SparseVector
- func (vec *SparseVector) Add(index int, value float64)
- func (vec *SparseVector) ForEach(f func(i, index int, value float64))
- func (vec *SparseVector) ForIntersection(other *SparseVector, f func(index int, a, b float64))
- func (vec *SparseVector) Len() int
- func (vec *SparseVector) Less(i, j int) bool
- func (vec *SparseVector) SortIndex()
- func (vec *SparseVector) Swap(i, j int)
type StringIndexer
- func NewStringIndexer() *StringIndexer
- func (set *StringIndexer) Add(name string)
- func (set *StringIndexer) Len() int
- func (set *StringIndexer) ToIndex(name string) int
- func (set *StringIndexer) ToName(index int) string

Constants ¶

View Source

const (
	Basic    string = "basic"    // Basic KNN
	Centered string = "centered" // KNN with centered ratings
	ZScore   string = "z_score"  // KNN with standardized ratings
	Baseline string = "baseline" // KNN with baseline ratings
)

Predefined values for hyper-parameter Type.

View Source

const (
	SGDOptimizer string = "sgd" // Fit model (FM) with stochastic gradient descent.
	BPROptimizer string = "bpr" // Fit model (FM) with bayesian personal ranking.
)

Predefined values for hyper-parameter Optimizer.

View Source

const (
	Pearson string = "pearson" // Pearson similarity
	Cosine  string = "cosine"  // Cosine similarity
	MSD     string = "msd"     // MSD similarity
)

Predefined values for hyper-parameter Similarity.

View Source

const NotId = -1

NotId represents an ID doesn't exist.

Variables ¶

This section is empty.

Functions ¶

func Concatenate ¶

func Concatenate(vectors ...[]int) []int

Concatenate merges vectors of integers to one.

func CosineSimilarity ¶

func CosineSimilarity(a, b *MarginalSubSet) float64

CosineSimilarity computes the cosine similarity between a pair of vectors.

func FillZeroMatrix ¶

func FillZeroMatrix(mat [][]float64)

FillZeroMatrix fills a matrix with zeros.

func FillZeroVector ¶

func FillZeroVector(vec []float64)

FillZeroVector fills a vector with zeros.

func ImplicitSimilarity ¶

func ImplicitSimilarity(a, b *MarginalSubSet) float64

ImplicitSimilarity computes similarity between two vectors with implicit feedback.

func MSDSimilarity ¶

func MSDSimilarity(a, b *MarginalSubSet) float64

MSDSimilarity computes the Mean Squared Difference similarity between a pair of vectors.

func Max ¶

func Max(a []int) int

Max finds the maximum in a vector of integers. Panic if the slice is empty.

func Min ¶

func Min(a []int) int

Min finds the minimum in a vector of integers. Panic if the slice is empty.

func NewMatrixInt ¶

func NewMatrixInt(row, col int) [][]int

NewMatrixInt creates a matrix of integers.

func Parallel ¶

func Parallel(nTask int, nJob int, worker func(begin, end int))

Parallel schedules and runs tasks in parallel. nTask is the number of tasks. nJob is the number of executors. worker is the executed function which passed a range of task IDs (begin, end).

func ParallelForSum ¶

func ParallelForSum(begin, end int, worker func(i int) float64) float64

ParallelForSum runs for loop in parallel.

func ParallelMean ¶

func ParallelMean(nTask int, nJob int, worker func(begin, end int) (sum float64)) float64

ParallelMean schedules and runs tasks in parallel, then returns the mean of returned values. nJob is the number of executors. worker is the executed function which passed a range of task IDs (begin, end) and returns a double value.

func PearsonSimilarity ¶

func PearsonSimilarity(a, b *MarginalSubSet) float64

PearsonSimilarity computes the absolute Pearson correlation coefficient between a pair of vectors.

Types ¶

type FuncSimilarity ¶

type FuncSimilarity func(a, b *MarginalSubSet) float64

FuncSimilarity computes the similarity between a pair of vectors.

type Indexer ¶

type Indexer struct {
	Indices map[int]int // sparse ID -> dense index
	IDs     []int       // dense index -> sparse ID
}

Indexer manages the map between sparse IDs and dense indices. A sparse ID is a user ID or item ID. The dense index is the internal user index or item index optimized for faster parameter access and less memory usage.

func NewIndexer ¶

func NewIndexer() *Indexer

NewIndexer creates a Indexer.

func (*Indexer) Add ¶

func (set *Indexer) Add(ID int)

Add adds a new ID to the indexer.

func (*Indexer) Len ¶

func (set *Indexer) Len() int

Len returns the number of indexed IDs.

func (*Indexer) ToID ¶

func (set *Indexer) ToID(index int) int

ToID converts a dense index to a sparse ID.

func (*Indexer) ToIndex ¶

func (set *Indexer) ToIndex(ID int) int

ToIndex converts a sparse ID to a dense index.

type MarginalSubSet ¶

type MarginalSubSet struct {
	Indexer *Indexer  // the indexer
	Indices []int     // the full list of indices
	Values  []float64 // the full list of values
	SubSet  []int     // indices of the subset
}

MarginalSubSet constructs a subset over a list of IDs, indices and values.

func NewMarginalSubSet ¶

func NewMarginalSubSet(indexer *Indexer, indices []int, values []float64, subset []int) *MarginalSubSet

NewMarginalSubSet creates a MarginalSubSet.

func (*MarginalSubSet) Contain ¶

func (set *MarginalSubSet) Contain(id int) bool

Contain returns true am ID existed in the subset.

func (*MarginalSubSet) Count ¶

func (set *MarginalSubSet) Count() int

Count gets the size of marginal subset.

func (*MarginalSubSet) ForEach ¶

func (set *MarginalSubSet) ForEach(f func(i, id int, value float64))

ForEach iterates items in the subset with IDs.

func (*MarginalSubSet) ForEachIndex ¶

func (set *MarginalSubSet) ForEachIndex(f func(i, index int, value float64))

ForEachIndex iterates items in the subset with indices.

func (*MarginalSubSet) ForIntersection ¶

func (set *MarginalSubSet) ForIntersection(other *MarginalSubSet, f func(id int, a, b float64))

ForIntersection iterates items in the intersection of two subsets. The method find items with common indices in linear time.

func (*MarginalSubSet) GetID ¶

func (set *MarginalSubSet) GetID(i int) int

GetID returns the ID of i-th item.

func (*MarginalSubSet) GetIndex ¶

func (set *MarginalSubSet) GetIndex(i int) int

GetIndex returns the index of i-th item.

func (*MarginalSubSet) Len ¶

func (set *MarginalSubSet) Len() int

Len returns the number of items.

func (*MarginalSubSet) Less ¶

func (set *MarginalSubSet) Less(i, j int) bool

Less compares two items.

func (*MarginalSubSet) Mean ¶

func (set *MarginalSubSet) Mean() float64

Mean of ratings in the subset.

func (*MarginalSubSet) Swap ¶

func (set *MarginalSubSet) Swap(i, j int)

Swap two items.

type MaxHeap ¶

type MaxHeap struct {
	Elem  []interface{} // store elements
	Score []float64     // store scores
	K     int           // the size of heap
}

MaxHeap is designed for store K maximal elements. Heap is used to reduce time complexity and memory complexity in top-K searching.

func NewMaxHeap ¶

func NewMaxHeap(k int) *MaxHeap

NewMaxHeap creates a MaxHeap.

func (*MaxHeap) Add ¶

func (maxHeap *MaxHeap) Add(elem interface{}, score float64)

Add a new element to the MaxHeap.

func (*MaxHeap) Len ¶

func (maxHeap *MaxHeap) Len() int

Len returns the size of heap. It is a method of heap.Interface.

func (*MaxHeap) Less ¶

func (maxHeap *MaxHeap) Less(i, j int) bool

Less returns true if the score of i-th item is less than the score of j-th item. It is a method of heap.Interface.

func (*MaxHeap) Pop ¶

func (maxHeap *MaxHeap) Pop() interface{}

Pop the last item (the element with minimal score) in the MaxHeap. It is a method of heap.Interface.

func (*MaxHeap) Push ¶

func (maxHeap *MaxHeap) Push(x interface{})

Push a neighbors into the MaxHeap. It is a method of heap.Interface.

func (*MaxHeap) Swap ¶

func (maxHeap *MaxHeap) Swap(i, j int)

Swap the i-th item with the j-th item. It is a method of heap.Interface.

func (*MaxHeap) ToSorted ¶

func (maxHeap *MaxHeap) ToSorted() ([]interface{}, []float64)

ToSorted returns a sorted slice od elements in the heap.

type ParamName ¶

type ParamName string

ParamName is the type of hyper-parameter names.

const (
	Lr            ParamName = "Lr"            // learning rate
	Reg           ParamName = "Reg"           // regularization strength
	NEpochs       ParamName = "NEpochs"       // number of epochs
	NFactors      ParamName = "NFactors"      // number of factors
	RandomState   ParamName = "RandomState"   // random state (seed)
	UseBias       ParamName = "UseBias"       // use bias
	InitMean      ParamName = "InitMean"      // mean of gaussian initial parameter
	InitStdDev    ParamName = "InitStdDev"    // standard deviation of gaussian initial parameter
	InitLow       ParamName = "InitLow"       // lower bound of uniform initial parameter
	InitHigh      ParamName = "InitHigh"      // upper bound of uniform initial parameter
	NUserClusters ParamName = "NUserClusters" // number of user cluster
	NItemClusters ParamName = "NItemClusters" // number of item cluster
	Type          ParamName = "Type"          // type for KNN
	UserBased     ParamName = "UserBased"     // user based if true. otherwise item based.
	Similarity    ParamName = "Similarity"    // similarity metrics
	K             ParamName = "K"             // number of neighbors
	MinK          ParamName = "MinK"          // least number of neighbors
	Optimizer     ParamName = "Optimizer"     // optimizer for optimization (SGD/ALS/BPR)
	Shrinkage     ParamName = "Shrinkage"     // shrinkage strength of similarity
	Alpha         ParamName = "Alpha"         // alpha value, depend on context
)

Predefined hyper-parameter names

type Params ¶

type Params map[ParamName]interface{}

Params stores hyper-parameters for an model. It is a map between strings (names) and interface{}s (values). For example, hyper-parameters for SVD is given by:

 base.Params{
		base.Lr:       0.007,
		base.NEpochs:  100,
		base.NFactors: 80,
		base.Reg:      0.1,
	}

func (Params) Copy ¶

func (parameters Params) Copy() Params

Copy hyper-parameters.

func (Params) GetBool ¶

func (parameters Params) GetBool(name ParamName, _default bool) bool

GetBool gets a bool parameter by name. Returns _default if not exists or type doesn't match.

func (Params) GetFloat64 ¶

func (parameters Params) GetFloat64(name ParamName, _default float64) float64

GetFloat64 gets a float parameter by name. Returns _default if not exists or type doesn't match. The type will be converted if given int.

func (Params) GetInt ¶

func (parameters Params) GetInt(name ParamName, _default int) int

GetInt gets a integer parameter by name. Returns _default if not exists or type doesn't match.

func (Params) GetInt64 ¶

func (parameters Params) GetInt64(name ParamName, _default int64) int64

GetInt64 gets a int64 parameter by name. Returns _default if not exists or type doesn't match. The type will be converted if given int.

func (Params) GetString ¶

func (parameters Params) GetString(name ParamName, _default string) string

GetString gets a string parameter. Returns _default if not exists or type doesn't match.

func (Params) Merge ¶

func (parameters Params) Merge(params Params) Params

Merge another group of hyper-parameters to current group of hyper-parameters.

type RandomGenerator ¶

type RandomGenerator struct {
	*rand.Rand
}

RandomGenerator is the random generator for gorse.

func NewRandomGenerator ¶

func NewRandomGenerator(seed int64) RandomGenerator

NewRandomGenerator creates a RandomGenerator.

func (RandomGenerator) NewNormalMatrix ¶

func (rng RandomGenerator) NewNormalMatrix(row, col int, mean, stdDev float64) [][]float64

NewNormalMatrix makes a matrix filled with normal random floats.

func (RandomGenerator) NewNormalVector ¶

func (rng RandomGenerator) NewNormalVector(size int, mean, stdDev float64) []float64

NewNormalVector makes a vec filled with normal random floats.

func (RandomGenerator) NewUniformMatrix ¶

func (rng RandomGenerator) NewUniformMatrix(row, col int, low, high float64) [][]float64

NewUniformMatrix makes a matrix filled with uniform random floats.

func (RandomGenerator) NewUniformVector ¶

func (rng RandomGenerator) NewUniformVector(size int, low, high float64) []float64

NewUniformVector makes a vec filled with uniform random floats,

func (RandomGenerator) NewUniformVectorInt ¶

func (rng RandomGenerator) NewUniformVectorInt(size, low, high int) []int

NewUniformVectorInt makes a vec filled with uniform random integers.

type RuntimeOptions ¶

type RuntimeOptions struct {
	Verbose bool // Verbose switch
	FitJobs int  // Number of jobs for model fitting
	CVJobs  int  // Number of jobs for cross validation
}

RuntimeOptions defines options used for runtime.

func (*RuntimeOptions) GetCVJobs ¶

func (options *RuntimeOptions) GetCVJobs() int

GetCVJobs returns the number of concurrent jobs for cross validation.

func (*RuntimeOptions) GetFitJobs ¶

func (options *RuntimeOptions) GetFitJobs() int

GetFitJobs returns the number of concurrent jobs for model fitting.

func (*RuntimeOptions) GetVerbose ¶

func (options *RuntimeOptions) GetVerbose() bool

GetVerbose returns the indicator of verbose.

func (*RuntimeOptions) Log ¶

func (options *RuntimeOptions) Log(v ...interface{})

Log to logs.

func (*RuntimeOptions) Logf ¶

func (options *RuntimeOptions) Logf(format string, v ...interface{})

Logf to logs with format.

func (*RuntimeOptions) Logln ¶

func (options *RuntimeOptions) Logln(v ...interface{})

Logln to logs with newline.

type SparseVector ¶

type SparseVector struct {
	Indices []int
	Values  []float64
	Sorted  bool
}

SparseVector is the data structure for the sparse vector.

func NewDenseSparseMatrix ¶

func NewDenseSparseMatrix(row int) []*SparseVector

NewDenseSparseMatrix creates an array of SparseVectors.

func NewSparseVector ¶

func NewSparseVector() *SparseVector

NewSparseVector creates a SparseVector.

func (*SparseVector) Add ¶

func (vec *SparseVector) Add(index int, value float64)

Add a new item.

func (*SparseVector) ForEach ¶

func (vec *SparseVector) ForEach(f func(i, index int, value float64))

ForEach iterates items in the sparse vector.

func (*SparseVector) ForIntersection ¶

func (vec *SparseVector) ForIntersection(other *SparseVector, f func(index int, a, b float64))

ForIntersection iterates items in the intersection of two vectors. The method sorts two vectors by indices first, then find common indices in linear time.

func (*SparseVector) Len ¶

func (vec *SparseVector) Len() int

Len returns the number of items.

func (*SparseVector) Less ¶

func (vec *SparseVector) Less(i, j int) bool

Less returns true if the index of i-th item is less than the index of j-th item.

func (*SparseVector) SortIndex ¶

func (vec *SparseVector) SortIndex()

SortIndex sorts items by indices.

func (*SparseVector) Swap ¶

func (vec *SparseVector) Swap(i, j int)

Swap two items.

type StringIndexer ¶

type StringIndexer struct {
	Indices map[string]int
	Names   []string
}

StringIndexer manages the map between names and indices. The index is the internal index optimized for faster parameter access and less memory usage.

func NewStringIndexer ¶

func NewStringIndexer() *StringIndexer

NewStringIndexer creates a StringIndexer.

func (*StringIndexer) Add ¶

func (set *StringIndexer) Add(name string)

Add adds a new ID to the indexer.

func (*StringIndexer) Len ¶

func (set *StringIndexer) Len() int

Len returns the number of indexed IDs.

func (*StringIndexer) ToIndex ¶

func (set *StringIndexer) ToIndex(name string) int

ToIndex converts a sparse ID to a dense index.

func (*StringIndexer) ToName ¶

func (set *StringIndexer) ToName(index int) string

ToName converts an index to a name.

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL

Documentation ¶

Overview ¶

Index ¶

Constants ¶

Variables ¶

Functions ¶

func Concatenate ¶

func CosineSimilarity ¶

func FillZeroMatrix ¶

func FillZeroVector ¶

func ImplicitSimilarity ¶

func MSDSimilarity ¶

func Max ¶

func Min ¶

func NewMatrix ¶

func NewMatrixInt ¶

func Parallel ¶

func ParallelFor ¶

func ParallelForSum ¶

func ParallelMean ¶

func PearsonSimilarity ¶

Types ¶

type FuncSimilarity ¶

type Indexer ¶

func NewIndexer ¶

func (*Indexer) Add ¶

func (*Indexer) Len ¶

func (*Indexer) ToID ¶

func (*Indexer) ToIndex ¶

type MarginalSubSet ¶

func NewMarginalSubSet ¶

func (*MarginalSubSet) Contain ¶

func (*MarginalSubSet) Count ¶

func (*MarginalSubSet) ForEach ¶

func (*MarginalSubSet) ForEachIndex ¶

func (*MarginalSubSet) ForIntersection ¶

func (*MarginalSubSet) GetID ¶

func (*MarginalSubSet) GetIndex ¶

func (*MarginalSubSet) Len ¶

func (*MarginalSubSet) Less ¶

func (*MarginalSubSet) Mean ¶

func (*MarginalSubSet) Swap ¶

type MaxHeap ¶

func NewMaxHeap ¶

func (*MaxHeap) Add ¶

func (*MaxHeap) Len ¶

func (*MaxHeap) Less ¶

func (*MaxHeap) Pop ¶

func (*MaxHeap) Push ¶

func (*MaxHeap) Swap ¶

func (*MaxHeap) ToSorted ¶

type ParamName ¶

type Params ¶

func (Params) Copy ¶

func (Params) GetBool ¶

func (Params) GetFloat64 ¶

func (Params) GetInt ¶

func (Params) GetInt64 ¶

func (Params) GetString ¶

func (Params) Merge ¶

type RandomGenerator ¶

func NewRandomGenerator ¶

func (RandomGenerator) NewNormalMatrix ¶

func (RandomGenerator) NewNormalVector ¶

func (RandomGenerator) NewUniformMatrix ¶

func (RandomGenerator) NewUniformVector ¶

func (RandomGenerator) NewUniformVectorInt ¶

type RuntimeOptions ¶

func (*RuntimeOptions) GetCVJobs ¶

func (*RuntimeOptions) GetFitJobs ¶

func (*RuntimeOptions) GetVerbose ¶

func (*RuntimeOptions) Log ¶

func (*RuntimeOptions) Logf ¶

func (*RuntimeOptions) Logln ¶

type SparseVector ¶

func NewDenseSparseMatrix ¶

func NewSparseVector ¶

func (*SparseVector) Add ¶

func (*SparseVector) ForEach ¶

func (*SparseVector) ForIntersection ¶