Documentation
¶
Overview ¶
Package core provides core components for gorse.
Core components include:
- Dataset: used to train and test models.
- Splitter: used to split dataset.
- Evaluator: evaluate models.
- Validation: cross validation.
Index ¶
- Variables
- func Copy(dst, src interface{}) error
- func EvaluateAUC(estimator ModelInterface, testSet, excludeSet DataSetInterface) float64
- func EvaluateRank(estimator ModelInterface, testSet DataSetInterface, ...) []float64
- func EvaluateRating(estimator ModelInterface, testSet DataSetInterface, metrics ...RatingMetric) []float64
- func Items(dataSet ...DataSetInterface) map[string]bool
- func LoadEntityFromCSV(filePath string, fieldSep string, tagSep string, header bool, names []string, ...) []map[string]interface{}
- func MAE(groundTruth []float64, prediction []float64) float64
- func MAP(targetSet *base.MarginalSubSet, rankList []string) float64
- func MRR(targetSet *base.MarginalSubSet, rankList []string) float64
- func NDCG(targetSet *base.MarginalSubSet, rankList []string) float64
- func Neighbors(dataSet DataSetInterface, itemId string, n int, similarity base.FuncSimilarity) ([]string, []float64)
- func Popularity(dataSet DataSetInterface) (itemId []string, popularity []float64)
- func Precision(targetSet *base.MarginalSubSet, rankList []string) float64
- func RMSE(groundTruth []float64, prediction []float64) float64
- func Recall(targetSet *base.MarginalSubSet, rankList []string) float64
- func Top(items map[string]bool, userId string, n int, exclude *base.MarginalSubSet, ...) ([]string, []float64)
- type CrossValidateResult
- type CrossValidationEvaluator
- type DataSet
- func (set *DataSet) Count() int
- func (set *DataSet) FeatureCount() int
- func (set *DataSet) Get(i int) (string, string, float64)
- func (set *DataSet) GetWithIndex(i int) (int, int, float64)
- func (set *DataSet) GlobalMean() float64
- func (set *DataSet) Item(itemId string) *base.MarginalSubSet
- func (set *DataSet) ItemByIndex(itemIndex int) *base.MarginalSubSet
- func (set *DataSet) ItemCount() int
- func (set *DataSet) ItemFeatures() []*base.SparseVector
- func (set *DataSet) ItemIndexer() *base.Indexer
- func (set *DataSet) Items() []*base.MarginalSubSet
- func (set *DataSet) SetItemFeature(items []map[string]interface{}, features []string, idName string)
- func (set *DataSet) SetUserFeatures(users []map[string]interface{}, features []string, idName string)
- func (set *DataSet) SubSet(subset []int) DataSetInterface
- func (set *DataSet) User(userId string) *base.MarginalSubSet
- func (set *DataSet) UserByIndex(userIndex int) *base.MarginalSubSet
- func (set *DataSet) UserCount() int
- func (set *DataSet) UserFeatures() []*base.SparseVector
- func (set *DataSet) UserIndexer() *base.Indexer
- func (set *DataSet) Users() []*base.MarginalSubSet
- type DataSetInterface
- type ModelInterface
- type ModelSelectionResult
- type ParameterGrid
- type RankMetric
- type RatingMetric
- type Splitter
- type SubSet
- func (set *SubSet) Count() int
- func (set *SubSet) Get(i int) (string, string, float64)
- func (set *SubSet) GetWithIndex(i int) (int, int, float64)
- func (set *SubSet) GlobalMean() float64
- func (set *SubSet) Item(itemId string) *base.MarginalSubSet
- func (set *SubSet) ItemByIndex(itemIndex int) *base.MarginalSubSet
- func (set *SubSet) Items() []*base.MarginalSubSet
- func (set *SubSet) SubSet(indices []int) DataSetInterface
- func (set *SubSet) User(userId string) *base.MarginalSubSet
- func (set *SubSet) UserByIndex(userIndex int) *base.MarginalSubSet
- func (set *SubSet) Users() []*base.MarginalSubSet
Constants ¶
This section is empty.
Variables ¶
var ( GorseDir string DataSetDir string TempDir string )
The Data directories
Functions ¶
func EvaluateAUC ¶
func EvaluateAUC(estimator ModelInterface, testSet, excludeSet DataSetInterface) float64
EvaluateAUC evaluates a model by AUC.
func EvaluateRank ¶
func EvaluateRank(estimator ModelInterface, testSet DataSetInterface, excludeSet DataSetInterface, n int, metrics ...RankMetric) []float64
EvaluateRank evaluates a model in top-n tasks.
func EvaluateRating ¶
func EvaluateRating(estimator ModelInterface, testSet DataSetInterface, metrics ...RatingMetric) []float64
EvaluateRating evaluates a model in rating prediction tasks.
func Items ¶
func Items(dataSet ...DataSetInterface) map[string]bool
Items gets all items from the test set and the training set.
func LoadEntityFromCSV ¶
func LoadEntityFromCSV(filePath string, fieldSep string, tagSep string, header bool, names []string, index int) []map[string]interface{}
LoadEntityFromCSV load entities (items or users) from a csv file.
func MAP ¶
func MAP(targetSet *base.MarginalSubSet, rankList []string) float64
MAP means Mean Average Precision. mAP: http://sdsawtelle.github.io/blog/output/mean-average-precision-MAP-for-recommender-systems.html
func MRR ¶
func MRR(targetSet *base.MarginalSubSet, rankList []string) float64
MRR means Mean Reciprocal Rank.
The mean reciprocal rank is a statistic measure for evaluating any process that produces a list of possible responses to a sample of queries, ordered by probability of correctness. The reciprocal rank of a query response is the multiplicative inverse of the rank of the first correct answer: 1 for first place, 1⁄2 for second place, 1⁄3 for third place and so on. The mean reciprocal rank is the average of the reciprocal ranks of results for a sample of queries Q:
MRR = \frac{1}{Q} \sum^{|Q|}_{i=1} \frac{1}{rank_i}
func NDCG ¶
func NDCG(targetSet *base.MarginalSubSet, rankList []string) float64
NDCG means Normalized Discounted Cumulative Gain.
func Neighbors ¶
func Neighbors(dataSet DataSetInterface, itemId string, n int, similarity base.FuncSimilarity) ([]string, []float64)
Neighbors finds N nearest neighbors of a item. It returns a unordered slice of items (sparse ID) and corresponding similarities.
func Popularity ¶
func Popularity(dataSet DataSetInterface) (itemId []string, popularity []float64)
Popularity compute popularity for all items.
func Precision ¶
func Precision(targetSet *base.MarginalSubSet, rankList []string) float64
Precision is the fraction of relevant items among the recommended items.
\frac{|relevant documents| \cap |retrieved documents|} {|{retrieved documents}|}
Types ¶
type CrossValidateResult ¶
CrossValidateResult contains the result of cross validate
func CrossValidate ¶
func CrossValidate(model ModelInterface, dataSet DataSetInterface, splitter Splitter, seed int64, options *base.RuntimeOptions, evaluators ...CrossValidationEvaluator) []CrossValidateResult
CrossValidate evaluates a model by k-fold cross validation.
func (CrossValidateResult) MeanAndMargin ¶
func (sv CrossValidateResult) MeanAndMargin() (float64, float64)
MeanAndMargin returns the mean and the margin of cross validation scores.
type CrossValidationEvaluator ¶
type CrossValidationEvaluator func(estimator ModelInterface, testSet, trainSet DataSetInterface) (scores, costs []float64)
CrossValidationEvaluator is the evaluator for cross-validation.
func NewRankEvaluator ¶
func NewRankEvaluator(n int, metrics ...RankMetric) CrossValidationEvaluator
NewRankEvaluator creates a evaluator for personalized ranking cross-validation.
func NewRatingEvaluator ¶
func NewRatingEvaluator(metrics ...RatingMetric) CrossValidationEvaluator
NewRatingEvaluator creates a evaluator for rating prediction cross-validation.
type DataSet ¶
type DataSet struct {
// contains filtered or unexported fields
}
DataSet contains preprocessed data structures for recommendation models.
func LoadDataFromBuiltIn ¶
LoadDataFromBuiltIn loads a built-in Data set. Now support:
ml-100k - MovieLens 100K ml-1m - MovieLens 1M ml-10m - MovieLens 10M ml-20m - MovieLens 20M netflix - Netflix filmtrust - FlimTrust epinions - Epinions
func LoadDataFromCSV ¶
LoadDataFromCSV loads Data from a CSV file. The CSV file should be:
[optional header] <userId 1> <sep> <itemId 1> <sep> <rating 1> <sep> <extras> <userId 2> <sep> <itemId 2> <sep> <rating 2> <sep> <extras> <userId 3> <sep> <itemId 3> <sep> <rating 3> <sep> <extras> ...
For example, the `u.Data` from MovieLens 100K is:
196\t242\t3\t881250949 186\t302\t3\t891717742 22\t377\t1\t878887116
func LoadDataFromNetflix ¶
LoadDataFromNetflix loads Data from the Netflix dataset. The file should be:
<itemId 1>: <userId 1>, <rating 1>, <date> <userId 2>, <rating 2>, <date> <userId 3>, <rating 3>, <date> ...
func NewDataSet ¶
NewDataSet creates a data set.
func (*DataSet) FeatureCount ¶
FeatureCount returns the number of additional features.
func (*DataSet) GetWithIndex ¶
GetWithIndex gets the i-th record by <user index, item index, rating>.
func (*DataSet) GlobalMean ¶
GlobalMean computes the global mean of ratings.
func (*DataSet) Item ¶
func (set *DataSet) Item(itemId string) *base.MarginalSubSet
Item returns the subset of a item.
func (*DataSet) ItemByIndex ¶
func (set *DataSet) ItemByIndex(itemIndex int) *base.MarginalSubSet
ItemByIndex gets ratings of a item by index.
func (*DataSet) ItemFeatures ¶
func (set *DataSet) ItemFeatures() []*base.SparseVector
ItemFeatures returns additional features of items.
func (*DataSet) ItemIndexer ¶
ItemIndexer returns the item indexer.
func (*DataSet) Items ¶
func (set *DataSet) Items() []*base.MarginalSubSet
Items gets ratings of a item by index.
func (*DataSet) SetItemFeature ¶
func (set *DataSet) SetItemFeature(items []map[string]interface{}, features []string, idName string)
SetItemFeature sets features of items.
func (*DataSet) SetUserFeatures ¶
func (set *DataSet) SetUserFeatures(users []map[string]interface{}, features []string, idName string)
SetUserFeatures sets features of users.
func (*DataSet) SubSet ¶
func (set *DataSet) SubSet(subset []int) DataSetInterface
SubSet returns a subset of current dataset.
func (*DataSet) User ¶
func (set *DataSet) User(userId string) *base.MarginalSubSet
User returns the subset of a user.
func (*DataSet) UserByIndex ¶
func (set *DataSet) UserByIndex(userIndex int) *base.MarginalSubSet
UserByIndex gets ratings of a user by index.
func (*DataSet) UserFeatures ¶
func (set *DataSet) UserFeatures() []*base.SparseVector
UserFeatures returns additional features of users.
func (*DataSet) UserIndexer ¶
UserIndexer returns the user indexer.
func (*DataSet) Users ¶
func (set *DataSet) Users() []*base.MarginalSubSet
Users gets ratings of a user by index.
type DataSetInterface ¶
type DataSetInterface interface { // GlobalMean returns the global mean of ratings in the dataset. GlobalMean() float64 // Count returns the number of ratings in the dataset. Count() int // UserCount returns the number of users in the dataset. UserCount() int // ItemCount returns the number of items in the dataset. ItemCount() int // FeatureCount returns the number of additional features. FeatureCount() int // Get i-th rating by (user ID, item ID, rating). Get(i int) (string, string, float64) // GetWithIndex gets i-th rating by (user index, item index, rating). GetWithIndex(i int) (int, int, float64) // UserIndexer returns the user indexer. UserIndexer() *base.Indexer // ItemIndexer returns the item indexer. ItemIndexer() *base.Indexer // SubSet gets a subset of current dataset. SubSet(subset []int) DataSetInterface // Users returns subsets of users. Users() []*base.MarginalSubSet // Items returns subsets of items. Items() []*base.MarginalSubSet // UserFeatures returns additional features of users. UserFeatures() []*base.SparseVector // ItemFeatures returns additional features of items. ItemFeatures() []*base.SparseVector // User returns the subset of a user. User(userId string) *base.MarginalSubSet // Item returns the subset of a item. Item(itemId string) *base.MarginalSubSet // UserByIndex returns the subset of a user by the index. UserByIndex(userIndex int) *base.MarginalSubSet // ItemByIndex returns the subset of a item by the index. ItemByIndex(itemIndex int) *base.MarginalSubSet }
DataSetInterface is the interface for a dataset object.
func NewSubSet ¶
func NewSubSet(dataSet *DataSet, subset []int) DataSetInterface
NewSubSet creates a subset of a dataset.
func Split ¶
func Split(data DataSetInterface, testRatio float64) (train, test DataSetInterface)
Split dataset to a training set and a test set with ratio.
type ModelInterface ¶
type ModelInterface interface { // Set parameters. SetParams(params base.Params) // Get parameters. GetParams() base.Params // Predict the rating given by a user (userId) to a item (itemId). Predict(userId, itemId string) float64 // Fit a model with a train set and parameters. Fit(trainSet DataSetInterface, options *base.RuntimeOptions) }
ModelInterface is the interface for all models. Any model in this package should implement it.
type ModelSelectionResult ¶
type ModelSelectionResult struct { BestScore float64 BestCost float64 BestParams base.Params BestIndex int CVResults []CrossValidateResult AllParams []base.Params }
ModelSelectionResult contains the return of grid search.
func GridSearchCV ¶
func GridSearchCV(estimator ModelInterface, dataSet DataSetInterface, paramGrid ParameterGrid, splitter Splitter, seed int64, options *base.RuntimeOptions, evaluators ...CrossValidationEvaluator) []ModelSelectionResult
GridSearchCV finds the best parameters for a model.
func RandomSearchCV ¶
func RandomSearchCV(estimator ModelInterface, dataSet DataSetInterface, paramGrid ParameterGrid, splitter Splitter, trial int, seed int64, options *base.RuntimeOptions, evaluators ...CrossValidationEvaluator) []ModelSelectionResult
RandomSearchCV searches hyper-parameters by random.
type ParameterGrid ¶
ParameterGrid contains candidate for grid search.
type RankMetric ¶
type RankMetric func(targetSet *base.MarginalSubSet, rankList []string) float64
RankMetric is used by evaluators in personalized ranking tasks.
type RatingMetric ¶
RatingMetric is used by evaluators in rating prediction tasks.
type Splitter ¶
type Splitter func(set DataSetInterface, seed int64) ([]DataSetInterface, []DataSetInterface)
Splitter split Data to train set and test set.
func NewKFoldSplitter ¶
NewKFoldSplitter creates a k-fold splitter.
func NewRatioSplitter ¶
NewRatioSplitter creates a ratio splitter.
func NewUserLOOSplitter ¶
NewUserLOOSplitter creates a per-user leave-one-out Data splitter.
type SubSet ¶
type SubSet struct { *DataSet // the existed dataset. // contains filtered or unexported fields }
SubSet creates a subset index over a existed dataset.
func (*SubSet) GetWithIndex ¶
GetWithIndex gets the i-th record by <user index, item index, rating>.
func (*SubSet) GlobalMean ¶
GlobalMean computes the global mean of ratings.
func (*SubSet) Item ¶
func (set *SubSet) Item(itemId string) *base.MarginalSubSet
Item returns ratings subset of a item.
func (*SubSet) ItemByIndex ¶
func (set *SubSet) ItemByIndex(itemIndex int) *base.MarginalSubSet
ItemByIndex gets ratings of a item by index.
func (*SubSet) Items ¶
func (set *SubSet) Items() []*base.MarginalSubSet
Items gets ratings of a item by index.
func (*SubSet) SubSet ¶
func (set *SubSet) SubSet(indices []int) DataSetInterface
SubSet returns a subset of current dataset.
func (*SubSet) User ¶
func (set *SubSet) User(userId string) *base.MarginalSubSet
User returns ratings subset of a user.
func (*SubSet) UserByIndex ¶
func (set *SubSet) UserByIndex(userIndex int) *base.MarginalSubSet
UserByIndex gets ratings of a user by index.
func (*SubSet) Users ¶
func (set *SubSet) Users() []*base.MarginalSubSet
Users gets ratings of a user by index.