Documentation
¶
Overview ¶
Package modelselection contains KFold, GridSearchCV, CrossValidate
Index ¶
- func ParameterGrid(paramGrid map[string][]interface{}) (out []map[string]interface{})
- func TrainTestSplit(X, Y mat.Matrix, testsize float64, randomstate uint64) (Xtrain, Xtest, ytrain, ytest *mat.Dense)
- type CrossValidateResult
- type GridSearchCV
- func (gscv *GridSearchCV) Fit(Xmatrix, Ymatrix mat.Matrix) base.Fiter
- func (gscv *GridSearchCV) GetNOutputs() int
- func (gscv *GridSearchCV) IsClassifier() bool
- func (gscv *GridSearchCV) Predict(X mat.Matrix, Y mat.Mutable) *mat.Dense
- func (gscv *GridSearchCV) PredicterClone() base.Predicter
- func (gscv *GridSearchCV) Score(X, Y mat.Matrix) float64
- type KFold
- type RandomState
- type Split
- type Splitter
Examples ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func ParameterGrid ¶
ParameterGrid ...
Example ¶
paramArray := ParameterGrid(map[string][]interface{}{"a": {1, 2, 3}, "b": {10, 11}}) sortParamArray(paramArray) for _, m := range paramArray { fmt.Println(m["a"], m["b"]) }
Output: 1 10 1 11 2 10 2 11 3 10 3 11
func TrainTestSplit ¶
func TrainTestSplit(X, Y mat.Matrix, testsize float64, randomstate uint64) (Xtrain, Xtest, ytrain, ytest *mat.Dense)
TrainTestSplit splits X and Y into test set and train set testsize must be between 0 and 1 it produce same sets than scikit-learn
Example ¶
/* >>> import numpy as np >>> from sklearn.model_selection import train_test_split >>> X, y = np.arange(10).reshape((5, 2)), range(5) >>> X_train, X_test, y_train, y_test = train_test_split( ... X, y, test_size=0.33, random_state=42) ... >>> X_train array([[4, 5], [0, 1], [6, 7]]) >>> y_train [2, 0, 3] >>> X_test array([[2, 3], [8, 9]]) >>> y_test [1, 4] */ X := mat.NewDense(5, 2, []float64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) Y := mat.NewDense(5, 1, []float64{0, 1, 2, 3, 4}) RandomState := uint64(42) Xtrain, Xtest, Ytrain, Ytest := TrainTestSplit(X, Y, .33, RandomState) fmt.Printf("X_train:\n%g\n", mat.Formatted(Xtrain)) fmt.Printf("Y_train:\n%g\n", mat.Formatted(Ytrain)) fmt.Printf("X_test:\n%g\n", mat.Formatted(Xtest)) fmt.Printf("Y_test:\n%g\n", mat.Formatted(Ytest))
Output: X_train: ⎡4 5⎤ ⎢0 1⎥ ⎣6 7⎦ Y_train: ⎡2⎤ ⎢0⎥ ⎣3⎦ X_test: ⎡2 3⎤ ⎣8 9⎦ Y_test: ⎡1⎤ ⎣4⎦
Types ¶
type CrossValidateResult ¶
type CrossValidateResult struct { TestScore []float64 FitTime, ScoreTime []time.Duration Estimator []base.Predicter }
CrossValidateResult is the struct result of CrossValidate. it includes TestScore,FitTime,ScoreTime,Estimator
func CrossValidate ¶
func CrossValidate(estimator base.Predicter, X, Y *mat.Dense, groups []int, scorer func(Ytrue, Ypred mat.Matrix) float64, cv Splitter, NJobs int) (res CrossValidateResult)
CrossValidate Evaluate a score by cross-validation scorer is a func(Ytrue,Ypred) float64 only mean_squared_error for now NJobs is the number of goroutines. if <=0, runtime.NumCPU is used
Example ¶
// example adapted from https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.cross_validate.html#sklearn.model_selection.cross_validate for _, NJobs := range []int{1, 3} { randomState := rand.New(base.NewLockedSource(5)) diabetes := datasets.LoadDiabetes() X, y := diabetes.X.Slice(0, 150, 0, diabetes.X.RawMatrix().Cols).(*mat.Dense), diabetes.Y.Slice(0, 150, 0, 1).(*mat.Dense) lasso := linearModel.NewLasso() scorer := func(Y, Ypred mat.Matrix) float64 { e := metrics.R2Score(Y, Ypred, nil, "").At(0, 0) return e } cvresults := CrossValidate(lasso, X, y, nil, scorer, &KFold{NSplits: 3, Shuffle: true, RandomState: randomState}, NJobs) sort.Sort(cvresults) fmt.Printf("%.8f\n", cvresults.TestScore) }
Output: [0.29391770 0.25681807 0.24695688] [0.29391770 0.25681807 0.24695688]
func (CrossValidateResult) Len ¶
func (r CrossValidateResult) Len() int
Len for CrossValidateResult to implement sort.Interface
func (CrossValidateResult) Less ¶
func (r CrossValidateResult) Less(i, j int) bool
Less for CrossValidateResult to implement sort.Interface
func (CrossValidateResult) Swap ¶
func (r CrossValidateResult) Swap(i, j int)
Swap for CrossValidateResult to implement sort.Interface
type GridSearchCV ¶
type GridSearchCV struct { Estimator base.Predicter ParamGrid map[string][]interface{} Scorer func(Ytrue, Ypred mat.Matrix) float64 CV Splitter Verbose bool NJobs int LowerScoreIsBetter bool UseChannels bool RandomState rand.Source CVResults map[string][]interface{} BestEstimator base.Predicter BestScore float64 BestParams map[string]interface{} BestIndex int NOutputs int }
GridSearchCV ... Estimator is the base estimator. it must implement base.Predicter Scorer is a function __returning a higher score when Ypred is better__ CV is a splitter (defaults to KFold)
Example ¶
RandomState := base.NewLockedSource(7) ds := datasets.LoadBoston() X, Y := preprocessing.NewStandardScaler().FitTransform(ds.X, ds.Y) mlp := neuralnetwork.NewMLPRegressor([]int{20}, "relu", "adam", 1e-4) mlp.RandomState = RandomState mlp.Shuffle = false mlp.BatchSize = 20 mlp.LearningRateInit = .005 mlp.MaxIter = 100 scorer := func(Y, Ypred mat.Matrix) float64 { return metrics.MeanSquaredError(Y, Ypred, nil, "").At(0, 0) } gscv := &GridSearchCV{ Estimator: mlp, ParamGrid: map[string][]interface{}{ "Alpha": {1e-4, 2e-4, 5e-4, 1e-3}, "WeightDecay": {1e-4, 1e-5, 1e-6, 5e-7, 2e-7, 1e-7, 5e-8, 2e-8, 1e-8, 0}, }, Scorer: scorer, LowerScoreIsBetter: true, // CV: &KFold{NSplits: 3, RandomState: RandomState, Shuffle: true}, Verbose: true, NJobs: -1} gscv.Fit(X, Y) fmt.Println("Alpha", gscv.BestParams["Alpha"]) fmt.Println("WeightDecay", gscv.BestParams["WeightDecay"]) // fmt.Println(gscv.CVResults["score"])
Output: Alpha 0.0001 WeightDecay 5e-08
func (*GridSearchCV) Fit ¶
func (gscv *GridSearchCV) Fit(Xmatrix, Ymatrix mat.Matrix) base.Fiter
Fit ...
func (*GridSearchCV) GetNOutputs ¶
func (gscv *GridSearchCV) GetNOutputs() int
GetNOutputs returns output columns number for Y to pass to predict
func (*GridSearchCV) IsClassifier ¶
func (gscv *GridSearchCV) IsClassifier() bool
IsClassifier returns underlaying estimater IsClassifier
func (*GridSearchCV) PredicterClone ¶
func (gscv *GridSearchCV) PredicterClone() base.Predicter
PredicterClone ...
type KFold ¶
type KFold struct { NSplits int Shuffle bool RandomState base.RandomState }
KFold ...
Example ¶
randomState := rand.New(base.NewLockedSource(7)) X := mat.NewDense(6, 1, []float64{1, 2, 3, 4, 5, 6}) subtest := func(shuffle bool) { fmt.Println("shuffle", shuffle) kf := &KFold{NSplits: 3, Shuffle: shuffle, RandomState: randomState} for sp := range kf.Split(X, nil) { fmt.Printf("%#v\n", sp) } } subtest(false) subtest(true)
Output: shuffle false modelselection.Split{TrainIndex:[]int{0, 1, 2, 3}, TestIndex:[]int{4, 5}} modelselection.Split{TrainIndex:[]int{4, 5, 2, 3}, TestIndex:[]int{0, 1}} modelselection.Split{TrainIndex:[]int{0, 4, 5, 3}, TestIndex:[]int{1, 2}} shuffle true modelselection.Split{TrainIndex:[]int{5, 0, 2, 3}, TestIndex:[]int{4, 1}} modelselection.Split{TrainIndex:[]int{5, 3, 2, 0}, TestIndex:[]int{1, 4}} modelselection.Split{TrainIndex:[]int{2, 4, 1, 0}, TestIndex:[]int{5, 3}}
func (*KFold) GetNSplits ¶
GetNSplits for KFold
type RandomState ¶
RandomState is to init a new random source for reproducibility