modelselection

package

v1.0.1 Latest Latest Go to latest Published: Feb 7, 2024 License: MIT Imports: 10 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/ksaid39/sklearn

Documentation ¶

Overview ¶

Package modelselection contains KFold, GridSearchCV, CrossValidate

Index ¶

func ParameterGrid(paramGrid map[string][]interface{}) (out []map[string]interface{})
func TrainTestSplit(X, Y mat.Matrix, testsize float64, randomstate uint64) (Xtrain, Xtest, ytrain, ytest *mat.Dense)
type CrossValidateResult
- func CrossValidate(estimator base.Predicter, X, Y *mat.Dense, groups []int, ...) (res CrossValidateResult)
type GridSearchCV
type KFold
type RandomState
type Split
type Splitter

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

func ParameterGrid ¶

func ParameterGrid(paramGrid map[string][]interface{}) (out []map[string]interface{})

ParameterGrid ...

Example ¶

paramArray := ParameterGrid(map[string][]interface{}{"a": {1, 2, 3}, "b": {10, 11}})
sortParamArray(paramArray)
for _, m := range paramArray {
	fmt.Println(m["a"], m["b"])
}

func TrainTestSplit ¶

func TrainTestSplit(X, Y mat.Matrix, testsize float64, randomstate uint64) (Xtrain, Xtest, ytrain, ytest *mat.Dense)

TrainTestSplit splits X and Y into test set and train set testsize must be between 0 and 1 it produce same sets than scikit-learn

Example ¶

/*
   >>> import numpy as np
   >>> from sklearn.model_selection import train_test_split
   >>> X, y = np.arange(10).reshape((5, 2)), range(5)
   >>> X_train, X_test, y_train, y_test = train_test_split(
   ...     X, y, test_size=0.33, random_state=42)
   ...
   >>> X_train
   array([[4, 5],
          [0, 1],
          [6, 7]])
   >>> y_train
   [2, 0, 3]
   >>> X_test
   array([[2, 3],
          [8, 9]])
   >>> y_test
   [1, 4]

*/
X := mat.NewDense(5, 2, []float64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
Y := mat.NewDense(5, 1, []float64{0, 1, 2, 3, 4})
RandomState := uint64(42)
Xtrain, Xtest, Ytrain, Ytest := TrainTestSplit(X, Y, .33, RandomState)
fmt.Printf("X_train:\n%g\n", mat.Formatted(Xtrain))
fmt.Printf("Y_train:\n%g\n", mat.Formatted(Ytrain))
fmt.Printf("X_test:\n%g\n", mat.Formatted(Xtest))
fmt.Printf("Y_test:\n%g\n", mat.Formatted(Ytest))

Output:

X_train:
⎡4  5⎤
⎢0  1⎥
⎣6  7⎦
Y_train:
⎡2⎤
⎢0⎥
⎣3⎦
X_test:
⎡2  3⎤
⎣8  9⎦
Y_test:
⎡1⎤
⎣4⎦

Types ¶

type CrossValidateResult ¶

type CrossValidateResult struct {
	TestScore          []float64
	FitTime, ScoreTime []time.Duration
	Estimator          []base.Predicter
}

CrossValidateResult is the struct result of CrossValidate. it includes TestScore,FitTime,ScoreTime,Estimator

func CrossValidate ¶

func CrossValidate(estimator base.Predicter, X, Y *mat.Dense, groups []int, scorer func(Ytrue, Ypred mat.Matrix) float64, cv Splitter, NJobs int) (res CrossValidateResult)

CrossValidate Evaluate a score by cross-validation scorer is a func(Ytrue,Ypred) float64 only mean_squared_error for now NJobs is the number of goroutines. if <=0, runtime.NumCPU is used

Example ¶

// example adapted from https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.cross_validate.html#sklearn.model_selection.cross_validate
for _, NJobs := range []int{1, 3} {
	randomState := rand.New(base.NewLockedSource(5))
	diabetes := datasets.LoadDiabetes()
	X, y := diabetes.X.Slice(0, 150, 0, diabetes.X.RawMatrix().Cols).(*mat.Dense), diabetes.Y.Slice(0, 150, 0, 1).(*mat.Dense)
	lasso := linearModel.NewLasso()
	scorer := func(Y, Ypred mat.Matrix) float64 {
		e := metrics.R2Score(Y, Ypred, nil, "").At(0, 0)
		return e
	}
	cvresults := CrossValidate(lasso, X, y, nil, scorer, &KFold{NSplits: 3, Shuffle: true, RandomState: randomState}, NJobs)
	sort.Sort(cvresults)
	fmt.Printf("%.8f\n", cvresults.TestScore)
}

Output:

[0.29391770 0.25681807 0.24695688]
[0.29391770 0.25681807 0.24695688]

func (CrossValidateResult) Len ¶

func (r CrossValidateResult) Len() int

Len for CrossValidateResult to implement sort.Interface

func (CrossValidateResult) Less ¶

func (r CrossValidateResult) Less(i, j int) bool

Less for CrossValidateResult to implement sort.Interface

func (CrossValidateResult) Swap ¶

func (r CrossValidateResult) Swap(i, j int)

Swap for CrossValidateResult to implement sort.Interface

type GridSearchCV ¶

type GridSearchCV struct {
	Estimator          base.Predicter
	ParamGrid          map[string][]interface{}
	Scorer             func(Ytrue, Ypred mat.Matrix) float64
	CV                 Splitter
	Verbose            bool
	NJobs              int
	LowerScoreIsBetter bool
	UseChannels        bool
	RandomState        rand.Source

	CVResults     map[string][]interface{}
	BestEstimator base.Predicter
	BestScore     float64
	BestParams    map[string]interface{}
	BestIndex     int
	NOutputs      int
}

GridSearchCV ... Estimator is the base estimator. it must implement base.Predicter Scorer is a function __returning a higher score when Ypred is better__ CV is a splitter (defaults to KFold)

Example ¶

RandomState := base.NewLockedSource(7)
ds := datasets.LoadBoston()
X, Y := preprocessing.NewStandardScaler().FitTransform(ds.X, ds.Y)

mlp := neuralnetwork.NewMLPRegressor([]int{20}, "relu", "adam", 1e-4)
mlp.RandomState = RandomState
mlp.Shuffle = false
mlp.BatchSize = 20
mlp.LearningRateInit = .005
mlp.MaxIter = 100

scorer := func(Y, Ypred mat.Matrix) float64 {
	return metrics.MeanSquaredError(Y, Ypred, nil, "").At(0, 0)
}
gscv := &GridSearchCV{
	Estimator: mlp,
	ParamGrid: map[string][]interface{}{
		"Alpha":       {1e-4, 2e-4, 5e-4, 1e-3},
		"WeightDecay": {1e-4, 1e-5, 1e-6, 5e-7, 2e-7, 1e-7, 5e-8, 2e-8, 1e-8, 0},
	},
	Scorer:             scorer,
	LowerScoreIsBetter: true,
	// CV:                 &KFold{NSplits: 3, RandomState: RandomState, Shuffle: true},
	Verbose: true,
	NJobs:   -1}
gscv.Fit(X, Y)
fmt.Println("Alpha", gscv.BestParams["Alpha"])
fmt.Println("WeightDecay", gscv.BestParams["WeightDecay"])
// fmt.Println(gscv.CVResults["score"])

Output:

Alpha 0.0001
WeightDecay 5e-08

func (*GridSearchCV) Fit ¶

func (gscv *GridSearchCV) Fit(Xmatrix, Ymatrix mat.Matrix) base.Fiter

Fit ...

func (*GridSearchCV) GetNOutputs ¶

func (gscv *GridSearchCV) GetNOutputs() int

GetNOutputs returns output columns number for Y to pass to predict

func (*GridSearchCV) IsClassifier ¶

func (gscv *GridSearchCV) IsClassifier() bool

IsClassifier returns underlaying estimater IsClassifier

func (*GridSearchCV) Predict ¶

func (gscv *GridSearchCV) Predict(X mat.Matrix, Y mat.Mutable) *mat.Dense

Predict ...

func (*GridSearchCV) PredicterClone ¶

func (gscv *GridSearchCV) PredicterClone() base.Predicter

PredicterClone ...

func (*GridSearchCV) Score ¶

func (gscv *GridSearchCV) Score(X, Y mat.Matrix) float64

Score for gridSearchCV returns best estimator score

type KFold ¶

type KFold struct {
	NSplits     int
	Shuffle     bool
	RandomState base.RandomState
}

KFold ...

Example ¶

randomState := rand.New(base.NewLockedSource(7))
X := mat.NewDense(6, 1, []float64{1, 2, 3, 4, 5, 6})
subtest := func(shuffle bool) {
	fmt.Println("shuffle", shuffle)
	kf := &KFold{NSplits: 3, Shuffle: shuffle, RandomState: randomState}
	for sp := range kf.Split(X, nil) {
		fmt.Printf("%#v\n", sp)
	}

}
subtest(false)
subtest(true)

Output:

shuffle false
modelselection.Split{TrainIndex:[]int{0, 1, 2, 3}, TestIndex:[]int{4, 5}}
modelselection.Split{TrainIndex:[]int{4, 5, 2, 3}, TestIndex:[]int{0, 1}}
modelselection.Split{TrainIndex:[]int{0, 4, 5, 3}, TestIndex:[]int{1, 2}}
shuffle true
modelselection.Split{TrainIndex:[]int{5, 0, 2, 3}, TestIndex:[]int{4, 1}}
modelselection.Split{TrainIndex:[]int{5, 3, 2, 0}, TestIndex:[]int{1, 4}}
modelselection.Split{TrainIndex:[]int{2, 4, 1, 0}, TestIndex:[]int{5, 3}}

func (*KFold) GetNSplits ¶

func (splitter *KFold) GetNSplits(X, Y *mat.Dense) int

GetNSplits for KFold

func (*KFold) Split ¶

func (splitter *KFold) Split(X, Y *mat.Dense) (ch chan Split)

Split generate Split structs

func (*KFold) SplitterClone ¶

func (splitter *KFold) SplitterClone() Splitter

SplitterClone ...

type RandomState ¶

type RandomState = rand.Rand

RandomState is to init a new random source for reproducibility

type Split ¶

type Split struct{ TrainIndex, TestIndex []int }

Split ...

type Splitter ¶

type Splitter interface {
	Split(X, Y *mat.Dense) (ch chan Split)
	GetNSplits(X, Y *mat.Dense) int
	SplitterClone() Splitter
}

Splitter is the interface for splitters like KFold

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL