datasets

package
v0.0.0-...-fcddba5 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 19, 2023 License: MIT Imports: 22 Imported by: 0

Documentation

Overview

Package datasets includes utilities to load datasets, including methods to load and fetch popular reference datasets. It also features some artificial data generators.

Index

Examples

Constants

This section is empty.

Variables

This section is empty.

Functions

func LoadExamScore

func LoadExamScore() (X, Y *mat.Dense)

LoadExamScore loads data from ex2data1 from Andrew Ng machine learning course

Example
X, Y := LoadExamScore()
xr, xc := X.Dims()
yr, yc := Y.Dims()
fmt.Printf("X %d,%d Y %d,%d\n", xr, xc, yr, yc)
fmt.Println(matstr(X.Slice(0, 3, 0, 2), Y.Slice(0, 3, 0, 1)))
Output:

X 100,2 Y 100,1
34.62365962451697	78.0246928153624	0
30.28671076822607	43.89499752400101	0
35.84740876993872	72.90219802708364	0

func LoadInternationalAirlinesPassengers

func LoadInternationalAirlinesPassengers() (Y *mat.Dense)

LoadInternationalAirlinesPassengers ...

Example
Y := LoadInternationalAirlinesPassengers()
fmt.Println(Y.Dims())
Output:

144 1

func LoadMicroChipTest

func LoadMicroChipTest() (X, Y *mat.Dense)

LoadMicroChipTest loads data from ex2data2 from Andrew Ng machine learning course

Example
X, Y := LoadMicroChipTest()
xr, xc := X.Dims()
yr, yc := Y.Dims()
fmt.Printf("X %d,%d Y %d,%d\n", xr, xc, yr, yc)
fmt.Println(matstr(X.Slice(0, 3, 0, 2), Y.Slice(0, 3, 0, 1)))
Output:

X 118,2 Y 118,1
0.051267	0.69956	1
-0.092742	0.68494	1
-0.21371	0.69225	1

func LoadMnist

func LoadMnist() (X, Y *mat.Dense)

LoadMnist loads mnist data 5000x400,5000x1

Example
X, Y := LoadMnist()
xr, xc := X.Dims()
yr, yc := Y.Dims()
fmt.Printf("X %d,%d Y %d,%d\n", xr, xc, yr, yc)
fmt.Printf("%.6f %.6f\n", mat.Sum(X), mat.Sum(Y))
Output:

X 5000,400 Y 5000,1
262678.260160 27500.000000

func LoadMnistWeights

func LoadMnistWeights() (Theta1, Theta2 *mat.Dense)

LoadMnistWeights loads mnist weights

Example
Theta1, Theta2 := LoadMnistWeights()
xr, xc := Theta1.Dims()
yr, yc := Theta2.Dims()
fmt.Printf("Theta1 %d,%d Theta2 %d,%d\n", xr, xc, yr, yc)
fmt.Printf("%.6f %.6f\n", mat.Sum(Theta1), mat.Sum(Theta2))
Output:

Theta1 25,401 Theta2 10,26
9.242644 -100.083444

func LoadOctaveBin

func LoadOctaveBin(filename string) map[string]*mat.Dense

LoadOctaveBin reads an (possibly gzipped) octave binary file into a map of *map.Dense

func MakeBlobs

func MakeBlobs(config *MakeBlobsConfig) (X, Y *mat.Dense)

MakeBlobs Generate isotropic Gaussian blobs for clustering config may be null or preintialised config.Centers may be and int or a mat.Matrix unlinke scikit-learn's make_blob, Shuffle is false by default

Example
X, Y := MakeBlobs(&MakeBlobsConfig{})
rx, cx := X.Dims()
ry, cy := Y.Dims()
fmt.Printf("rx=%d cx=%d ry=%d cy=%d\n", rx, cx, ry, cy)
Output:

rx=100 cx=2 ry=100 cy=1

func MakeRegression

func MakeRegression(kwargs map[string]interface{}) (X, y, Coef *mat.Dense)

MakeRegression Generate a random regression problem n_samples : int, optional (default=100) The number of samples. n_features : int, optional (default=100) The number of features. n_informative : int, optional (default=10) The number of informative features, i.e., the number of features used to build the linear model used to generate the output. n_targets : int, optional (default=1) The number of regression targets, i.e., the dimension of the y output vector associated with a sample. By default, the output is a scalar. bias : float64 or []float64 or mat.Matrix, optional (default=0.0) The bias term in the underlying linear model. effective_rank : int , optional (default=None) currently unused tail_strength : float between 0.0 and 1.0, optional (default=0.5) currently unused shuffle : boolean, optional (default=True) coef : boolean. the coefficients of the underlying linear model are returned regardless its value. random_state : *math.Rand optional (default=nil)

Example
X, Y, _ := MakeRegression(map[string]interface{}{"n_samples": 200, "n_features": 3, "n_informative": 2, "n_targets": 2,
	"bias":    []float64{1., 2.},
	"shuffle": true,
})
xr, xc := X.Dims()
fmt.Println("X", xr, xc)
yr, yc := Y.Dims()
fmt.Println("Y", yr, yc)
Output:

X 200 3
Y 200 2

Types

type MLDataset

type MLDataset struct {
	Data         [][]float64 `json:"data,omitempty"`
	Target       []float64   `json:"target,omitempty"`
	TargetNames  []string    `json:"target_names,omitempty"`
	DESCR        string      `json:"DESCR,omitempty"`
	FeatureNames []string    `json:"feature_names,omitempty"`
	X, Y         *mat.Dense
}

MLDataset structure returned by LoadIris,LoadBreastCancer,LoadDiabetes,LoadBoston

func LoadBoston

func LoadBoston() (ds *MLDataset)

LoadBoston load the boston housing dataset

Example
X, Y := LoadBoston().GetXY()
xr, xc := X.Dims()
yr, yc := Y.Dims()
fmt.Printf("X %d,%d Y %d,%d\n", xr, xc, yr, yc)
fmt.Println(matstr(X.Slice(0, 3, 0, 4), Y.Slice(0, 3, 0, 1)))
Output:

X 506,13 Y 506,1
0.00632	18	2.31	0	24
0.02731	0	7.07	0	21.6
0.02729	0	7.07	0	34.7

func LoadBreastCancer

func LoadBreastCancer() (ds *MLDataset)

LoadBreastCancer load the breat cancer dataset

Example
ds := LoadBreastCancer()
X, Y := ds.X, ds.Y
xr, xc := X.Dims()
yr, yc := Y.Dims()
fmt.Printf("X %d,%d Y %d,%d\n", xr, xc, yr, yc)
fmt.Println(matstr(X.Slice(0, 3, 0, 4), Y.Slice(0, 3, 0, 1)))
Output:

X 569,30 Y 569,1
17.99	10.38	122.8	1001	0
20.57	17.77	132.9	1326	0
19.69	21.25	130	1203	0

func LoadDiabetes

func LoadDiabetes() (ds *MLDataset)

LoadDiabetes load the diabetes dataset

Example
ds := LoadDiabetes()
X, Y := ds.X, ds.Y

xr, xc := X.Dims()
yr, yc := Y.Dims()
fmt.Printf("X %d,%d Y %d,%d\n", xr, xc, yr, yc)
fmt.Println(matstr(X.Slice(0, 3, 0, 4), Y.Slice(0, 3, 0, 1)))
Output:

X 442,10 Y 442,1
0.0380759064334241	0.0506801187398187	0.0616962065186885	0.0218723549949558	151
-0.00188201652779104	-0.044641636506989	-0.0514740612388061	-0.0263278347173518	75
0.0852989062966783	0.0506801187398187	0.0444512133365941	-0.00567061055493425	141

func LoadIris

func LoadIris() (ds *MLDataset)

LoadIris load the iris dataset

Example
ds := LoadIris()
X, Y := ds.X, ds.Y
xr, xc := X.Dims()
yr, yc := Y.Dims()
fmt.Printf("X %d,%d Y %d,%d\n", xr, xc, yr, yc)
fmt.Println(matstr(X.Slice(0, 3, 0, 4), Y.Slice(0, 3, 0, 1)))
Output:

X 150,4 Y 150,1
5.1	3.5	1.4	0.2	0
4.9	3	1.4	0.2	0
4.7	3.2	1.3	0.2	0

func LoadWine

func LoadWine() (ds *MLDataset)

LoadWine load the boston housing dataset

Example
X, Y := LoadWine().GetXY()
xr, xc := X.Dims()
yr, yc := Y.Dims()
fmt.Printf("X %d,%d Y %d,%d\n", xr, xc, yr, yc)
fmt.Println(matstr(X.Slice(0, 3, 0, xc), Y.Slice(0, 3, 0, yc)))
Output:

X 178,13 Y 178,1
14.23	1.71	2.43	15.6	127	2.8	3.06	0.28	2.29	5.64	1.04	3.92	1065	0
13.2	1.78	2.14	11.2	100	2.65	2.76	0.26	1.28	4.38	1.05	3.4	1050	0
13.16	2.36	2.67	18.6	101	2.8	3.24	0.3	2.81	5.68	1.03	3.17	1185	0

func (*MLDataset) GetXY

func (ds *MLDataset) GetXY() (X, Y *mat.Dense)

GetXY returns X,Y matrices for dataset

type MakeBlobsConfig

type MakeBlobsConfig struct {
	NSamples    int
	NFeatures   int
	Centers     interface{} // integer or mat.Matrix(NCenters,NFeatures)
	ClusterStd  float64
	CenterBox   []float64
	Shuffle     bool
	RandomState base.RandomState
}

MakeBlobsConfig is the struct of MakeBlobs params

Directories

Path Synopsis
Package data is a dummy package for folder of data files used in tests
Package data is a dummy package for folder of data files used in tests

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL