Documentation ¶
Index ¶
- func ClosedForm(X *mat64.Dense, Y *mat64.Vector) *mat64.Vector
- func GradientDescent(X *mat64.Dense, Y *mat64.Vector, step, tolerance float64, maxIterations int) (*mat64.Vector, int)
- func Predict(X *mat64.Dense, W *mat64.Vector) *mat64.Vector
- func RMSE(rss float64, n int) float64
- func RSS(Y *mat64.Vector, X *mat64.Dense, W *mat64.Vector) float64
- func RSSGradient(Y *mat64.Vector, X *mat64.Dense, W *mat64.Vector, step float64) *mat64.Vector
Examples ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func ClosedForm ¶
ClosedForm computes coefficients using a closed-form solution defined by the equation W=inverse(X'*X)*X'*Y. Important: this solution is very inefficient as it requires to compute an inverse of the matrix X'*X which is O(N^3). Also, for the inverse of the matrix to exist the number of ROWS must be greater than the number of FEATURES.
func GradientDescent ¶
func GradientDescent(X *mat64.Dense, Y *mat64.Vector, step, tolerance float64, maxIterations int) (*mat64.Vector, int)
GradientDescent algorithm for the linear regression computes the parameters of the function by taking a gradient (partial derivatives) of the RSS and going down the slope iteratively. Iteration stops when the maxIterations is exceeded or when the change in the gradient is less than epsilon.
Example ¶
package main import ( "encoding/csv" "fmt" "github.com/mennanov/mlearn" "github.com/mennanov/mlearn/features" "github.com/mennanov/mlearn/regression" "io" "os" ) // loadCSVFile reads the csv file and extracts its features and target vectors as slices of strings. func loadCSVFile(file string, targetColumn int, featureColumns []int) ([][]string, []string) { f, err := os.Open(file) if err != nil { panic(err) } defer f.Close() csvReader := csv.NewReader(f) featuresMatrix := [][]string{} targetsVector := []string{} for i := 0; ; i++ { row, err := csvReader.Read() if err != nil { if err == io.EOF { err = nil } break } // Skip the headers row. if i == 0 { continue } // Add a target value for the current row. targetsVector = append(targetsVector, row[targetColumn]) // Add a features vector (a slice of strings) for the current row. row_features := make([]string, len(featureColumns)) for j, c := range featureColumns { row_features[j] = row[c] } featuresMatrix = append(featuresMatrix, row_features) } return featuresMatrix, targetsVector } func main() { columnIdx := []int{3, 4, 5, 6, 7, 14, 15, 17, 18} targetColumnIdx := 2 encoders := []features.Encoder{ &features.NumericMultiplicationEncoder{Columns: []int{0, 0}, ColumnName: "bedrooms_square"}, &features.NumericMultiplicationEncoder{Columns: []int{0, 1}, ColumnName: "bedrooms_bathrooms"}, &features.NumericEncoder{Column: 1, ColumnName: "bathrooms"}, &features.NumericEncoder{Column: 2, ColumnName: "sqft_living"}, &features.NumericMultiplicationEncoder{Columns: []int{4, 4}, ColumnName: "floors_square"}, &features.NumericEncoder{Column: 5, ColumnName: "yr_built"}, &features.NumericEncoder{Column: 6, ColumnName: "yr_renovated"}, &features.NumericSumEncoder{Columns: []int{7, 8}, ColumnName: "lat_lng"}, } featuresTrainStr, targetsTrainStr := loadCSVFile("../data/kc_house_train_data.csv", targetColumnIdx, columnIdx) featuresTrain, columns, err := mlearn.NewMatrixFromData(featuresTrainStr, encoders...) if err != nil { panic(err) } targetsTrain, err := mlearn.NewVectorFromStringData(targetsTrainStr) if err != nil { panic(err) } fmt.Println(columns) r, _ := featuresTrain.Dims() weights, iterations := regression.GradientDescent(featuresTrain, targetsTrain, 1.1e-12, 5e-2, 1000) fmt.Println("Gradient Descend converged after iterations: ", iterations) rssTrain := regression.RSS(targetsTrain, featuresTrain, weights) rmseTrain := regression.RMSE(rssTrain, r) fmt.Println("Train RSS:", rssTrain, "Train RMSE:", rmseTrain) // Load the test data set. featuresTestStr, targetsTestStr := loadCSVFile("../data/kc_house_test_data.csv", targetColumnIdx, columnIdx) featuresTest, _, err := mlearn.NewMatrixFromData(featuresTestStr, encoders...) if err != nil { panic(err) } targetsTest, err := mlearn.NewVectorFromStringData(targetsTestStr) if err != nil { panic(err) } r, _ = featuresTest.Dims() rssTest := regression.RSS(targetsTest, featuresTest, weights) rmseTest := regression.RMSE(rssTest, r) fmt.Println("Test RSS:", rssTest, "Test RMSE:", rmseTest) }
Output: [intercept bedrooms_square bedrooms_bathrooms bathrooms sqft_living floors_square yr_built yr_renovated lat_lng] Gradient Descend converged after iterations: 366 Train RSS: 1.183078156129154e+15 Train RMSE: 260874.63908916235 Test RSS: 2.7031485613954122e+14 Test RMSE: 252822.72892846845
Types ¶
This section is empty.
Click to show internal directories.
Click to hide internal directories.