Documentation ¶
Overview ¶
Package base provides base interfaces for GoLearn objects to implement. It also provides a raw base for those objects.
Index ¶
- Constants
- func CheckNewInstancesFromRaw(attrs []Attribute, rows int, data []float64) error
- func ParseCSV(filepath string, label int, columns []int) (int, int, []string, []string, []float64)
- func ParseCSVGetRows(filepath string) int
- func ParseCSVSniffAttributeNames(filepath string, hasHeaders bool) []string
- func SaveEstimatorToGob(path string, e *Estimator)
- type Attribute
- type BaseClassifier
- type BaseEstimator
- type BaseRegressor
- type CategoricalAttribute
- func (Attr *CategoricalAttribute) Equals(other Attribute) bool
- func (Attr *CategoricalAttribute) GetName() string
- func (Attr *CategoricalAttribute) GetStringFromSysVal(val float64) string
- func (Attr *CategoricalAttribute) GetSysVal(userVal string) float64
- func (Attr *CategoricalAttribute) GetSysValFromString(rawVal string) float64
- func (Attr *CategoricalAttribute) GetType() int
- func (Attr *CategoricalAttribute) GetUsrVal(sysVal float64) string
- func (Attr *CategoricalAttribute) SetName(name string)
- func (Attr *CategoricalAttribute) String() string
- type Classifier
- type Estimator
- type FloatAttribute
- func (Attr *FloatAttribute) CheckSysValFromString(rawVal string) (float64, error)
- func (Attr *FloatAttribute) Equals(other Attribute) bool
- func (Attr *FloatAttribute) GetName() string
- func (Attr *FloatAttribute) GetStringFromSysVal(rawVal float64) string
- func (Attr *FloatAttribute) GetSysVal(userVal float64) float64
- func (Attr *FloatAttribute) GetSysValFromString(rawVal string) float64
- func (Attr *FloatAttribute) GetType() int
- func (Attr *FloatAttribute) GetUsrVal(sysVal float64) float64
- func (Attr *FloatAttribute) SetName(name string)
- func (Attr *FloatAttribute) String() string
- type Instances
- func InstancesTrainTestSplit(src *Instances, prop float64) [2](*Instances)
- func NewInstances(attrs []Attribute, rows int) *Instances
- func NewInstancesFromDense(attrs []Attribute, rows int, mat *mat64.Dense) *Instances
- func NewInstancesFromRaw(attrs []Attribute, rows int, data []float64) *Instances
- func ParseCSVToInstances(filepath string, hasHeaders bool) (instances *Instances, err error)
- func (inst *Instances) CountAttrValues(a Attribute) map[string]int
- func (inst *Instances) CountClassValues() map[string]int
- func (inst *Instances) DecomposeOnAttributeValues(at Attribute) map[string]*Instances
- func (inst *Instances) Equal(other *Instances) bool
- func (inst *Instances) GeneratePredictionVector() *Instances
- func (inst *Instances) Get(row int, col int) float64
- func (inst *Instances) GetAttr(attrIndex int) Attribute
- func (inst *Instances) GetAttrIndex(of Attribute) int
- func (inst *Instances) GetAttrStr(row int, attr int) string
- func (inst *Instances) GetAttributeCount() int
- func (inst *Instances) GetClass(row int) string
- func (Inst *Instances) GetClassAttr() Attribute
- func (Inst *Instances) GetClassAttrPtr() *Attribute
- func (inst *Instances) GetClassDistribution() map[string]int
- func (inst *Instances) GetClassDistributionAfterSplit(at Attribute) map[string]map[string]int
- func (inst *Instances) GetRowVector(row int) []float64
- func (inst *Instances) GetRowVectorWithoutClass(row int) []float64
- func (inst *Instances) ReplaceAttr(index int, a Attribute)
- func (inst *Instances) RowStr(row int) string
- func (inst *Instances) SampleWithReplacement(size int) *Instances
- func (inst *Instances) SelectAttributes(attrs []Attribute) *Instances
- func (inst *Instances) Set(row int, col int, val float64)
- func (inst *Instances) SetAttrStr(row int, attr int, val string)
- func (inst *Instances) Shuffle()
- func (inst *Instances) Sort(direction SortDirection, attrs []int)
- func (inst *Instances) String() string
- type Model
- type Predictor
- type SortDirection
Constants ¶
const ( // CategoricalType is for Attributes which represent values distinctly. CategoricalType = iota // Float64Type should be replaced with a FractionalNumeric type [DEPRECATED]. Float64Type )
Variables ¶
This section is empty.
Functions ¶
func CheckNewInstancesFromRaw ¶
CheckNewInstancesFromRaw checks whether a call to NewInstancesFromRaw is likely to produce an error-free result.
func ParseCSV ¶
ParseCSV parses a CSV file and returns the number of columns and rows, the headers, the labels associated with classification, and the data that will be used for training.
func ParseCSVGetRows ¶
ParseCSVGetRows returns the number of rows in a given file.
func ParseCSVSniffAttributeNames ¶
ParseCsvSniffAttributeNames returns a slice containing the top row of a given CSV file, or placeholders if hasHeaders is false.
func SaveEstimatorToGob ¶
Serialises an estimator to a provided filepath, in gob format. See http://golang.org/pkg/encoding/gob for further details.
Types ¶
type Attribute ¶
type Attribute interface { // Returns the general characterstics of this Attribute . // to avoid the overhead of casting GetType() int // Returns the human-readable name of this Attribute. GetName() string // Sets the human-readable name of this Attribute. SetName(string) // Gets a human-readable overview of this Attribute for debugging. String() string // Converts a value given from a human-readable string into a system // representation. For example, a CategoricalAttribute with values // ["iris-setosa", "iris-virginica"] would return the float64 // representation of 0 when given "iris-setosa". GetSysValFromString(string) float64 // Converts a given value from a system representation into a human // representation. For example, a CategoricalAttribute with values // ["iris-setosa", "iris-viriginica"] might return "iris-setosa" // when given 0.0 as the argument. GetStringFromSysVal(float64) string // Tests for equality with another Attribute. Other Attributes are // considered equal if: // * They have the same type (i.e. FloatAttribute <> CategoricalAttribute) // * They have the same name // * If applicable, they have the same categorical values (though not // necessarily in the same order). Equals(Attribute) bool }
Attribute Attributes disambiguate columns of the feature matrix and declare their types.
func ParseCSVGetAttributes ¶
ParseCSVGetAttributes returns an ordered slice of appropriate-ly typed and named Attributes.
func ParseCSVSniffAttributeTypes ¶
ParseCSVSniffAttributeTypes returns a slice of appropriately-typed Attributes.
The type of a given attribute is determined by looking at the first data row of the CSV.
type BaseClassifier ¶
type BaseClassifier struct {
TrainingData *Instances
}
BaseClassifier stores options common to every classifier.
type BaseEstimator ¶
type CategoricalAttribute ¶
type CategoricalAttribute struct { Name string // contains filtered or unexported fields }
CategoricalAttribute is an Attribute implementation which stores discrete string values - useful for representing classes.
func NewCategoricalAttribute ¶
func NewCategoricalAttribute() *CategoricalAttribute
func (*CategoricalAttribute) Equals ¶
func (Attr *CategoricalAttribute) Equals(other Attribute) bool
Equals checks equality against another Attribute.
Two CategoricalAttributes are considered equal if they contain the same values and have the same name. Otherwise, this function returns false.
func (*CategoricalAttribute) GetName ¶
func (Attr *CategoricalAttribute) GetName() string
GetName returns the human-readable name assigned to this attribute.
func (*CategoricalAttribute) GetStringFromSysVal ¶
func (Attr *CategoricalAttribute) GetStringFromSysVal(val float64) string
GetStringFromSysVal returns a human-readable value from the given system-representation value val.
IMPORTANT: This function calls panic() if the value is greater than the length of the array. TODO: Return a user-configurable default instead.
func (*CategoricalAttribute) GetSysVal ¶
func (Attr *CategoricalAttribute) GetSysVal(userVal string) float64
GetSysVal returns the system representation of userVal as an index into the Values slice If the userVal can't be found, it returns -1.
func (*CategoricalAttribute) GetSysValFromString ¶
func (Attr *CategoricalAttribute) GetSysValFromString(rawVal string) float64
GetSysValFromString returns the system representation of rawVal as an index into the Values slice. If rawVal is not inside the Values slice, it is appended.
IMPORTANT: If no system representation yet exists, this functions adds it. If you need to determine whether rawVal exists: use GetSysVal and check for a -1 return value.
Example: if the CategoricalAttribute contains the values ["iris-setosa", "iris-virginica"] and "iris-versicolor" is provided as the argument, the Values slide becomes ["iris-setosa", "iris-virginica", "iris-versicolor"] and 2.00 is returned as the system representation.
func (*CategoricalAttribute) GetType ¶
func (Attr *CategoricalAttribute) GetType() int
GetType returns CategoricalType to avoid casting overhead.
func (*CategoricalAttribute) GetUsrVal ¶
func (Attr *CategoricalAttribute) GetUsrVal(sysVal float64) string
GetUsrVal returns a human-readable representation of the given sysVal.
IMPORTANT: this function doesn't check the boundaries of the array.
func (*CategoricalAttribute) SetName ¶
func (Attr *CategoricalAttribute) SetName(name string)
SetName sets the human-readable name on this attribute.
func (*CategoricalAttribute) String ¶
func (Attr *CategoricalAttribute) String() string
String returns a human-readable summary of this Attribute.
Returns a string containing the list of human-readable values this CategoricalAttribute can take.
type Classifier ¶
type Classifier interface { // Takes a set of Instances, copies the class Attribute // and constructs a new set of Instances of equivalent // length with only the class Attribute and fills it in // with predictions. Predict(*Instances) *Instances // Takes a set of instances and updates the Classifier's // internal structures to enable prediction Fit(*Instances) // Why not make every classifier return a nice-looking string? String() string }
Classifier implementations predict categorical class labels.
type Estimator ¶
type Estimator interface {
Fit()
}
An object that can ingest some data and train on it.
type FloatAttribute ¶
FloatAttribute is an implementation which stores floating point representations of numbers.
func NewFloatAttribute ¶
func NewFloatAttribute() *FloatAttribute
NewFloatAttribute returns a new FloatAttribute with a default precision of 2 decimal places
func (*FloatAttribute) CheckSysValFromString ¶
func (Attr *FloatAttribute) CheckSysValFromString(rawVal string) (float64, error)
CheckSysValFromString confirms whether a given rawVal can be converted into a valid system representation.
func (*FloatAttribute) Equals ¶
func (Attr *FloatAttribute) Equals(other Attribute) bool
Equals tests a FloatAttribute for equality with another Attribute.
Returns false if the other Attribute has a different name or if the other Attribute is not a FloatAttribute.
func (*FloatAttribute) GetName ¶
func (Attr *FloatAttribute) GetName() string
GetName returns this FloatAttribute's human-readable name.
func (*FloatAttribute) GetStringFromSysVal ¶
func (Attr *FloatAttribute) GetStringFromSysVal(rawVal float64) string
GetStringFromSysVal converts a given system value to to a string with two decimal places of precision [TODO: revise this and allow more precision].
func (*FloatAttribute) GetSysVal ¶
func (Attr *FloatAttribute) GetSysVal(userVal float64) float64
GetSysVal returns the system representation of userVal.
Because FloatAttribute represents float64 types, this just returns its argument.
func (*FloatAttribute) GetSysValFromString ¶
func (Attr *FloatAttribute) GetSysValFromString(rawVal string) float64
GetSysValFromString parses the given rawVal string to a float64 and returns it.
float64 happens to be a 1-to-1 mapping to the system representation. IMPORTANT: This function panic()s if rawVal is not a valid float. Use CheckSysValFromString to confirm.
func (*FloatAttribute) GetType ¶
func (Attr *FloatAttribute) GetType() int
GetType returns Float64Type.
func (*FloatAttribute) GetUsrVal ¶
func (Attr *FloatAttribute) GetUsrVal(sysVal float64) float64
GetUsrVal returns the user representation of sysVal.
Because FloatAttribute represents float64 types, this just returns its argument.
func (*FloatAttribute) SetName ¶
func (Attr *FloatAttribute) SetName(name string)
SetName sets this FloatAttribute's human-readable name.
func (*FloatAttribute) String ¶
func (Attr *FloatAttribute) String() string
String returns a human-readable summary of this Attribute. e.g. "FloatAttribute(Sepal Width)"
type Instances ¶
type Instances struct { Rows int Cols int ClassIndex int // contains filtered or unexported fields }
Instances represents a grid of numbers (typed by Attributes) stored internally in mat.DenseMatrix as float64's. See docs/instances.md for more information.
func InstancesTrainTestSplit ¶
InstancesTrainTestSplit takes a given Instances (src) and a train-test fraction (prop) and returns an array of two new Instances, one containing approximately that fraction and the other containing what's left.
IMPORTANT: this function is only meaningful when prop is between 0.0 and 1.0. Using any other values may result in odd behaviour.
func NewInstances ¶
NewInstances returns a preallocated Instances structure with some helful values pre-filled.
func NewInstancesFromDense ¶
NewInstancesFromDense creates a set of Instances from a mat64.Dense matrix
func NewInstancesFromRaw ¶
NewInstancesFromRaw wraps a slice of float64 numbers in a mat64.Dense structure, reshaping it with the given number of rows and representing it with the given attrs (Attribute slice)
IMPORTANT: if the |attrs| * |rows| value doesn't equal len(data) then panic()s may occur. Use CheckNewInstancesFromRaw to confirm.
func ParseCSVToInstances ¶
ParseCSVToInstances reads the CSV file given by filepath and returns the read Instances.
func (*Instances) CountAttrValues ¶
CountAttrValues returns the distribution of values of a given Attribute. IMPORTANT: calls panic() if the attribute index of a cannot be determined. Call GetAttrIndex(a) and check for a -1 return value.
func (*Instances) CountClassValues ¶
CountClassValues returns the class distribution of this Instances set
func (*Instances) DecomposeOnAttributeValues ¶
DecomposeOnAttributeValues divides the instance set depending on the value of a given Attribute, constructs child instances, and returns them in a map keyed on the string value of that Attribute. IMPORTANT: calls panic() if the attribute index of at cannot be determined. Use GetAttrIndex(at) and check for a non-zero return value.
func (*Instances) Equal ¶
Equal checks whether a given Instance set is exactly the same as another: same size and same values (as determined by the Attributes)
IMPORTANT: does not explicitly check if the Attributes are considered equal.
func (*Instances) GeneratePredictionVector ¶
GeneratePredictionVector generates a new set of Instances with the same number of rows, but only this Instance set's class Attribute.
func (*Instances) Get ¶
Get returns the system representation (float64) of the value stored at the given row and col coordinate.
func (*Instances) GetAttr ¶
GetAttr returns information about an attribute at given index in the attributes slice.
func (*Instances) GetAttrIndex ¶
GetAttrIndex returns the offset of a given Attribute `a' to an index in the attributes slice
func (*Instances) GetAttrStr ¶
GetAttrStr returns a human-readable string value stored in column `attr' and row `row', as determined by the appropriate Attribute function.
func (*Instances) GetAttributeCount ¶
GetAttributeCount returns the number of attributes represented.
func (*Instances) GetClass ¶
GetClass returns the string representation of the given row's class, as determined by the Attribute at the ClassIndex position from GetAttr
func (*Instances) GetClassAttr ¶
func (*Instances) GetClassAttrPtr ¶
func (*Instances) GetClassDistribution ¶
GetClassDist returns a map containing the count of each class type (indexed by the class' string representation)
func (*Instances) GetClassDistributionAfterSplit ¶
func (*Instances) GetRowVector ¶
GetRowVector returns a row of system representation values at the given row index.
func (*Instances) GetRowVectorWithoutClass ¶
GetRowVector returns a row of system representation values at the given row index, excluding the class attribute
func (*Instances) ReplaceAttr ¶
ReplaceAttr overwrites the attribute at `index' with `a'
func (*Instances) SampleWithReplacement ¶
SampleWithReplacement returns a new set of Instances of size `size' containing random rows from this set of Instances.
IMPORTANT: There's a high chance of seeing duplicate rows whenever size is close to the row count.
func (*Instances) SelectAttributes ¶
SelectAttributes returns a new instance set containing the values from this one with only the Attributes specified
func (*Instances) Set ¶
Set sets the system representation (float64) to val at the given row and column coordinate.
func (*Instances) SetAttrStr ¶
SetAttrStr sets the system-representation value of row in column attr to value val, implicitly converting the string to system-representation via the appropriate Attribute function.
func (*Instances) Shuffle ¶
func (inst *Instances) Shuffle()
Shuffle randomizes the row order in place
func (*Instances) Sort ¶
func (inst *Instances) Sort(direction SortDirection, attrs []int)
Sort does an in-place radix sort of Instances, using SortDirection direction (Ascending or Descending) with attrs as a slice of Attribute indices that you want to sort by.
IMPORTANT: Radix sort is not stable, so ordering outside the attributes used for sorting is arbitrary.
type Model ¶
type Model interface {
Score()
}
An supervised learning object, that is possible of scoring accuracy against a test set.
type SortDirection ¶
type SortDirection int
SortDirection specifies sorting direction...
const ( // Descending says that Instances should be sorted high to low... Descending SortDirection = 1 // Ascending states that Instances should be sorted low to high... Ascending SortDirection = 2 )