Documentation ¶
Overview ¶
Package base provides base interfaces for GoLearn objects to implement. It also provides a raw base for those objects.
Index ¶
- Constants
- Variables
- func DecomposeOnAttributeValues(inst FixedDataGrid, at Attribute) map[string]FixedDataGrid
- func GetClass(from DataGrid, row int) string
- func GetClassDistribution(inst FixedDataGrid) map[string]int
- func GetClassDistributionAfterSplit(inst FixedDataGrid, at Attribute) map[string]map[string]int
- func InstancesTrainTestSplit(src FixedDataGrid, prop float64) (FixedDataGrid, FixedDataGrid)
- func PackFloatToBytes(val float64) []byte
- func PackFloatToBytesInline(val float64, ret []byte)
- func PackU64ToBytes(val uint64) []byte
- func PackU64ToBytesInline(val uint64, ret []byte)
- func ParseCSVBuildInstances(filepath string, hasHeaders bool, u UpdatableDataGrid)
- func ParseCSVGetRows(filepath string) (int, error)
- func ParseCSVSniffAttributeNames(filepath string, hasHeaders bool) []string
- func SaveEstimatorToGob(path string, e *Estimator)
- func SetClass(at UpdatableDataGrid, row int, class string)
- func SetLogger(logger *log.Logger)
- func SetLoggerOut(out io.Writer)
- func Silent()
- func UnpackBytesToFloat(val []byte) float64
- func UnpackBytesToU64(val []byte) uint64
- type Attribute
- func AttributeDifference(a1, a2 []Attribute) []Attribute
- func AttributeDifferenceReferences(a1, a2 []Attribute) []Attribute
- func AttributeIntersect(a1, a2 []Attribute) []Attribute
- func AttributeIntersectReferences(a1, a2 []Attribute) []Attribute
- func CheckCompatible(s1 FixedDataGrid, s2 FixedDataGrid) []Attribute
- func NonClassAttributes(d DataGrid) []Attribute
- func NonClassFloatAttributes(d DataGrid) []Attribute
- func ParseCSVGetAttributes(filepath string, hasHeaders bool) []Attribute
- func ParseCSVSniffAttributeTypes(filepath string, hasHeaders bool) []Attribute
- type AttributeGroup
- type AttributeGroupStorageRef
- type AttributeSpec
- type BaseClassifier
- type BaseEstimator
- type BaseRegressor
- type BinaryAttribute
- func (b *BinaryAttribute) Compatible(other Attribute) bool
- func (b *BinaryAttribute) Equals(other Attribute) bool
- func (b *BinaryAttribute) GetName() string
- func (b *BinaryAttribute) GetStringFromSysVal(val []byte) string
- func (b *BinaryAttribute) GetSysValFromString(userVal string) []byte
- func (b *BinaryAttribute) GetType() int
- func (b *BinaryAttribute) SetName(name string)
- func (b *BinaryAttribute) String() string
- type BinaryAttributeGroup
- type CategoricalAttribute
- func (Attr *CategoricalAttribute) Compatible(other Attribute) bool
- func (Attr *CategoricalAttribute) Equals(other Attribute) bool
- func (Attr *CategoricalAttribute) GetName() string
- func (Attr *CategoricalAttribute) GetStringFromSysVal(rawVal []byte) string
- func (Attr *CategoricalAttribute) GetSysVal(userVal string) []byte
- func (Attr *CategoricalAttribute) GetSysValFromString(rawVal string) []byte
- func (Attr *CategoricalAttribute) GetType() int
- func (Attr *CategoricalAttribute) GetUsrVal(sysVal []byte) string
- func (Attr *CategoricalAttribute) GetValues() []string
- func (Attr *CategoricalAttribute) SetName(name string)
- func (Attr *CategoricalAttribute) String() string
- type Classifier
- type DataGrid
- type DenseInstances
- func (inst *DenseInstances) AddAttribute(a Attribute) AttributeSpec
- func (inst *DenseInstances) AddClassAttribute(a Attribute) error
- func (inst *DenseInstances) AllAttributes() []Attribute
- func (inst *DenseInstances) AllClassAttributes() []Attribute
- func (inst *DenseInstances) CreateAttributeGroup(name string, size int) (err error)
- func (inst *DenseInstances) Equal(other DataGrid) bool
- func (inst *DenseInstances) Extend(rows int) error
- func (inst *DenseInstances) Get(a AttributeSpec, row int) []byte
- func (inst *DenseInstances) GetAttribute(get Attribute) (AttributeSpec, error)
- func (inst *DenseInstances) GetAttributeGroup(name string) (AttributeGroup, error)
- func (inst *DenseInstances) MapOverRows(asv []AttributeSpec, mapFunc func([][]byte, int) (bool, error)) error
- func (inst *DenseInstances) RemoveClassAttribute(a Attribute) error
- func (inst *DenseInstances) RowString(row int) string
- func (inst *DenseInstances) Set(a AttributeSpec, row int, val []byte)
- func (inst *DenseInstances) Size() (int, int)
- func (inst *DenseInstances) String() string
- type Estimator
- type Filter
- type FilteredAttribute
- type FixedAttributeGroup
- type FixedDataGrid
- func LazyShuffle(from FixedDataGrid) FixedDataGrid
- func LazySort(inst FixedDataGrid, direction SortDirection, attrs []AttributeSpec) (FixedDataGrid, error)
- func SampleWithReplacement(from FixedDataGrid, size int) FixedDataGrid
- func Shuffle(from FixedDataGrid) FixedDataGrid
- func Sort(inst FixedDataGrid, direction SortDirection, attrs []AttributeSpec) (FixedDataGrid, error)
- type FloatAttribute
- func (Attr *FloatAttribute) CheckSysValFromString(rawVal string) ([]byte, error)
- func (Attr *FloatAttribute) Compatible(other Attribute) bool
- func (Attr *FloatAttribute) Equals(other Attribute) bool
- func (Attr *FloatAttribute) GetFloatFromSysVal(rawVal []byte) float64
- func (Attr *FloatAttribute) GetName() string
- func (Attr *FloatAttribute) GetStringFromSysVal(rawVal []byte) string
- func (Attr *FloatAttribute) GetSysValFromString(rawVal string) []byte
- func (Attr *FloatAttribute) GetType() int
- func (Attr *FloatAttribute) SetName(name string)
- func (Attr *FloatAttribute) String() string
- type InstancesView
- func (v *InstancesView) AddClassAttribute(a Attribute) error
- func (v *InstancesView) AllAttributes() []Attribute
- func (v *InstancesView) AllClassAttributes() []Attribute
- func (v *InstancesView) Get(as AttributeSpec, row int) []byte
- func (v *InstancesView) GetAttribute(a Attribute) (AttributeSpec, error)
- func (v *InstancesView) MapOverRows(as []AttributeSpec, rowFunc func([][]byte, int) (bool, error)) error
- func (v *InstancesView) RemoveClassAttribute(a Attribute) error
- func (v *InstancesView) RowString(row int) string
- func (v *InstancesView) Size() (int, int)
- func (v *InstancesView) String() string
- type LazilyFilteredInstances
- func (l *LazilyFilteredInstances) AddClassAttribute(cls Attribute) error
- func (l *LazilyFilteredInstances) AllAttributes() []Attribute
- func (l *LazilyFilteredInstances) AllClassAttributes() []Attribute
- func (l *LazilyFilteredInstances) Get(as AttributeSpec, row int) []byte
- func (l *LazilyFilteredInstances) GetAttribute(target Attribute) (AttributeSpec, error)
- func (l *LazilyFilteredInstances) MapOverRows(asv []AttributeSpec, mapFunc func([][]byte, int) (bool, error)) error
- func (l *LazilyFilteredInstances) RemoveClassAttribute(cls Attribute) error
- func (l *LazilyFilteredInstances) RowString(row int) string
- func (l *LazilyFilteredInstances) Size() (int, int)
- func (l *LazilyFilteredInstances) String() string
- type Model
- type Predictor
- type SortDirection
- type UpdatableDataGrid
Constants ¶
const ( // CategoricalType is for Attributes which represent values distinctly. CategoricalType = iota // Float64Type should be replaced with a FractionalNumeric type [DEPRECATED]. Float64Type BinaryType )
Variables ¶
Logger is the default logger for the entire golearn package. It writes to stdout and has no prefix and no flags.
Functions ¶
func DecomposeOnAttributeValues ¶
func DecomposeOnAttributeValues(inst FixedDataGrid, at Attribute) map[string]FixedDataGrid
DecomposeOnAttributeValues divides the instance set depending on the value of a given Attribute, constructs child instances, and returns them in a map keyed on the string value of that Attribute.
IMPORTANT: calls panic() if the AttributeSpec of at cannot be determined.
func GetClass ¶
GetClass is a shortcut for returning the string value of the current class on a given row.
IMPORTANT: GetClass will panic if the number of ClassAttributes is set to anything other than one.
func GetClassDistribution ¶
func GetClassDistribution(inst FixedDataGrid) map[string]int
GetClassDistribution returns a map containing the count of each class type (indexed by the class' string representation).
func GetClassDistributionAfterSplit ¶
func GetClassDistributionAfterSplit(inst FixedDataGrid, at Attribute) map[string]map[string]int
GetClassDistributionAfterSplit returns the class distribution after a speculative split on a given Attribute.
func InstancesTrainTestSplit ¶
func InstancesTrainTestSplit(src FixedDataGrid, prop float64) (FixedDataGrid, FixedDataGrid)
InstancesTrainTestSplit takes a given Instances (src) and a train-test fraction (prop) and returns an array of two new Instances, one containing approximately that fraction and the other containing what's left.
IMPORTANT: this function is only meaningful when prop is between 0.0 and 1.0. Using any other values may result in odd behaviour.
func PackFloatToBytes ¶
PackFloatToBytes returns a 8-byte slice containing the byte values of a float64.
func PackFloatToBytesInline ¶
PackFloatToBytesInline fills ret with the byte values of the float64 argument. ret must be at least 8 bytes in size.
func PackU64ToBytes ¶
PackU64ToBytes allocates a return value of appropriate length and fills it with the values of val.
func PackU64ToBytesInline ¶
PackU64ToBytesInline fills ret with the byte values of val. Ret must have length at least 8.
func ParseCSVBuildInstances ¶
func ParseCSVBuildInstances(filepath string, hasHeaders bool, u UpdatableDataGrid)
ParseCSVBuildInstances updates an [[#UpdatableDataGrid]] from a filepath in place
func ParseCSVGetRows ¶
ParseCSVGetRows returns the number of rows in a given file.
func ParseCSVSniffAttributeNames ¶
ParseCSVSniffAttributeNames returns a slice containing the top row of a given CSV file, or placeholders if hasHeaders is false.
func SaveEstimatorToGob ¶
SaveEstimatorToGob serialises an estimator to a provided filepath, in gob format. See http://golang.org/pkg/encoding/gob for further details.
func SetClass ¶
func SetClass(at UpdatableDataGrid, row int, class string)
SetClass is a shortcut for updating the given class of a row.
IMPORTANT: SetClass will panic if the number of class Attributes is anything other than one.
func SetLoggerOut ¶
SetLoggerOut creates a new base logger for the entire golearn package using the given out instead of the default, os.Stdout. The other log options are set to the default, i.e. no prefix and no flags.
func Silent ¶
func Silent()
Silent turns off logging throughout the golearn package by setting the logger to write to dev/null.
func UnpackBytesToFloat ¶
UnpackBytesToFloat converts a given byte slice into an equivalent float64.
func UnpackBytesToU64 ¶
UnpackBytesToU64 converst a given byte slice into a uint64 value.
Types ¶
type Attribute ¶
type Attribute interface { // Returns the general characterstics of this Attribute . // to avoid the overhead of casting GetType() int // Returns the human-readable name of this Attribute. GetName() string // Sets the human-readable name of this Attribute. SetName(string) // Gets a human-readable overview of this Attribute for debugging. String() string // Converts a value given from a human-readable string into a system // representation. For example, a CategoricalAttribute with values // ["iris-setosa", "iris-virginica"] would return the float64 // representation of 0 when given "iris-setosa". GetSysValFromString(string) []byte // Converts a given value from a system representation into a human // representation. For example, a CategoricalAttribute with values // ["iris-setosa", "iris-viriginica"] might return "iris-setosa" // when given 0.0 as the argument. GetStringFromSysVal([]byte) string // Tests for equality with another Attribute. Other Attributes are // considered equal if: // * They have the same type (i.e. FloatAttribute <> CategoricalAttribute) // * They have the same name // * If applicable, they have the same categorical values (though not // necessarily in the same order). Equals(Attribute) bool // Tests whether two Attributes can be represented in the same pond // i.e. they're the same size, and their byte order makes them meaningful // when considered together Compatible(Attribute) bool }
Attributes disambiguate columns of the feature matrix and declare their types.
func AttributeDifference ¶
AttributeDifference returns the difference between two Attribute slices: i.e. all the values in a1 which do not occur in a2.
IMPORTANT: result is ordered the same as a1.
IMPORTANT: result only contains values from a1.
func AttributeDifferenceReferences ¶
AttributeDifferenceReferences returns the difference between two Attribute slices: i.e. all the values in a1 which do not occur in a2.
IMPORTANT: result is not guaranteed to be ordered.
IMPORTANT: done using pointers for speed, use AttributeDifference if the Attributes originate from different DataGrids.
func AttributeIntersect ¶
AttributeIntersect returns the intersection of two Attribute slices.
IMPORTANT: result is ordered in order of the first []Attribute argument.
IMPORTANT: result contains only Attributes from a1.
func AttributeIntersectReferences ¶
AttributeIntersectReferences returns the intersection of two Attribute slices.
IMPORTANT: result is not guaranteed to be ordered.
IMPORTANT: done using pointers for speed, use AttributeDifference if the Attributes originate from different DataGrids.
func CheckCompatible ¶
func CheckCompatible(s1 FixedDataGrid, s2 FixedDataGrid) []Attribute
CheckCompatible checks whether two DataGrids have the same Attributes and if they do, it returns them.
func NonClassAttributes ¶
NonClassAttrs returns all Attributes which aren't designated as a class Attribute.
func NonClassFloatAttributes ¶
NonClassFloatAttributes returns all FloatAttributes which aren't designated as a class Attribute.
func ParseCSVGetAttributes ¶
ParseCSVGetAttributes returns an ordered slice of appropriate-ly typed and named Attributes.
func ParseCSVSniffAttributeTypes ¶
ParseCSVSniffAttributeTypes returns a slice of appropriately-typed Attributes.
The type of a given attribute is determined by looking at the first data row of the CSV.
type AttributeGroup ¶
type AttributeGroup interface { // Adds a new Attribute AddAttribute(Attribute) error // Returns all Attributes Attributes() []Attribute // Gets the size of each row in bytes (rounded up) RowSize() int // Gets references to underlying memory Storage() []AttributeGroupStorageRef // Returns a human-readable summary String() string // contains filtered or unexported methods }
AttributeGroups store related sequences of system values in memory for the DenseInstances structure.
type AttributeGroupStorageRef ¶
AttributeGroupStorageRef is a reference to a particular set of allocated rows within a FixedAttributeGroup
type AttributeSpec ¶
type AttributeSpec struct {
// contains filtered or unexported fields
}
AttributeSpec is a pointer to a particular Attribute within a particular Instance structure and encodes position and storage information associated with that Attribute.
func ResolveAllAttributes ¶
func ResolveAllAttributes(d DataGrid) []AttributeSpec
ResolveAllAttributes returns every AttributeSpec
func ResolveAttributes ¶
func ResolveAttributes(d DataGrid, attrs []Attribute) []AttributeSpec
ResolveAttributes returns AttributeSpecs describing all of the Attributes.
func (*AttributeSpec) GetAttribute ¶
func (a *AttributeSpec) GetAttribute() Attribute
GetAttribute returns an AttributeSpec which matches a given Attribute.
func (*AttributeSpec) String ¶
func (a *AttributeSpec) String() string
String returns a human-readable description of this AttributeSpec.
type BaseClassifier ¶
type BaseClassifier struct {
TrainingData *DataGrid
}
BaseClassifier stores options common to every classifier.
type BaseEstimator ¶
type BinaryAttribute ¶
type BinaryAttribute struct {
Name string
}
BinaryAttributes can only represent 1 or 0.
func NewBinaryAttribute ¶
func NewBinaryAttribute(name string) *BinaryAttribute
NewBinaryAttribute creates a BinaryAttribute with the given name
func (*BinaryAttribute) Compatible ¶
func (b *BinaryAttribute) Compatible(other Attribute) bool
Compatible checks whether this Attribute can be represented in the same pond as another.
func (*BinaryAttribute) Equals ¶
func (b *BinaryAttribute) Equals(other Attribute) bool
Equals checks for equality with another BinaryAttribute.
func (*BinaryAttribute) GetName ¶
func (b *BinaryAttribute) GetName() string
GetName returns the name of this Attribute.
func (*BinaryAttribute) GetStringFromSysVal ¶
func (b *BinaryAttribute) GetStringFromSysVal(val []byte) string
GetStringFromSysVal returns either 1 or 0.
func (*BinaryAttribute) GetSysValFromString ¶
func (b *BinaryAttribute) GetSysValFromString(userVal string) []byte
GetSysValFromString returns either 1 or 0 in a single byte.
func (*BinaryAttribute) GetType ¶
func (b *BinaryAttribute) GetType() int
GetType returns BinaryType.
func (*BinaryAttribute) SetName ¶
func (b *BinaryAttribute) SetName(name string)
SetName sets the name of this Attribute.
func (*BinaryAttribute) String ¶
func (b *BinaryAttribute) String() string
String returns a human-redable representation.
type BinaryAttributeGroup ¶
type BinaryAttributeGroup struct {
FixedAttributeGroup
}
BinaryAttributeGroups contain only BinaryAttributes Compact each Attribute to a bit for better storage
func (*BinaryAttributeGroup) RowSize ¶
func (b *BinaryAttributeGroup) RowSize() int
func (*BinaryAttributeGroup) String ¶
func (b *BinaryAttributeGroup) String() string
type CategoricalAttribute ¶
type CategoricalAttribute struct { Name string // contains filtered or unexported fields }
CategoricalAttribute is an Attribute implementation which stores discrete string values - useful for representing classes.
func NewCategoricalAttribute ¶
func NewCategoricalAttribute() *CategoricalAttribute
NewCategoricalAttribute creates a blank CategoricalAttribute.
func (*CategoricalAttribute) Compatible ¶
func (Attr *CategoricalAttribute) Compatible(other Attribute) bool
Compatible checks that this CategoricalAttribute has the same values as another, in the same order.
func (*CategoricalAttribute) Equals ¶
func (Attr *CategoricalAttribute) Equals(other Attribute) bool
Equals checks equality against another Attribute.
Two CategoricalAttributes are considered equal if they contain the same values and have the same name. Otherwise, this function returns false.
func (*CategoricalAttribute) GetName ¶
func (Attr *CategoricalAttribute) GetName() string
GetName returns the human-readable name assigned to this attribute.
func (*CategoricalAttribute) GetStringFromSysVal ¶
func (Attr *CategoricalAttribute) GetStringFromSysVal(rawVal []byte) string
GetStringFromSysVal returns a human-readable value from the given system-representation value val.
IMPORTANT: This function calls panic() if the value is greater than the length of the array. TODO: Return a user-configurable default instead.
func (*CategoricalAttribute) GetSysVal ¶
func (Attr *CategoricalAttribute) GetSysVal(userVal string) []byte
GetSysVal returns the system representation of userVal as an index into the Values slice If the userVal can't be found, it returns nothing.
func (*CategoricalAttribute) GetSysValFromString ¶
func (Attr *CategoricalAttribute) GetSysValFromString(rawVal string) []byte
GetSysValFromString returns the system representation of rawVal as an index into the Values slice. If rawVal is not inside the Values slice, it is appended.
IMPORTANT: If no system representation yet exists, this functions adds it. If you need to determine whether rawVal exists: use GetSysVal and check for a zero-length return value.
Example: if the CategoricalAttribute contains the values ["iris-setosa", "iris-virginica"] and "iris-versicolor" is provided as the argument, the Values slide becomes ["iris-setosa", "iris-virginica", "iris-versicolor"] and 2.00 is returned as the system representation.
func (*CategoricalAttribute) GetType ¶
func (Attr *CategoricalAttribute) GetType() int
GetType returns CategoricalType to avoid casting overhead.
func (*CategoricalAttribute) GetUsrVal ¶
func (Attr *CategoricalAttribute) GetUsrVal(sysVal []byte) string
GetUsrVal returns a human-readable representation of the given sysVal.
IMPORTANT: this function doesn't check the boundaries of the array.
func (*CategoricalAttribute) GetValues ¶
func (Attr *CategoricalAttribute) GetValues() []string
GetValues returns all the values currently defined
func (*CategoricalAttribute) SetName ¶
func (Attr *CategoricalAttribute) SetName(name string)
SetName sets the human-readable name on this attribute.
func (*CategoricalAttribute) String ¶
func (Attr *CategoricalAttribute) String() string
String returns a human-readable summary of this Attribute.
Returns a string containing the list of human-readable values this CategoricalAttribute can take.
type Classifier ¶
type Classifier interface { // Takes a set of Instances, copies the class Attribute // and constructs a new set of Instances of equivalent // length with only the class Attribute and fills it in // with predictions. Predict(FixedDataGrid) (FixedDataGrid, error) // Takes a set of instances and updates the Classifier's // internal structures to enable prediction Fit(FixedDataGrid) error // Why not make every classifier return a nice-looking string? String() string }
Classifier implementations predict categorical class labels.
type DataGrid ¶
type DataGrid interface { // Retrieves a given Attribute's specification GetAttribute(Attribute) (AttributeSpec, error) // Retrieves details of every Attribute AllAttributes() []Attribute // Marks an Attribute as a class Attribute AddClassAttribute(Attribute) error // Unmarks an Attribute as a class Attribute RemoveClassAttribute(Attribute) error // Returns details of all class Attributes AllClassAttributes() []Attribute // Gets the bytes at a given position or nil Get(AttributeSpec, int) []byte // Convenience function for iteration. MapOverRows([]AttributeSpec, func([][]byte, int) (bool, error)) error }
DataGrid implementations represent data addressable by rows and columns.
type DenseInstances ¶
type DenseInstances struct {
// contains filtered or unexported fields
}
DenseInstances stores each Attribute value explicitly in a large grid.
func NewDenseInstances ¶
func NewDenseInstances() *DenseInstances
NewDenseInstances generates a new DenseInstances set with an anonymous EDF mapping and default settings.
func ParseCSVToInstances ¶
func ParseCSVToInstances(filepath string, hasHeaders bool) (instances *DenseInstances, err error)
ParseCSVToInstances reads the CSV file given by filepath and returns the read Instances.
func (*DenseInstances) AddAttribute ¶
func (inst *DenseInstances) AddAttribute(a Attribute) AttributeSpec
AddAttribute adds an Attribute to this set of DenseInstances Creates a default AttributeGroup for it if a suitable one doesn't exist. Returns an AttributeSpec for subsequent Set() calls.
IMPORTANT: will panic if storage has been allocated via Extend.
func (*DenseInstances) AddClassAttribute ¶
func (inst *DenseInstances) AddClassAttribute(a Attribute) error
AddClassAttribute sets an Attribute to be a class Attribute.
func (*DenseInstances) AllAttributes ¶
func (inst *DenseInstances) AllAttributes() []Attribute
AllAttributes returns a slice of all Attributes.
func (*DenseInstances) AllClassAttributes ¶
func (inst *DenseInstances) AllClassAttributes() []Attribute
AllClassAttributes returns a slice of Attributes which have been designated class Attributes.
func (*DenseInstances) CreateAttributeGroup ¶
func (inst *DenseInstances) CreateAttributeGroup(name string, size int) (err error)
CreateAttributeGroup adds a new AttributeGroup to this set of instances with a given name. If the size is 0, a bit-ag is added if the size of not 0, then the size of each ag attribute is set to that number of bytes.
func (*DenseInstances) Equal ¶
func (inst *DenseInstances) Equal(other DataGrid) bool
Equal checks whether a given Instance set is exactly the same as another: same size and same values (as determined by the Attributes)
IMPORTANT: does not explicitly check if the Attributes are considered equal.
func (*DenseInstances) Extend ¶
func (inst *DenseInstances) Extend(rows int) error
Extend extends this set of Instances to store rows additional rows. It's recommended to set rows to something quite large.
IMPORTANT: panics if the allocation fails
func (*DenseInstances) Get ¶
func (inst *DenseInstances) Get(a AttributeSpec, row int) []byte
Get gets a particular Attribute (given as an AttributeSpec) on a particular row. AttributeSpecs can be obtained using GetAttribute() or AddAttribute()
func (*DenseInstances) GetAttribute ¶
func (inst *DenseInstances) GetAttribute(get Attribute) (AttributeSpec, error)
GetAttribute returns an Attribute equal to the argument.
TODO: Write a function to pre-compute this once we've allocated TODO: Write a utility function which retrieves all AttributeSpecs for a given instance set.
func (*DenseInstances) GetAttributeGroup ¶
func (inst *DenseInstances) GetAttributeGroup(name string) (AttributeGroup, error)
GetAttributeGroup returns a reference to a AttributeGroup of a given name /
func (*DenseInstances) MapOverRows ¶
func (inst *DenseInstances) MapOverRows(asv []AttributeSpec, mapFunc func([][]byte, int) (bool, error)) error
MapOverRows passes each row map into a function. First argument is a list of AttributeSpec in the order they're needed in for the function. The second is the function to call on each row.
func (*DenseInstances) RemoveClassAttribute ¶
func (inst *DenseInstances) RemoveClassAttribute(a Attribute) error
RemoveClassAttribute removes an Attribute from the set of class Attributes.
func (*DenseInstances) RowString ¶
func (inst *DenseInstances) RowString(row int) string
RowString returns a string representation of a given row.
func (*DenseInstances) Set ¶
func (inst *DenseInstances) Set(a AttributeSpec, row int, val []byte)
Set sets a particular Attribute (given as an AttributeSpec) on a particular row to a particular value.
AttributeSpecs can be obtained using GetAttribute() or AddAttribute().
IMPORTANT: Will panic() if the AttributeSpec isn't valid
IMPORTANT: Will panic() if the row is too large
IMPORTANT: Will panic() if the val is not the right length
func (*DenseInstances) Size ¶
func (inst *DenseInstances) Size() (int, int)
Size returns the number of Attributes as the first return value and the maximum allocated row as the second value.
func (*DenseInstances) String ¶
func (inst *DenseInstances) String() string
String returns a human-readable summary of this dataset.
type Estimator ¶
type Estimator interface {
Fit()
}
An Estimator is object that can ingest some data and train on it.
type Filter ¶
type Filter interface { // Adds an Attribute to the filter AddAttribute(Attribute) error // Allows mapping old to new Attributes GetAttributesAfterFiltering() []FilteredAttribute // Gets a string for printing String() string // Accepts an old Attribute, the new one and returns a sequence Transform(Attribute, Attribute, []byte) []byte // Builds the filter Train() error }
Filters transform the byte sequences stored in DataGrid implementations.
type FilteredAttribute ¶
FilteredAttributes represent a mapping from the output generated by a filter to the original value.
type FixedAttributeGroup ¶
type FixedAttributeGroup struct {
// contains filtered or unexported fields
}
FixedAttributeGroups contain a particular number of rows of a particular number of Attributes, all of a given type.
func (*FixedAttributeGroup) AddAttribute ¶
func (f *FixedAttributeGroup) AddAttribute(a Attribute) error
AddAttribute adds an attribute to this FixedAttributeGroup
func (*FixedAttributeGroup) Attributes ¶
func (f *FixedAttributeGroup) Attributes() []Attribute
Attributes returns a slice of Attributes in this FixedAttributeGroup
func (*FixedAttributeGroup) RowSize ¶
func (f *FixedAttributeGroup) RowSize() int
RowSize returns the size of each row in bytes
func (*FixedAttributeGroup) Storage ¶
func (f *FixedAttributeGroup) Storage() []AttributeGroupStorageRef
Storage returns a slice of FixedAttributeGroupStorageRefs which can be used to access the memory in this pond.
func (*FixedAttributeGroup) String ¶
func (f *FixedAttributeGroup) String() string
String gets a human-readable summary
type FixedDataGrid ¶
type FixedDataGrid interface { DataGrid // Returns a string representation of a given row RowString(int) string // Returns the number of Attributes and rows currently allocated Size() (int, int) }
FixedDataGrid implementations have a size known in advance and implement all of the functionality offered by DataGrid implementations.
func LazyShuffle ¶
func LazyShuffle(from FixedDataGrid) FixedDataGrid
LazyShuffle randomizes the row order without re-ordering the rows via an InstancesView.
func LazySort ¶
func LazySort(inst FixedDataGrid, direction SortDirection, attrs []AttributeSpec) (FixedDataGrid, error)
LazySort also does a sort, but returns an InstanceView and doesn't actually reorder the rows, just makes it look like they've been reordered See also: Sort
func SampleWithReplacement ¶
func SampleWithReplacement(from FixedDataGrid, size int) FixedDataGrid
SampleWithReplacement returns a new FixedDataGrid containing an equal number of random rows drawn from the original FixedDataGrid
IMPORTANT: There's a high chance of seeing duplicate rows whenever size is close to the row count.
func Shuffle ¶
func Shuffle(from FixedDataGrid) FixedDataGrid
Shuffle randomizes the row order either in place (if DenseInstances) or using LazyShuffle.
func Sort ¶
func Sort(inst FixedDataGrid, direction SortDirection, attrs []AttributeSpec) (FixedDataGrid, error)
Sort does a radix sort of DenseInstances, using SortDirection direction (Ascending or Descending) with attrs as a slice of Attribute indices that you want to sort by.
IMPORTANT: Radix sort is not stable, so ordering outside the attributes used for sorting is arbitrary.
type FloatAttribute ¶
FloatAttribute is an implementation which stores floating point representations of numbers.
func NewFloatAttribute ¶
func NewFloatAttribute(name string) *FloatAttribute
NewFloatAttribute returns a new FloatAttribute with a default precision of 2 decimal places
func (*FloatAttribute) CheckSysValFromString ¶
func (Attr *FloatAttribute) CheckSysValFromString(rawVal string) ([]byte, error)
CheckSysValFromString confirms whether a given rawVal can be converted into a valid system representation. If it can't, the returned value is nil.
func (*FloatAttribute) Compatible ¶
func (Attr *FloatAttribute) Compatible(other Attribute) bool
Compatible checks whether this FloatAttribute can be ponded with another Attribute (checks if they're both FloatAttributes)
func (*FloatAttribute) Equals ¶
func (Attr *FloatAttribute) Equals(other Attribute) bool
Equals tests a FloatAttribute for equality with another Attribute.
Returns false if the other Attribute has a different name or if the other Attribute is not a FloatAttribute.
func (*FloatAttribute) GetFloatFromSysVal ¶
func (Attr *FloatAttribute) GetFloatFromSysVal(rawVal []byte) float64
GetFloatFromSysVal converts a given system value to a float
func (*FloatAttribute) GetName ¶
func (Attr *FloatAttribute) GetName() string
GetName returns this FloatAttribute's human-readable name.
func (*FloatAttribute) GetStringFromSysVal ¶
func (Attr *FloatAttribute) GetStringFromSysVal(rawVal []byte) string
GetStringFromSysVal converts a given system value to to a string with two decimal places of precision.
func (*FloatAttribute) GetSysValFromString ¶
func (Attr *FloatAttribute) GetSysValFromString(rawVal string) []byte
GetSysValFromString parses the given rawVal string to a float64 and returns it.
float64 happens to be a 1-to-1 mapping to the system representation. IMPORTANT: This function panic()s if rawVal is not a valid float. Use CheckSysValFromString to confirm.
func (*FloatAttribute) GetType ¶
func (Attr *FloatAttribute) GetType() int
GetType returns Float64Type.
func (*FloatAttribute) SetName ¶
func (Attr *FloatAttribute) SetName(name string)
SetName sets this FloatAttribute's human-readable name.
func (*FloatAttribute) String ¶
func (Attr *FloatAttribute) String() string
String returns a human-readable summary of this Attribute. e.g. "FloatAttribute(Sepal Width)"
type InstancesView ¶
type InstancesView struct {
// contains filtered or unexported fields
}
InstancesViews hide or re-order Attributes and rows from a given DataGrid to make it appear that they've been deleted.
func NewInstancesViewFromAttrs ¶
func NewInstancesViewFromAttrs(src FixedDataGrid, attrs []Attribute) *InstancesView
NewInstancesViewFromAttrs creates a new InstancesView from a source FixedDataGrid and a slice of Attributes.
Only the Attributes specified will appear in this InstancesView.
func NewInstancesViewFromRows ¶
func NewInstancesViewFromRows(src FixedDataGrid, rows map[int]int) *InstancesView
NewInstancesViewFromRows creates a new InstancesView from a source FixedDataGrid and row -> row mapping. The key of the rows map is the row as it exists within this mapping: for example an entry like 5 -> 1 means that row 1 in src will appear at row 5 in the Instancesview.
Rows are not masked in this implementation, meaning that all rows which are left unspecified appear as normal.
func NewInstancesViewFromVisible ¶
func NewInstancesViewFromVisible(src FixedDataGrid, rows []int, attrs []Attribute) *InstancesView
NewInstancesViewFromVisible creates a new InstancesView from a source FixedDataGrid, a slice of row numbers and a slice of Attributes.
Only the rows specified will appear in this InstancesView, and they will appear in the same order they appear within the rows array.
Only the Attributes specified will appear in this InstancesView. Retrieving Attribute specifications from this InstancesView will maintain their order.
func (*InstancesView) AddClassAttribute ¶
func (v *InstancesView) AddClassAttribute(a Attribute) error
AddClassAttribute adds the given Attribute to the set of defined class Attributes, if it hasn't been filtered.
func (*InstancesView) AllAttributes ¶
func (v *InstancesView) AllAttributes() []Attribute
AllAttributes returns every Attribute which hasn't been filtered.
func (*InstancesView) AllClassAttributes ¶
func (v *InstancesView) AllClassAttributes() []Attribute
AllClassAttributes returns all the Attributes currently defined as being class Attributes.
func (*InstancesView) Get ¶
func (v *InstancesView) Get(as AttributeSpec, row int) []byte
Get returns a sequence of bytes stored under a given Attribute on a given row.
IMPORTANT: The AttributeSpec is unverified, meaning it's possible to return values from Attributes filtered by this InstancesView if the underlying AttributeSpec is known.
func (*InstancesView) GetAttribute ¶
func (v *InstancesView) GetAttribute(a Attribute) (AttributeSpec, error)
GetAttribute returns an Attribute specification matching an Attribute if it has not been filtered.
The AttributeSpecs returned are the same as those returned by the source FixedDataGrid.
func (*InstancesView) MapOverRows ¶
func (v *InstancesView) MapOverRows(as []AttributeSpec, rowFunc func([][]byte, int) (bool, error)) error
MapOverRows, see DenseInstances.MapOverRows.
IMPORTANT: MapOverRows is not guaranteed to be ordered, but this one especially so.
func (*InstancesView) RemoveClassAttribute ¶
func (v *InstancesView) RemoveClassAttribute(a Attribute) error
RemoveClassAttribute removes the given Attribute from the set of class Attributes.
func (*InstancesView) RowString ¶
func (v *InstancesView) RowString(row int) string
RowString returns a string representation of a given row.
func (*InstancesView) Size ¶
func (v *InstancesView) Size() (int, int)
Size Returns the number of Attributes and rows this InstancesView contains.
func (*InstancesView) String ¶
func (v *InstancesView) String() string
String returns a human-readable summary of this InstancesView.
type LazilyFilteredInstances ¶
type LazilyFilteredInstances struct {
// contains filtered or unexported fields
}
LazilyFilteredInstances map a Filter over an underlying FixedDataGrid and are a memory-efficient way of applying them.
func NewLazilyFilteredInstances ¶
func NewLazilyFilteredInstances(src FixedDataGrid, f Filter) *LazilyFilteredInstances
NewLazilyFitleredInstances returns a new FixedDataGrid after applying the given Filter to the Attributes it includes. Unfiltered Attributes are passed through without modification.
func (*LazilyFilteredInstances) AddClassAttribute ¶
func (l *LazilyFilteredInstances) AddClassAttribute(cls Attribute) error
AddClassAttribute adds a given Attribute (either before or after filtering) to the set of defined class Attributes.
func (*LazilyFilteredInstances) AllAttributes ¶
func (l *LazilyFilteredInstances) AllAttributes() []Attribute
AllAttributes returns every Attribute defined in the source datagrid, in addition to the revised Attributes created by the filter.
func (*LazilyFilteredInstances) AllClassAttributes ¶
func (l *LazilyFilteredInstances) AllClassAttributes() []Attribute
AllClassAttributes returns details of all Attributes currently specified as being class Attributes.
If applicable, the Attributes returned are those after modification by the Filter.
func (*LazilyFilteredInstances) Get ¶
func (l *LazilyFilteredInstances) Get(as AttributeSpec, row int) []byte
Get returns a transformed byte slice stored at a given AttributeSpec and row.
func (*LazilyFilteredInstances) GetAttribute ¶
func (l *LazilyFilteredInstances) GetAttribute(target Attribute) (AttributeSpec, error)
GetAttribute returns an AttributeSpecification for a given Attribute
func (*LazilyFilteredInstances) MapOverRows ¶
func (l *LazilyFilteredInstances) MapOverRows(asv []AttributeSpec, mapFunc func([][]byte, int) (bool, error)) error
MapOverRows maps an iteration mapFunc over the bytes contained in the source FixedDataGrid, after modification by the filter.
func (*LazilyFilteredInstances) RemoveClassAttribute ¶
func (l *LazilyFilteredInstances) RemoveClassAttribute(cls Attribute) error
RemoveClassAttribute removes a given Attribute (either before or after filtering) from the set of defined class Attributes.
func (*LazilyFilteredInstances) RowString ¶
func (l *LazilyFilteredInstances) RowString(row int) string
RowString returns a string representation of a given row after filtering.
func (*LazilyFilteredInstances) Size ¶
func (l *LazilyFilteredInstances) Size() (int, int)
Size returns the number of Attributes and rows of the underlying FixedDataGrid.
func (*LazilyFilteredInstances) String ¶
func (l *LazilyFilteredInstances) String() string
String returns a human-readable summary of this FixedDataGrid after filtering.
type Model ¶
type Model interface {
Score()
}
A Model is a supervised learning object, that is possible of scoring accuracy against a test set.
type Predictor ¶
type Predictor interface {
Predict()
}
A Predictor is an object that provides predictions.
type SortDirection ¶
type SortDirection int
SortDirection specifies sorting direction...
const ( // Descending says that Instances should be sorted high to low... Descending SortDirection = 1 // Ascending states that Instances should be sorted low to high... Ascending SortDirection = 2 )
type UpdatableDataGrid ¶
type UpdatableDataGrid interface { FixedDataGrid // Sets a given Attribute and row to a byte sequence. Set(AttributeSpec, int, []byte) // Adds an Attribute to the grid. AddAttribute(Attribute) AttributeSpec // Allocates additional room to hold a number of rows Extend(int) error }
UpdatableDataGrid implementations can be changed in addition to implementing all of the functionality offered by FixedDataGrid implementations.
func GeneratePredictionVector ¶
func GeneratePredictionVector(from FixedDataGrid) UpdatableDataGrid
GeneratePredictionVector selects the class Attributes from a given FixedDataGrid and returns something which can hold the predictions.