Documentation ¶
Overview ¶
Package base provides base interfaces for GoLearn objects to implement. It also provides a raw base for those objects.
Index ¶
- Constants
- Variables
- func CheckStrictlyCompatible(s1 FixedDataGrid, s2 FixedDataGrid) bool
- func ConvertAllRowsToMat64(attrs []Attribute, f FixedDataGrid) ([]*mat.Dense, error)
- func ConvertRowToMat64(attrs []Attribute, f FixedDataGrid, r int) (*mat.Dense, error)
- func DecomposeOnAttributeValues(inst FixedDataGrid, at Attribute) map[string]FixedDataGrid
- func DecomposeOnNumericAttributeThreshold(inst FixedDataGrid, at Attribute, val float64) map[string]FixedDataGrid
- func DescribeError(description string, err error) error
- func FormatError(err error, format string, args ...interface{}) error
- func GetClass(from DataGrid, row int) string
- func GetClassDistribution(inst FixedDataGrid) map[string]int
- func GetClassDistributionAfterSplit(inst FixedDataGrid, at Attribute) map[string]map[string]int
- func GetClassDistributionAfterThreshold(inst FixedDataGrid, at Attribute, val float64) map[string]map[string]int
- func GetClassDistributionByBinaryFloatValue(inst FixedDataGrid) []int
- func GetClassDistributionByCategoricalValue(inst FixedDataGrid) []int
- func InstancesAreEqual(inst, other FixedDataGrid) bool
- func InstancesTrainTestSplit(src FixedDataGrid, prop float64) (FixedDataGrid, FixedDataGrid)
- func MarshalAttribute(a Attribute) (map[string]interface{}, error)
- func PackFloatToBytes(val float64) []byte
- func PackFloatToBytesInline(val float64, ret []byte)
- func PackU64ToBytes(val uint64) []byte
- func PackU64ToBytesInline(val uint64, ret []byte)
- func ParseARFFGetRows(filepath string) (int, error)
- func ParseCSVBuildInstancesFromReader(r io.ReadSeeker, attrs []Attribute, hasHeader bool, u UpdatableDataGrid) (err error)
- func ParseCSVEstimateFilePrecision(filepath string) (int, error)
- func ParseCSVEstimateFilePrecisionFromReader(r io.ReadSeeker) (int, error)
- func ParseCSVGetRows(filepath string) (int, error)
- func ParseCSVGetRowsFromReader(r io.ReadSeeker) (int, error)
- func ParseCSVSniffAttributeNames(filepath string, hasHeaders bool) []string
- func ParseCSVSniffAttributeNamesFromReader(r io.ReadSeeker, hasHeaders bool) []string
- func ParseDenseARFFBuildInstancesFromReader(r io.Reader, attrs []Attribute, u UpdatableDataGrid) (err error)
- func ParseMatchAttributes(attrs, templateAttrs []Attribute)
- func ParseSQLBuildInstancesFromData(data [][]string, headers []string, attrs []Attribute, u UpdatableDataGrid) (err error)
- func ParseSQLEstimateFilePrecisionFromData(data [][]string) (int, error)
- func SaveEstimatorToGob(path string, e *Estimator)
- func SerializeAttribute(attr Attribute) ([]byte, error)
- func SerializeInstances(inst FixedDataGrid, f io.Writer) error
- func SerializeInstancesToCSV(inst FixedDataGrid, path string) error
- func SerializeInstancesToCSVStream(inst FixedDataGrid, f io.Writer) error
- func SerializeInstancesToDenseARFF(inst FixedDataGrid, path, relation string) error
- func SerializeInstancesToDenseARFFWithAttributes(inst FixedDataGrid, rawAttrs []Attribute, path, relation string) error
- func SerializeInstancesToFile(inst FixedDataGrid, path string) error
- func SerializeInstancesToTarWriter(inst FixedDataGrid, tw *tar.Writer, prefix string, includeData bool) error
- func SerializeInstancesToWriterDenseARFFWithAttributes(w io.Writer, inst FixedDataGrid, rawAttrs []Attribute, relation string) error
- func SetClass(at UpdatableDataGrid, row int, class string)
- func SetLogger(logger *log.Logger)
- func SetLoggerOut(out io.Writer)
- func Silent()
- func UnpackBytesToFloat(val []byte) float64
- func UnpackBytesToU64(val []byte) uint64
- func WrapError(err error) error
- type Attribute
- func AttributeDifference(a1, a2 []Attribute) []Attribute
- func AttributeDifferenceReferences(a1, a2 []Attribute) []Attribute
- func AttributeIntersect(a1, a2 []Attribute) []Attribute
- func AttributeIntersectReferences(a1, a2 []Attribute) []Attribute
- func CheckCompatible(s1 FixedDataGrid, s2 FixedDataGrid) []Attribute
- func DeserializeAttribute(data []byte) (Attribute, error)
- func DeserializeAttributes(data []byte) ([]Attribute, error)
- func GetAttributeByName(inst FixedDataGrid, name string) Attribute
- func NonClassAttributes(d DataGrid) []Attribute
- func NonClassFloatAttributes(d DataGrid) []Attribute
- func ParseARFFGetAttributes(filepath string) []Attribute
- func ParseCSVGetAttributes(filepath string, hasHeaders bool) []Attribute
- func ParseCSVGetAttributesFromReader(r io.ReadSeeker, hasHeaders bool) []Attribute
- func ParseCSVSniffAttributeTypes(filepath string, hasHeaders bool) []Attribute
- func ParseCSVSniffAttributeTypesFromReader(r io.ReadSeeker, hasHeaders bool) []Attribute
- func ParseSQLGetAttributesFromData(data [][]string, headers []string) []Attribute
- func ParseSQLSniffAttributeTypesFromData(data [][]string) []Attribute
- func ReplaceDeserializedAttributeWithVersionFromInstances(deserialized Attribute, matchingWith FixedDataGrid) (Attribute, error)
- func ReplaceDeserializedAttributesWithVersionsFromInstances(deserialized []Attribute, matchingWith FixedDataGrid) ([]Attribute, error)
- type AttributeGroup
- type AttributeSpec
- type BaseClassifier
- type BaseEstimator
- type BaseRegressor
- type BinaryAttribute
- func (b *BinaryAttribute) Compatible(other Attribute) bool
- func (b *BinaryAttribute) Equals(other Attribute) bool
- func (b *BinaryAttribute) GetName() string
- func (b *BinaryAttribute) GetStringFromSysVal(val []byte) string
- func (b *BinaryAttribute) GetSysValFromString(userVal string) []byte
- func (b *BinaryAttribute) GetType() int
- func (b *BinaryAttribute) MarshalJSON() ([]byte, error)
- func (b *BinaryAttribute) SetName(name string)
- func (b *BinaryAttribute) String() string
- func (b *BinaryAttribute) UnmarshalJSON(data []byte) error
- type BinaryAttributeGroup
- type CategoricalAttribute
- func (Attr *CategoricalAttribute) Compatible(other Attribute) bool
- func (Attr *CategoricalAttribute) Equals(other Attribute) bool
- func (Attr *CategoricalAttribute) GetName() string
- func (Attr *CategoricalAttribute) GetStringFromSysVal(rawVal []byte) string
- func (Attr *CategoricalAttribute) GetSysVal(userVal string) []byte
- func (Attr *CategoricalAttribute) GetSysValFromString(rawVal string) []byte
- func (Attr *CategoricalAttribute) GetType() int
- func (Attr *CategoricalAttribute) GetUsrVal(sysVal []byte) string
- func (Attr *CategoricalAttribute) GetValues() []string
- func (Attr *CategoricalAttribute) MarshalJSON() ([]byte, error)
- func (Attr *CategoricalAttribute) SetName(name string)
- func (Attr *CategoricalAttribute) String() string
- func (Attr *CategoricalAttribute) UnmarshalJSON(data []byte) error
- type Classifier
- type ClassifierDeserializer
- func (c *ClassifierDeserializer) Close()
- func (c *ClassifierDeserializer) GetAttributeForKey(key string) (Attribute, error)
- func (c *ClassifierDeserializer) GetAttributesForKey(key string) ([]Attribute, error)
- func (c *ClassifierDeserializer) GetBytesForKey(key string) ([]byte, error)
- func (c *ClassifierDeserializer) GetInstancesForKey(key string) (FixedDataGrid, error)
- func (c *ClassifierDeserializer) GetJSONForKey(key string, v interface{}) error
- func (c *ClassifierDeserializer) GetStringForKey(key string) (string, error)
- func (c *ClassifierDeserializer) GetU64ForKey(key string) (uint64, error)
- func (c *ClassifierDeserializer) Prefix(prefix string, suffix string) string
- func (c *ClassifierDeserializer) ReadMetadataAtPrefix(prefix string) (ClassifierMetadataV1, error)
- type ClassifierMetadataV1
- type ClassifierSerializer
- func (c *ClassifierSerializer) Close() error
- func (c *ClassifierSerializer) Prefix(prefix string, suffix string) string
- func (c *ClassifierSerializer) WriteAttributeForKey(key string, a Attribute) error
- func (c *ClassifierSerializer) WriteAttributesForKey(key string, attrs []Attribute) error
- func (c *ClassifierSerializer) WriteBytesForKey(key string, b []byte) error
- func (c *ClassifierSerializer) WriteInstancesForKey(key string, g FixedDataGrid, includeData bool) error
- func (c *ClassifierSerializer) WriteJSONForKey(key string, v interface{}) error
- func (c *ClassifierSerializer) WriteMetadataAtPrefix(prefix string, metadata ClassifierMetadataV1) error
- func (c *ClassifierSerializer) WriteU64ForKey(key string, v uint64) error
- type DataBaseColumns
- type DataGrid
- type DenseInstances
- func CopyDenseInstances(template *DenseInstances, templateAttrs []Attribute) *DenseInstances
- func DeserializeInstances(f io.ReadSeeker) (ret *DenseInstances, err error)
- func DeserializeInstancesFromTarReader(tr *FunctionalTarReader, prefix string) (ret *DenseInstances, err error)
- func NewDenseCopy(of FixedDataGrid) *DenseInstances
- func NewDenseInstances() *DenseInstances
- func NewStructuralCopy(of FixedDataGrid) *DenseInstances
- func ParseCSVToInstances(filepath string, hasHeaders bool) (instances *DenseInstances, err error)
- func ParseCSVToInstancesFromReader(r io.ReadSeeker, hasHeaders bool) (instances *DenseInstances, err error)
- func ParseCSVToInstancesWithAttributeGroups(filepath string, attrGroups, classAttrGroups map[string]string, ...) (instances *DenseInstances, err error)
- func ParseCSVToInstancesWithAttributeGroupsFromReader(r io.ReadSeeker, attrGroups, classAttrGroups map[string]string, ...) (instances *DenseInstances, err error)
- func ParseCSVToTemplatedInstances(filepath string, hasHeaders bool, template *DenseInstances) (instances *DenseInstances, err error)
- func ParseCSVToTemplatedInstancesFromReader(r io.ReadSeeker, hasHeaders bool, template *DenseInstances) (instances *DenseInstances, err error)
- func ParseDenseARFFToInstances(filepath string) (ret *DenseInstances, err error)
- func ParseSQLToInstances(db *sqlx.DB, query string) (*DenseInstances, error)
- func ParseSQLToInstancesFromStrings(data [][]string, headers []string) (*DenseInstances, error)
- func (inst *DenseInstances) AddAttribute(a Attribute) AttributeSpec
- func (inst *DenseInstances) AddAttributeToAttributeGroup(newAttribute Attribute, ag string) (AttributeSpec, error)
- func (inst *DenseInstances) AddClassAttribute(a Attribute) error
- func (inst *DenseInstances) AllAttributeGroups() map[string]AttributeGroup
- func (inst *DenseInstances) AllAttributes() []Attribute
- func (inst *DenseInstances) AllClassAttributes() []Attribute
- func (inst *DenseInstances) CreateAttributeGroup(name string, size int) (err error)
- func (inst *DenseInstances) Extend(rows int) error
- func (inst *DenseInstances) Get(a AttributeSpec, row int) []byte
- func (inst *DenseInstances) GetAttribute(get Attribute) (AttributeSpec, error)
- func (inst *DenseInstances) GetAttributeGroup(name string) (AttributeGroup, error)
- func (inst *DenseInstances) MapOverRows(asv []AttributeSpec, mapFunc func([][]byte, int) (bool, error)) error
- func (inst *DenseInstances) RemoveClassAttribute(a Attribute) error
- func (inst *DenseInstances) RowString(row int) string
- func (inst *DenseInstances) Set(a AttributeSpec, row int, val []byte)
- func (inst *DenseInstances) Size() (int, int)
- func (inst *DenseInstances) String() string
- type Estimator
- type Filter
- type FilteredAttribute
- type FixedAttributeGroup
- type FixedDataGrid
- func LazyShuffle(from FixedDataGrid) FixedDataGrid
- func LazySort(inst FixedDataGrid, direction SortDirection, attrs []AttributeSpec) (FixedDataGrid, error)
- func SampleWithReplacement(from FixedDataGrid, size int) FixedDataGrid
- func Shuffle(from FixedDataGrid) FixedDataGrid
- func Sort(inst FixedDataGrid, direction SortDirection, attrs []AttributeSpec) (FixedDataGrid, error)
- type FloatAttribute
- func (Attr *FloatAttribute) CheckSysValFromString(rawVal string) ([]byte, error)
- func (Attr *FloatAttribute) Compatible(other Attribute) bool
- func (Attr *FloatAttribute) Equals(other Attribute) bool
- func (Attr *FloatAttribute) GetFloatFromSysVal(rawVal []byte) float64
- func (Attr *FloatAttribute) GetName() string
- func (Attr *FloatAttribute) GetStringFromSysVal(rawVal []byte) string
- func (Attr *FloatAttribute) GetSysValFromString(rawVal string) []byte
- func (Attr *FloatAttribute) GetType() int
- func (f *FloatAttribute) MarshalJSON() ([]byte, error)
- func (Attr *FloatAttribute) SetName(name string)
- func (Attr *FloatAttribute) String() string
- func (f *FloatAttribute) UnmarshalJSON(data []byte) error
- type FunctionalTarReader
- type GoLearnError
- type InstancesView
- func (v *InstancesView) AddClassAttribute(a Attribute) error
- func (v *InstancesView) AllAttributes() []Attribute
- func (v *InstancesView) AllClassAttributes() []Attribute
- func (v *InstancesView) Get(as AttributeSpec, row int) []byte
- func (v *InstancesView) GetAttribute(a Attribute) (AttributeSpec, error)
- func (v *InstancesView) MapOverRows(as []AttributeSpec, rowFunc func([][]byte, int) (bool, error)) error
- func (v *InstancesView) RemoveClassAttribute(a Attribute) error
- func (v *InstancesView) RowString(row int) string
- func (v *InstancesView) Size() (int, int)
- func (v *InstancesView) String() string
- type LazilyFilteredInstances
- func (l *LazilyFilteredInstances) AddClassAttribute(cls Attribute) error
- func (l *LazilyFilteredInstances) AllAttributes() []Attribute
- func (l *LazilyFilteredInstances) AllClassAttributes() []Attribute
- func (l *LazilyFilteredInstances) Get(as AttributeSpec, row int) []byte
- func (l *LazilyFilteredInstances) GetAttribute(target Attribute) (AttributeSpec, error)
- func (l *LazilyFilteredInstances) MapOverRows(asv []AttributeSpec, mapFunc func([][]byte, int) (bool, error)) error
- func (l *LazilyFilteredInstances) RemoveClassAttribute(cls Attribute) error
- func (l *LazilyFilteredInstances) RowString(row int) string
- func (l *LazilyFilteredInstances) Size() (int, int)
- func (l *LazilyFilteredInstances) String() string
- type Mat64Instances
- func (m *Mat64Instances) AddClassAttribute(a Attribute) error
- func (m *Mat64Instances) AllAttributes() []Attribute
- func (m *Mat64Instances) AllClassAttributes() []Attribute
- func (m *Mat64Instances) Get(as AttributeSpec, row int) []byte
- func (m *Mat64Instances) GetAttribute(a Attribute) (AttributeSpec, error)
- func (m *Mat64Instances) MapOverRows(as []AttributeSpec, f func([][]byte, int) (bool, error)) error
- func (m *Mat64Instances) RemoveClassAttribute(a Attribute) error
- func (m *Mat64Instances) RowString(row int) string
- func (m *Mat64Instances) Size() (int, int)
- func (m *Mat64Instances) String() string
- type Model
- type Predictor
- type SortDirection
- type UpdatableDataGrid
Constants ¶
const ( // CategoricalType is for Attributes which represent values distinctly. CategoricalType = iota // Float64Type should be replaced with a FractionalNumeric type [DEPRECATED]. Float64Type BinaryType )
const (
SerializationFormatVersion = "golearn 1.0"
)
Variables ¶
Logger is the default logger for the entire golearn package. It writes to stdout and has no prefix and no flags.
Functions ¶
func CheckStrictlyCompatible ¶
func CheckStrictlyCompatible(s1 FixedDataGrid, s2 FixedDataGrid) bool
CheckStrictlyCompatible checks whether two DenseInstances have AttributeGroups with the same Attributes, in the same order, enabling optimisations.
func ConvertAllRowsToMat64 ¶
func ConvertAllRowsToMat64(attrs []Attribute, f FixedDataGrid) ([]*mat.Dense, error)
ConvertAllRowsToMat64 takes a list of Attributes and returns a vector of all rows in a mat.Dense format.
func ConvertRowToMat64 ¶
ConvertRowToMat64 takes a list of Attributes, a FixedDataGrid and a row number, and returns the float values of that row in a mat.Dense format.
func DecomposeOnAttributeValues ¶
func DecomposeOnAttributeValues(inst FixedDataGrid, at Attribute) map[string]FixedDataGrid
DecomposeOnAttributeValues divides the instance set depending on the value of a given Attribute, constructs child instances, and returns them in a map keyed on the string value of that Attribute.
IMPORTANT: calls panic() if the AttributeSpec of at cannot be determined.
func DecomposeOnNumericAttributeThreshold ¶
func DecomposeOnNumericAttributeThreshold(inst FixedDataGrid, at Attribute, val float64) map[string]FixedDataGrid
DecomposeOnNumericAttributeThreshold divides the instance set depending on the value of a given numeric Attribute, constructs child instances, and returns them in a map keyed on whether that row had a higher value than the threshold or not.
IMPORTANT: calls panic() if the AttributeSpec of at cannot be determined, or if the Attribute is not numeric.
func DescribeError ¶
func FormatError ¶
func GetClass ¶
GetClass is a shortcut for returning the string value of the current class on a given row.
IMPORTANT: GetClass will panic if the number of ClassAttributes is set to anything other than one.
func GetClassDistribution ¶
func GetClassDistribution(inst FixedDataGrid) map[string]int
GetClassDistribution returns a map containing the count of each class type (indexed by the class' string representation).
func GetClassDistributionAfterSplit ¶
func GetClassDistributionAfterSplit(inst FixedDataGrid, at Attribute) map[string]map[string]int
GetClassDistributionAfterSplit returns the class distribution after a speculative split on a given Attribute.
func GetClassDistributionAfterThreshold ¶
func GetClassDistributionAfterThreshold(inst FixedDataGrid, at Attribute, val float64) map[string]map[string]int
GetClassDistributionAfterThreshold returns the class distribution after a speculative split on a given Attribute using a threshold.
func GetClassDistributionByBinaryFloatValue ¶
func GetClassDistributionByBinaryFloatValue(inst FixedDataGrid) []int
GetClassDistributionByBinaryFloatValue returns the count of each row which has a float value close to 0.0 or 1.0.
func GetClassDistributionByCategoricalValue ¶
func GetClassDistributionByCategoricalValue(inst FixedDataGrid) []int
GetClassDistributionByIntegerVal returns a vector containing the count of each class vector (indexed by the class' system integer representation)
func InstancesAreEqual ¶
func InstancesAreEqual(inst, other FixedDataGrid) bool
InstancesAreEqual checks whether a given Instance set is exactly the same as another (i.e. has the same size and values).
func InstancesTrainTestSplit ¶
func InstancesTrainTestSplit(src FixedDataGrid, prop float64) (FixedDataGrid, FixedDataGrid)
InstancesTrainTestSplit takes a given Instances (src) and a train-test fraction (prop) and returns an array of two new Instances, one containing approximately that fraction and the other containing what's left.
IMPORTANT: this function is only meaningful when prop is between 0.0 and 1.0. Using any other values may result in odd behaviour.
func MarshalAttribute ¶
MarshalAttribute converts an Attribute to a JSON map.
func PackFloatToBytes ¶
PackFloatToBytes returns a 8-byte slice containing the byte values of a float64.
func PackFloatToBytesInline ¶
PackFloatToBytesInline fills ret with the byte values of the float64 argument. ret must be at least 8 bytes in size.
func PackU64ToBytes ¶
PackU64ToBytes allocates a return value of appropriate length and fills it with the values of val.
func PackU64ToBytesInline ¶
PackU64ToBytesInline fills ret with the byte values of val. Ret must have length at least 8.
func ParseARFFGetRows ¶
ParseARFFGetRows returns the number of data rows in an ARFF file.
func ParseCSVBuildInstancesFromReader ¶
func ParseCSVBuildInstancesFromReader(r io.ReadSeeker, attrs []Attribute, hasHeader bool, u UpdatableDataGrid) (err error)
ParseCSVBuildInstancesFromReader updates an [[#UpdatableDataGrid]] from a io.Reader
func ParseCSVEstimateFilePrecision ¶
ParseCSVEstimateFilePrecision determines what the maximum number of digits occuring anywhere after the decimal point within the file.
func ParseCSVEstimateFilePrecisionFromReader ¶
func ParseCSVEstimateFilePrecisionFromReader(r io.ReadSeeker) (int, error)
ParseCSVEstimateFilePrecisionFromReader determines what the maximum number of digits occuring anywhere after the decimal point within the reader.
func ParseCSVGetRows ¶
ParseCSVGetRows returns the number of rows in a given file.
func ParseCSVGetRowsFromReader ¶
func ParseCSVGetRowsFromReader(r io.ReadSeeker) (int, error)
ParseCSVGetRowsFromReader returns the number of rows in a given reader.
func ParseCSVSniffAttributeNames ¶
ParseCSVSniffAttributeNames returns a slice containing the top row of a given CSV file, or placeholders if hasHeaders is false.
func ParseCSVSniffAttributeNamesFromReader ¶
func ParseCSVSniffAttributeNamesFromReader(r io.ReadSeeker, hasHeaders bool) []string
ParseCSVSniffAttributeNamesFromReader returns a slice containing the top row of a given reader with CSV-contents, or placeholders if hasHeaders is false.
func ParseDenseARFFBuildInstancesFromReader ¶
func ParseDenseARFFBuildInstancesFromReader(r io.Reader, attrs []Attribute, u UpdatableDataGrid) (err error)
ParseDenseARFFBuildInstancesFromReader updates an [[#UpdatableDataGrid]] from a io.Reader
func ParseMatchAttributes ¶
func ParseMatchAttributes(attrs, templateAttrs []Attribute)
ParseUtilsMatchAttrs tries to match the set of Attributes read from one file with those read from another, and writes the matching Attributes back to the original set.
func ParseSQLBuildInstancesFromData ¶
func ParseSQLBuildInstancesFromData(data [][]string, headers []string, attrs []Attribute, u UpdatableDataGrid) (err error)
func SaveEstimatorToGob ¶
SaveEstimatorToGob serialises an estimator to a provided filepath, in gob format. See http://golang.org/pkg/encoding/gob for further details.
func SerializeAttribute ¶
func SerializeInstances ¶
func SerializeInstances(inst FixedDataGrid, f io.Writer) error
SerializeInstances stores a FixedDataGrid into an efficient format to the given io.Writer stream.
func SerializeInstancesToCSV ¶
func SerializeInstancesToCSV(inst FixedDataGrid, path string) error
SerializesInstancesToCSV converts a FixedDataGrid into a CSV file format.
func SerializeInstancesToCSVStream ¶
func SerializeInstancesToCSVStream(inst FixedDataGrid, f io.Writer) error
SerializeInstancesToCSVStream outputs a FixedDataGrid into a CSV file format, via the io.Writer stream.
func SerializeInstancesToDenseARFF ¶
func SerializeInstancesToDenseARFF(inst FixedDataGrid, path, relation string) error
SerializeInstancesToDenseARFF writes the given FixedDataGrid to a densely-formatted ARFF file.
func SerializeInstancesToDenseARFFWithAttributes ¶
func SerializeInstancesToDenseARFFWithAttributes(inst FixedDataGrid, rawAttrs []Attribute, path, relation string) error
SerializeInstancesToDenseARFFWithAttributes writes the given FixedDataGrid to a densely-formatted ARFF file with the header Attributes in the order given.
func SerializeInstancesToFile ¶
func SerializeInstancesToFile(inst FixedDataGrid, path string) error
func SerializeInstancesToTarWriter ¶
func SerializeInstancesToTarWriter(inst FixedDataGrid, tw *tar.Writer, prefix string, includeData bool) error
SerializeInstancesToTarWriter stores a FixedDataGrid into an efficient form given a tar.Writer.
func SetClass ¶
func SetClass(at UpdatableDataGrid, row int, class string)
SetClass is a shortcut for updating the given class of a row.
IMPORTANT: SetClass will panic if the number of class Attributes is anything other than one.
func SetLoggerOut ¶
SetLoggerOut creates a new base logger for the entire golearn package using the given out instead of the default, os.Stdout. The other log options are set to the default, i.e. no prefix and no flags.
func Silent ¶
func Silent()
Silent turns off logging throughout the golearn package by setting the logger to write to dev/null.
func UnpackBytesToFloat ¶
UnpackBytesToFloat converts a given byte slice into an equivalent float64.
func UnpackBytesToU64 ¶
UnpackBytesToU64 converst a given byte slice into a uint64 value.
Types ¶
type Attribute ¶
type Attribute interface { json.Unmarshaler json.Marshaler // Returns the general characterstics of this Attribute . // to avoid the overhead of casting GetType() int // Returns the human-readable name of this Attribute. GetName() string // Sets the human-readable name of this Attribute. SetName(string) // Gets a human-readable overview of this Attribute for debugging. String() string // Converts a value given from a human-readable string into a system // representation. For example, a CategoricalAttribute with values // ["iris-setosa", "iris-virginica"] would return the float64 // representation of 0 when given "iris-setosa". GetSysValFromString(string) []byte // Converts a given value from a system representation into a human // representation. For example, a CategoricalAttribute with values // ["iris-setosa", "iris-viriginica"] might return "iris-setosa" // when given 0.0 as the argument. GetStringFromSysVal([]byte) string // Tests for equality with another Attribute. Other Attributes are // considered equal if: // * They have the same type (i.e. FloatAttribute <> CategoricalAttribute) // * They have the same name // * If applicable, they have the same categorical values (though not // necessarily in the same order). Equals(Attribute) bool // Tests whether two Attributes can be represented in the same pond // i.e. they're the same size, and their byte order makes them meaningful // when considered together Compatible(Attribute) bool }
Attributes disambiguate columns of the feature matrix and declare their types.
func AttributeDifference ¶
AttributeDifference returns the difference between two Attribute slices: i.e. all the values in a1 which do not occur in a2.
IMPORTANT: result is ordered the same as a1.
IMPORTANT: result only contains values from a1.
func AttributeDifferenceReferences ¶
AttributeDifferenceReferences returns the difference between two Attribute slices: i.e. all the values in a1 which do not occur in a2.
IMPORTANT: result is not guaranteed to be ordered.
IMPORTANT: done using pointers for speed, use AttributeDifference if the Attributes originate from different DataGrids.
func AttributeIntersect ¶
AttributeIntersect returns the intersection of two Attribute slices.
IMPORTANT: result is ordered in order of the first []Attribute argument.
IMPORTANT: result contains only Attributes from a1.
func AttributeIntersectReferences ¶
AttributeIntersectReferences returns the intersection of two Attribute slices.
IMPORTANT: result is not guaranteed to be ordered.
IMPORTANT: done using pointers for speed, use AttributeDifference if the Attributes originate from different DataGrids.
func CheckCompatible ¶
func CheckCompatible(s1 FixedDataGrid, s2 FixedDataGrid) []Attribute
CheckCompatible checks whether two DataGrids have the same Attributes and if they do, it returns them.
func DeserializeAttribute ¶
func DeserializeAttributes ¶
DeserializeAttributes constructs a ve
func GetAttributeByName ¶
func GetAttributeByName(inst FixedDataGrid, name string) Attribute
GetAttributeByName returns an Attribute matching a given name. Returns nil if one doesn't exist.
func NonClassAttributes ¶
NonClassAttrs returns all Attributes which aren't designated as a class Attribute.
func NonClassFloatAttributes ¶
NonClassFloatAttributes returns all FloatAttributes which aren't designated as a class Attribute.
func ParseARFFGetAttributes ¶
ParseARFFGetAttributes returns the set of Attributes represented in this ARFF
func ParseCSVGetAttributes ¶
ParseCSVGetAttributes returns an ordered slice of appropriate-ly typed and named Attributes.
func ParseCSVGetAttributesFromReader ¶
func ParseCSVGetAttributesFromReader(r io.ReadSeeker, hasHeaders bool) []Attribute
ParseCSVGetAttributesFromReader returns an ordered slice of appropriate-ly typed and named Attributes.
func ParseCSVSniffAttributeTypes ¶
ParseCSVSniffAttributeTypes returns a slice of appropriately-typed Attributes.
The type of a given attribute is determined by looking at the first data row of the CSV.
func ParseCSVSniffAttributeTypesFromReader ¶
func ParseCSVSniffAttributeTypesFromReader(r io.ReadSeeker, hasHeaders bool) []Attribute
ParseCSVSniffAttributeTypesFromReader returns a slice of appropriately-typed Attributes.
The type of a given attribute is determined by looking at the first data row of the CSV.
func ReplaceDeserializedAttributeWithVersionFromInstances ¶
func ReplaceDeserializedAttributeWithVersionFromInstances(deserialized Attribute, matchingWith FixedDataGrid) (Attribute, error)
ReplaceDeserializedAttributeWithVersionFromInstances takes an independently deserialized Attribute and matches it if possible with one from a candidate FixedDataGrid.
func ReplaceDeserializedAttributesWithVersionsFromInstances ¶
func ReplaceDeserializedAttributesWithVersionsFromInstances(deserialized []Attribute, matchingWith FixedDataGrid) ([]Attribute, error)
ReplaceDeserializedAttributesWithVersionsFromInstances takes some independently loaded Attributes and matches them up with a candidate FixedDataGrid.
type AttributeGroup ¶
type AttributeGroup interface { // Adds a new Attribute AddAttribute(Attribute) error // Returns all Attributes Attributes() []Attribute // Gets the size of each row in bytes (rounded up) RowSizeInBytes() int // Gets a reference to underlying memory Storage() []byte // Returns a human-readable summary String() string // contains filtered or unexported methods }
AttributeGroups store related sequences of system values in memory for the DenseInstances structure.
type AttributeSpec ¶
type AttributeSpec struct {
// contains filtered or unexported fields
}
AttributeSpec is a pointer to a particular Attribute within a particular Instance structure and encodes position and storage information associated with that Attribute.
func ResolveAllAttributes ¶
func ResolveAllAttributes(d DataGrid) []AttributeSpec
ResolveAllAttributes returns every AttributeSpec
func ResolveAttributes ¶
func ResolveAttributes(d DataGrid, attrs []Attribute) []AttributeSpec
ResolveAttributes returns AttributeSpecs describing all of the Attributes.
func (*AttributeSpec) GetAttribute ¶
func (a *AttributeSpec) GetAttribute() Attribute
GetAttribute returns an AttributeSpec which matches a given Attribute.
func (*AttributeSpec) String ¶
func (a *AttributeSpec) String() string
String returns a human-readable description of this AttributeSpec.
type BaseClassifier ¶
type BaseClassifier struct {
TrainingData *DataGrid
}
BaseClassifier stores options common to every classifier.
type BaseEstimator ¶
type BinaryAttribute ¶
type BinaryAttribute struct {
Name string
}
BinaryAttributes can only represent 1 or 0.
func NewBinaryAttribute ¶
func NewBinaryAttribute(name string) *BinaryAttribute
NewBinaryAttribute creates a BinaryAttribute with the given name
func (*BinaryAttribute) Compatible ¶
func (b *BinaryAttribute) Compatible(other Attribute) bool
Compatible checks whether this Attribute can be represented in the same pond as another.
func (*BinaryAttribute) Equals ¶
func (b *BinaryAttribute) Equals(other Attribute) bool
Equals checks for equality with another BinaryAttribute.
func (*BinaryAttribute) GetName ¶
func (b *BinaryAttribute) GetName() string
GetName returns the name of this Attribute.
func (*BinaryAttribute) GetStringFromSysVal ¶
func (b *BinaryAttribute) GetStringFromSysVal(val []byte) string
GetStringFromSysVal returns either 1 or 0.
func (*BinaryAttribute) GetSysValFromString ¶
func (b *BinaryAttribute) GetSysValFromString(userVal string) []byte
GetSysValFromString returns either 1 or 0 in a single byte.
func (*BinaryAttribute) GetType ¶
func (b *BinaryAttribute) GetType() int
GetType returns BinaryType.
func (*BinaryAttribute) MarshalJSON ¶
func (b *BinaryAttribute) MarshalJSON() ([]byte, error)
MarshalJSON returns a JSON version of this BinaryAttribute for serialisation.
func (*BinaryAttribute) SetName ¶
func (b *BinaryAttribute) SetName(name string)
SetName sets the name of this Attribute.
func (*BinaryAttribute) String ¶
func (b *BinaryAttribute) String() string
String returns a human-redable representation.
func (*BinaryAttribute) UnmarshalJSON ¶
func (b *BinaryAttribute) UnmarshalJSON(data []byte) error
UnmarshalJSON unpacks a BinaryAttribute from serialisation. Usually, there's nothing to deserialize.
type BinaryAttributeGroup ¶
type BinaryAttributeGroup struct {
// contains filtered or unexported fields
}
BinaryAttributeGroups contain only BinaryAttributes Compact each Attribute to a bit for better storage
func (*BinaryAttributeGroup) AddAttribute ¶
func (b *BinaryAttributeGroup) AddAttribute(a Attribute) error
AddAttribute adds an Attribute to this BinaryAttributeGroup
func (*BinaryAttributeGroup) Attributes ¶
func (b *BinaryAttributeGroup) Attributes() []Attribute
Attributes returns a slice of Attributes in this BinaryAttributeGroup.
func (*BinaryAttributeGroup) RowSizeInBytes ¶
func (b *BinaryAttributeGroup) RowSizeInBytes() int
RowSizeInBytes returns the size of each row in bytes (rounded up to nearest byte).
func (*BinaryAttributeGroup) Storage ¶
func (b *BinaryAttributeGroup) Storage() []byte
Storage returns a reference to the underlying storage.
IMPORTANT: don't modify
func (*BinaryAttributeGroup) String ¶
func (b *BinaryAttributeGroup) String() string
String returns a human-readable summary.
type CategoricalAttribute ¶
type CategoricalAttribute struct { Name string // contains filtered or unexported fields }
CategoricalAttribute is an Attribute implementation which stores discrete string values - useful for representing classes.
func NewCategoricalAttribute ¶
func NewCategoricalAttribute() *CategoricalAttribute
NewCategoricalAttribute creates a blank CategoricalAttribute.
func (*CategoricalAttribute) Compatible ¶
func (Attr *CategoricalAttribute) Compatible(other Attribute) bool
Compatible checks that this CategoricalAttribute has the same values as another, in the same order.
func (*CategoricalAttribute) Equals ¶
func (Attr *CategoricalAttribute) Equals(other Attribute) bool
Equals checks equality against another Attribute.
Two CategoricalAttributes are considered equal if they contain the same values and have the same name. Otherwise, this function returns false.
func (*CategoricalAttribute) GetName ¶
func (Attr *CategoricalAttribute) GetName() string
GetName returns the human-readable name assigned to this attribute.
func (*CategoricalAttribute) GetStringFromSysVal ¶
func (Attr *CategoricalAttribute) GetStringFromSysVal(rawVal []byte) string
GetStringFromSysVal returns a human-readable value from the given system-representation value val.
IMPORTANT: This function calls panic() if the value is greater than the length of the array. TODO: Return a user-configurable default instead.
func (*CategoricalAttribute) GetSysVal ¶
func (Attr *CategoricalAttribute) GetSysVal(userVal string) []byte
GetSysVal returns the system representation of userVal as an index into the Values slice If the userVal can't be found, it returns nothing.
func (*CategoricalAttribute) GetSysValFromString ¶
func (Attr *CategoricalAttribute) GetSysValFromString(rawVal string) []byte
GetSysValFromString returns the system representation of rawVal as an index into the Values slice. If rawVal is not inside the Values slice, it is appended.
IMPORTANT: If no system representation yet exists, this functions adds it. If you need to determine whether rawVal exists: use GetSysVal and check for a zero-length return value.
Example: if the CategoricalAttribute contains the values ["iris-setosa", "iris-virginica"] and "iris-versicolor" is provided as the argument, the Values slide becomes ["iris-setosa", "iris-virginica", "iris-versicolor"] and 2.00 is returned as the system representation.
func (*CategoricalAttribute) GetType ¶
func (Attr *CategoricalAttribute) GetType() int
GetType returns CategoricalType to avoid casting overhead.
func (*CategoricalAttribute) GetUsrVal ¶
func (Attr *CategoricalAttribute) GetUsrVal(sysVal []byte) string
GetUsrVal returns a human-readable representation of the given sysVal.
IMPORTANT: this function doesn't check the boundaries of the array.
func (*CategoricalAttribute) GetValues ¶
func (Attr *CategoricalAttribute) GetValues() []string
GetValues returns all the values currently defined
func (*CategoricalAttribute) MarshalJSON ¶
func (Attr *CategoricalAttribute) MarshalJSON() ([]byte, error)
MarshalJSON returns a JSON version of this Attribute.
func (*CategoricalAttribute) SetName ¶
func (Attr *CategoricalAttribute) SetName(name string)
SetName sets the human-readable name on this attribute.
func (*CategoricalAttribute) String ¶
func (Attr *CategoricalAttribute) String() string
String returns a human-readable summary of this Attribute.
Returns a string containing the list of human-readable values this CategoricalAttribute can take.
func (*CategoricalAttribute) UnmarshalJSON ¶
func (Attr *CategoricalAttribute) UnmarshalJSON(data []byte) error
UnmarshalJSON returns a JSON version of this Attribute.
type Classifier ¶
type Classifier interface { // Takes a set of Instances, copies the class Attribute // and constructs a new set of Instances of equivalent // length with only the class Attribute and fills it in // with predictions. Predict(FixedDataGrid) (FixedDataGrid, error) // Takes a set of instances and updates the Classifier's // internal structures to enable prediction Fit(FixedDataGrid) error // Why not make every classifier return a nice-looking string? String() string // Save the classifier to a file Save(string) error // Read recreates the classifier from a file Load(string) error // Retrieves the metadata associated with this classifer // (required for Ensembles) GetMetadata() ClassifierMetadataV1 // Used when something is saved as part of an ensemble SaveWithPrefix(*ClassifierSerializer, string) error LoadWithPrefix(*ClassifierDeserializer, string) error }
Classifier implementations predict categorical class labels.
type ClassifierDeserializer ¶
type ClassifierDeserializer struct { Metadata *ClassifierMetadataV1 // contains filtered or unexported fields }
ClassifierDeserializer attaches helper functions useful for reading classificatiers. (UNSTABLE).
func ReadSerializedClassifierStub ¶
func ReadSerializedClassifierStub(filePath string) (*ClassifierDeserializer, error)
ReadSerializedClassifierStub is the counterpart of CreateSerializedClassifierStub. It's used inside SaveableClassifiers to read information from a perviously saved model file.
func (*ClassifierDeserializer) Close ¶
func (c *ClassifierDeserializer) Close()
Close cleans up everything.
func (*ClassifierDeserializer) GetAttributeForKey ¶
func (c *ClassifierDeserializer) GetAttributeForKey(key string) (Attribute, error)
GetAttributeForKey returns an Attribute stored at a given key
func (*ClassifierDeserializer) GetAttributesForKey ¶
func (c *ClassifierDeserializer) GetAttributesForKey(key string) ([]Attribute, error)
GetAttributesForKey returns an Attribute list stored at a given key
func (*ClassifierDeserializer) GetBytesForKey ¶
func (c *ClassifierDeserializer) GetBytesForKey(key string) ([]byte, error)
GetBytesForKey returns the bytes at a given location in the output.
func (*ClassifierDeserializer) GetInstancesForKey ¶
func (c *ClassifierDeserializer) GetInstancesForKey(key string) (FixedDataGrid, error)
GetInstancesForKey deserializes some instances stored in a classifier output file
func (*ClassifierDeserializer) GetJSONForKey ¶
func (c *ClassifierDeserializer) GetJSONForKey(key string, v interface{}) error
GetJSONForKey deserializes a JSON key in the output file.
func (*ClassifierDeserializer) GetStringForKey ¶
func (c *ClassifierDeserializer) GetStringForKey(key string) (string, error)
func (*ClassifierDeserializer) GetU64ForKey ¶
func (c *ClassifierDeserializer) GetU64ForKey(key string) (uint64, error)
GetUInt64ForKey returns a int64 stored at a given key
func (*ClassifierDeserializer) Prefix ¶
func (c *ClassifierDeserializer) Prefix(prefix string, suffix string) string
Prefix outputs a string in the right format for TAR
func (*ClassifierDeserializer) ReadMetadataAtPrefix ¶
func (c *ClassifierDeserializer) ReadMetadataAtPrefix(prefix string) (ClassifierMetadataV1, error)
ReadMetadataAtPrefix reads the METADATA file after prefix. If an error is returned, the first value is undefined.
type ClassifierMetadataV1 ¶
type ClassifierMetadataV1 struct { // FormatVersion should always be 1 for this structure FormatVersion int `json:"format_version"` // Uses the classifier name (provided by the classifier) ClassifierName string `json:"classifier"` // ClassifierVersion is also provided by the classifier // and checks whether this version of GoLearn can read what's // be written. ClassifierVersion string `json"classifier_version"` // This is a custom metadata field, provided by the classifier ClassifierMetadata map[string]interface{} `json:"classifier_metadata"` }
ClassifierMetadataV1 is what gets written into METADATA in a classification file format.
type ClassifierSerializer ¶
type ClassifierSerializer struct {
// contains filtered or unexported fields
}
ClassifierSerializer is an object used by SaveableClassifiers.
func CreateSerializedClassifierStub ¶
func CreateSerializedClassifierStub(filePath string, metadata ClassifierMetadataV1) (*ClassifierSerializer, error)
CreateSerializedClassifierStub generates a file to serialize into and writes the METADATA header.
func (*ClassifierSerializer) Close ¶
func (c *ClassifierSerializer) Close() error
Close finalizes the Classifier serialization session.
func (*ClassifierSerializer) Prefix ¶
func (c *ClassifierSerializer) Prefix(prefix string, suffix string) string
Prefix outputs a string in the right format for TAR
func (*ClassifierSerializer) WriteAttributeForKey ¶
func (c *ClassifierSerializer) WriteAttributeForKey(key string, a Attribute) error
WriteAttributeForKey creates a new entry in the file containing a serialized representation of Attribute
func (*ClassifierSerializer) WriteAttributesForKey ¶
func (c *ClassifierSerializer) WriteAttributesForKey(key string, attrs []Attribute) error
WriteAttributesForKey does the same as WriteAttributeForKey, just with more than one Attribute.
func (*ClassifierSerializer) WriteBytesForKey ¶
func (c *ClassifierSerializer) WriteBytesForKey(key string, b []byte) error
WriteBytesForKey creates a new entry in the serializer file with some user-defined bytes.
func (*ClassifierSerializer) WriteInstancesForKey ¶
func (c *ClassifierSerializer) WriteInstancesForKey(key string, g FixedDataGrid, includeData bool) error
WriteInstances for key creates a new entry in the file containing some training instances
func (*ClassifierSerializer) WriteJSONForKey ¶
func (c *ClassifierSerializer) WriteJSONForKey(key string, v interface{}) error
WriteJSONForKey creates a new entry in the file with an interface serialized as JSON.
func (*ClassifierSerializer) WriteMetadataAtPrefix ¶
func (c *ClassifierSerializer) WriteMetadataAtPrefix(prefix string, metadata ClassifierMetadataV1) error
WriteMetadataAtPrefix outputs a METADATA entry in the right place
func (*ClassifierSerializer) WriteU64ForKey ¶
func (c *ClassifierSerializer) WriteU64ForKey(key string, v uint64) error
WriteU64ForKey creates a new entry in the serializer file with the bytes of a uint64
type DataBaseColumns ¶
type DataBaseColumns []*sql.ColumnType
func (DataBaseColumns) NullType ¶
func (colTypes DataBaseColumns) NullType(i int) string
type DataGrid ¶
type DataGrid interface { // Retrieves a given Attribute's specification GetAttribute(Attribute) (AttributeSpec, error) // Retrieves details of every Attribute AllAttributes() []Attribute // Marks an Attribute as a class Attribute AddClassAttribute(Attribute) error // Unmarks an Attribute as a class Attribute RemoveClassAttribute(Attribute) error // Returns details of all class Attributes AllClassAttributes() []Attribute // Gets the bytes at a given position or nil Get(AttributeSpec, int) []byte // Convenience function for iteration. MapOverRows([]AttributeSpec, func([][]byte, int) (bool, error)) error }
DataGrid implementations represent data addressable by rows and columns.
type DenseInstances ¶
type DenseInstances struct {
// contains filtered or unexported fields
}
DenseInstances stores each Attribute value explicitly in a large grid.
func CopyDenseInstances ¶
func CopyDenseInstances(template *DenseInstances, templateAttrs []Attribute) *DenseInstances
CopyDenseInstancesStructure returns a new DenseInstances with identical structure (layout, Attributes) to the original
func DeserializeInstances ¶
func DeserializeInstances(f io.ReadSeeker) (ret *DenseInstances, err error)
DeserializeInstances returns a DenseInstances using a given io.Reader.
func DeserializeInstancesFromTarReader ¶
func DeserializeInstancesFromTarReader(tr *FunctionalTarReader, prefix string) (ret *DenseInstances, err error)
DeserializeInstancesFromTarReader returns DenseInstances from a FunctionalTarReader with the name prefix.
func NewDenseCopy ¶
func NewDenseCopy(of FixedDataGrid) *DenseInstances
NewDenseCopy generates a new DenseInstances set from an existing FixedDataGrid.
func NewDenseInstances ¶
func NewDenseInstances() *DenseInstances
NewDenseInstances generates a new DenseInstances set with an anonymous EDF mapping and default settings.
func NewStructuralCopy ¶
func NewStructuralCopy(of FixedDataGrid) *DenseInstances
NewStructuralCopy generates an empty DenseInstances with the same layout as an existing FixedDataGrid, but with no data.
func ParseCSVToInstances ¶
func ParseCSVToInstances(filepath string, hasHeaders bool) (instances *DenseInstances, err error)
ParseCSVToInstances reads the CSV file given by filepath and returns the read Instances.
func ParseCSVToInstancesFromReader ¶
func ParseCSVToInstancesFromReader(r io.ReadSeeker, hasHeaders bool) (instances *DenseInstances, err error)
ParseCSVToInstancesFromReader reads the reader containing CSV and returns the read Instances.
func ParseCSVToInstancesWithAttributeGroups ¶
func ParseCSVToInstancesWithAttributeGroups(filepath string, attrGroups, classAttrGroups map[string]string, attrOverrides map[int]Attribute, hasHeaders bool) (instances *DenseInstances, err error)
ParseCSVToInstancesWithAttributeGroups reads the CSV file given by filepath, and returns the read DenseInstances, but also makes sure to group any Attributes specified in the first argument and also any class Attributes specified in the second
func ParseCSVToInstancesWithAttributeGroupsFromReader ¶
func ParseCSVToInstancesWithAttributeGroupsFromReader(r io.ReadSeeker, attrGroups, classAttrGroups map[string]string, attrOverrides map[int]Attribute, hasHeaders bool) (instances *DenseInstances, err error)
ParseCSVToInstancesWithAttributeGroupsFromReader reads the CSV file given by filepath, and returns the read DenseInstances, but also makes sure to group any Attributes specified in the first argument and also any class Attributes specified in the second
func ParseCSVToTemplatedInstances ¶
func ParseCSVToTemplatedInstances(filepath string, hasHeaders bool, template *DenseInstances) (instances *DenseInstances, err error)
ParseCSVToInstancesTemplated reads the CSV file given by filepath and returns the read Instances, using another already read DenseInstances as a template.
func ParseCSVToTemplatedInstancesFromReader ¶
func ParseCSVToTemplatedInstancesFromReader(r io.ReadSeeker, hasHeaders bool, template *DenseInstances) (instances *DenseInstances, err error)
ParseCSVToTemplatedInstancesFromReader reads the reader containing CSV and returns the read Instances, using another already read DenseInstances as a template.
func ParseDenseARFFToInstances ¶
func ParseDenseARFFToInstances(filepath string) (ret *DenseInstances, err error)
ParseDenseARFFToInstances parses the dense ARFF File into a FixedDataGrid
func ParseSQLToInstances ¶
func ParseSQLToInstances(db *sqlx.DB, query string) (*DenseInstances, error)
func ParseSQLToInstancesFromStrings ¶
func ParseSQLToInstancesFromStrings(data [][]string, headers []string) (*DenseInstances, error)
func (*DenseInstances) AddAttribute ¶
func (inst *DenseInstances) AddAttribute(a Attribute) AttributeSpec
AddAttribute adds an Attribute to this set of DenseInstances Creates a default AttributeGroup for it if a suitable one doesn't exist. Returns an AttributeSpec for subsequent Set() calls.
IMPORTANT: will panic if storage has been allocated via Extend.
func (*DenseInstances) AddAttributeToAttributeGroup ¶
func (inst *DenseInstances) AddAttributeToAttributeGroup(newAttribute Attribute, ag string) (AttributeSpec, error)
AddAttributeToAttributeGroup adds an Attribute to a given ag
func (*DenseInstances) AddClassAttribute ¶
func (inst *DenseInstances) AddClassAttribute(a Attribute) error
AddClassAttribute sets an Attribute to be a class Attribute.
func (*DenseInstances) AllAttributeGroups ¶
func (inst *DenseInstances) AllAttributeGroups() map[string]AttributeGroup
AllAttributeGroups returns a copy of the available AttributeGroups
func (*DenseInstances) AllAttributes ¶
func (inst *DenseInstances) AllAttributes() []Attribute
AllAttributes returns a slice of all Attributes.
func (*DenseInstances) AllClassAttributes ¶
func (inst *DenseInstances) AllClassAttributes() []Attribute
AllClassAttributes returns a slice of Attributes which have been designated class Attributes.
func (*DenseInstances) CreateAttributeGroup ¶
func (inst *DenseInstances) CreateAttributeGroup(name string, size int) (err error)
CreateAttributeGroup adds a new AttributeGroup to this set of instances with a given name. If the size is 0, a bit-ag is added if the size of not 0, then the size of each ag attribute is set to that number of bytes.
func (*DenseInstances) Extend ¶
func (inst *DenseInstances) Extend(rows int) error
Extend extends this set of Instances to store rows additional rows. It's recommended to set rows to something quite large.
IMPORTANT: panics if the allocation fails
func (*DenseInstances) Get ¶
func (inst *DenseInstances) Get(a AttributeSpec, row int) []byte
Get gets a particular Attribute (given as an AttributeSpec) on a particular row. AttributeSpecs can be obtained using GetAttribute() or AddAttribute()
func (*DenseInstances) GetAttribute ¶
func (inst *DenseInstances) GetAttribute(get Attribute) (AttributeSpec, error)
GetAttribute returns an Attribute equal to the argument.
TODO: Write a function to pre-compute this once we've allocated TODO: Write a utility function which retrieves all AttributeSpecs for a given instance set.
func (*DenseInstances) GetAttributeGroup ¶
func (inst *DenseInstances) GetAttributeGroup(name string) (AttributeGroup, error)
GetAttributeGroup returns a reference to a AttributeGroup of a given name /
func (*DenseInstances) MapOverRows ¶
func (inst *DenseInstances) MapOverRows(asv []AttributeSpec, mapFunc func([][]byte, int) (bool, error)) error
MapOverRows passes each row map into a function. First argument is a list of AttributeSpec in the order they're needed in for the function. The second is the function to call on each row.
func (*DenseInstances) RemoveClassAttribute ¶
func (inst *DenseInstances) RemoveClassAttribute(a Attribute) error
RemoveClassAttribute removes an Attribute from the set of class Attributes.
func (*DenseInstances) RowString ¶
func (inst *DenseInstances) RowString(row int) string
RowString returns a string representation of a given row.
func (*DenseInstances) Set ¶
func (inst *DenseInstances) Set(a AttributeSpec, row int, val []byte)
Set sets a particular Attribute (given as an AttributeSpec) on a particular row to a particular value.
AttributeSpecs can be obtained using GetAttribute() or AddAttribute().
IMPORTANT: Will panic() if the AttributeSpec isn't valid
IMPORTANT: Will panic() if the row is too large
IMPORTANT: Will panic() if the val is not the right length
func (*DenseInstances) Size ¶
func (inst *DenseInstances) Size() (int, int)
Size returns the number of Attributes as the first return value and the maximum allocated row as the second value.
func (*DenseInstances) String ¶
func (inst *DenseInstances) String() string
String returns a human-readable summary of this dataset.
type Estimator ¶
type Estimator interface {
Fit()
}
An Estimator is object that can ingest some data and train on it.
type Filter ¶
type Filter interface { // Adds an Attribute to the filter AddAttribute(Attribute) error // Allows mapping old to new Attributes GetAttributesAfterFiltering() []FilteredAttribute // Gets a string for printing String() string // Accepts an old Attribute, the new one and returns a sequence Transform(Attribute, Attribute, []byte) []byte // Builds the filter Train() error }
Filters transform the byte sequences stored in DataGrid implementations.
type FilteredAttribute ¶
FilteredAttributes represent a mapping from the output generated by a filter to the original value.
type FixedAttributeGroup ¶
type FixedAttributeGroup struct {
// contains filtered or unexported fields
}
FixedAttributeGroups contain a particular number of rows of a particular number of Attributes, all of a given type.
func (*FixedAttributeGroup) AddAttribute ¶
func (f *FixedAttributeGroup) AddAttribute(a Attribute) error
AddAttribute adds an attribute to this FixedAttributeGroup
func (*FixedAttributeGroup) Attributes ¶
func (f *FixedAttributeGroup) Attributes() []Attribute
Attributes returns a slice of Attributes in this FixedAttributeGroup
func (*FixedAttributeGroup) RowSizeInBytes ¶
func (f *FixedAttributeGroup) RowSizeInBytes() int
RowSizeInBytes returns the size of each row in bytes
func (*FixedAttributeGroup) Storage ¶
func (f *FixedAttributeGroup) Storage() []byte
Storage returns a slice of FixedAttributeGroupStorageRefs which can be used to access the memory in this pond.
func (*FixedAttributeGroup) String ¶
func (f *FixedAttributeGroup) String() string
String gets a human-readable summary
type FixedDataGrid ¶
type FixedDataGrid interface { DataGrid // Returns a string representation of a given row RowString(int) string // Returns the number of Attributes and rows currently allocated Size() (int, int) }
FixedDataGrid implementations have a size known in advance and implement all of the functionality offered by DataGrid implementations.
func LazyShuffle ¶
func LazyShuffle(from FixedDataGrid) FixedDataGrid
LazyShuffle randomizes the row order without re-ordering the rows via an InstancesView.
func LazySort ¶
func LazySort(inst FixedDataGrid, direction SortDirection, attrs []AttributeSpec) (FixedDataGrid, error)
LazySort also does a sort, but returns an InstanceView and doesn't actually reorder the rows, just makes it look like they've been reordered See also: Sort
func SampleWithReplacement ¶
func SampleWithReplacement(from FixedDataGrid, size int) FixedDataGrid
SampleWithReplacement returns a new FixedDataGrid containing an equal number of random rows drawn from the original FixedDataGrid
IMPORTANT: There's a high chance of seeing duplicate rows whenever size is close to the row count.
func Shuffle ¶
func Shuffle(from FixedDataGrid) FixedDataGrid
Shuffle randomizes the row order either in place (if DenseInstances) or using LazyShuffle.
func Sort ¶
func Sort(inst FixedDataGrid, direction SortDirection, attrs []AttributeSpec) (FixedDataGrid, error)
Sort does a radix sort of DenseInstances, using SortDirection direction (Ascending or Descending) with attrs as a slice of Attribute indices that you want to sort by.
IMPORTANT: Radix sort is not stable, so ordering outside the attributes used for sorting is arbitrary.
type FloatAttribute ¶
FloatAttribute is an implementation which stores floating point representations of numbers.
func NewFloatAttribute ¶
func NewFloatAttribute(name string) *FloatAttribute
NewFloatAttribute returns a new FloatAttribute with a default precision of 2 decimal places
func (*FloatAttribute) CheckSysValFromString ¶
func (Attr *FloatAttribute) CheckSysValFromString(rawVal string) ([]byte, error)
CheckSysValFromString confirms whether a given rawVal can be converted into a valid system representation. If it can't, the returned value is nil.
func (*FloatAttribute) Compatible ¶
func (Attr *FloatAttribute) Compatible(other Attribute) bool
Compatible checks whether this FloatAttribute can be ponded with another Attribute (checks if they're both FloatAttributes)
func (*FloatAttribute) Equals ¶
func (Attr *FloatAttribute) Equals(other Attribute) bool
Equals tests a FloatAttribute for equality with another Attribute.
Returns false if the other Attribute has a different name or if the other Attribute is not a FloatAttribute.
func (*FloatAttribute) GetFloatFromSysVal ¶
func (Attr *FloatAttribute) GetFloatFromSysVal(rawVal []byte) float64
GetFloatFromSysVal converts a given system value to a float
func (*FloatAttribute) GetName ¶
func (Attr *FloatAttribute) GetName() string
GetName returns this FloatAttribute's human-readable name.
func (*FloatAttribute) GetStringFromSysVal ¶
func (Attr *FloatAttribute) GetStringFromSysVal(rawVal []byte) string
GetStringFromSysVal converts a given system value to to a string with two decimal places of precision.
func (*FloatAttribute) GetSysValFromString ¶
func (Attr *FloatAttribute) GetSysValFromString(rawVal string) []byte
GetSysValFromString parses the given rawVal string to a float64 and returns it.
float64 happens to be a 1-to-1 mapping to the system representation. IMPORTANT: This function panic()s if rawVal is not a valid float. Use CheckSysValFromString to confirm.
func (*FloatAttribute) GetType ¶
func (Attr *FloatAttribute) GetType() int
GetType returns Float64Type.
func (*FloatAttribute) MarshalJSON ¶
func (f *FloatAttribute) MarshalJSON() ([]byte, error)
MarshalJSON returns a JSON representation of this Attribute for serialisation.
func (*FloatAttribute) SetName ¶
func (Attr *FloatAttribute) SetName(name string)
SetName sets this FloatAttribute's human-readable name.
func (*FloatAttribute) String ¶
func (Attr *FloatAttribute) String() string
String returns a human-readable summary of this Attribute. e.g. "FloatAttribute(Sepal Width)"
func (*FloatAttribute) UnmarshalJSON ¶
func (f *FloatAttribute) UnmarshalJSON(data []byte) error
UnmarshalJSON reads a JSON representation of this Attribute.
type FunctionalTarReader ¶
FunctionalTarReader allows you to read anything in a tar file in any order, rather than just sequentially.
func NewFunctionalTarReader ¶
func NewFunctionalTarReader(regenFunc func() *tar.Reader) *FunctionalTarReader
NewFunctionalTarReader creates a new FunctionalTarReader using a function that it can call to get a tar.Reader at the beginning of the file.
func (*FunctionalTarReader) GetNamedFile ¶
func (f *FunctionalTarReader) GetNamedFile(name string) ([]byte, error)
GetNamedFile returns a file named a given thing from the tar file. If there's more than one entry, the most recent is returned.
type GoLearnError ¶
func (*GoLearnError) Error ¶
func (g *GoLearnError) Error() string
type InstancesView ¶
type InstancesView struct {
// contains filtered or unexported fields
}
InstancesViews hide or re-order Attributes and rows from a given DataGrid to make it appear that they've been deleted.
func NewInstancesViewFromAttrs ¶
func NewInstancesViewFromAttrs(src FixedDataGrid, attrs []Attribute) *InstancesView
NewInstancesViewFromAttrs creates a new InstancesView from a source FixedDataGrid and a slice of Attributes.
Only the Attributes specified will appear in this InstancesView.
func NewInstancesViewFromRows ¶
func NewInstancesViewFromRows(src FixedDataGrid, rows map[int]int) *InstancesView
NewInstancesViewFromRows creates a new InstancesView from a source FixedDataGrid and row -> row mapping. The key of the rows map is the row as it exists within this mapping: for example an entry like 5 -> 1 means that row 1 in src will appear at row 5 in the Instancesview.
Rows are not masked in this implementation, meaning that all rows which are left unspecified appear as normal.
func NewInstancesViewFromVisible ¶
func NewInstancesViewFromVisible(src FixedDataGrid, rows []int, attrs []Attribute) *InstancesView
NewInstancesViewFromVisible creates a new InstancesView from a source FixedDataGrid, a slice of row numbers and a slice of Attributes.
Only the rows specified will appear in this InstancesView, and they will appear in the same order they appear within the rows array.
Only the Attributes specified will appear in this InstancesView. Retrieving Attribute specifications from this InstancesView will maintain their order.
func (*InstancesView) AddClassAttribute ¶
func (v *InstancesView) AddClassAttribute(a Attribute) error
AddClassAttribute adds the given Attribute to the set of defined class Attributes, if it hasn't been filtered.
func (*InstancesView) AllAttributes ¶
func (v *InstancesView) AllAttributes() []Attribute
AllAttributes returns every Attribute which hasn't been filtered.
func (*InstancesView) AllClassAttributes ¶
func (v *InstancesView) AllClassAttributes() []Attribute
AllClassAttributes returns all the Attributes currently defined as being class Attributes.
func (*InstancesView) Get ¶
func (v *InstancesView) Get(as AttributeSpec, row int) []byte
Get returns a sequence of bytes stored under a given Attribute on a given row.
IMPORTANT: The AttributeSpec is unverified, meaning it's possible to return values from Attributes filtered by this InstancesView if the underlying AttributeSpec is known.
func (*InstancesView) GetAttribute ¶
func (v *InstancesView) GetAttribute(a Attribute) (AttributeSpec, error)
GetAttribute returns an Attribute specification matching an Attribute if it has not been filtered.
The AttributeSpecs returned are the same as those returned by the source FixedDataGrid.
func (*InstancesView) MapOverRows ¶
func (v *InstancesView) MapOverRows(as []AttributeSpec, rowFunc func([][]byte, int) (bool, error)) error
MapOverRows, see DenseInstances.MapOverRows.
IMPORTANT: MapOverRows is not guaranteed to be ordered, but this one especially so.
func (*InstancesView) RemoveClassAttribute ¶
func (v *InstancesView) RemoveClassAttribute(a Attribute) error
RemoveClassAttribute removes the given Attribute from the set of class Attributes.
func (*InstancesView) RowString ¶
func (v *InstancesView) RowString(row int) string
RowString returns a string representation of a given row.
func (*InstancesView) Size ¶
func (v *InstancesView) Size() (int, int)
Size Returns the number of Attributes and rows this InstancesView contains.
func (*InstancesView) String ¶
func (v *InstancesView) String() string
String returns a human-readable summary of this InstancesView.
type LazilyFilteredInstances ¶
type LazilyFilteredInstances struct {
// contains filtered or unexported fields
}
LazilyFilteredInstances map a Filter over an underlying FixedDataGrid and are a memory-efficient way of applying them.
func NewLazilyFilteredInstances ¶
func NewLazilyFilteredInstances(src FixedDataGrid, f Filter) *LazilyFilteredInstances
NewLazilyFitleredInstances returns a new FixedDataGrid after applying the given Filter to the Attributes it includes. Unfiltered Attributes are passed through without modification.
func (*LazilyFilteredInstances) AddClassAttribute ¶
func (l *LazilyFilteredInstances) AddClassAttribute(cls Attribute) error
AddClassAttribute adds a given Attribute (either before or after filtering) to the set of defined class Attributes.
func (*LazilyFilteredInstances) AllAttributes ¶
func (l *LazilyFilteredInstances) AllAttributes() []Attribute
AllAttributes returns every Attribute defined in the source datagrid, in addition to the revised Attributes created by the filter.
func (*LazilyFilteredInstances) AllClassAttributes ¶
func (l *LazilyFilteredInstances) AllClassAttributes() []Attribute
AllClassAttributes returns details of all Attributes currently specified as being class Attributes.
If applicable, the Attributes returned are those after modification by the Filter.
func (*LazilyFilteredInstances) Get ¶
func (l *LazilyFilteredInstances) Get(as AttributeSpec, row int) []byte
Get returns a transformed byte slice stored at a given AttributeSpec and row.
func (*LazilyFilteredInstances) GetAttribute ¶
func (l *LazilyFilteredInstances) GetAttribute(target Attribute) (AttributeSpec, error)
GetAttribute returns an AttributeSpecification for a given Attribute
func (*LazilyFilteredInstances) MapOverRows ¶
func (l *LazilyFilteredInstances) MapOverRows(asv []AttributeSpec, mapFunc func([][]byte, int) (bool, error)) error
MapOverRows maps an iteration mapFunc over the bytes contained in the source FixedDataGrid, after modification by the filter.
func (*LazilyFilteredInstances) RemoveClassAttribute ¶
func (l *LazilyFilteredInstances) RemoveClassAttribute(cls Attribute) error
RemoveClassAttribute removes a given Attribute (either before or after filtering) from the set of defined class Attributes.
func (*LazilyFilteredInstances) RowString ¶
func (l *LazilyFilteredInstances) RowString(row int) string
RowString returns a string representation of a given row after filtering.
func (*LazilyFilteredInstances) Size ¶
func (l *LazilyFilteredInstances) Size() (int, int)
Size returns the number of Attributes and rows of the underlying FixedDataGrid.
func (*LazilyFilteredInstances) String ¶
func (l *LazilyFilteredInstances) String() string
String returns a human-readable summary of this FixedDataGrid after filtering.
type Mat64Instances ¶
func InstancesFromMat64 ¶
func InstancesFromMat64(rows, cols int, data *mat.Dense) *Mat64Instances
InstancesFromMat64 returns a new Mat64Instances from a literal provided.
func (*Mat64Instances) AddClassAttribute ¶
func (m *Mat64Instances) AddClassAttribute(a Attribute) error
AddClassAttribute adds an attribute to the class set.
func (*Mat64Instances) AllAttributes ¶
func (m *Mat64Instances) AllAttributes() []Attribute
AllAttributes returns every defined Attribute.
func (*Mat64Instances) AllClassAttributes ¶
func (m *Mat64Instances) AllClassAttributes() []Attribute
AllClassAttributes returns every class attribute.
func (*Mat64Instances) Get ¶
func (m *Mat64Instances) Get(as AttributeSpec, row int) []byte
Get returns the bytes at a given position
func (*Mat64Instances) GetAttribute ¶
func (m *Mat64Instances) GetAttribute(a Attribute) (AttributeSpec, error)
GetAttribute returns an AttributeSpec from an Attribute field.
func (*Mat64Instances) MapOverRows ¶
func (m *Mat64Instances) MapOverRows(as []AttributeSpec, f func([][]byte, int) (bool, error)) error
MapOverRows is a convenience function for iteration
func (*Mat64Instances) RemoveClassAttribute ¶
func (m *Mat64Instances) RemoveClassAttribute(a Attribute) error
RemoveClassAttribute removes an attribute to the class set.
func (*Mat64Instances) RowString ¶
func (m *Mat64Instances) RowString(row int) string
RowString: should print the values of a row TODO: make this less half-assed
func (*Mat64Instances) Size ¶
func (m *Mat64Instances) Size() (int, int)
Size returns the number of Attributes, then the number of rows
func (*Mat64Instances) String ¶
func (m *Mat64Instances) String() string
String returns a human-readable summary of this dataset.
type Model ¶
type Model interface {
Score()
}
A Model is a supervised learning object, that is possible of scoring accuracy against a test set.
type Predictor ¶
type Predictor interface {
Predict()
}
A Predictor is an object that provides predictions.
type SortDirection ¶
type SortDirection int
SortDirection specifies sorting direction...
const ( // Descending says that Instances should be sorted high to low... Descending SortDirection = 1 // Ascending states that Instances should be sorted low to high... Ascending SortDirection = 2 )
type UpdatableDataGrid ¶
type UpdatableDataGrid interface { FixedDataGrid // Sets a given Attribute and row to a byte sequence. Set(AttributeSpec, int, []byte) // Adds an Attribute to the grid. AddAttribute(Attribute) AttributeSpec // Allocates additional room to hold a number of rows Extend(int) error }
UpdatableDataGrid implementations can be changed in addition to implementing all of the functionality offered by FixedDataGrid implementations.
func GeneratePredictionVector ¶
func GeneratePredictionVector(from FixedDataGrid) UpdatableDataGrid
GeneratePredictionVector selects the class Attributes from a given FixedDataGrid and returns something which can hold the predictions.
Source Files ¶
- arff.go
- attributes.go
- bag.go
- binary.go
- categorical.go
- classifier.go
- conversion.go
- csv.go
- data.go
- dense.go
- domain.go
- error.go
- filewrapper.go
- filtered.go
- filters.go
- fixed.go
- float.go
- group.go
- logger.go
- mat.go
- serialize.go
- serialize_attributes.go
- serialize_instances.go
- sort.go
- spec.go
- sql.go
- util.go
- util_attributes.go
- util_instances.go
- view.go