seafan

package module

v0.2.41 Latest Latest Go to latest Published: Mar 16, 2024 License: Apache-2.0 Imports: 24 Imported by: 2

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/invertedv/seafan

Links

Open Source Insights

README ¶

Seafan

Package seafan is a set of tools for building DNN models. The build engine is gorgonia.

Seafan features:

A data pipeline based on chutils to access files and ClickHouse tables.
- Point-and-shoot specification of the data
- Simple specification of one-hot features
- Functions of fields in the pipeline can be calculated from, and optionally added back, to the pipeline using the built-in expression parser (see Expr2Tree).
A wrapper around gorgonia that meshes to the pipeline.
- Simple specification of models, including embeddings
- A fit method with optional early stopping
- Callbacks during model fit
- Saving and loading models
Model diagnostics for categorical targets.
- KS plots
- Decile plots
- Marginal effects plots
Utilities.
- Plotting wrapper for plotly for xy plots.
- Numeric struct for (x,y) data and plotting and descriptive statistics.

Documentation ¶

Overview ¶

Package seafan is a set of tools for building DNN models. The build engine is gorgonia (https://pkg.go.dev/gorgonia.org/gorgonia).

Seafan features:

- A data pipeline based on chutils (https://github.com/invertedv/chutils) to access files and ClickHouse tables.

Point-and-shoot specification of the data
Simple specification of one-hot features

- A wrapper around gorgonia that meshes to the pipeline.

Simple specification of models, including embeddings
A fit method with optional early stopping and callbacks
Saving and loading models

- Model diagnostics for categorical targets.

KS plots
Decile plots

- Utilities.

Plotting wrapper for plotly (https://github.com/MetalBlueberry/go-plotly) for xy plots.
Numeric struct for (x,y) data and plotting and descriptive statistics.

Index ¶

Variables
func AddFitted(pipeIn Pipeline, nnFile string, target []int, name string, fts FTypes, ...) error
func Coalesce(vals []float64, nCat int, trg []int, binary, logodds bool, sl Slicer) ([]float64, error)
func CrossEntropy(model *NNModel) (cost *G.Node)
func Decile(xyIn *XY, plt *utilities.PlotDef) error
func EvalSFunction(node *OpNode) error
func Evaluate(curNode *OpNode, pipe Pipeline) error
func Expr2Tree(curNode *OpNode) error
func GetNode(ns G.Nodes, name string) *G.Node
func KS(xy *XY, plt *utilities.PlotDef) (ks float64, notTarget *Desc, target *Desc, err error)
func LeakyReluAct(n *G.Node, alpha float64) *G.Node
func LinearAct(n *G.Node) *G.Node
func Loop(loopVar string, start, end int, inner []*OpNode, assign []string, ...) error
func Marginal(nnFile string, feat string, target []int, pipe Pipeline, pd *utilities.PlotDef, ...) error
func PipeToCSV(pipe Pipeline, outFile string, sep, eol, quote rune) error
func PipeToSQL(pipe Pipeline, table string, after int, conn *chutils.Connect) error
func R2(y, yhat []float64) float64
func RMS(model *NNModel) (cost *G.Node)
func ReluAct(n *G.Node) *G.Node
func SegPlot(pipe Pipeline, obs, fit, seg string, plt *utilities.PlotDef, ...) error
func SigmoidAct(n *G.Node) *G.Node
func SoftMaxAct(n *G.Node) *G.Node
func SoftRMS(model *NNModel) (cost *G.Node)
func Strip(s string) (left, inner string, err error)
func UnNormalize(vals []float64, ft *FType) (unNorm []float64)
func Unique(xs []any) []any
func Wrapper(e error, text string) error
type Activation
- func StrAct(s string) (*Activation, float64)
- func (i Activation) String() string
type Args
- func MakeArgs(s string) (keyval Args, err error)
- func (kv Args) Get(key string, kind reflect.Kind) (val any)
type ChData
- func NewChData(name string, opts ...Opts) *ChData
- func (ch *ChData) AppendRows(gd *GData, fTypes FTypes) (pipeOut Pipeline, err error)
- func (ch *ChData) AppendRowsRaw(gd *GData) error
- func (ch *ChData) Batch(inputs G.Nodes) bool
- func (ch *ChData) BatchSize() int
- func (ch *ChData) Cols(field string) int
- func (ch *ChData) Describe(field string, topK int) string
- func (ch *ChData) Drop(field string) error
- func (ch *ChData) Epoch(setTo int) int
- func (ch *ChData) FieldCount() int
- func (ch *ChData) FieldList() []string
- func (ch *ChData) GData() *GData
- func (ch *ChData) Get(field string) *GDatum
- func (ch *ChData) GetFType(field string) *FType
- func (ch *ChData) GetFTypes() FTypes
- func (ch *ChData) GetKeepRaw() bool
- func (ch *ChData) Init() (err error)
- func (ch *ChData) InitOld() (err error)
- func (ch *ChData) IsCat(field string) bool
- func (ch *ChData) IsCts(field string) bool
- func (ch *ChData) IsNormalized(field string) bool
- func (ch *ChData) IsSorted() bool
- func (ch *ChData) Join(right Pipeline, onField string, joinType JoinType) (result Pipeline, err error)
- func (ch *ChData) Keep(fields []string) error
- func (ch *ChData) Name() string
- func (ch *ChData) ReInit(ftypes *FTypes) (pipeOut Pipeline, err error)
- func (ch *ChData) Row(take int) (newPipe Pipeline, err error)
- func (ch *ChData) Rows() int
- func (ch *ChData) SaveFTypes(fileName string) error
- func (ch *ChData) Shuffle()
- func (ch *ChData) Slice(sl Slicer) (Pipeline, error)
- func (ch *ChData) Sort(field string, ascending bool) error
- func (ch *ChData) SortField() string
- func (ch *ChData) String() string
- func (ch *ChData) Subset(rows []int) (newPipe Pipeline, err error)
- func (ch *ChData) Where(field string, equalTo []any) (newPipe Pipeline, err error)
type CostFunc
type DOLayer
- func DropOutParse(s string) (*DOLayer, error)
type Desc
- func Assess(xy *XY, cutoff float64) (n int, precision, recall, accuracy float64, obs, fit *Desc, err error)
- func NewDesc(u []float64, name string) (*Desc, error)
- func (d *Desc) Populate(x []float64, noSort bool, sl Slicer)
- func (d *Desc) String() string
type FCLayer
- func FCParse(s string) (fc *FCLayer, err error)
type FParam
type FRole
- func (i FRole) String() string
type FType
- func (ft *FType) String() string
type FTypes
- func LoadFTypes(fileName string) (fts FTypes, err error)
- func (fts FTypes) DropFields(dropFields ...string) FTypes
- func (fts FTypes) Get(name string) *FType
- func (fts FTypes) Save(fileName string) (err error)
type Fit
- func NewFit(nn *NNModel, epochs int, p Pipeline, opts ...FitOpts) *Fit
- func (ft *Fit) BestEpoch() int
- func (ft *Fit) Do() (err error)
- func (ft *Fit) InCosts() *XY
- func (ft *Fit) NNModel() *NNModel
- func (ft *Fit) OutCosts() *XY
- func (ft *Fit) OutFile() string
type FitOpts
- func WithL2Reg(penalty float64) FitOpts
- func WithLearnRate(lrStart, lrEnd float64) FitOpts
- func WithOutFile(fileName string) FitOpts
- func WithShuffle(interval int) FitOpts
- func WithValidation(p Pipeline, wait int) FitOpts
type FuncSpec
type GData
- func NewGData() *GData
- func (gd *GData) AddRaw(data [][]any, fields []string, fts FTypes, keepRaw bool) error
- func (gd *GData) AppendC(raw *Raw, name string, normalize bool, fp *FParam, keepRaw bool) error
- func (gd *GData) AppendD(raw *Raw, name string, fp *FParam, keepRaw bool) error
- func (gd *GData) AppendField(newData *Raw, name string, fRole FRole, keepRaw bool) error
- func (gd *GData) AppendRows(gdApp *GData, fTypes FTypes) (gdOut *GData, err error)
- func (gd *GData) AppendRowsRaw(gdApp *GData) error
- func (gd *GData) Back2Raw() (rawData []*Raw, nCol int, fields []string, err error)
- func (gd *GData) Close() error
- func (gd *GData) Copy() (gdOut *GData, err error)
- func (gd *GData) CountLines() (numLines int, err error)
- func (gd *GData) Drop(field string) error
- func (gd *GData) FieldCount() int
- func (gd *GData) FieldList() []string
- func (gd *GData) Get(name string) *GDatum
- func (gd *GData) GetData() []*GDatum
- func (gd *GData) GetFType(field string) *FType
- func (gd *GData) GetFTypes() FTypes
- func (gd *GData) GetRaw(field string) (*Raw, error)
- func (gd *GData) IsSorted() bool
- func (gd *GData) Join(right *GData, onField string, joinType JoinType) (result *GData, err error)
- func (gd *GData) Keep(fields []string) error
- func (gd *GData) Len() int
- func (gd *GData) Less(i, j int) bool
- func (gd *GData) MakeOneHot(from, name string) error
- func (gd *GData) ReInit(fTypes *FTypes) (gdOut *GData, err error)
- func (gd *GData) Read(nTarget int, validate bool) (data []chutils.Row, valid []chutils.Valid, err error)
- func (gd *GData) Reset() error
- func (gd *GData) Row(take int) (gdNew *GData, err error)
- func (gd *GData) Rows() int
- func (gd *GData) Seek(lineNo int) error
- func (gd *GData) Shuffle()
- func (gd *GData) Slice(sl Slicer) (*GData, error)
- func (gd *GData) Sort(field string, ascending bool) error
- func (gd *GData) SortField() string
- func (gd *GData) Subset(keepRows []int) (gdOut *GData, err error)
- func (gd *GData) Swap(i, j int)
- func (gd *GData) TableSpec() *chutils.TableDef
- func (gd *GData) UpdateFts(newFts FTypes) (*GData, error)
- func (gd *GData) Where(field string, equalTo []any) (gdOut *GData, err error)
type GDatum
- func (g *GDatum) Describe(topK int) string
- func (g *GDatum) String() string
type JoinType
- func (i JoinType) String() string
type Layer
- func (i Layer) String() string
type Levels
- func ByCounts(data *Raw, sl Slicer) Levels
- func ByPtr(data *Raw) Levels
- func (l Levels) FindValue(val int32) any
- func (l Levels) Sort(byName, ascend bool) (key []any, val []int32)
- func (l Levels) String() string
- func (l Levels) TopK(topNum int, byName, ascend bool) string
type ModSpec
- func LoadModSpec(fileName string) (ms ModSpec, err error)
- func (m ModSpec) Check() error
- func (m ModSpec) DropOut(loc int) *DOLayer
- func (m ModSpec) FC(loc int) *FCLayer
- func (m ModSpec) Inputs(p Pipeline) (FTypes, error)
- func (m ModSpec) LType(i int) (*Layer, error)
- func (m ModSpec) Save(fileName string) (err error)
- func (m ModSpec) Target(p Pipeline) (*FType, error)
- func (m ModSpec) TargetName() string
type NNModel
- func LoadNN(fileRoot string, p Pipeline, build bool) (nn *NNModel, err error)
- func NewNNModel(modSpec ModSpec, pipe Pipeline, build bool, nnOpts ...NNOpts) (*NNModel, error)
- func PredictNN(fileRoot string, pipe Pipeline, build bool, opts ...NNOpts) (nn *NNModel, err error)
- func PredictNNwFts(fileRoot string, pipe Pipeline, build bool, fts FTypes, opts ...NNOpts) (nn *NNModel, err error)
- func (m *NNModel) Cost() *G.Node
- func (m *NNModel) CostFlt() float64
- func (m *NNModel) CostFn() CostFunc
- func (m *NNModel) Features() G.Nodes
- func (m *NNModel) FitSlice() []float64
- func (m *NNModel) Fitted() G.Result
- func (m *NNModel) Fwd()
- func (m *NNModel) G() *G.ExprGraph
- func (m *NNModel) InputFT() FTypes
- func (m *NNModel) Inputs() G.Nodes
- func (m *NNModel) ModSpec() ModSpec
- func (m *NNModel) Name() string
- func (m *NNModel) Obs() *G.Node
- func (m *NNModel) ObsSlice() []float64
- func (m *NNModel) Opts() []NNOpts
- func (m *NNModel) OutputCols() int
- func (m *NNModel) Params() G.Nodes
- func (m *NNModel) Save(fileRoot string) (err error)
- func (m *NNModel) String() string
type NNOpts
- func WithCostFn(cf CostFunc) NNOpts
- func WithName(name string) NNOpts
type OpNode
- func CopyNode(src *OpNode) (dest *OpNode)
type Opts
- func WithBatchSize(bsize int) Opts
- func WithCallBack(cb Opts) Opts
- func WithCats(names ...string) Opts
- func WithCycle(cycle bool) Opts
- func WithFtypes(fts FTypes) Opts
- func WithKeepRaw(keepRaw bool) Opts
- func WithNormalized(names ...string) Opts
- func WithOneHot(name, from string) Opts
- func WithReader(rdr any) Opts
type Pipeline
- func AddToPipe(rootNode *OpNode, fieldName string, pipe Pipeline) (outPipe Pipeline, err error)
- func Append(pipe1, pipe2 Pipeline) (Pipeline, error)
- func CSVToPipe(csvFile string, fts FTypes, keepRaw bool) (pipe Pipeline, err error)
- func SQLToPipe(sql string, fts FTypes, keepRaw bool, conn *chutils.Connect) (pipe Pipeline, err error)
- func VecFromAny(data [][]any, fields []string, ftypes FTypes) (pipe Pipeline, err error)
type Raw
- func AllocRaw(n int, kind reflect.Kind) *Raw
- func NewRaw(x []any, sl Slicer) *Raw
- func NewRawCast(x any, sl Slicer) *Raw
- func (r *Raw) CumeAfter(aggType string) (*Raw, error)
- func (r *Raw) CumeBefore(aggType string) (*Raw, error)
- func (r *Raw) Exp() (*Raw, error)
- func (r *Raw) Index(indices *Raw) (*Raw, error)
- func (r *Raw) IsNumeric() bool
- func (r *Raw) Lag(missing any) (*Raw, error)
- func (r *Raw) Len() int
- func (r *Raw) Less(i, j int) bool
- func (r *Raw) Log() (*Raw, error)
- func (r *Raw) Max() (*Raw, error)
- func (r *Raw) Mean() (*Raw, error)
- func (r *Raw) Min() (*Raw, error)
- func (r *Raw) Pow(exponent *Raw) (*Raw, error)
- func (r *Raw) Product() (*Raw, error)
- func (r *Raw) Std() (*Raw, error)
- func (r *Raw) Sum() (*Raw, error)
- func (r *Raw) Swap(i, j int)
type SeaError
- func (seaErr SeaError) Error() string
type Slice
- func NewSlice(feat string, minCnt int, pipe Pipeline, restrict []any) (*Slice, error)
- func (s *Slice) Index() int32
- func (s *Slice) Iter() bool
- func (s *Slice) MakeSlicer() Slicer
- func (s *Slice) Title() string
- func (s *Slice) Value() any
type Slicer
- func SlicerAnd(s1, s2 Slicer) Slicer
- func SlicerOr(s1, s2 Slicer) Slicer
type Summary
type VecData
- func NewVecData(name string, data *GData, opts ...Opts) *VecData
- func (vec *VecData) AppendRows(gd *GData, fTypes FTypes) (pipeOut Pipeline, err error)
- func (vec *VecData) AppendRowsRaw(gd *GData) error
- func (vec *VecData) Batch(inputs G.Nodes) bool
- func (vec *VecData) BatchSize() int
- func (vec *VecData) Cols(field string) int
- func (vec *VecData) Describe(field string, topK int) string
- func (vec *VecData) Drop(field string) error
- func (vec *VecData) Epoch(setTo int) int
- func (vec *VecData) FieldCount() int
- func (vec *VecData) FieldList() []string
- func (vec *VecData) GData() *GData
- func (vec *VecData) Get(field string) *GDatum
- func (vec *VecData) GetFType(field string) *FType
- func (vec *VecData) GetFTypes() FTypes
- func (vec *VecData) GetKeepRaw() bool
- func (vec *VecData) Init() error
- func (vec *VecData) IsCat(field string) bool
- func (vec *VecData) IsCts(field string) bool
- func (vec *VecData) IsNormalized(field string) bool
- func (vec *VecData) IsSorted() bool
- func (vec *VecData) Join(right Pipeline, onField string, joinType JoinType) (result Pipeline, err error)
- func (vec *VecData) Keep(fields []string) error
- func (vec *VecData) Name() string
- func (vec *VecData) ReInit(ftypes *FTypes) (pipeOut Pipeline, err error)
- func (vec *VecData) Row(take int) (newPipe Pipeline, err error)
- func (vec *VecData) Rows() int
- func (vec *VecData) SaveFTypes(fileName string) error
- func (vec *VecData) Shuffle()
- func (vec *VecData) Slice(sl Slicer) (Pipeline, error)
- func (vec *VecData) Sort(field string, ascending bool) error
- func (vec *VecData) SortField() string
- func (vec *VecData) String() string
- func (vec *VecData) Subset(rows []int) (newPipe Pipeline, err error)
- func (vec *VecData) Where(field string, equalTo []any) (newPipe Pipeline, err error)
type XY
- func NewXY(x, y []float64) (*XY, error)
- func (p *XY) Interp(xNew []float64) (*XY, error)
- func (p *XY) Len() int
- func (p *XY) Less(i, j int) bool
- func (p *XY) Plot(pd *utilities.PlotDef, scatter bool) error
- func (p *XY) Sort() error
- func (p *XY) String() string
- func (p *XY) Swap(i, j int)

Constants ¶

This section is empty.

Variables ¶

View Source

var (
	// FunctionsStr lists the functions that parser supports, the number and types of arguments, type of return
	//go:embed strings/functions.txt
	FunctionsStr string

	// Functions is a slice that describes all supported functions/operations
	Functions []FuncSpec

	Height = 1200.0
	Width  = 1200.0
)

View Source

var Browser = "firefox"

Browser is the browser to use for plotting.

View Source

var Verbose = true

Verbose controls amount of printing.

Functions ¶

func AddFitted ¶

func AddFitted(pipeIn Pipeline, nnFile string, target []int, name string, fts FTypes, logodds bool, obsFit *FType) error

AddFitted addes fitted values to a Pipeline. The features can be re-normalized/re-mapped to align pipeIn with the model build pipeIn -- input Pipeline to run the model on nnFile -- root directory of NNModel target -- target columns of the model output to coalesce name -- name of fitted value in Pipeline fts -- options FTypes to use for normalizing pipeIn

func Coalesce ¶

func Coalesce(vals []float64, nCat int, trg []int, binary, logodds bool, sl Slicer) ([]float64, error)

Coalesce combines columns of either a one-hot feature or a softmax output. In the case of a feature, it returns 1 if any of the target columns is 1. In the case of a softmax output, it sums the entries.

func CrossEntropy ¶

func CrossEntropy(model *NNModel) (cost *G.Node)

CrossEntropy cost function

func Decile ¶

func Decile(xyIn *XY, plt *utilities.PlotDef) error

Decile generates a decile plot based on xy

XY        values to base the plot on.
plt       PlotDef plot options.  If plt is nil an error is generated.

The deciles are created based on the values of xy.X

func EvalSFunction ¶ added in v0.1.1

func EvalSFunction(node *OpNode) error

EvalSFunction evaluates a summary function. A summary function returns a single value.

func Evaluate ¶ added in v0.1.0

func Evaluate(curNode *OpNode, pipe Pipeline) error

Evaluate evaluates an expression parsed by Expr2Tree. The user calls Evaluate with the top node as returned by Expr2Tree To add a field to a pipeline:

Create the *OpNode tree to evaluate the expression using Expr2Tree
Populate the values from a Pipeline using Evaluate.
Add the values to the Pipeline using AddToPipe

Note, you can access the values after Evaluate without adding the field to the Pipeline from the *Raw item of the root node.

Example ¶

This example shows how to print a result

Verbose = false

// builds a Pipline with two fields:
//    c = 1,2,3,4
//    D = 5,-5,3,6
pipe := buildPipe([]string{"1,2,3,4", "'a', 'b', 'c', 'd'"}, []string{"f", "s"})

field := &OpNode{Expression: "print(c, 0)"}
if e := Expr2Tree(field); e != nil {
	panic(e)
}

if e := Evaluate(field, pipe); e != nil {
	panic(e)
}

Output:

c
0: 1
1: 2
2: 3
3: 4

Example (DateAdd) ¶

Simple date arithmetic is possible. The function dateAdd(d,m) adds m months to d. The data is: row, newField1, newField2, newField3, date 0,row0,.1,.2, 3/1/2023 2,row2,2.1,3.2, 4/1/2023 3,row3,3.1,4.2, 5/1/2023 4,row4,4.1,5.2, 6/1/2023 1,row1,1.1,2.2, 7/1/2023 100,row100,4100.0,5200.0, 8/1/2020

Verbose = false
var (
	outPipe Pipeline
	err     error
)

data := os.Getenv("data")
pipe, e := CSVToPipe(data+"/pipeTest2.csv", nil, false)
if e != nil {
	panic(e)
}

root := &OpNode{Expression: "dateAdd(date,row)"}

if err = Expr2Tree(root); err != nil {
	panic(err)
}

if err = Evaluate(root, pipe); err != nil {
	panic(err)
}

if outPipe, err = AddToPipe(root, "nextMonth", pipe); err != nil {
	panic(err)
}

fmt.Println("date + row months")

raw, e := outPipe.GData().GetRaw("nextMonth")
if e != nil {
	panic(e)
}

fmt.Println(raw.Data)

Output:

date + row months
[2023-03-01 00:00:00 +0000 UTC 2023-06-01 00:00:00 +0000 UTC 2023-08-01 00:00:00 +0000 UTC 2023-10-01 00:00:00 +0000 UTC 2023-08-01 00:00:00 +0000 UTC 2028-12-01 00:00:00 +0000 UTC]

Example (If) ¶

var (
	outPipe Pipeline
	err     error
)

Verbose = false

data := os.Getenv("data")
pipe, e := CSVToPipe(data+"/pipeTest2.csv", nil, false)
if e != nil {
	panic(e)
}

root := &OpNode{Expression: "if(date=='3/1/2023',1,0)"}

if err = Expr2Tree(root); err != nil {
	panic(err)
}

if err = Evaluate(root, pipe); err != nil {
	panic(err)
}

if outPipe, err = AddToPipe(root, "march2023", pipe); err != nil {
	panic(err)
}

fmt.Println(pipe.Get("march2023").Data.([]float64))
root = &OpNode{Expression: "if(date>'3/1/2023',1,0)"}

if err = Expr2Tree(root); err != nil {
	panic(err)
}

if err = Evaluate(root, pipe); err != nil {
	panic(err)
}

if outPipe, err = AddToPipe(root, "afterMarch2023", pipe); err != nil {
	panic(err)
}
fmt.Println(outPipe.Get("afterMarch2023").Data.([]float64))

Output:

[1 0 0 0 0 0]
[0 1 1 1 1 0]

func Expr2Tree ¶ added in v0.1.0

func Expr2Tree(curNode *OpNode) error

Expr2Tree builds the OpNode tree that is a binary tree representation of an expression. The process to add a field to a Pipeline is:

Create the *OpNode tree using Expr2Tree to evaluate the expression
Populate the values from a Pipeline using Evaluate.
Add the values to the Pipeline using AddToPipe.

Note, you can access the values after Evaluate without adding the field to the Pipeline from the Raw field of the root node.

The expression can include:

arithmetic operators: +, -, *, /
exponentation: ^
functions
logicals: &&, ||. These evaluate to 0 or 1.
if statements: if(condition, value if true, value if false). The true value is applied if the condition evaluates to a positive value.
parentheses

func GetNode ¶

func GetNode(ns G.Nodes, name string) *G.Node

GetNode returns a node by name from a G.Nodes

func KS ¶

func KS(xy *XY, plt *utilities.PlotDef) (ks float64, notTarget *Desc, target *Desc, err error)

KS finds the KS of a softmax model that is reduced to a binary outcome.

xy        XY struct where x is fitted value and y is the binary observed value
plt       PlotDef plot options.  If plt is nil, no plot is produced.

The ks statistic is returned as are Desc descriptions of the model for the two groups. Returns

ks          KS statistic
notTarget  Desc struct of fitted values of the non-target outcomes
target     Desc struct of fitted values of target outcomes

Target: html plot file and/or plot in browser.

func LeakyReluAct ¶

func LeakyReluAct(n *G.Node, alpha float64) *G.Node

LeakyReluAct is leaky relu activation

func LinearAct ¶

func LinearAct(n *G.Node) *G.Node

LinearAct is a no-op. It is the default ModSpec default activation.

func Loop ¶ added in v0.1.2

func Loop(loopVar string, start, end int, inner []*OpNode, assign []string, pipe Pipeline) error

Loop enables looping in parse. The ops in inner are run for each iteration.

inner - is a slice of *OpNode expressions to run in the loop and then assign to "assign" in the pipeline
loopVar - the name of the loop variable. This may be used in the "inner" expressions. It is not added to the pipeline.
loopVar takes on values from start to end.

Example ¶

In this example, we calculate four expressions and return them to the pipeline. The field c is added to itself each iteration. The field r stores the loop field. On return, it will have the last value of the loop.

The fields are evaulated in order, starting with the 0 element of inner.

Verbose = false

// builds a Pipline with two fields:
//    c = 1,2,3,4
//    D = 5,-5,3,6
pipe := buildPipe([]string{"1,2,3,4", "5,-5,3,6"}, []string{"f", "f"})
// we'll add two fields to the pipeline: the sum=c+d and max=max(c,d)

// start by parsing the expressions.
field1, result1 := &OpNode{Expression: "c+c"}, "c"
field2, result2 := &OpNode{Expression: "indx"}, "r" // indx will be the name of the looping field.
field3, result3 := &OpNode{Expression: "D*c"}, "s"
field4, result4 := &OpNode{Expression: "s-1"}, "t"

inner := []*OpNode{field1, field2, field3, field4}
assign := []string{result1, result2, result3, result4}

for ind := 0; ind < len(assign); ind++ {
	if e := Expr2Tree(inner[ind]); e != nil {
		panic(e)
	}
}

if e := Loop("indx", 1, 3, inner, assign, pipe); e != nil {
	panic(e)
}

for ind := 0; ind < len(assign); ind++ {
	fmt.Println(assign[ind])
	fmt.Println(pipe.Get(assign[ind]).Data.([]float64))
}

Output:

c
[4 8 12 16]
r
[2 2 2 2]
s
[20 -40 36 96]
t
[19 -41 35 95]

func Marginal ¶

func Marginal(nnFile string, feat string, target []int, pipe Pipeline, pd *utilities.PlotDef, obsFtype *FType) error

Marginal produces a set of plots to aid in understanding the effect of a feature. The plot takes the model output and creates six segments based on the quantiles of the model output: (<.1, .1-.25, .25-.5, .5-.75, .75-.9, .9-1).

For each segment, the feature being analyzed various across its range within the quartile (continuous) its values (discrete). The bottom row shows the distribution of the feature within the quartile range.

func PipeToCSV ¶ added in v0.1.6

func PipeToCSV(pipe Pipeline, outFile string, sep, eol, quote rune) error

PipeToCSV saves the pipe as a CSV

func PipeToSQL ¶ added in v0.1.6

func PipeToSQL(pipe Pipeline, table string, after int, conn *chutils.Connect) error

PipeToSQL creates "table" and saves the pipe data to it.

func R2 ¶ added in v0.0.30

func R2(y, yhat []float64) float64

R2 returns the model r-square. Returns -1 if an error.

func RMS ¶

func RMS(model *NNModel) (cost *G.Node)

RMS cost function

func ReluAct ¶

func ReluAct(n *G.Node) *G.Node

ReluAct is relu activation

func SegPlot ¶ added in v0.0.29

func SegPlot(pipe Pipeline, obs, fit, seg string, plt *utilities.PlotDef, minVal, maxVal *float64) error

SegPlot generates a decile plot of the fields y and fit in pipe. The segments are based on the values of the field seg. If seg is continuous, the segments are based on quantiles: 0-.1, .1-.25, .25-.5, .5-.75, .9-1

	obs       observed field (y-axis) name
	fit       fitted field (x-axis) name
    seg       segmenting field name
	plt       PlotDef plot options.  If plt is nil an error is generated.

func SigmoidAct ¶

func SigmoidAct(n *G.Node) *G.Node

SigmoidAct is sigmoid activation

func SoftMaxAct ¶

func SoftMaxAct(n *G.Node) *G.Node

SoftMaxAct implements softmax activation functin

func SoftRMS ¶

func SoftRMS(model *NNModel) (cost *G.Node)

func Strip ¶

func Strip(s string) (left, inner string, err error)

Strip is a utility that takes a string of the form "Func(args)" and returns "Func" and "args"

func UnNormalize ¶ added in v0.0.29

func UnNormalize(vals []float64, ft *FType) (unNorm []float64)

UnNormalize un-normalizes a slice, if need be

func Unique ¶

func Unique(xs []any) []any

Unique returns a slice of the unique values of xs

func Wrapper ¶

func Wrapper(e error, text string) error

Types ¶

type Activation ¶

type Activation int

Activation types

const (
	Linear Activation = 0 + iota
	Relu
	LeakyRelu
	Sigmoid
	SoftMax
)

func StrAct ¶

func StrAct(s string) (*Activation, float64)

StrAct takes a string and returns corresponding Activation and any parameter. Nil if fails.

func (Activation) String ¶

func (i Activation) String() string

type Args ¶

type Args map[string]string

Args map holds layer arguments in key/val style

func MakeArgs ¶

func MakeArgs(s string) (keyval Args, err error)

MakeArgs takes an argument string of the form "arg1:val1, arg2:val2, ...." and returns entries in key/val format

func (Args) Get ¶

func (kv Args) Get(key string, kind reflect.Kind) (val any)

Get returns a val from Args coercing to type kind. Nil if fails.

type ChData ¶

type ChData struct {
	// contains filtered or unexported fields
}

ChData provides a Pipeline interface into text files (delimited, fixed length) and ClickHouse.

func NewChData ¶

func NewChData(name string, opts ...Opts) *ChData

func (*ChData) AppendRows ¶ added in v0.2.8

func (ch *ChData) AppendRows(gd *GData, fTypes FTypes) (pipeOut Pipeline, err error)

AppendRows appends rows to the existing GData and then re-initializes each GDatum, using the fTypes, if provided.

func (*ChData) AppendRowsRaw ¶ added in v0.2.8

func (ch *ChData) AppendRowsRaw(gd *GData) error

AppendRowsRaw simply appends rows, in place, to the existing GData. Only the *Raw data is updated. The .Data field is set to nil.

func (*ChData) Batch ¶

func (ch *ChData) Batch(inputs G.Nodes) bool

Batch loads a batch into Inputs. It returns false if the epoch is done. If cycle is true, it will start at the beginning on the next call. If cycle is false, it will call Init() at the next call to Batch()

Example ¶

dataPath := os.Getenv("data") // path to data directory
fileName := dataPath + "/test1.csv"
f, e := os.Open(fileName)

if e != nil {
	panic(e)
}
// set up chutils file reader
rdr := file.NewReader(fileName, ',', '\n', 0, 0, 1, 0, f, 0)
e = rdr.Init("", chutils.MergeTree)

if e != nil {
	panic(e)
}

// determine data types
e = rdr.TableSpec().Impute(rdr, 0, .99)

if e != nil {
	panic(e)
}

bSize := 100
ch := NewChData("Test ch Pipeline",
	WithBatchSize(bSize),
	WithReader(rdr),
	WithNormalized("x1"))
// create a graph & node to illustrate Batch()
g := G.NewGraph()
node := G.NewTensor(g, G.Float64, 2, G.WithName("x1"), G.WithShape(bSize, 1), G.WithInit(G.Zeroes()))

var sumX = 0.0
n := 0
// run through batchs and verify counts and mean of x1 is zero
for ch.Batch(G.Nodes{node}) {
	n += bSize
	x := node.Value().Data().([]float64)
	for _, xv := range x {
		sumX += xv
	}
}

mean := sumX / float64(n)

fmt.Printf("mean of x1: %0.2f", math.Abs(mean))

Output:

rows read:  8500
mean of x1: 0.00

Example (Example2) ¶

// We can normalize fields by values we supply rather than the values in the epoch.
dataPath := os.Getenv("data") // path to data directory
fileName := dataPath + "/test1.csv"
f, e := os.Open(fileName)

if e != nil {
	panic(e)
}

// set up chutils file reader
rdr := file.NewReader(fileName, ',', '\n', 0, 0, 1, 0, f, 0)
e = rdr.Init("", chutils.MergeTree)

if e != nil {
	panic(e)
}

// determine data types
e = rdr.TableSpec().Impute(rdr, 0, .99)

if e != nil {
	panic(e)
}

bSize := 100
// Let's normalize x1 with location=41 and scale=1
ft := &FType{
	Name:       "x1",
	Role:       0,
	Cats:       0,
	EmbCols:    0,
	Normalized: true,
	From:       "",
	FP:         &FParam{Location: 40, Scale: 1},
}
ch := NewChData("Test ch Pipeline",
	WithBatchSize(bSize),
	WithReader(rdr))

WithFtypes(FTypes{ft})(ch)

// create a graph & node to illustrate Batch()
g := G.NewGraph()
node := G.NewTensor(g, G.Float64, 2, G.WithName("x1"), G.WithShape(bSize, 1), G.WithInit(G.Zeroes()))

sumX := 0.0
n := 0
// run through batchs and verify counts and mean of x1 is zero
for ch.Batch(G.Nodes{node}) {
	n += bSize
	x := node.Value().Data().([]float64)
	for _, xv := range x {
		sumX += xv
	}
}

mean := sumX / float64(n)

fmt.Printf("mean of x1: %0.2f", math.Abs(mean))

Output:

rows read:  8500
mean of x1: 39.50

func (*ChData) BatchSize ¶

func (ch *ChData) BatchSize() int

BatchSize returns Pipeline batch size. Use WithBatchSize to set this.

func (*ChData) Cols ¶

func (ch *ChData) Cols(field string) int

Cols returns the # of columns in the field

func (*ChData) Describe ¶

func (ch *ChData) Describe(field string, topK int) string

Describe describes a field. If the field has role FRCat, the top k values (by frequency) are returned.

func (*ChData) Drop ¶ added in v0.2.7

func (ch *ChData) Drop(field string) error

Drop drops the listed field from the pipeline

func (*ChData) Epoch ¶

func (ch *ChData) Epoch(setTo int) int

Epoch sets the epoch to setTo if setTo >=0. Returns epoch #.

func (*ChData) FieldCount ¶ added in v0.2.7

func (ch *ChData) FieldCount() int

FieldCount returns the number of fields in the pipeline

func (*ChData) FieldList ¶

func (ch *ChData) FieldList() []string

FieldList returns a slice of field names in the Pipeline

func (*ChData) GData ¶

func (ch *ChData) GData() *GData

GData returns the Pipelines' GData

func (*ChData) Get ¶

func (ch *ChData) Get(field string) *GDatum

Get returns a fields's GDatum

func (*ChData) GetFType ¶

func (ch *ChData) GetFType(field string) *FType

GetFType returns the field's FType

func (*ChData) GetFTypes ¶

func (ch *ChData) GetFTypes() FTypes

GetFTypes returns FTypes for ch Pipeline.

func (*ChData) GetKeepRaw ¶ added in v0.2.0

func (ch *ChData) GetKeepRaw() bool

GetKeepRaw returns true if *Raw data is retained

func (*ChData) Init ¶

func (ch *ChData) Init() (err error)

Init initializes the Pipeline.

Example ¶

dataPath := os.Getenv("data") // path to data directory
fileName := dataPath + "/test1.csv"
f, e := os.Open(fileName)

if e != nil {
	panic(e)
}

// set up chutils file reader
rdr := file.NewReader(fileName, ',', '\n', 0, 0, 1, 0, f, 0)
e = rdr.Init("", chutils.MergeTree)
if e != nil {
	panic(e)
}

// determine data types
e = rdr.TableSpec().Impute(rdr, 0, .99)

if e != nil {
	panic(e)
}

bSize := 100
ch := NewChData("Test ch Pipeline", WithBatchSize(bSize),
	WithReader(rdr), WithCycle(true),
	WithCats("y", "y1", "y2", "x4"),
	WithOneHot("yoh", "y"),
	WithOneHot("y1oh", "y1"),
	WithOneHot("x4oh", "x4"),
	WithNormalized("x1", "x2", "x3"),
	WithOneHot("y2oh", "y2"))
// initialize pipeline
e = ch.Init()

if e != nil {
	panic(e)
}

Output:

rows read:  8500

func (*ChData) InitOld ¶ added in v0.2.41

func (ch *ChData) InitOld() (err error)

Init initializes the Pipeline.

func (*ChData) IsCat ¶

func (ch *ChData) IsCat(field string) bool

IsCat returns true if field has role FRCat.

func (*ChData) IsCts ¶

func (ch *ChData) IsCts(field string) bool

IsCts returns true if the field has role FRCts.

func (*ChData) IsNormalized ¶

func (ch *ChData) IsNormalized(field string) bool

IsNormalized returns true if the field is normalized.

func (*ChData) IsSorted ¶

func (ch *ChData) IsSorted() bool

IsSorted returns true if the data has been sorted.

func (*ChData) Join ¶ added in v0.2.9

func (ch *ChData) Join(right Pipeline, onField string, joinType JoinType) (result Pipeline, err error)

func (*ChData) Keep ¶ added in v0.2.7

func (ch *ChData) Keep(fields []string) error

Keep keeps only the listed fields in the pipeline

func (*ChData) Name ¶

func (ch *ChData) Name() string

Name returns Pipeline name

func (*ChData) ReInit ¶ added in v0.2.8

func (ch *ChData) ReInit(ftypes *FTypes) (pipeOut Pipeline, err error)

ReInit re-initializes the Data field from Raw for each GDatum. If ftypes is not nil, these values are used, otherwise the FParam values are re-derived from the data. A new pipeline is returned.

func (*ChData) Row ¶ added in v0.2.7

func (ch *ChData) Row(take int) (newPipe Pipeline, err error)

Row creates a new pipeline with only the row, take

func (*ChData) Rows ¶

func (ch *ChData) Rows() int

Rows is # of rows of data in the Pipeline

func (*ChData) SaveFTypes ¶

func (ch *ChData) SaveFTypes(fileName string) error

SaveFTypes saves the FTypes for the Pipeline.

Example ¶

// Field Types (FTypes) can be saved once they're created.  This preserves key information like
//  - The field role
//  - Location and Scale used in normalization
//  - Mapping of discrete fields
//  - Construction of one-hot fields
dataPath := os.Getenv("data") // path to data directory
fileName := dataPath + "/test1.csv"
f, e := os.Open(fileName)

if e != nil {
	panic(e)
}

// set up chutils file reader
rdr := file.NewReader(fileName, ',', '\n', 0, 0, 1, 0, f, 0)
e = rdr.Init("", chutils.MergeTree)

if e != nil {
	panic(e)
}

// determine data types
e = rdr.TableSpec().Impute(rdr, 0, .99)

if e != nil {
	panic(e)
}

bSize := 100
ch := NewChData("Test ch Pipeline", WithBatchSize(bSize),
	WithReader(rdr), WithCycle(true),
	WithCats("y", "y1", "y2", "x4"),
	WithOneHot("yoh", "y"),
	WithOneHot("y1oh", "y1"),
	WithOneHot("x4oh", "x4"),
	WithNormalized("x1", "x2", "x3"),
	WithOneHot("y2oh", "y2"))
// initialize pipeline
e = ch.Init()

if e != nil {
	panic(e)
}

outFile := os.TempDir() + "/seafan.json"

if e = ch.SaveFTypes(outFile); e != nil {
	panic(e)
}

saveFTypes, e := LoadFTypes(outFile)

if e != nil {
	panic(e)
}

ch1 := NewChData("Saved FTypes", WithReader(rdr), WithBatchSize(bSize),
	WithFtypes(saveFTypes))

if e := ch1.Init(); e != nil {
	panic(e)
}

fmt.Printf("Role of field y1oh: %s", ch.GetFType("y1oh").Role)

Output:

rows read:  8500
rows read:  8500
Role of field y1oh: FROneHot

func (*ChData) Shuffle ¶

func (ch *ChData) Shuffle()

Shuffle shuffles the data

func (*ChData) Slice ¶

func (ch *ChData) Slice(sl Slicer) (Pipeline, error)

Slice returns a VecData Pipeline sliced according to sl

func (*ChData) Sort ¶

func (ch *ChData) Sort(field string, ascending bool) error

Sort sorts the data

func (*ChData) SortField ¶

func (ch *ChData) SortField() string

SortField returns the field the data is sorted on.

func (*ChData) String ¶

func (ch *ChData) String() string

func (*ChData) Subset ¶ added in v0.2.7

func (ch *ChData) Subset(rows []int) (newPipe Pipeline, err error)

Subset creates a new pipeline with only the rows, rows

func (*ChData) Where ¶ added in v0.2.7

func (ch *ChData) Where(field string, equalTo []any) (newPipe Pipeline, err error)

Where creates a new pipeline with rows where field is in equalTo. The comparison uses the *Raw data.

type CostFunc ¶

type CostFunc func(model *NNModel) *G.Node

CostFunc function prototype for cost functions

type DOLayer ¶

type DOLayer struct {
	//	position int     // insert dropout after layer AfterLayer
	DropProb float64 // dropout probability
}

DOLayer specifies a dropout layer. It occurs in the graph after dense layer AfterLayer (the input layer is layer 0).

func DropOutParse ¶

func DropOutParse(s string) (*DOLayer, error)

DropOutParse parses the arguments to a drop out layer

type Desc ¶

type Desc struct {
	Name string    // Name is the name of feature we are describing
	N    int       // N is the number of observations
	U    []float64 // U is the slice of locations at which to find the quantile
	Q    []float64 // Q is the slice of empirical quantiles
	Mean float64   // Mean is the average of the data
	Std  float64   // standard deviation
}

Desc contains descriptive information of a float64 slice

func Assess ¶

func Assess(xy *XY, cutoff float64) (n int, precision, recall, accuracy float64, obs, fit *Desc, err error)

Assess returns a selection of statistics of the fit

func NewDesc ¶

func NewDesc(u []float64, name string) (*Desc, error)

NewDesc creates a pointer to a new Desc struct instance with error checking.

u is a slice of values at which to find quantiles. If nil, a standard set is used.
name is the name of the feature (for printing)(

func (*Desc) Populate ¶

func (d *Desc) Populate(x []float64, noSort bool, sl Slicer)

Populate calculates the descriptive statistics based on x. The slice is not sorted if noSort

func (*Desc) String ¶

func (d *Desc) String() string

type FCLayer ¶

type FCLayer struct {
	Size    int
	Bias    bool
	Act     Activation
	ActParm float64
}

FCLayer has details of a fully connected layer

func FCParse ¶

func FCParse(s string) (fc *FCLayer, err error)

FCParse parses the arguments to an FC layer

type FParam ¶

type FParam struct {
	Location float64 `json:"location"` // location parameter for *Cts
	Scale    float64 `json:"scale"`    // scale parameter for *Cts
	Default  any     `json:"default"`  // default level for *Dscrt
	Lvl      Levels  `json:"lvl"`      // map of values to int32 category for *Dscrt
}

FParam -- field parameters -- is summary data about a field. These values may not be derived from the current data but are applied to the current data.

type FRole ¶

type FRole int

FRole is the role a feature plays

const (
	FRCts FRole = 0 + iota
	FRCat
	FROneHot
	FREmbed
	FREither
)

func (FRole) String ¶

func (i FRole) String() string

type FType ¶

type FType struct {
	Name       string
	Role       FRole
	Cats       int
	EmbCols    int
	Normalized bool
	From       string
	FP         *FParam
}

FType represents a single field. It holds key information about the feature: its role, dimensions, summary info.

func (*FType) String ¶

func (ft *FType) String() string

type FTypes ¶

type FTypes []*FType

func LoadFTypes ¶

func LoadFTypes(fileName string) (fts FTypes, err error)

LoadFTypes loads a file created by the FTypes Save method

func (FTypes) DropFields ¶

func (fts FTypes) DropFields(dropFields ...string) FTypes

DropFields will drop fields from the FTypes

func (FTypes) Get ¶

func (fts FTypes) Get(name string) *FType

Get returns the *FType of name

func (FTypes) Save ¶

func (fts FTypes) Save(fileName string) (err error)

Save saves FTypes to a json file--fileName

type Fit ¶

type Fit struct {
	// contains filtered or unexported fields
}

Fit struct for fitting a NNModel

func NewFit ¶

func NewFit(nn *NNModel, epochs int, p Pipeline, opts ...FitOpts) *Fit

NewFit creates a new *Fit.

func (*Fit) BestEpoch ¶

func (ft *Fit) BestEpoch() int

BestEpoch returns the epoch of the best cost (validation or in-sample--whichever is specified)

func (*Fit) Do ¶

func (ft *Fit) Do() (err error)

Do is the fitting loop. Upon completion ft.nn will have the best model.

Example ¶

Verbose = false
bSize := 100
// generate a Pipeline of type *ChData that reads test.csv in the data directory
pipe := chPipe(bSize, "test1.csv")
// generate model: target and features.  Target yoh is one-hot with 2 levels
mod := ModSpec{
	"Input(x1+x2+x3+x4)",
	"FC(size:3, activation:relu)",
	"DropOut(.1)",
	"FC(size:2, activation:softmax)",
	"Target(yoh)",
}
// model is straight-forward with no hidden layers or dropouts.
nn, e := NewNNModel(mod, pipe, true, WithCostFn(CrossEntropy))

if e != nil {
	panic(e)
}

epochs := 150
ft := NewFit(nn, epochs, pipe)
e = ft.Do()

if e != nil {
	panic(e)
}
// Plot the in-sample cost in a browser (default: firefox)
e = ft.InCosts().Plot(&utilities.PlotDef{Title: "In-Sample Cost Curve", Height: 1200, Width: 1200,
	Show: true, XTitle: "epoch", YTitle: "Cost"}, true)

if e != nil {
	panic(e)
}

Output:

Example (Example2) ¶

// This example demonstrates how to use a validation sample for early stopping
Verbose = false
bSize := 100
// generate a Pipeline of type *ChData that reads test.csv in the data directory
mPipe := chPipe(bSize, "test1.csv")
vPipe := chPipe(1000, "testVal.csv")

// generate model: target and features.  Target yoh is one-hot with 2 levels
mod := ModSpec{
	"Input(x1+x2+x3+x4)",
	"FC(size:3, activation:relu)",
	"DropOut(.1)",
	"FC(size:2, activation:softmax)",
	"Target(yoh)",
}
nn, e := NewNNModel(mod, mPipe, true, WithCostFn(CrossEntropy))

if e != nil {
	panic(e)
}

epochs := 150
ft := NewFit(nn, epochs, mPipe)
WithValidation(vPipe, 10)(ft)
e = ft.Do()

if e != nil {
	panic(e)
}
// Plot the in-sample cost in a browser (default: firefox)
e = ft.InCosts().Plot(&utilities.PlotDef{Title: "In-Sample Cost Curve", Height: 1200, Width: 1200,
	Show: true, XTitle: "epoch", YTitle: "Cost"}, true)

if e != nil {
	panic(e)
}

e = ft.OutCosts().Plot(&utilities.PlotDef{Title: "Validation Sample Cost Curve", Height: 1200, Width: 1200,
	Show: true, XTitle: "epoch", YTitle: "Cost"}, true)

if e != nil {
	panic(e)
}

Output:

func (*Fit) InCosts ¶

func (ft *Fit) InCosts() *XY

InCosts returns XY: X=epoch, Y=In-sample cost

func (*Fit) NNModel ¶ added in v0.0.24

func (ft *Fit) NNModel() *NNModel

NNModel returns model

func (*Fit) OutCosts ¶

func (ft *Fit) OutCosts() *XY

OutCosts returns XY: X=epoch, Y=validation cost

func (*Fit) OutFile ¶

func (ft *Fit) OutFile() string

OutFile returns the output file name

type FitOpts ¶

type FitOpts func(*Fit)

FitOpts functions add options

func WithL2Reg ¶

func WithL2Reg(penalty float64) FitOpts

WithL2Reg adds L2 regularization

func WithLearnRate ¶

func WithLearnRate(lrStart, lrEnd float64) FitOpts

WithLearnRate sets a learning rate function that declines linearly across the epochs.

func WithOutFile ¶

func WithOutFile(fileName string) FitOpts

WithOutFile specifies the file root name to save the best model.

func WithShuffle ¶

func WithShuffle(interval int) FitOpts

WithShuffle shuffles after interval epochs Default is 0 (don't shuffle ever)

func WithValidation ¶

func WithValidation(p Pipeline, wait int) FitOpts

WithValidation adds a validation Pipeline for early stopping. The fit is stopped when the validation cost does not improve for wait epochs.

type FuncSpec ¶ added in v0.2.0

type FuncSpec struct {
	Name   string         // The name of the function/operation.
	Return reflect.Kind   // The type of the return.  This will either be float64 or any.
	Args   []reflect.Kind // The types of the inputs to the function.
	Level  rune           // 'S' if the function is summary-level (1 element) or 'R' if it is row-level.
}

FuncSpec stores the details about a function call.

type GData ¶

type GData struct {
	// contains filtered or unexported fields
}

func NewGData ¶

func NewGData() *GData

NewGData returns a new instance of GData

func (*GData) AddRaw ¶ added in v0.2.9

func (gd *GData) AddRaw(data [][]any, fields []string, fts FTypes, keepRaw bool) error

AddRaw adds a number of fields in []any format to *GData. The fts are only used to determine the Role.

func (*GData) AppendC ¶

func (gd *GData) AppendC(raw *Raw, name string, normalize bool, fp *FParam, keepRaw bool) error

AppendC appends a continuous feature

func (*GData) AppendD ¶

func (gd *GData) AppendD(raw *Raw, name string, fp *FParam, keepRaw bool) error

AppendD appends a discrete feature

func (*GData) AppendField ¶ added in v0.0.29

func (gd *GData) AppendField(newData *Raw, name string, fRole FRole, keepRaw bool) error

AppendField adds a field to gd

func (*GData) AppendRows ¶ added in v0.2.8

func (gd *GData) AppendRows(gdApp *GData, fTypes FTypes) (gdOut *GData, err error)

AppendRows appends rows to the existing GData and then re-initializes each GDatum, using the fTypes, if provided.

func (*GData) AppendRowsRaw ¶ added in v0.2.8

func (gd *GData) AppendRowsRaw(gdApp *GData) error

AppendRowsRaw simply appends rows, in place, to the existing GData. Only the *Raw data is updated. The .Data field is set to nil.

func (*GData) Back2Raw ¶ added in v0.1.7

func (gd *GData) Back2Raw() (rawData []*Raw, nCol int, fields []string, err error)

Back2Raw converts the entire GData back to its raw state

func (*GData) Close ¶ added in v0.0.27

func (gd *GData) Close() error

func (*GData) Copy ¶ added in v0.2.18

func (gd *GData) Copy() (gdOut *GData, err error)

Copy makes an independent copy of gd

func (*GData) CountLines ¶ added in v0.0.27

func (gd *GData) CountLines() (numLines int, err error)

func (*GData) Drop ¶ added in v0.0.15

func (gd *GData) Drop(field string) error

Drop drops a field from *GData

func (*GData) FieldCount ¶

func (gd *GData) FieldCount() int

FieldCount returns the number of fields in GData

func (*GData) FieldList ¶

func (gd *GData) FieldList() []string

FieldList returns the names of the fields in GData

func (*GData) Get ¶

func (gd *GData) Get(name string) *GDatum

Get returns a single feature from GData

func (*GData) GetData ¶ added in v0.1.7

func (gd *GData) GetData() []*GDatum

GetData returns the slice of *GDatums

func (*GData) GetFType ¶ added in v0.2.9

func (gd *GData) GetFType(field string) *FType

GetFType returns the *FType of field. Returns

func (*GData) GetFTypes ¶ added in v0.2.9

func (gd *GData) GetFTypes() FTypes

GetFTypes returns a slice of *FType corresponding to GData.data

func (*GData) GetRaw ¶

func (gd *GData) GetRaw(field string) (*Raw, error)

GetRaw returns the raw data for the field.

func (*GData) IsSorted ¶

func (gd *GData) IsSorted() bool

IsSorted returns true if GData has been sorted by SortField

func (*GData) Join ¶ added in v0.2.9

func (gd *GData) Join(right *GData, onField string, joinType JoinType) (result *GData, err error)

Join joins two *GData on onField. Both *GData are sorted by onField, though the result may not be in sort order for Outer and Right joins. If a field value is missing, the FType.FParam.Default value is filled in. If that value is nil, the following are used:

int,float : 0
string ""
time.Time: 1/1/1970

The field being joined on must have the same name in both *GData. Fields in the left *GData have priority -- if there are duplicate fields, the field in "right" are omitted.

The resulting *GData has only *Raw fields populated. To populate the .data fields, use ReInit. FROneHot and FREmbed fields are left behind -- they'll need to be recreated after the join.

Example ¶

This example shows how to join two *Gdata structs.

// Build the first GData
gdLeft := NewGData()

field0 := []any{0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0}
if e := gdLeft.AppendC(NewRaw(field0, nil), "field0", false, nil, true); e != nil {
	panic(e)
}

field1 := []any{"r", "s", "b", "l", "c", "s", "a"}
if e := gdLeft.AppendD(NewRaw(field1, nil), "field1", nil, true); e != nil {
	panic(e)
}

field2 := []any{"A", "B", "C", "D", "E", "F", "G"}

// value to use for field2 if gdLeft doesn't contribute to an output row
fp := &FParam{Default: "XX"}
if e := gdLeft.AppendD(NewRaw(field2, nil), "field2", fp, true); e != nil {
	panic(e)
}

// Build the second GData
gdRight := NewGData()
field3 := []any{100.0, 200.0, 300.0, 400.0, 500.0}
if e := gdRight.AppendC(NewRaw(field3, nil), "field3", false, nil, true); e != nil {
	panic(e)
}

field1 = []any{"a", "b", "c", "k", "a"}
if e := gdRight.AppendD(NewRaw(field1, nil), "field1", nil, true); e != nil {
	panic(e)
}

// do an outer join on field1
gdJoin, err := gdLeft.Join(gdRight, "field1", Outer)
if err != nil {
	panic(err)
}

for _, fld := range gdJoin.FieldList() {
	x, err := gdJoin.GetRaw(fld)
	if err != nil {
		panic(err)
	}

	fmt.Println(fld)
	fmt.Println(x.Data)
}

Output:

field0
[6 6 2 4 3 0 1 5 0]
field2
[G G C E D A B F XX]
field3
[100 500 200 300 0 0 0 0 400]
field1
[a a b c l r s s k]

func (*GData) Keep ¶ added in v0.2.2

func (gd *GData) Keep(fields []string) error

Keep drops all fields not in "fields"

func (*GData) Len ¶

func (gd *GData) Len() int

func (*GData) Less ¶

func (gd *GData) Less(i, j int) bool

func (*GData) MakeOneHot ¶

func (gd *GData) MakeOneHot(from, name string) error

MakeOneHot creates & appends a one hot feature from a discrete feature

func (*GData) ReInit ¶ added in v0.2.8

func (gd *GData) ReInit(fTypes *FTypes) (gdOut *GData, err error)

ReInit re-initializes the Data field from Raw for each GDatum. If ftypes is not nil, these values are used, otherwise the FParam values are re-derived from the data.

func (*GData) Read ¶ added in v0.0.27

func (gd *GData) Read(nTarget int, validate bool) (data []chutils.Row, valid []chutils.Valid, err error)

Read reads row(s) in the format of chutils. Note: valids are all chutils.Valid. Invoking Read for the first time causes it to recreate the raw data of existing fields -- so the memory requirement will go up.

func (*GData) Reset ¶ added in v0.0.27

func (gd *GData) Reset() error

func (*GData) Row ¶ added in v0.2.7

func (gd *GData) Row(take int) (gdNew *GData, err error)

func (*GData) Rows ¶

func (gd *GData) Rows() int

Rows returns # of obserations in each element of GData

func (*GData) Seek ¶ added in v0.0.27

func (gd *GData) Seek(lineNo int) error

func (*GData) Shuffle ¶

func (gd *GData) Shuffle()

Shuffle shuffles the GData fields as a unit

func (*GData) Slice ¶

func (gd *GData) Slice(sl Slicer) (*GData, error)

Slice creates a new GData sliced according to sl

func (*GData) Sort ¶

func (gd *GData) Sort(field string, ascending bool) error

Sort sorts the GData on field. Calling Sort.Sort directly will cause a panic. Sorting a OneHot or Embedded field sorts on the underlying Categorical field

func (*GData) SortField ¶

func (gd *GData) SortField() string

SortField returns the field the GData is sorted on

func (*GData) Subset ¶ added in v0.2.7

func (gd *GData) Subset(keepRows []int) (gdOut *GData, err error)

Subset subsets the pipeline to the rows in keepRows

func (*GData) Swap ¶

func (gd *GData) Swap(i, j int)

func (*GData) TableSpec ¶ added in v0.0.27

func (gd *GData) TableSpec() *chutils.TableDef

func (*GData) UpdateFts ¶ added in v0.0.10

func (gd *GData) UpdateFts(newFts FTypes) (*GData, error)

UpdateFts produces a new *GData using the given FTypes. The return only has those fields contained in newFts

func (*GData) Where ¶ added in v0.2.7

func (gd *GData) Where(field string, equalTo []any) (gdOut *GData, err error)

type GDatum ¶

type GDatum struct {
	FT      *FType  // FT stores the details of the field: it's role, # categories, mappings
	Summary Summary // Summary of the Data (e.g. distribution)
	Data    any     // Data. This will be either []float64 (FRCts, FROneHot, FREmbed) or []int32 (FRCat)
	Raw     *Raw
}

func (*GDatum) Describe ¶

func (g *GDatum) Describe(topK int) string

Describe returns summary statistics. topK is # of values to return for discrete fields

func (*GDatum) String ¶

func (g *GDatum) String() string

type JoinType ¶ added in v0.2.9

type JoinType int

JoinType is the method to use in joining two GData structs

const (
	Inner JoinType = 0 + iota
	Left
	Right
	Outer
)

func (JoinType) String ¶ added in v0.2.9

func (i JoinType) String() string

type Layer ¶

type Layer int

Layer types

const (
	Input Layer = 0 + iota
	FC
	DropOut
	Target
)

func (Layer) String ¶

func (i Layer) String() string

type Levels ¶

type Levels map[any]int32

Levels is a map from underlying values if a discrete tensor to int32 values

func ByCounts ¶

func ByCounts(data *Raw, sl Slicer) Levels

ByCounts builds a Levels map with the distribution of data

func ByPtr ¶

func ByPtr(data *Raw) Levels

ByPtr returns a mapping of values of data to []int32 for modeling. The values of data are sorted, so the smallest will have a mapped value of 0.

func (Levels) FindValue ¶

func (l Levels) FindValue(val int32) any

FindValue returns key that maps to val

func (Levels) Sort ¶

func (l Levels) Sort(byName, ascend bool) (key []any, val []int32)

Sort sorts Levels, returns sorted map as key, val slices

func (Levels) String ¶ added in v0.2.36

func (l Levels) String() string

func (Levels) TopK ¶

func (l Levels) TopK(topNum int, byName, ascend bool) string

TopK returns the top k values either by name or by counts, ascending or descending

type ModSpec ¶

type ModSpec []string

ModSpec holds layers--each slice element is a layer

func LoadModSpec ¶

func LoadModSpec(fileName string) (ms ModSpec, err error)

LoadModSpec loads a ModSpec from file

func (ModSpec) Check ¶

func (m ModSpec) Check() error

Check checks that the layer name is valid

func (ModSpec) DropOut ¶

func (m ModSpec) DropOut(loc int) *DOLayer

DropOut returns the *DoLayer for layer i, if it is of type DropOut. Returns nil o.w.

func (ModSpec) FC ¶

func (m ModSpec) FC(loc int) *FCLayer

FC returns the *FCLayer for layer i, if it is of type FC. Returns nil o.w.

func (ModSpec) Inputs ¶

func (m ModSpec) Inputs(p Pipeline) (FTypes, error)

Inputs returns the FTypes of the input features

func (ModSpec) LType ¶

func (m ModSpec) LType(i int) (*Layer, error)

LType returns the layer type of layer i

func (ModSpec) Save ¶

func (m ModSpec) Save(fileName string) (err error)

Save ModSpec

func (ModSpec) Target ¶

func (m ModSpec) Target(p Pipeline) (*FType, error)

Target returns the *FType of the target

func (ModSpec) TargetName ¶ added in v0.0.29

func (m ModSpec) TargetName() string

type NNModel ¶

type NNModel struct {
	// contains filtered or unexported fields
}

NNModel structure

func LoadNN ¶

func LoadNN(fileRoot string, p Pipeline, build bool) (nn *NNModel, err error)

LoadNN restores a previously saved NNModel. fileRoot is the root name of the save file. p is the Pipeline with the field specs. if build is true, DropOut layers are included.

func NewNNModel ¶

func NewNNModel(modSpec ModSpec, pipe Pipeline, build bool, nnOpts ...NNOpts) (*NNModel, error)

NewNNModel creates a new NN model. Specs for fields in modSpec are pulled from pipe. if build is true, DropOut layers are included.

func PredictNN ¶

func PredictNN(fileRoot string, pipe Pipeline, build bool, opts ...NNOpts) (nn *NNModel, err error)

PredictNN reads in a NNModel from a file and populates it with a batch from p. Methods such as FitSlice and ObsSlice are immediately available.

Example ¶

// This example demonstrates fitting a regression model and predicting on new data
Verbose = false
bSize := 100
// generate a Pipeline of type *ChData that reads test.csv in the data directory
mPipe := chPipe(bSize, "test1.csv")
vPipe := chPipe(1000, "testVal.csv")

// This model is OLS
mod := ModSpec{
	"Input(x1+x2+x3+x4)",
	"FC(size:1)",
	"Target(ycts)",
}
// model is straight-forward with no hidden layers or dropouts.
nn, e := NewNNModel(mod, mPipe, true, WithCostFn(RMS))

if e != nil {
	panic(e)
}

epochs := 150
ft := NewFit(nn, epochs, mPipe)
e = ft.Do()

if e != nil {
	panic(e)
}

sf := os.TempDir() + "/nnTest"
e = nn.Save(sf)

if e != nil {
	panic(e)
}

pred, e := PredictNN(sf, vPipe, false)

if e != nil {
	panic(e)
}

fmt.Printf("out-of-sample correlation: %0.2f\n", stat.Correlation(pred.FitSlice(), pred.ObsSlice(), nil))

_ = os.Remove(sf + "P.nn")

if e != nil {
	panic(e)
}

_ = os.Remove(sf + "S.nn")

Output:

out-of-sample correlation: 0.84

func PredictNNwFts ¶ added in v0.0.11

func PredictNNwFts(fileRoot string, pipe Pipeline, build bool, fts FTypes, opts ...NNOpts) (nn *NNModel, err error)

PredictNNwFts creates a new Pipeline that updates the input pipe to have the FTypes specified by fts. For instance, if one has normalized a continuous input, the normalization factor used in the NN must be the same as its build values. One should save the FTypes from the model build pass them here.

func (*NNModel) Cost ¶

func (m *NNModel) Cost() *G.Node

Cost returns cost node

func (*NNModel) CostFlt ¶

func (m *NNModel) CostFlt() float64

CostFlt returns the value of the cost node

func (*NNModel) CostFn ¶

func (m *NNModel) CostFn() CostFunc

CostFn returns cost function

func (*NNModel) Features ¶

func (m *NNModel) Features() G.Nodes

Features returns the model input features (continuous+embedded)

func (*NNModel) FitSlice ¶

func (m *NNModel) FitSlice() []float64

FitSlice returns fitted values as a slice

func (*NNModel) Fitted ¶

func (m *NNModel) Fitted() G.Result

Fitted returns fitted values as a G.Result

func (*NNModel) Fwd ¶

func (m *NNModel) Fwd()

Fwd builds forward pass

func (*NNModel) G ¶

func (m *NNModel) G() *G.ExprGraph

G returns model graph

func (*NNModel) InputFT ¶

func (m *NNModel) InputFT() FTypes

func (*NNModel) Inputs ¶

func (m *NNModel) Inputs() G.Nodes

Inputs returns input (continuous+embedded+observed) Inputs

func (*NNModel) ModSpec ¶ added in v0.0.24

func (m *NNModel) ModSpec() ModSpec

ModSpec returns the ModSpec for the model

func (*NNModel) Name ¶

func (m *NNModel) Name() string

Name returns model name

func (*NNModel) Obs ¶

func (m *NNModel) Obs() *G.Node

Obs returns the target value as a node

func (*NNModel) ObsSlice ¶

func (m *NNModel) ObsSlice() []float64

ObsSlice returns target values as a slice

func (*NNModel) Opts ¶ added in v0.0.24

func (m *NNModel) Opts() []NNOpts

Opts returns user-input With options

func (*NNModel) OutputCols ¶ added in v0.0.9

func (m *NNModel) OutputCols() int

OutputCols returns the number of columns in the output

func (*NNModel) Params ¶

func (m *NNModel) Params() G.Nodes

Params retursn the model parameter nodes (weights, biases, embeddings)

func (*NNModel) Save ¶

func (m *NNModel) Save(fileRoot string) (err error)

Save saves a model to disk. Two files are created: <fileRoot>S.nn for the ModSpec and <fileRoot>P.nn form the parameters.

func (*NNModel) String ¶

func (m *NNModel) String() string

type NNOpts ¶

type NNOpts func(model1 *NNModel)

NNOpts -- NNModel options

func WithCostFn ¶

func WithCostFn(cf CostFunc) NNOpts

WithCostFn adds a cost function

func WithName ¶

func WithName(name string) NNOpts

WithName adds a name to the NNModel

type OpNode ¶ added in v0.1.0

type OpNode struct {
	Expression string    // expression this node implements
	Raw        *Raw      // node Value
	Func       *FuncSpec // details of the function required to evaulate this node.
	Role       FRole     // FRole to use when adding this node to a Pipeline
	Neg        bool      // negate result when populating Value
	Inputs     []*OpNode // Inputs to node calculation
	// contains filtered or unexported fields
}

OpNode is a single node of an expression. The input expression is successively broken into simpler expressions. Each leaf is devoid of expressions--they are only values. Hence, leaves have no Inputs.

operations: If the expression at a node is an operation, it will be broken into two subexpressions. The subexpressions are determined by scanning from the left using the order of precedence (+,-,*,/), respecting parentheses. The two subexpressions create two new nodes in Inputs.

Comparison operations with fields of type FRCat are permitted if the underlying data is type string or date. // Strings and dates are enclosed in a single quote ('). Date formats supported are: CCYYMMDD and MM/DD/CCYY.

Functions: If the expression is a function, each argument is assigned to an Input (in order). Functions have at least one input (argument). Two types of functions are supported: those that operate at the row level and those that operate at the summary level. A row-level function will create a slice that has as many elements as the Pipeline. A summary-level function, such as "mean", will have a single element.

Available row-level functions are:

exp(<expr>)
log(<expr>)
lag(<expr>,<missing>), where <missing> is used for the first element.
abs(<expr>) absolute value
if(<test>, <true>, <false>), where the value <yes> is used if <condition> is greater than 0 and <false> o.w.
row(<expr>) row number in pipeline. Row starts as 0 and is continuous.
countAfter(<expr>), countBefore(<expr>) is the number of rows after (before) the current row.
cumeAfter(<expr>), cumeBefore(<expr>,<missing>) is the cumulative sum of <expr> after (before) the current row (included)
prodAfter(<expr>), prodBefore(<expr>,<missing>) is the cumulative product of <expr> after (before) the current row (included) and <missing> is used for the last (first) element.
index(<expr>,<index>) returns <expr> in the order of <index>
cat(<expr>) converts <expr> to a categorical field. Only applicable to continuous fields.
toDate(<expr>) converts a string field to a date
toString(<expr>) converts <expr> to string
toFloatSP(<expr>) converts <expr> to float32
toFloatDP(<expr>) converts <expr> to float64
toInt(<expr>) converts <expr> to int. Same as cat().
dateAdd(<date>,<months>) adds <months> to the date, <date>
toLastDayOfMonth(<date>) moves the date to the last day of the month
toFirstDayOfMonth(<date>) moves the date to the first day of the month
year(<date>) returns the year
month(<date>) returns the month (1-12)
day(<date>) returns the day of the month (1-lastDayOfMonth)
dateDiff(<data1>,<date2>,unit) returns date1-date2 units can be 'hour', 'day', 'month' or 'year'
nowDate() returns current date
nowTime() returns current time as a string
substr(<string>,<start>,<length>) substring
strPos(<string>,<target>) first position of <target> in <string>. -1 if does not occur.
strCount(<string>,<target>) number of times <target> occurs in <string>
strLen(<string>) length of string
trunc(<expr>) truncate to int
exist(x,y) if x exists, returns x. If x does not exist, returns y.

The values in <...> can be any expression. The functions prodAfter, prodBefore, cumAfter,cumBefore, countAfter, countBefore do NOT include the current row.

Available summary-level functions are:

mean(<expr>)
count(<expr>)
sum(<expr>)
max(<expr>)
min(<expr>)
sse(<y>,<yhat>) returns the sum of squared error of y-yhat
mad(<y>,<yhat>) returns the sum of the absolute value of y-yhat
r2(<y>,<yhat>) returns the r-square of estimating y with yhat
npv(<discount rate>, <cash flows>). Find the NPV of the cash flows at discount rate. If disount rate is a slice, then the ith month's cashflows are discounted for i months at the ith discount rate.
irr(<cost>,<cash flows>). Find the IRR of an initial outlay of <cost> (a positive value!), yielding cash flows (The first cash flow gets discounted one period). irr returns 0 if there's no solution.
print(<expr>,<rows>) print <rows> of the <expr>. If <rows>=0, print entire slice.
printIf(<expr>,<rows>,<cond>) if condition evaluates to a value > 0, execute print(<expr>,<rows>)
histogram(<x>,<color>, <normalization>). Creates a histogram. normalization is one of: percent, count, density
plotLine(<x>,<markerType>, <color>)
plotXY(<x>,<y>,<markerType>, <color>)
setPlotDim(<width>,<height>), <width>, <height> are in pixels
render(<file>,<title>,<x label>,<y label>)
newPlot()

Comparisons

==, !=, >,>=, <, <=

Logical operators are supported:

&& for "and"
|| for "or"

Logical operators resolve to 0 or 1.

func CopyNode ¶ added in v0.1.7

func CopyNode(src *OpNode) (dest *OpNode)

CopyNode copies an *OpNode tree (with no shared addresses)

type Opts ¶

type Opts func(c Pipeline)

Opts function sets an option to a Pipeline

func WithBatchSize ¶

func WithBatchSize(bsize int) Opts

WithBatchSize sets the batch size for the pipeline

func WithCallBack ¶

func WithCallBack(cb Opts) Opts

WithCallBack sets a callback function.

Example ¶

// This example shows how to create a callback during the fitting phase (fit.Do).
// The callback is called at the end of each epoch.  The callback below loads a new dataset after
// epoch 100.

Verbose = false
bSize := 100
// generate a Pipeline of type *ChData that reads test.csv in the data directory
mPipe := chPipe(bSize, "test1.csv")
// This callback function replaces the initial dataset with newData.csv after epoch 2500
cb := func(c Pipeline) {
	switch d := c.(type) {
	case *ChData:
		if d.Epoch(-1) == 100 {
			dataPath := os.Getenv("data") // path to data directory
			fileName := dataPath + "/testVal.csv"
			f, e := os.Open(fileName)
			if e != nil {
				panic(e)
			}
			rdrx := file.NewReader(fileName, ',', '\n', 0, 0, 1, 0, f, 0)
			if e := rdrx.Init("", chutils.MergeTree); e != nil {
				panic(e)
			}
			if e := rdrx.TableSpec().Impute(rdrx, 0, .99); e != nil {
				panic(e)
			}
			rows, _ := rdrx.CountLines()
			fmt.Println("New data at end of epoch ", d.Epoch(-1))
			fmt.Println("Number of rows ", rows)
			WithReader(rdrx)(d)
		}
	}
}

WithCallBack(cb)(mPipe)

// This model is OLS
mod := ModSpec{
	"Input(x1+x2+x3+x4)",
	"FC(size:1)",
	"Target(ycts)",
}
// model is straight-forward with no hidden layers or dropouts.
nn, e := NewNNModel(mod, mPipe, true, WithCostFn(RMS))

if e != nil {
	panic(e)
}

epochs := 150
ft := NewFit(nn, epochs, mPipe)
e = ft.Do()

if e != nil {
	panic(e)
}

Output:

New data at end of epoch  100
Number of rows  1000

func WithCats ¶

func WithCats(names ...string) Opts

WithCats specifies a list of categorical features.

func WithCycle ¶

func WithCycle(cycle bool) Opts

WithCycle sets the cycle bool. If false, the intent is for the Pipeline to generate a new data set is generated for each epoch.

func WithFtypes ¶

func WithFtypes(fts FTypes) Opts

WithFtypes sets the FTypes of the Pipeline. The feature is used to override the default levels.

func WithKeepRaw ¶ added in v0.2.0

func WithKeepRaw(keepRaw bool) Opts

WithKeepRaw sets bool whether to keep the *Raw data in the pipeline.

func WithNormalized ¶

func WithNormalized(names ...string) Opts

WithNormalized sets the features to be normalized.

func WithOneHot ¶

func WithOneHot(name, from string) Opts

WithOneHot adds a one-hot field "name" based of field "from"

Example ¶

// This example shows a model that incorporates a feature (x4) as one-hot and an embedding
Verbose = false
bSize := 100
// generate a Pipeline of type *ChData that reads test.csv in the data directory
pipe := chPipe(bSize, "test1.csv")
// The feature x4 takes on values 0,1,2,...19.  chPipe treats this a continuous feature.
// Let's override that and re-initialize the pipeline.
WithCats("x4")(pipe)
WithOneHot("x4oh", "x4")(pipe)

if e := pipe.Init(); e != nil {
	panic(e)
}
mod := ModSpec{
	"Input(x1+x2+x3+x4oh)",
	"FC(size:2, activation:softmax)",
	"Target(yoh)",
}
//
fmt.Println("x4 as one-hot")
nn, e := NewNNModel(mod, pipe, true)
if e != nil {
	panic(e)
}
fmt.Println(nn)
fmt.Println("x4 as embedding")
mod = ModSpec{
	"Input(x1+x2+x3+E(x4oh,3))",
	"FC(size:2, activation:softmax)",
	"Target(yoh)",
}
nn, e = NewNNModel(mod, pipe, true)
if e != nil {
	panic(e)
}

fmt.Println(nn)

Output:

x4 as one-hot

Inputs
Field x1
	continuous

Field x2
	continuous

Field x3
	continuous

Field x4oh
	one-hot
	derived from feature x4
	length 20

Target
Field yoh
	one-hot
	derived from feature y
	length 2

Model Structure
Input(x1+x2+x3+x4oh)
FC(size:2, activation:softmax)
Target(yoh)

Batch size: 100
24 FC parameters
0 Embedding parameters

x4 as embedding

Inputs
Field x1
	continuous

Field x2
	continuous

Field x3
	continuous

Field x4oh
	embedding
	derived from feature x4
	length 20
	embedding dimension of 3

Target
Field yoh
	one-hot
	derived from feature y
	length 2

Model Structure
Input(x1+x2+x3+E(x4oh,3))
FC(size:2, activation:softmax)
Target(yoh)

Batch size: 100
7 FC parameters
60 Embedding parameters

Example (Example2) ¶

// This example incorporates a drop out layer
Verbose = false
bSize := 100
// generate a Pipeline of type *ChData that reads test.csv in the data directory
pipe := chPipe(bSize, "test1.csv")
// generate model: target and features.  Target yoh is one-hot with 2 levels
mod := ModSpec{
	"Input(x1+x2+x3+x4)",
	"FC(size:3, activation:relu)",
	"DropOut(.1)",
	"FC(size:2, activation:softmax)",
	"Target(yoh)",
}

nn, e := NewNNModel(mod, pipe, true,
	WithCostFn(CrossEntropy),
	WithName("Example With Dropouts"))

if e != nil {
	panic(e)
}
fmt.Println(nn)

Output:

Example With Dropouts
Inputs
Field x1
	continuous

Field x2
	continuous

Field x3
	continuous

Field x4
	continuous

Target
Field yoh
	one-hot
	derived from feature y
	length 2

Model Structure
Input(x1+x2+x3+x4)
FC(size:3, activation:relu)
DropOut(.1)
FC(size:2, activation:softmax)
Target(yoh)

Cost function: CrossEntropy

Batch size: 100
19 FC parameters
0 Embedding parameters

func WithReader ¶

func WithReader(rdr any) Opts

WithReader adds a reader.

type Pipeline ¶

type Pipeline interface {
	Init() error                                                              // initialize the pipeline
	Rows() int                                                                // # of observations in the pipeline (size of the epoch)
	Batch(inputs G.Nodes) bool                                                // puts the next batch in the input nodes
	Epoch(setTo int) int                                                      // manage epoch count
	IsNormalized(field string) bool                                           // true if feature is normalized
	IsCat(field string) bool                                                  // true if feature is one-hot encoded
	Cols(field string) int                                                    // # of columns in the feature
	IsCts(field string) bool                                                  // true if the feature is continuous
	GetFType(field string) *FType                                             // Get FType for the feature
	GetFTypes() FTypes                                                        // Get Ftypes for pipeline
	BatchSize() int                                                           // batch size
	FieldList() []string                                                      // fields available
	FieldCount() int                                                          // number of fields in the pipeline
	GData() *GData                                                            // return underlying GData
	Get(field string) *GDatum                                                 // return data for field
	GetKeepRaw() bool                                                         // returns whether raw data is kept
	Join(right Pipeline, onField string, joinType JoinType) (Pipeline, error) // joins two pipelines
	Slice(sl Slicer) (Pipeline, error)                                        // slice the pipeline
	Shuffle()                                                                 // shuffle data
	Describe(field string, topK int) string                                   // describes a field
	Subset(rows []int) (newPipe Pipeline, err error)                          // subsets pipeline to rows
	Where(field string, equalTo []any) (Pipeline, error)                      // subset pipeline to where field=equalTo
	Keep(fields []string) error                                               // keep on fields in the pipeline
	Drop(field string) error                                                  // drop field from the pipeline
	AppendRows(gd *GData, fTypes FTypes) (Pipeline, error)                    // appends gd to pipeline
	AppendRowsRaw(gd *GData) error                                            // appends gd ONLY to *Raw data
	ReInit(ftypes *FTypes) (Pipeline, error)                                  // reinitialized pipeline from *Raw data
}

The Pipeline interface specifies the methods required to be a data Pipeline. The Pipeline is the middleware between the data and the fitting routines.

func AddToPipe ¶ added in v0.1.0

func AddToPipe(rootNode *OpNode, fieldName string, pipe Pipeline) (outPipe Pipeline, err error)

AddToPipe adds the Value slice in rootNode to pipe. The field will have name fieldName. To do this:

Create the *OpNode tree to evaluate the expression using Expr2Tree
Populate the values from a Pipeline using Evaluate.
Add the values to the Pipeline using AddToPipe

Notes:

AddToPipe can be within a CallBack to populate each new call to the database with the calculated fields.
You can access the values after Evaluate without adding the field to the Pipeline from the Value element of the root node.

Example ¶

We'll add two fields to the pipeline: the sum=c+D and max=max(c,D)

var (
	outPipe Pipeline
	err     error
)

Verbose = false

// builds a Pipline with two fields:
//    c = 1,2
//    D = 3,-4
pipe := buildPipe([]string{"1,2", "3,-4"}, []string{"f", "f"})
// we'll add two fields to the pipeline: the sum=c+d and max=max(c,d)

// start by parsing the expressions.
field1 := &OpNode{Expression: "c+D"}
if e := Expr2Tree(field1); e != nil {
	panic(e)
}

field2 := &OpNode{Expression: "if(c>D,c,D)"}
if e := Expr2Tree(field2); e != nil {
	panic(e)
}
// field1 and field2 nodes now have the structure of the expressions

// evaluate these on pipe
if err = Evaluate(field1, pipe); err != nil {
	panic(err)
}

if err = Evaluate(field2, pipe); err != nil {
	panic(err)
}

// now add them to pipe
if outPipe, err = AddToPipe(field1, "sum", pipe); err != nil {
	panic(err)
}

if outPipe, err = AddToPipe(field2, "max", outPipe); err != nil {
	panic(err)
}

// see what we got
field1Val := outPipe.Get("sum")
fmt.Println(field1Val.Data.([]float64))

field2Val := outPipe.Get("max")
fmt.Println(field2Val.Data.([]float64))

Output:

[4 -2]
[3 2]

func Append ¶ added in v0.2.5

func Append(pipe1, pipe2 Pipeline) (Pipeline, error)

Append appends pipe2 to the bottom of pipe1. pipe2 must have all the fields of pipe1 but may have extra, which are not in the returned pipe

Example ¶

This example shows how to append one pipeline to another

Verbose = false

data := os.Getenv("data")
pipe1, e := CSVToPipe(data+"/pipeTest1.csv", nil, false)
if e != nil {
	panic(e)
}

pipe2, e := CSVToPipe(data+"/pipeTest4.csv", nil, false)
if e != nil {
	panic(e)
}

pipeOut, e := Append(pipe1, pipe2)
if e != nil {
	panic(e)
}

fmt.Println("pipe1 rows: ", pipe1.Rows())
fmt.Println("pipe2 rows: ", pipe2.Rows())
fmt.Println("appended pipe rows: ", pipeOut.Rows())
fmt.Println("# of fields: ", len(pipeOut.FieldList()))
fmt.Println("Field3: ", pipeOut.Get("Field3").Raw.Data)

Output:

pipe1 rows:  7
pipe2 rows:  2
appended pipe rows:  9
# of fields:  3
Field3:  [3 2.2 1.9 10.1 12.99 100 1001.4 -1 -2]

func CSVToPipe ¶ added in v0.1.6

func CSVToPipe(csvFile string, fts FTypes, keepRaw bool) (pipe Pipeline, err error)

CSVToPipe creates a pipe from a CSV file Optional fts specifies the FTypes, usually to match an existing pipeline.

Example ¶

Create a Pipeline from a CSV and force a specific FType. The values of the field "row" are integers: 1,2,3,4,5,6,7 If we just load the CSV, row will be treated as float64 (continuous). The field ft instructs the code to treat it as categorical.

Verbose = false

// row takes on values 1,2,3,...  If we do nothing, the pipe will convert these to float64.
// Specifying the role as FRCat will cause "row" to be treated as categorical.
ft := &FType{
	Name:       "row",
	Role:       FRCat,
	Cats:       0,
	EmbCols:    0,
	Normalized: false,
	From:       "",
	FP:         nil,
}

data := os.Getenv("data") + "/pipeTest1.csv"
pipe, e := CSVToPipe(data, FTypes{ft}, false)
if e != nil {
	panic(e)
}

fmt.Println("# Rows: ", pipe.Rows())
mapped := pipe.Get("row").Data.([]int32)
fmt.Println(mapped)
// categorical values are mapped to int32.
fmt.Println("\nmap for field row:")
rowMap := pipe.GetFType("row").FP.Lvl
// the raw values in pipeTest1.csv run from 1 to 7
for raw := int64(1); raw < int64(len(mapped))+1; raw++ {
	fmt.Printf("raw: %v, mapped: %v\n", raw, rowMap[any(raw)])
}

Output:

# Rows:  7
[0 1 2 3 4 5 6]

map for field row:
raw: 1, mapped: 0
raw: 2, mapped: 1
raw: 3, mapped: 2
raw: 4, mapped: 3
raw: 5, mapped: 4
raw: 6, mapped: 5
raw: 7, mapped: 6

func SQLToPipe ¶ added in v0.1.6

func SQLToPipe(sql string, fts FTypes, keepRaw bool, conn *chutils.Connect) (pipe Pipeline, err error)

SQLToPipe creates a pipe from the query sql Optional fts specifies the FTypes, usually to match an existing pipeline.

func VecFromAny ¶ added in v0.2.6

func VecFromAny(data [][]any, fields []string, ftypes FTypes) (pipe Pipeline, err error)

VecFromAny builds a pipeline for a slice of vectors ([]any). The first dimension is the field.

type Raw ¶

type Raw struct {
	Kind reflect.Kind // type of elements of Data
	Data []any
}

Raw holds a raw slice of type Kind

func AllocRaw ¶

func AllocRaw(n int, kind reflect.Kind) *Raw

AllocRaw creates an empty slice of type kind and len n

func NewRaw ¶

func NewRaw(x []any, sl Slicer) *Raw

NewRaw creates a new raw slice from x. This assumes all elements of x are the same Kind

func NewRawCast ¶

func NewRawCast(x any, sl Slicer) *Raw

func (*Raw) CumeAfter ¶ added in v0.2.0

func (r *Raw) CumeAfter(aggType string) (*Raw, error)

CumeAfter cumulates the data after the current row, for each row.

AggType can take on the following values:
- "sum"  Cumulative sums are taken.
- "product" Cumulative products are taken.
- "count" Counts for rows are taken.

For "sum" and "product", the value "missing" is used for the last row.

func (*Raw) CumeBefore ¶ added in v0.2.0

func (r *Raw) CumeBefore(aggType string) (*Raw, error)

CumeBefore cumulates the data before the current row, for each row.

AggType can take on the following values:
- "sum"  Cumulative sums are taken.
- "product" Cumulative products are taken.
- "count", "row" Counts for rows are taken.

For "sum" and "product", the value "missing" is used for the first row.

func (*Raw) Exp ¶ added in v0.2.0

func (r *Raw) Exp() (*Raw, error)

Exp returns e to the Raw

func (*Raw) Index ¶ added in v0.2.0

func (r *Raw) Index(indices *Raw) (*Raw, error)

Index returns data that is *Raw at the indices "indices"

func (*Raw) IsNumeric ¶ added in v0.2.0

func (r *Raw) IsNumeric() bool

IsNumeric returns true if the underlying type is numeric

func (*Raw) Lag ¶ added in v0.2.0

func (r *Raw) Lag(missing any) (*Raw, error)

Lag returns r lagged by 1. The first element is set to "missing".

func (*Raw) Len ¶

func (r *Raw) Len() int

func (*Raw) Less ¶

func (r *Raw) Less(i, j int) bool

func (*Raw) Log ¶ added in v0.2.0

func (r *Raw) Log() (*Raw, error)

Log takes the natural log of Raw

func (*Raw) Max ¶ added in v0.2.0

func (r *Raw) Max() (*Raw, error)

Max returns max

func (*Raw) Mean ¶ added in v0.2.0

func (r *Raw) Mean() (*Raw, error)

Mean finds the average

func (*Raw) Min ¶ added in v0.2.0

func (r *Raw) Min() (*Raw, error)

Min returns min

func (*Raw) Pow ¶ added in v0.2.0

func (r *Raw) Pow(exponent *Raw) (*Raw, error)

Pow returns Raw^exponent

func (*Raw) Product ¶ added in v0.2.0

func (r *Raw) Product() (*Raw, error)

Product returns the product of the elements

func (*Raw) Std ¶ added in v0.2.0

func (r *Raw) Std() (*Raw, error)

Std finds the sample standard deviation

func (*Raw) Sum ¶ added in v0.2.0

func (r *Raw) Sum() (*Raw, error)

Sum sums elements

func (*Raw) Swap ¶

func (r *Raw) Swap(i, j int)

type SeaError ¶

type SeaError int

const (
	ErrPipe SeaError = 0 + iota
	ErrData
	ErrFields
	ErrGData
	ErrChData
	ErrModSpec
	ErrNNModel
	ErrDiags
	ErrVecData
)

func (SeaError) Error ¶

func (seaErr SeaError) Error() string

type Slice ¶

type Slice struct {
	// contains filtered or unexported fields
}

Slice implements generating Slicer functions for a feature. These are used to slice through the values of a discrete feature. For continuous features, it slices by quartile.

func NewSlice ¶

func NewSlice(feat string, minCnt int, pipe Pipeline, restrict []any) (*Slice, error)

NewSlice makes a new Slice based on feat in Pipeline pipe. minCnt is the minimum # of obs a slice must have to be used. Restrict is a slice of values to restrict Iter to.

func (*Slice) Index ¶

func (s *Slice) Index() int32

Index returns the mapped value of the current value

func (*Slice) Iter ¶

func (s *Slice) Iter() bool

Iter iterates through the levels (ranges) of the feature. Returns false when done.

Example ¶

// An example of slicing through the data to generate diagnostics on subsets.
// The code here will generate a decile plot for each of the 20 levels of x4.
Verbose = false
bSize := 100
// generate a Pipeline of type *ChData that reads test.csv in the data directory
pipe := chPipe(bSize, "test1.csv")
// The feature x4 takes on values 0,1,2,...19.  chPipe treats this a continuous feature.
// Let's override that and re-initialize the pipeline.

WithCats("x4")(pipe)
WithOneHot("x4oh", "x4")(pipe)

if e := pipe.Init(); e != nil {
	panic(e)
}

mod := ModSpec{
	"Input(x1+x2+x3+x4oh)",
	"FC(size:2, activation:softmax)",
	"Target(yoh)",
}
nn, e := NewNNModel(mod, pipe, true)

if e != nil {
	panic(e)
}
WithCostFn(CrossEntropy)(nn)

ft := NewFit(nn, 100, pipe)

if e = ft.Do(); e != nil {
	panic(e)
}

sf := os.TempDir() + "/nnTest"
e = nn.Save(sf)

if e != nil {
	panic(e)
}

WithBatchSize(8500)(pipe)

pred, e := PredictNN(sf, pipe, false)
if e != nil {
	panic(e)
}

if e = AddFitted(pipe, sf, []int{1}, "fit", nil, false, nil); e != nil {
	panic(e)
}

_ = os.Remove(sf + "P.nn")
_ = os.Remove(sf + "S.nn")

s, e := NewSlice("x4", 0, pipe, nil)
if e != nil {
	panic(e)
}

fit, e := Coalesce(pred.FitSlice(), 2, []int{1}, false, false, nil)
if e != nil {
	panic(e)
}
desc, e := NewDesc(nil, "Descriptive Statistics")

for s.Iter() {
	slicer := s.MakeSlicer()
	if e != nil {
		panic(e)
	}
	desc.Populate(fit, true, slicer)
	fmt.Printf("Slice x4=%v has %d observations\n", s.Value(), desc.N)
}

Output:

Slice x4=0 has 391 observations
Slice x4=1 has 408 observations
Slice x4=2 has 436 observations
Slice x4=3 has 428 observations
Slice x4=4 has 417 observations
Slice x4=5 has 472 observations
Slice x4=6 has 424 observations
Slice x4=7 has 455 observations
Slice x4=8 has 431 observations
Slice x4=9 has 442 observations
Slice x4=10 has 411 observations
Slice x4=11 has 413 observations
Slice x4=12 has 433 observations
Slice x4=13 has 416 observations
Slice x4=14 has 434 observations
Slice x4=15 has 367 observations
Slice x4=16 has 437 observations
Slice x4=17 has 433 observations
Slice x4=18 has 429 observations
Slice x4=19 has 423 observations

func (*Slice) MakeSlicer ¶

func (s *Slice) MakeSlicer() Slicer

MakeSlicer makes a Slicer function for the current value (discrete) or range (continuous) of the feature. Continuous features are sliced at the lower quartile, median and upper quartile, producing 4 slices.

func (*Slice) Title ¶

func (s *Slice) Title() string

Title retrieves the auto-generated title

func (*Slice) Value ¶

func (s *Slice) Value() any

Value returns the level of a discrete feature we're working on

type Slicer ¶

type Slicer func(row int) bool

Slicer is an optional function that returns true if the row is to be used in calculations. This is used to subset the diagnostics to specific values.

func SlicerAnd ¶

func SlicerAnd(s1, s2 Slicer) Slicer

SlicerAnd creates a Slicer that is s1 && s2

func SlicerOr ¶

func SlicerOr(s1, s2 Slicer) Slicer

SlicerOr creates a Slicer that is s1 || s2

type Summary ¶

type Summary struct {
	NRows  int    // size of the data
	DistrC *Desc  // summary of continuous field
	DistrD Levels // summary of discrete field
}

Summary has descriptive statistics of a field using its current data.

type VecData ¶

type VecData struct {
	// contains filtered or unexported fields
}

func NewVecData ¶

func NewVecData(name string, data *GData, opts ...Opts) *VecData

func (*VecData) AppendRows ¶ added in v0.2.8

func (vec *VecData) AppendRows(gd *GData, fTypes FTypes) (pipeOut Pipeline, err error)

AppendRows appends rows to the existing GData and then re-initializes each GDatum, using the fTypes, if provided.

func (*VecData) AppendRowsRaw ¶ added in v0.2.8

func (vec *VecData) AppendRowsRaw(gd *GData) error

AppendRowsRaw simply appends rows, in place, to the existing GData. Only the *Raw data is updated. The .Data field is set to nil.

func (*VecData) Batch ¶

func (vec *VecData) Batch(inputs G.Nodes) bool

func (*VecData) BatchSize ¶

func (vec *VecData) BatchSize() int

BatchSize returns Pipeline batch size

func (*VecData) Cols ¶

func (vec *VecData) Cols(field string) int

Cols returns the # of columns in the field

func (*VecData) Describe ¶

func (vec *VecData) Describe(field string, topK int) string

Describe describes a field. If the field has role FRCat, the top k values (by frequency) are returned.

func (*VecData) Drop ¶ added in v0.2.7

func (vec *VecData) Drop(field string) error

Drop drops the listed field from the pipeline

func (*VecData) Epoch ¶

func (vec *VecData) Epoch(setTo int) int

Epoch sets the epoch to setTo if setTo >=0 and returns epoch #.

func (*VecData) FieldCount ¶ added in v0.2.7

func (vec *VecData) FieldCount() int

FieldCount returns the number of fields in the pipeline

func (*VecData) FieldList ¶

func (vec *VecData) FieldList() []string

FieldList returns a slice of field names in the Pipeline

func (*VecData) GData ¶

func (vec *VecData) GData() *GData

GData returns the Pipelines' GData

func (*VecData) Get ¶

func (vec *VecData) Get(field string) *GDatum

Get returns a fields's GDatum

func (*VecData) GetFType ¶

func (vec *VecData) GetFType(field string) *FType

GetFType returns the fields FType

func (*VecData) GetFTypes ¶

func (vec *VecData) GetFTypes() FTypes

GetFTypes returns FTypes for vec Pipeline.

func (*VecData) GetKeepRaw ¶ added in v0.2.0

func (vec *VecData) GetKeepRaw() bool

func (*VecData) Init ¶

func (vec *VecData) Init() error

func (*VecData) IsCat ¶

func (vec *VecData) IsCat(field string) bool

IsCat returns true if field has role FRCat.

func (*VecData) IsCts ¶

func (vec *VecData) IsCts(field string) bool

IsCts returns true if the field has role FRCts.

func (*VecData) IsNormalized ¶

func (vec *VecData) IsNormalized(field string) bool

IsNormalized returns true if the field is normalized.

func (*VecData) IsSorted ¶

func (vec *VecData) IsSorted() bool

IsSorted returns true if the data has been sorted.

func (*VecData) Join ¶ added in v0.2.9

func (vec *VecData) Join(right Pipeline, onField string, joinType JoinType) (result Pipeline, err error)

func (*VecData) Keep ¶ added in v0.2.7

func (vec *VecData) Keep(fields []string) error

Keep keeps only the listed fields in the pipeline

func (*VecData) Name ¶

func (vec *VecData) Name() string

Name returns Pipeline name

func (*VecData) ReInit ¶ added in v0.2.8

func (vec *VecData) ReInit(ftypes *FTypes) (pipeOut Pipeline, err error)

ReInit re-initializes the Data field from Raw for each GDatum. If ftypes is not nil, these values are used, otherwise the FParam values are re-derived from the data. A new pipeline is returned.

func (*VecData) Row ¶ added in v0.2.7

func (vec *VecData) Row(take int) (newPipe Pipeline, err error)

Row creates a new pipeline with only the row, take

func (*VecData) Rows ¶

func (vec *VecData) Rows() int

Rows is # of rows of data in the Pipeline

func (*VecData) SaveFTypes ¶

func (vec *VecData) SaveFTypes(fileName string) error

SaveFTypes saves the FTypes for the Pipeline.

func (*VecData) Shuffle ¶

func (vec *VecData) Shuffle()

Shuffle shuffles the data.

func (*VecData) Slice ¶

func (vec *VecData) Slice(sl Slicer) (Pipeline, error)

func (*VecData) Sort ¶

func (vec *VecData) Sort(field string, ascending bool) error

Sort sorts the data on "field".

func (*VecData) SortField ¶

func (vec *VecData) SortField() string

SortField returns the name of the sort field.

func (*VecData) String ¶

func (vec *VecData) String() string

func (*VecData) Subset ¶ added in v0.2.7

func (vec *VecData) Subset(rows []int) (newPipe Pipeline, err error)

Subset creates a new pipeline with only the rows, rows

func (*VecData) Where ¶ added in v0.2.7

func (vec *VecData) Where(field string, equalTo []any) (newPipe Pipeline, err error)

Where creates a new pipeline with rows where field is in equalTo. The comparison uses the *Raw data.

type XY ¶

type XY struct {
	X []float64
	Y []float64
}

XY struct holds (x,y) pairs as distinct slices

func NewXY ¶

func NewXY(x, y []float64) (*XY, error)

NewXY creates a pointer to a new XY with error checking

func (*XY) Interp ¶

func (p *XY) Interp(xNew []float64) (*XY, error)

Interp linearly interpolates XY at the points xNew.

func (*XY) Len ¶

func (p *XY) Len() int

func (*XY) Less ¶

func (p *XY) Less(i, j int) bool

func (*XY) Plot ¶

func (p *XY) Plot(pd *utilities.PlotDef, scatter bool) error

Plot produces an XY Plotly plot

func (*XY) Sort ¶

func (p *XY) Sort() error

Sort sorts with error checking

func (*XY) String ¶

func (p *XY) String() string

func (*XY) Swap ¶

func (p *XY) Swap(i, j int)

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL

README ¶

Seafan

Documentation ¶

Overview ¶

Index ¶

Examples ¶

Constants ¶

Variables ¶

Functions ¶

func AddFitted ¶

func Coalesce ¶

func CrossEntropy ¶

func Decile ¶

func EvalSFunction ¶ added in v0.1.1

func Evaluate ¶ added in v0.1.0

func Expr2Tree ¶ added in v0.1.0

func GetNode ¶

func KS ¶

func LeakyReluAct ¶

func LinearAct ¶

func Loop ¶ added in v0.1.2

func Marginal ¶

func PipeToCSV ¶ added in v0.1.6

func PipeToSQL ¶ added in v0.1.6

func R2 ¶ added in v0.0.30

func RMS ¶

func ReluAct ¶

func SegPlot ¶ added in v0.0.29

func SigmoidAct ¶

func SoftMaxAct ¶

func SoftRMS ¶

func Strip ¶

func UnNormalize ¶ added in v0.0.29

func Unique ¶

func Wrapper ¶

Types ¶

type Activation ¶

func StrAct ¶

func (Activation) String ¶

type Args ¶

func MakeArgs ¶

func (Args) Get ¶

type ChData ¶

func NewChData ¶

func (*ChData) AppendRows ¶ added in v0.2.8

func (*ChData) AppendRowsRaw ¶ added in v0.2.8

func (*ChData) Batch ¶

func (*ChData) BatchSize ¶

func (*ChData) Cols ¶

func (*ChData) Describe ¶

func (*ChData) Drop ¶ added in v0.2.7

func (*ChData) Epoch ¶

func (*ChData) FieldCount ¶ added in v0.2.7

func (*ChData) FieldList ¶

func (*ChData) GData ¶

func (*ChData) Get ¶

func (*ChData) GetFType ¶

func (*ChData) GetFTypes ¶

func (*ChData) GetKeepRaw ¶ added in v0.2.0

func (*ChData) Init ¶

func (*ChData) InitOld ¶ added in v0.2.41

func (*ChData) IsCat ¶

func (*ChData) IsCts ¶

func (*ChData) IsNormalized ¶

func (*ChData) IsSorted ¶

func (*ChData) Join ¶ added in v0.2.9

func (*ChData) Keep ¶ added in v0.2.7

func (*ChData) Name ¶

func (*ChData) ReInit ¶ added in v0.2.8

func (*ChData) Row ¶ added in v0.2.7

func (*ChData) Rows ¶

func (*ChData) SaveFTypes ¶

func (*ChData) Shuffle ¶

func (*ChData) Slice ¶

func (*ChData) Sort ¶

func (*ChData) SortField ¶

func (*ChData) String ¶

func (*ChData) Subset ¶ added in v0.2.7

func (*ChData) Where ¶ added in v0.2.7

type CostFunc ¶