vft

package module

v0.0.0-...-cfa6996 Latest Latest Go to latest Published: Jan 23, 2024 License: MIT Imports: 19 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/glycerine/slurpdf

Links

Open Source Insights

README ¶

slurpdf: a dataframe package for Go (golang) with parallel multi-core read-in.

This is a data-frame package for Go (golang). It features the ability to "slurp" in a large .csv file from disk into a dataframe. It uses all cores available to parse those doubles quickly.

See slurpdf_test.go for this test code, which gives example use:

		fn := "data/test001.csv"
		d := NewSlurpDataFrameNoStrings()
		t0 := time.Now()
		err := d.Slurp(fn)
		panicOn(err)
		vv("slurped in fn '%v' in %v", fn, time.Since(t0))

		nr := d.Nrow // Number of cases in full database
		nc := d.Ncol // Number of columns (X variables and Y target)
		vv("we see nc = %v, nr= %v", nc, nr)

		// illustrate how to use the testing framework
		if nc != 4 {
			panic("expected nc == 4")
		}
		if nr != 5 {
			panic("expected nr == 5")
		}

		// expected
		eh := []string{"x1", "x2", "x3", "y"}
		em := [][]float64{
			[]float64{1.49152459063627, 2.49152289572389, 1.67045378357525, 1},
			[]float64{0.391160302666239, 1.39115885339298, 3.55649293629879, 1},
			[]float64{0.434211270774665, 1.43420995498692, 3.30302332417163, 0},
			[]float64{0.136364617767486, 1.13636348140514, 8.33327222273116, 0},
			[]float64{1.136364617767486, -3.13636348140514, -9.273116, 0},
		}
		_ = em
		cv.So(d.Header, cv.ShouldResemble, strings.Join(eh, ","))
		cv.So(d.Colnames, cv.ShouldResemble, eh)
		for i := range em {
			cv.So(d.MatFullRow(i), cv.ShouldResemble, em[i])
		}

		// ExtractCols
		xi0 := 3
		xi1 := 5
		wcol := []int{1, 3}
		n, nvar, xx, cn := d.ExtractCols(xi0, xi1, wcol)
		cv.So(n == 2, cv.ShouldBeTrue)
		cv.So(nvar == 2, cv.ShouldBeTrue)
		cv.So(cn, cv.ShouldResemble, []string{"x2", "y"})
		cv.So(xx[0], cv.ShouldResemble, em[3][1])
		cv.So(xx[1], cv.ShouldResemble, em[3][3])
		cv.So(xx[2], cv.ShouldResemble, em[4][1])
		cv.So(xx[3], cv.ShouldResemble, em[4][3])

		// ExtractXXYY
		xi0 = 4
		xi1 = 5  // just the last row
		xj0 := 2 // just the 3rd col
		xj1 := 3
		yj := 3 // and the target
		n, nvar, xx, yy, colnames, targetname := d.ExtractXXYY(xi0, xi1, xj0, xj1, yj)
		cv.So(n == 1, cv.ShouldBeTrue)
		cv.So(nvar == 1, cv.ShouldBeTrue)
		cv.So(colnames, cv.ShouldResemble, []string{"x3"})
		cv.So(targetname, cv.ShouldResemble, "y")
		cv.So(xx, cv.ShouldResemble, em[xi0][xj0:xj1])
		cv.So(yy, cv.ShouldResemble, em[xi0][yj:(yj+1)])

License: MIT

Documentation ¶

Index ¶

Constants
Variables
func AlwaysPrintf(format string, a ...interface{})
func Caller(upStack int) string
func CountLines(fd *os.File) (nline int, mmap []byte)
func CsvShowMain()
func DirExists(name string) bool
func FileExists(name string) bool
func FileLine(depth int) string
func FileSize(name string) (int64, error)
func MemoryMapFile(fd *os.File) (mmap []byte)
func PP(format string, a ...interface{})
func Printf(format string, a ...interface{}) (n int, err error)
func SumSliceFloat64(x []float64) (tot float64)
func SumSliceInt(x []int) (tot int)
func TSPrintf(format string, a ...interface{})
func VV(format string, a ...interface{})
type BoolMatrix
- func NewBoolMatrix(nrow, ncol int) (m *BoolMatrix)
- func NewBoolMatrixColMajor(nrow, ncol int) (m *BoolMatrix)
- func NewBoolMatrixColVec(nrow, ncol int) (m *BoolMatrix)
- func (m *BoolMatrix) AddRow(rowlabel string) (i int)
- func (m *BoolMatrix) At(i, j int) bool
- func (m *BoolMatrix) Cbind(m2 *BoolMatrix)
- func (m *BoolMatrix) Clone() (clone *BoolMatrix)
- func (m *BoolMatrix) CmetaDisplay() (feaDisplay []string)
- func (m *BoolMatrix) Col(j int) (res []bool)
- func (m *BoolMatrix) DeleteCols(wcol []int)
- func (m *BoolMatrix) ExtractFeatAsMatrix(factors []FeatMeta) (r *BoolMatrix)
- func (m *BoolMatrix) ExtractRowsColsAsMatrix(rowbeg, rowendx int, wcol []int) (r *BoolMatrix)
- func (m *BoolMatrix) FillColMajor(slc []bool, makeCopy bool)
- func (m *BoolMatrix) FillRowMajor(slc []bool, makeCopy bool)
- func (m *BoolMatrix) GetRowIter(begrow, endxrow, chunk int) (r *BoolRowIter)
- func (m *BoolMatrix) NewRowColIter(factors []FeatMeta, begrow, length, chunk int, name string) (rci *BoolRowColIter)
- func (m *BoolMatrix) ReformatToColumnMajor()
- func (m *BoolMatrix) ReformatToRowMajor()
- func (m *BoolMatrix) ReformatToSliceOfColVec()
- func (m *BoolMatrix) Reshape(newNrow, newNcol int)
- func (m *BoolMatrix) Row(i int) (res []bool)
- func (m *BoolMatrix) RowChunk(beg, endx int) (r *BoolMatrix)
- func (m *BoolMatrix) Set(i, j int, v bool)
- func (m *BoolMatrix) String() (r string)
- func (m *BoolMatrix) Transpose()
- func (m *BoolMatrix) WriteCol(j int, writeme []bool)
- func (m *BoolMatrix) WriteRow(i int, writeme []bool)
type BoolRowColIter
- func (rci *BoolRowColIter) FetchAdv() (r *BoolMatrix, done bool)
- func (rci *BoolRowColIter) FetchAdv1() (r *BoolMatrix)
type BoolRowIter
- func NewBoolRowIter(m *BoolMatrix, beg, length, chunk int) *BoolRowIter
- func (ri *BoolRowIter) Adv() (done bool)
- func (ri *BoolRowIter) Fetch() (r *BoolMatrix, done bool)
- func (ri *BoolRowIter) FetchAdv() (r *BoolMatrix, done bool)
- func (ri *BoolRowIter) FetchAdv1() (r *BoolMatrix)
- func (ri *BoolRowIter) FetchAdvBX() (beg, endx int, done bool)
- func (ri *BoolRowIter) FetchBX() (beg, endx int, done bool)
- func (ri *BoolRowIter) FetchBegEndx() (beg, endx int, done bool)
type ColVecBool
type ColVecFloat64
type ColVecInt
type ColumnKind
type CsvLoader2
- func NewCsvLoader2(path string) (*CsvLoader2, error)
- func (s *CsvLoader2) Close() error
- func (s *CsvLoader2) ReadOne() ([]string, error)
type FeatMeta
- func NewFeatMeta() *FeatMeta
- func (f *FeatMeta) String() (r string)
type LexCodeSlice
- func (p LexCodeSlice) Len() int
- func (p LexCodeSlice) Less(i, j int) bool
- func (p LexCodeSlice) String() (r string)
- func (p LexCodeSlice) Swap(i, j int)
type MatrixFloat64
- func NewMatrixColMajorFloat64(nrow, ncol int) (m *MatrixFloat64)
- func NewMatrixColVecFloat64(nrow, ncol int) (m *MatrixFloat64)
- func NewMatrixFloat64(nrow, ncol int) (m *MatrixFloat64)
- func (m *MatrixFloat64) Add(i, j int, v float64)
- func (m *MatrixFloat64) AddRow(rowlabel string) (i int)
- func (m *MatrixFloat64) At(i, j int) float64
- func (m *MatrixFloat64) Cbind(m2 *MatrixFloat64)
- func (m *MatrixFloat64) Clone() (clone *MatrixFloat64)
- func (m *MatrixFloat64) CmetaDisplay() (feaDisplay []string)
- func (m *MatrixFloat64) Col(j int) (res []float64)
- func (m *MatrixFloat64) DeleteCols(wcol []int)
- func (m *MatrixFloat64) ExtractFeatAsMatrix(factors []FeatMeta) (r *MatrixFloat64)
- func (m *MatrixFloat64) ExtractRowsColsAsMatrix(rowbeg, rowendx int, wcol []int) (r *MatrixFloat64)
- func (m *MatrixFloat64) FillColMajor(slc []float64, makeCopy bool)
- func (m *MatrixFloat64) FillRowMajor(slc []float64, makeCopy bool)
- func (m *MatrixFloat64) GetRowIter(begrow, endxrow, chunk int) (r *RowIterFloat64)
- func (m *MatrixFloat64) NewRowColIter(factors []FeatMeta, begrow, length, chunk int, name string) (rci *RowColIterFloat64)
- func (m *MatrixFloat64) ReformatToColumnMajor()
- func (m *MatrixFloat64) ReformatToRowMajor()
- func (m *MatrixFloat64) ReformatToSliceOfColVec()
- func (m *MatrixFloat64) Reshape(newNrow, newNcol int)
- func (m *MatrixFloat64) Row(i int) (res []float64)
- func (m *MatrixFloat64) RowChunk(beg, endx int) (r *MatrixFloat64)
- func (m *MatrixFloat64) RowInto(i int, fillme []float64) (res []float64)
- func (m *MatrixFloat64) Set(i, j int, v float64)
- func (m *MatrixFloat64) String() (r string)
- func (m *MatrixFloat64) SumAll() (tot float64)
- func (m *MatrixFloat64) Transpose()
- func (m *MatrixFloat64) WriteCol(j int, writeme []float64)
- func (m *MatrixFloat64) WriteRow(i int, writeme []float64)
type MatrixInt
- func NewMatrixColMajorInt(nrow, ncol int) (m *MatrixInt)
- func NewMatrixColVecInt(nrow, ncol int) (m *MatrixInt)
- func NewMatrixInt(nrow, ncol int) (m *MatrixInt)
- func (m *MatrixInt) Add(i, j int, v int)
- func (m *MatrixInt) AddRow(rowlabel string) (i int)
- func (m *MatrixInt) At(i, j int) int
- func (m *MatrixInt) Cbind(m2 *MatrixInt)
- func (m *MatrixInt) Clone() (clone *MatrixInt)
- func (m *MatrixInt) CmetaDisplay() (feaDisplay []string)
- func (m *MatrixInt) Col(j int) (res []int)
- func (m *MatrixInt) DeleteCols(wcol []int)
- func (m *MatrixInt) ExtractFeatAsMatrix(factors []FeatMeta) (r *MatrixInt)
- func (m *MatrixInt) ExtractRowsColsAsMatrix(rowbeg, rowendx int, wcol []int) (r *MatrixInt)
- func (m *MatrixInt) FillColMajor(slc []int, makeCopy bool)
- func (m *MatrixInt) FillRowMajor(slc []int, makeCopy bool)
- func (m *MatrixInt) GetRowIter(begrow, endxrow, chunk int) (r *RowIterInt)
- func (m *MatrixInt) NewRowColIter(factors []FeatMeta, begrow, length, chunk int, name string) (rci *RowColIterInt)
- func (m *MatrixInt) ReformatToColumnMajor()
- func (m *MatrixInt) ReformatToRowMajor()
- func (m *MatrixInt) ReformatToSliceOfColVec()
- func (m *MatrixInt) Reshape(newNrow, newNcol int)
- func (m *MatrixInt) Row(i int) (res []int)
- func (m *MatrixInt) RowChunk(beg, endx int) (r *MatrixInt)
- func (m *MatrixInt) RowInto(i int, fillme []int) (res []int)
- func (m *MatrixInt) Set(i, j int, v int)
- func (m *MatrixInt) String() (r string)
- func (m *MatrixInt) SumAll() (tot int)
- func (m *MatrixInt) Transpose()
- func (m *MatrixInt) WriteCol(j int, writeme []int)
- func (m *MatrixInt) WriteRow(i int, writeme []int)
type RowColIterFloat64
- func (rci *RowColIterFloat64) FetchAdv() (r *MatrixFloat64, done bool)
- func (rci *RowColIterFloat64) FetchAdv1() (r *MatrixFloat64)
type RowColIterInt
- func (rci *RowColIterInt) FetchAdv() (r *MatrixInt, done bool)
- func (rci *RowColIterInt) FetchAdv1() (r *MatrixInt)
type RowIterFloat64
- func NewRowIterFloat64(m *MatrixFloat64, beg, length, chunk int) *RowIterFloat64
- func (ri *RowIterFloat64) Adv() (done bool)
- func (ri *RowIterFloat64) Fetch() (r *MatrixFloat64, done bool)
- func (ri *RowIterFloat64) FetchAdv() (r *MatrixFloat64, done bool)
- func (ri *RowIterFloat64) FetchAdv1() (r *MatrixFloat64)
- func (ri *RowIterFloat64) FetchAdvBX() (beg, endx int, done bool)
- func (ri *RowIterFloat64) FetchBX() (beg, endx int, done bool)
- func (ri *RowIterFloat64) FetchBegEndx() (beg, endx int, done bool)
type RowIterInt
- func NewRowIterInt(m *MatrixInt, beg, length, chunk int) *RowIterInt
- func (ri *RowIterInt) Adv() (done bool)
- func (ri *RowIterInt) Fetch() (r *MatrixInt, done bool)
- func (ri *RowIterInt) FetchAdv() (r *MatrixInt, done bool)
- func (ri *RowIterInt) FetchAdv1() (r *MatrixInt)
- func (ri *RowIterInt) FetchAdvBX() (beg, endx int, done bool)
- func (ri *RowIterInt) FetchBX() (beg, endx int, done bool)
- func (ri *RowIterInt) FetchBegEndx() (beg, endx int, done bool)
type SlurpDataFrame
- func NewSlurpDataFrameNoStrings() *SlurpDataFrame
- func NewSlurpDataFrameTwoStrings() *SlurpDataFrame
- func (df *SlurpDataFrame) Disgorge(path string) (err error)
- func (sdf *SlurpDataFrame) ExtractCols(xi0, xi1 int, wcol []int) (n, nvar int, xx []float64, colnames []string)
- func (sdf *SlurpDataFrame) ExtractXXYY(xi0, xi1, xj0, xj1, yj int) (n, nvar int, xx, yy []float64, colnames []string, targetname string)
- func (df *SlurpDataFrame) FindTm(tm time.Time, si time.Duration) (rowi int, err error)
- func (df *SlurpDataFrame) MatFullRow(irow int) []float64
- func (df *SlurpDataFrame) MatPartRow(irow, leftCount int) []float64
- func (df *SlurpDataFrame) MatrixAt(irow, jcol int) float64
- func (df *SlurpDataFrame) ReadGzipped(path string) (err error)
- func (df *SlurpDataFrame) Row(i int) (tm time.Time, dat []float64)
- func (df *SlurpDataFrame) RowSlice(i int, nSpan int) (rowslice []float64)
- func (df *SlurpDataFrame) Slurp(path string) (err error)

Constants ¶

View Source

const MISSINGLEVEL string = "."

match the reference implement for a missing factor level; always level == 0

View Source

const RFC3339MsecTz0 = "2006-01-02T15:04:05.000Z07:00"

Variables ¶

View Source

var Chicago *time.Location

View Source

var ErrNoData = fmt.Errorf("no data")

View Source

var ErrNoHeader = fmt.Errorf("no header")

View Source

var ErrRowNotFound = fmt.Errorf("row not found")

View Source

var GTZ *time.Location

View Source

var OurStdout io.Writer = os.Stdout

so we can multi write easily, use our own printf

View Source

var VerboseVerbose bool = false

for tons of debug output

Functions ¶

func AlwaysPrintf ¶

func AlwaysPrintf(format string, a ...interface{})

func Caller ¶

func Caller(upStack int) string

func CountLines ¶

func CountLines(fd *os.File) (nline int, mmap []byte)

memory mapped counting of newlines: very fast even on a single core, because it uses bytes.Count().

func CsvShowMain ¶

func CsvShowMain()

func DirExists ¶

func DirExists(name string) bool

func FileExists ¶

func FileExists(name string) bool

func FileLine ¶

func FileLine(depth int) string

func FileSize ¶

func FileSize(name string) (int64, error)

func MemoryMapFile ¶

func MemoryMapFile(fd *os.File) (mmap []byte)

func PP ¶

func PP(format string, a ...interface{})

func Printf ¶

func Printf(format string, a ...interface{}) (n int, err error)

Printf formats according to a format specifier and writes to standard output. It returns the number of bytes written and any write error encountered.

func SumSliceFloat64 ¶

func SumSliceFloat64(x []float64) (tot float64)

SumSlice returns the sum of all the elements in x.

func SumSliceInt ¶

func SumSliceInt(x []int) (tot int)

SumSlice returns the sum of all the elements in x.

func VV ¶

func VV(format string, a ...interface{})

Types ¶

type BoolMatrix ¶

type BoolMatrix struct {
	Nrow int
	Ncol int

	Colnames []string
	Rownames []string

	IsColMajor bool // row major by default
	Dat        []bool

	// For zero-copy extraction of a subset of
	// columns that can be Fetched by chunked
	// rows, we also implement an
	// alterntaive representation that can
	// be layered atop, when ReformatToSliceOfColVec()
	// has been called and it has set IsSliceOfColVec to true.
	//
	// We can refer to columns via a slice
	// of ColVec. If IsSliceOfColVec is true,
	// then IsColMajor will be true too for sure;
	// but its implementations will be overriden.
	//
	IsSliceOfColVec bool
	ColVec          []*ColVecBool

	// track metadata by column/row; and don't share
	// with pointers, use values here, so each Matrix
	// gets its own copy, and we can update Colj without
	// impacting the origin Matrix.
	Cmeta []FeatMeta

	// Row meta data is mostly if we Transpose and then
	// Transpose back, we retain the column meta data.
	Rmeta []FeatMeta
}

BoolMatrix is a matrix of bool. Since bool is not Addable, we cannot use Matrix[T].

func NewBoolMatrix ¶

func NewBoolMatrix(nrow, ncol int) (m *BoolMatrix)

NewBoolMatrix allocates room for nrow * ncol elements.

func NewBoolMatrixColMajor ¶

func NewBoolMatrixColMajor(nrow, ncol int) (m *BoolMatrix)

NewBoolMatrixColMajor returns a column-major matrix.

func NewBoolMatrixColVec ¶

func NewBoolMatrixColVec(nrow, ncol int) (m *BoolMatrix)

NewBoolMatrixColVec allocates room for nrow * ncol elements in a IsSliceOfColVec format.

func (*BoolMatrix) AddRow ¶

func (m *BoolMatrix) AddRow(rowlabel string) (i int)

AddRow extends the matrix by one row and returns the index to the new row. The new row is all 0. This can be pretty fast if m is row major; and can be pretty slow if not. Pass empty string for rowlabel if not using them.

func (*BoolMatrix) At ¶

func (m *BoolMatrix) At(i, j int) bool

At reads out the [i,j]-th element.

func (*BoolMatrix) Cbind ¶

func (m *BoolMatrix) Cbind(m2 *BoolMatrix)

Cbind will append the columns of m2 on to the right side of m, updating m in-place.

The resulting Matrix m will have m.IsColMajor:true AND m.IsSliceOfColVec:true.

In some cases, e.g. if both m and m2 started as IsColMajor:true, then no .Dat will be copied and m will simply point to m2's data. Beware of this aliasing. If you change m2 after a Cbind, then those changes to m2 may show up also in m. For safety, do not write to m2 after Cbind()-ing it to m. Instead, if need be, write through m to the appended columns.

func (*BoolMatrix) Clone ¶

func (m *BoolMatrix) Clone() (clone *BoolMatrix)

Clone returns a fresh copy of m, with no shared state.

func (*BoolMatrix) CmetaDisplay ¶

func (m *BoolMatrix) CmetaDisplay() (feaDisplay []string)

CmetaDisplay returns just the essentials of m.Cmeta for diagnostics

func (*BoolMatrix) Col ¶

func (m *BoolMatrix) Col(j int) (res []bool)

Col will return the underlying slice from .Dat of column j if the the matrix is in column-major order; otherwise it will return a coalesced copy and changing res will have no impact on .Dat.

func (*BoolMatrix) DeleteCols ¶

func (m *BoolMatrix) DeleteCols(wcol []int)

DeleteCols deletes from m the 0-based column numbers listed in wcol.

func (*BoolMatrix) ExtractFeatAsMatrix ¶

func (m *BoolMatrix) ExtractFeatAsMatrix(factors []FeatMeta) (r *BoolMatrix)

ExtractFeatAsMatrix returns a sub-Matrix of m that has all rows but only the columns associated with factors.

func (*BoolMatrix) ExtractRowsColsAsMatrix ¶

func (m *BoolMatrix) ExtractRowsColsAsMatrix(rowbeg, rowendx int, wcol []int) (r *BoolMatrix)

ExtractRowsColsAsMatrix creates a submatrix of the requested rows and columns. This is zero copy if m.IsSliceOfColVec is true.

func (*BoolMatrix) FillColMajor ¶

func (m *BoolMatrix) FillColMajor(slc []bool, makeCopy bool)

FillColMajor copies slc into Dat, and sets IsColMajor to true If makeCopy then we'll make our own copy of slc; otherwise just point to it.

func (*BoolMatrix) FillRowMajor ¶

func (m *BoolMatrix) FillRowMajor(slc []bool, makeCopy bool)

FillRowMajor copies slc into Dat, and sets IsColMajor to false. If makeCopy then we'll make our own copy of slc; otherwise just point to it.

func (*BoolMatrix) GetRowIter ¶

func (m *BoolMatrix) GetRowIter(begrow, endxrow, chunk int) (r *BoolRowIter)

GetRowIter returns an iterator that will read [beg, endxrow) rows of m, by requesting Rowset()s of chunk rows at a time. The endxrow parameter allows us to read fewer than m.Nrow elements all in, if desired.

Single column vectors are supported so that Matrix can be used the chunk out simple vectors too. The only current restriction is that we will return *all* the columns in our rowset, so to omit columns you may need to DeleteCols to adjust the shape of m before hand; say to remove any target column, for example. Or just use a RowColIter instead.

func (*BoolMatrix) NewRowColIter ¶

func (m *BoolMatrix) NewRowColIter(factors []FeatMeta, begrow, length, chunk int, name string) (rci *BoolRowColIter)

NewRowColIter specifies the columns to fetch via the factors slice.

func (*BoolMatrix) ReformatToColumnMajor ¶

func (m *BoolMatrix) ReformatToColumnMajor()

ReformatToColumnMajor will actually re-write the data in .Dat, if need be, to be column major: to have each columns's data adjacent so advancing the index of .Dat by 1 goes to the next row; or to the top of the next column if at the last row.

Be aware that the Row() fetches from m will be slower; but reading a whole column will be faster of course.

This is a no-op if the Matrix already has IsColMajor true.

func (*BoolMatrix) ReformatToRowMajor ¶

func (m *BoolMatrix) ReformatToRowMajor()

ReformatToRowMajor will actually re-write the data in .Dat, if need be, to be row major: to have each rows's data adjacent so advancing the index of .Dat by 1 goes to the next column; or to the beginning of the next row if at the last column. This is a no-op if the Matrix already has IsColMajor false.

func (*BoolMatrix) ReformatToSliceOfColVec ¶

func (m *BoolMatrix) ReformatToSliceOfColVec()

ReformatToSliceOfColVec will set IsSliceOfColVec to true after reformating the data internally to use the SliceOfColVec representation. To do so we'll call ReformatToColumnMajor() which will involve a copy if the Matrix starts out row-major.

func (*BoolMatrix) Reshape ¶

func (m *BoolMatrix) Reshape(newNrow, newNcol int)

Reshape does not change Dat, but re-assigns Nrow = newNrow and Ncol = newNcol. It also discards m.Colnames and m.Rownames. It will reinitialize Cmeta to be newNcol long; but that looses all Cmeta[i].Names and any other meta information that they contained. So avoid Reshape unless you can re-create any needed Cmeta information. Reshape will panic if m.IsSliceOfColVec is true.

func (*BoolMatrix) Row ¶

func (m *BoolMatrix) Row(i int) (res []bool)

Row will return the underlying slice from .Dat of row i if the the matrix is in row-major order; otherwise it will return a coalesced copy and changing res will have no impact on .Dat.

In other words, it will try and do as little work as possible to return a readable copy of the data. But if you need to write into it... make sure that you have a row-major matrix; or use WriteRow to write it back at the end. (And comment out the panic that warns about this.

func (*BoolMatrix) RowChunk ¶

func (m *BoolMatrix) RowChunk(beg, endx int) (r *BoolMatrix)

RowChunk is like Row, but returns multiple rows in row-major form. All columns are returned.

func (*BoolMatrix) Set ¶

func (m *BoolMatrix) Set(i, j int, v bool)

Set v as the value for [i,j]-th element.

func (*BoolMatrix) String ¶

func (m *BoolMatrix) String() (r string)

String satisfies the common Stringer interface. It provides a view of the contents of the Matrix m.

func (*BoolMatrix) Transpose ¶

func (m *BoolMatrix) Transpose()

Transpose flips the Matrix without changing Dat. It turns m into its transpose efficiently. Only meta data describing how to access the rows and columns is adjusted, and this is very quick. Transpose is not allowed for IsSliceOfColVec:true Matrixes and we will panic.

func (*BoolMatrix) WriteCol ¶

func (m *BoolMatrix) WriteCol(j int, writeme []bool)

WriteCol will replace column j with writeme, which must have length m.Row.

func (*BoolMatrix) WriteRow ¶

func (m *BoolMatrix) WriteRow(i int, writeme []bool)

WriteRow will replace row i with writeme, which must have length m.Ncol.

type BoolRowColIter ¶

type BoolRowColIter struct {
	// contains filtered or unexported fields
}

BoolRowColIter is a BoolMatrix iterator that returns, upon Fetch, chunks of rows in a sub-matrix of the specified colums (factors).

func (*BoolRowColIter) FetchAdv ¶

func (rci *BoolRowColIter) FetchAdv() (r *BoolMatrix, done bool)

FetchAdv is the basic iterator operation. Returns a submatrix r that has a subset of columns and a chunk of contigious rows from m.

func (*BoolRowColIter) FetchAdv1 ¶

func (rci *BoolRowColIter) FetchAdv1() (r *BoolMatrix)

FetchAdv1 is the basic iterator operation but without the done return value. The returned r will be nil when there are no more rows to return. See also FetchAdv() which FetchAdv1() calls internally.

type BoolRowIter ¶

type BoolRowIter struct {
	// contains filtered or unexported fields
}

BoolRowIter refers to a row-slice of a BoolMatrix; see the GetRowIter() method on the Matrix below.

func NewBoolRowIter ¶

func NewBoolRowIter(m *BoolMatrix, beg, length, chunk int) *BoolRowIter

NewRowIter makes a row iterator. See also the method GetRowIter on Matrix.

func (*BoolRowIter) Adv ¶

func (ri *BoolRowIter) Adv() (done bool)

Adv advances the row iterator

func (*BoolRowIter) Fetch ¶

func (ri *BoolRowIter) Fetch() (r *BoolMatrix, done bool)

Fetch returns the current row set, without advancing

func (*BoolRowIter) FetchAdv ¶

func (ri *BoolRowIter) FetchAdv() (r *BoolMatrix, done bool)

return current row and then advance, so the next Fetch or FetchAdv will read starting with the beg row.

func (*BoolRowIter) FetchAdv1 ¶

func (ri *BoolRowIter) FetchAdv1() (r *BoolMatrix)

FetchAdv1 just returns nil if done, without a separate done flag. Otherwise identical to FetchAdv() which it calls.

func (*BoolRowIter) FetchAdvBX ¶

func (ri *BoolRowIter) FetchAdvBX() (beg, endx int, done bool)

FetchAdvBX does FetchBX() and then advances the iterator to the next chunk.

Specifically, FetchAdvBX returns the current chunk of rows, pointed to by the [beg, endx) return values, and then advances the iterator to the next chunk of rows to be read.

The length of the returned range is always endx - beg; so [0, 0) is an empty range. The size of the range will be ri.chunk unless there are insufficient elements left before hitting the endxrow point.

The returned range is empty iff done is returned true; so always check done first. See also FetchAdv to get a row range without advancing the iterator.

If done is returned true, then beg and endx are undefined and should be ignored.

func (*BoolRowIter) FetchBX ¶

func (ri *BoolRowIter) FetchBX() (beg, endx int, done bool)

FetchBX returns the current chunk of rows, pointed to by the [beg, endx) returned values. The returned range is empty iff done is returned true; so always check done first.

Concretely, the length of the returned range is always endx - beg; so [0, 0) is an empty range. The size of the range will be ri.chunk unless there are insufficient elements left before hitting the endxrow point.

See also FetchAdvBX to read the current chunk and then advance to the next.

If done is returned true, then beg and endx are undefined and should be ignored.

func (*BoolRowIter) FetchBegEndx ¶

func (ri *BoolRowIter) FetchBegEndx() (beg, endx int, done bool)

FetchBegEndx just supplies the beg and endx row index that Fetch would return. This can be used to coordinate/compare with other row iterators or the VectorSlicer.

type ColVecBool ¶

type ColVecBool struct {
	Dat []bool
}

type ColVecFloat64 ¶

type ColVecFloat64 struct {
	Dat []float64
}

VolVec is a column vector; a Matrix has an alternative internal data structure as an list of ColVec, just like an R dataframe. This allows for zero copy ExtractRowsColsAsMatrix.

type ColVecInt ¶

type ColVecInt struct {
	Dat []int
}

VolVecInt is a column vector; a Matrix has an alternative internal data structure as an list of ColVec, just like an R dataframe. This allows for zero copy ExtractRowsColsAsMatrix.

type ColumnKind ¶

type ColumnKind int

const (
	FACTOR    ColumnKind = 1
	NUMERIC   ColumnKind = 2
	TIMESTAMP ColumnKind = 3
	SYMBOL    ColumnKind = 4
)

type CsvLoader2 ¶

type CsvLoader2 struct {
	Path   string
	File   *os.File
	Gz     *gzip.Reader
	Csv    *csv.Reader
	Header []string
}

func NewCsvLoader2 ¶

func NewCsvLoader2(path string) (*CsvLoader2, error)

detects .gz suffix and reads using gunzip. if path is "-" we read from stdin

func (*CsvLoader2) Close ¶

func (s *CsvLoader2) Close() error

func (*CsvLoader2) ReadOne ¶

func (s *CsvLoader2) ReadOne() ([]string, error)

type FeatMeta ¶

type FeatMeta struct {
	Name        string
	Colj        int
	IsFactor    bool
	Cuts        []float64
	Levels      []string
	LevelCount  int
	Rightclosed bool
	IsOrdinal   bool

	FactorMap    map[string]int
	InvFactorMap map[int]string

	MyMat any // our underlying matrix
}

FeatMeta gives Feature Metadata for each column, especially details about factors and their levels, cuts, and IsOrdinal

func NewFeatMeta ¶

func NewFeatMeta() *FeatMeta

NewFeatMeta creates a new feature metadata struct, used to describe a feature (factor, covariate, independent variable) that typically resides in each of the columns of our Matrix. The New function is necessary since the internal maps must be allocated.

func (*FeatMeta) String ¶

func (f *FeatMeta) String() (r string)

type LexCodeSlice ¶

type LexCodeSlice []lexcode

LexCodeSlice facilitates sorting by factor name lexically

func (LexCodeSlice) Len ¶

func (p LexCodeSlice) Len() int

func (LexCodeSlice) Less ¶

func (p LexCodeSlice) Less(i, j int) bool

func (LexCodeSlice) String ¶

func (p LexCodeSlice) String() (r string)

func (LexCodeSlice) Swap ¶

func (p LexCodeSlice) Swap(i, j int)

type MatrixFloat64 ¶

type MatrixFloat64 struct {
	Nrow int
	Ncol int

	Colnames []string
	Rownames []string

	IsColMajor bool // row major by default
	Dat        []float64

	// For zero-copy extraction of a subset of
	// columns that can be Fetched by chunked
	// rows, we also implement an
	// alterntaive representation that can
	// be layered atop, when ReformatToSliceOfColVec()
	// has been called and it has set IsSliceOfColVec to true.
	//
	// We can refer to columns via a slice
	// of ColVec. If IsSliceOfColVec is true,
	// then IsColMajor will be true too for sure;
	// but its implementations will be overriden.
	//
	IsSliceOfColVec bool
	ColVec          []*ColVecFloat64

	// track metadata by column/row; and don't share
	// with pointers, use values here, so each Matrix
	// gets its own copy, and we can update Colj without
	// impacting the origin Matrix.
	Cmeta []FeatMeta

	// Row meta data is mostly if we Transpose and then
	// Transpose back, we retain the column meta data.
	Rmeta []FeatMeta
	// contains filtered or unexported fields
}

MatrixFloat64 stores a rectangular matrix whose entries are float64.

func NewMatrixColMajorFloat64 ¶

func NewMatrixColMajorFloat64(nrow, ncol int) (m *MatrixFloat64)

NewMatrixColMajor returns a column-major matrix.

func NewMatrixColVecFloat64 ¶

func NewMatrixColVecFloat64(nrow, ncol int) (m *MatrixFloat64)

NewMatrixColVec allocates room for nrow * ncol elements in a IsSliceOfColVec format.

func NewMatrixFloat64 ¶

func NewMatrixFloat64(nrow, ncol int) (m *MatrixFloat64)

NewMatrix allocates room for nrow * ncol elements. It defaults to row-major

func (*MatrixFloat64) Add ¶

func (m *MatrixFloat64) Add(i, j int, v float64)

Add v to the [i,j] element of the Matrix.

func (*MatrixFloat64) AddRow ¶

func (m *MatrixFloat64) AddRow(rowlabel string) (i int)

AddRow extends the matrix by one row and returns the index to the new row. The new row is all 0. This can be pretty fast if m is row major; and can be pretty slow if not. Pass empty string for rowlabel if not using them.

func (*MatrixFloat64) At ¶

func (m *MatrixFloat64) At(i, j int) float64

At reads out the [i,j]-th element.

func (*MatrixFloat64) Cbind ¶

func (m *MatrixFloat64) Cbind(m2 *MatrixFloat64)

Cbind will append the columns of m2 on to the right side of m, updating m in-place.

The resulting Matrix m will have m.IsColMajor:true AND m.IsSliceOfColVec:true.

In some cases, e.g. if both m and m2 started as IsColMajor:true, then no .Dat will be copied and m will simply point to m2's data. Beware of this aliasing. If you change m2 after a Cbind, then those changes to m2 may (or may not) show up also in m. For safety, discard references to m2 after Cbind()-ing it to m. Instead, if need be, read/write through m to the appended columns.

func (*MatrixFloat64) Clone ¶

func (m *MatrixFloat64) Clone() (clone *MatrixFloat64)

Clone returns a fresh copy of m, with no shared state.

func (*MatrixFloat64) CmetaDisplay ¶

func (m *MatrixFloat64) CmetaDisplay() (feaDisplay []string)

CmetaDisplay returns just the essentials of m.Cmeta for diagnostics

func (*MatrixFloat64) Col ¶

func (m *MatrixFloat64) Col(j int) (res []float64)

Col will return the underlying slice from .Dat of column j if the the matrix is in column-major order; otherwise it will return a coalesced copy and changing res will have no impact on .Dat.

func (*MatrixFloat64) DeleteCols ¶

func (m *MatrixFloat64) DeleteCols(wcol []int)

DeleteCols deletes from m the 0-based column numbers listed in wcol.

func (*MatrixFloat64) ExtractFeatAsMatrix ¶

func (m *MatrixFloat64) ExtractFeatAsMatrix(factors []FeatMeta) (r *MatrixFloat64)

ExtractFeatAsMatrix returns a sub-Matrix of m that has all rows but only the columns associated with factors.

func (*MatrixFloat64) ExtractRowsColsAsMatrix ¶

func (m *MatrixFloat64) ExtractRowsColsAsMatrix(rowbeg, rowendx int, wcol []int) (r *MatrixFloat64)

ExtractRowsColsAsMatrix creates a submatrix of the requested rows and columns. This is zero copy if m.IsSliceOfColVec is true. If len(wcol) == 0, we will give all the columns.

func (*MatrixFloat64) FillColMajor ¶

func (m *MatrixFloat64) FillColMajor(slc []float64, makeCopy bool)

FillColMajor copies slc into Dat, and sets IsColMajor to true If makeCopy then we'll make our own copy of slc; otherwise just point to it.

func (*MatrixFloat64) FillRowMajor ¶

func (m *MatrixFloat64) FillRowMajor(slc []float64, makeCopy bool)

FillRowMajor copies slc into Dat, and sets IsColMajor to false. If makeCopy then we'll make our own copy of slc; otherwise just point to it.

func (*MatrixFloat64) GetRowIter ¶

func (m *MatrixFloat64) GetRowIter(begrow, endxrow, chunk int) (r *RowIterFloat64)

GetRowIter returns an iterator that will read [beg, endxrow) rows of m, by requesting Rowset()s of chunk rows at a time. The endxrow parameter allows us to read fewer than m.Nrow elements all in, if desired.

Single column vectors are supported so that Matrix can be used the chunk out simple vectors too. The only current restriction is that we will return *all* the columns in our rowset, so to omit columns you may need to DeleteCols to adjust the shape of m before hand; say to remove any target column, for example. Or just use a RowColIter instead.

func (*MatrixFloat64) NewRowColIter ¶

func (m *MatrixFloat64) NewRowColIter(factors []FeatMeta, begrow, length, chunk int, name string) (rci *RowColIterFloat64)

NewRowColIter specifies the columns to fetch via the factors slice.

func (*MatrixFloat64) ReformatToColumnMajor ¶

func (m *MatrixFloat64) ReformatToColumnMajor()

ReformatToColumnMajor will actually re-write the data in .Dat, if need be, to be column major: to have each columns's data adjacent so advancing the index of .Dat by 1 goes to the next row; or to the top of the next column if at the last row.

Be aware that the Row() fetches from m will be slower; but reading a whole column will be faster of course.

This is a no-op if the Matrix already has IsColMajor true.

func (*MatrixFloat64) ReformatToRowMajor ¶

func (m *MatrixFloat64) ReformatToRowMajor()

ReformatToRowMajor will actually re-write the data in .Dat, if need be, to be row major: to have each rows's data adjacent so advancing the index of .Dat by 1 goes to the next column; or to the beginning of the next row if at the last column. This is a no-op if the Matrix already has IsColMajor false.

func (*MatrixFloat64) ReformatToSliceOfColVec ¶

func (m *MatrixFloat64) ReformatToSliceOfColVec()

ReformatToSliceOfColVec will set IsSliceOfColVec to true after reformating the data internally to use the SliceOfColVec representation. To do so we'll call ReformatToColumnMajor() which will involve a copy if the Matrix starts out row-major.

func (*MatrixFloat64) Reshape ¶

func (m *MatrixFloat64) Reshape(newNrow, newNcol int)

Reshape does not change Dat, but re-assigns Nrow = newNrow and Ncol = newNcol. It also discards m.Colnames and m.Rownames. It will reinitialize Cmeta to be newNcol long; but that looses all Cmeta[i].Names and any other meta information that they contained. So avoid Reshape unless you can re-create any needed Cmeta information. Reshape will panic if m.IsSliceOfColVec is true.

func (*MatrixFloat64) Row ¶

func (m *MatrixFloat64) Row(i int) (res []float64)

Row will return the underlying slice from .Dat of row i if the the matrix is in row-major order; otherwise it will return a coalesced copy and changing res will have no impact on .Dat.

In other words, it will try and do as little work as possible to return a readable copy of the data. But if you need to write into it... make sure that you have a row-major matrix; or use WriteRow to write it back at the end. (And comment out the panic that warns about this.

func (*MatrixFloat64) RowChunk ¶

func (m *MatrixFloat64) RowChunk(beg, endx int) (r *MatrixFloat64)

RowChunk is like Row, but returns multiple rows in row-major form. All columns are returned.

func (*MatrixFloat64) RowInto ¶

func (m *MatrixFloat64) RowInto(i int, fillme []float64) (res []float64)

RowInto allows less allocation, compared to Row(), by having the caller provide a fillme slice as a working buffer possible fill in and return a slice of in the returned res slice.

This is really only useful when m.IsColMajor is true.

For a column-major Matrix, we will return the answer in res, using fillme to fill in the answer. For row-major, res will point to .Dat rather than fillme, to avoid allocation.

The caller should just use res to be agnostic to the Matrix internal format. The provided fillme must be at least m.Ncol long, else we will panic.

Rather than require the caller to provide fillme, we will just keep it inside Matrix in rowbuffer now. Row() will now automatically use RowInto with rowbuffer if m is column major.

func (*MatrixFloat64) Set ¶

func (m *MatrixFloat64) Set(i, j int, v float64)

Set v as the value for [i,j]-th element.

func (*MatrixFloat64) String ¶

func (m *MatrixFloat64) String() (r string)

String satisfies the common Stringer interface. It provides a view of the contents of the Matrix m.

func (*MatrixFloat64) SumAll ¶

func (m *MatrixFloat64) SumAll() (tot float64)

SumAll returns the sum of all elements in m.

func (*MatrixFloat64) Transpose ¶

func (m *MatrixFloat64) Transpose()

Transpose flips the Matrix without changing Dat. It turns m into its transpose efficiently. Only meta data describing how to access the rows and columns is adjusted, and this is very quick. Transpose is not allowed for IsSliceOfColVec:true Matrixes and we will panic.

func (*MatrixFloat64) WriteCol ¶

func (m *MatrixFloat64) WriteCol(j int, writeme []float64)

WriteCol will replace column j with writeme, which must have length m.Row.

func (*MatrixFloat64) WriteRow ¶

func (m *MatrixFloat64) WriteRow(i int, writeme []float64)

WriteRow will replace row i with writeme, which must have length m.Ncol.

type MatrixInt ¶

type MatrixInt struct {
	Nrow int
	Ncol int

	Colnames []string
	Rownames []string

	IsColMajor bool // row major by default
	Dat        []int

	// For zero-copy extraction of a subset of
	// columns that can be Fetched by chunked
	// rows, we also implement an
	// alterntaive representation that can
	// be layered atop, when ReformatToSliceOfColVec()
	// has been called and it has set IsSliceOfColVec to true.
	//
	// We can refer to columns via a slice
	// of ColVec. If IsSliceOfColVec is true,
	// then IsColMajor will be true too for sure;
	// but its implementations will be overriden.
	//
	IsSliceOfColVec bool
	ColVec          []*ColVecInt

	// track metadata by column/row; and don't share
	// with pointers, use values here, so each Matrix
	// gets its own copy, and we can update Colj without
	// impacting the origin Matrix.
	Cmeta []FeatMeta

	// Row meta data is mostly if we Transpose and then
	// Transpose back, we retain the column meta data.
	Rmeta []FeatMeta
	// contains filtered or unexported fields
}

MatrixInt is used for example for matrices of factors that have been turned into integers.

func NewMatrixColMajorInt ¶

func NewMatrixColMajorInt(nrow, ncol int) (m *MatrixInt)

NewMatrixColMajor returns a column-major matrix.

func NewMatrixColVecInt ¶

func NewMatrixColVecInt(nrow, ncol int) (m *MatrixInt)

NewMatrixColVec allocates room for nrow * ncol elements in a IsSliceOfColVec format.

func NewMatrixInt ¶

func NewMatrixInt(nrow, ncol int) (m *MatrixInt)

NewMatrix allocates room for nrow * ncol elements. It defaults to row-major

func (*MatrixInt) Add ¶

func (m *MatrixInt) Add(i, j int, v int)

Add v to the [i,j] element of the Matrix.

func (*MatrixInt) AddRow ¶

func (m *MatrixInt) AddRow(rowlabel string) (i int)

AddRow extends the matrix by one row and returns the index to the new row. The new row is all 0. This can be pretty fast if m is row major; and can be pretty slow if not. Pass empty string for rowlabel if not using them.

func (*MatrixInt) At ¶

func (m *MatrixInt) At(i, j int) int

At reads out the [i,j]-th element.

func (*MatrixInt) Cbind ¶

func (m *MatrixInt) Cbind(m2 *MatrixInt)

Cbind will append the columns of m2 on to the right side of m, updating m in-place.

The resulting Matrix m will have m.IsColMajor:true AND m.IsSliceOfColVec:true.

In some cases, e.g. if both m and m2 started as IsColMajor:true, then no .Dat will be copied and m will simply point to m2's data. Beware of this aliasing. If you change m2 after a Cbind, then those changes to m2 may (or may not) show up also in m. For safety, discard references to m2 after Cbind()-ing it to m. Instead, if need be, read/write through m to the appended columns.

func (*MatrixInt) Clone ¶

func (m *MatrixInt) Clone() (clone *MatrixInt)

Clone returns a fresh copy of m, with no shared state.

func (*MatrixInt) CmetaDisplay ¶

func (m *MatrixInt) CmetaDisplay() (feaDisplay []string)

CmetaDisplay returns just the essentials of m.Cmeta for diagnostics

func (*MatrixInt) Col ¶

func (m *MatrixInt) Col(j int) (res []int)

Col will return the underlying slice from .Dat of column j if the the matrix is in column-major order; otherwise it will return a coalesced copy and changing res will have no impact on .Dat.

func (*MatrixInt) DeleteCols ¶

func (m *MatrixInt) DeleteCols(wcol []int)

DeleteCols deletes from m the 0-based column numbers listed in wcol.

func (*MatrixInt) ExtractFeatAsMatrix ¶

func (m *MatrixInt) ExtractFeatAsMatrix(factors []FeatMeta) (r *MatrixInt)

ExtractFeatAsMatrix returns a sub-Matrix of m that has all rows but only the columns associated with factors.

func (*MatrixInt) ExtractRowsColsAsMatrix ¶

func (m *MatrixInt) ExtractRowsColsAsMatrix(rowbeg, rowendx int, wcol []int) (r *MatrixInt)

ExtractRowsColsAsMatrix creates a submatrix of the requested rows and columns. This is zero copy if m.IsSliceOfColVec is true.

func (*MatrixInt) FillColMajor ¶

func (m *MatrixInt) FillColMajor(slc []int, makeCopy bool)

FillColMajor copies slc into Dat, and sets IsColMajor to true If makeCopy then we'll make our own copy of slc; otherwise just point to it.

func (*MatrixInt) FillRowMajor ¶

func (m *MatrixInt) FillRowMajor(slc []int, makeCopy bool)

FillRowMajor copies slc into Dat, and sets IsColMajor to false. If makeCopy then we'll make our own copy of slc; otherwise just point to it.

func (*MatrixInt) GetRowIter ¶

func (m *MatrixInt) GetRowIter(begrow, endxrow, chunk int) (r *RowIterInt)

GetRowIter returns an iterator that will read [beg, endxrow) rows of m, by requesting Rowset()s of chunk rows at a time. The endxrow parameter allows us to read fewer than m.Nrow elements all in, if desired.

Single column vectors are supported so that Matrix can be used the chunk out simple vectors too. The only current restriction is that we will return *all* the columns in our rowset, so to omit columns you may need to DeleteCols to adjust the shape of m before hand; say to remove any target column, for example. Or just use a RowColIter instead.

func (*MatrixInt) NewRowColIter ¶

func (m *MatrixInt) NewRowColIter(factors []FeatMeta, begrow, length, chunk int, name string) (rci *RowColIterInt)

NewRowColIter specifies the columns to fetch via the factors slice.

func (*MatrixInt) ReformatToColumnMajor ¶

func (m *MatrixInt) ReformatToColumnMajor()

ReformatToColumnMajor will actually re-write the data in .Dat, if need be, to be column major: to have each columns's data adjacent so advancing the index of .Dat by 1 goes to the next row; or to the top of the next column if at the last row.

Be aware that the Row() fetches from m will be slower; but reading a whole column will be faster of course.

This is a no-op if the Matrix already has IsColMajor true.

func (*MatrixInt) ReformatToRowMajor ¶

func (m *MatrixInt) ReformatToRowMajor()

ReformatToRowMajor will actually re-write the data in .Dat, if need be, to be row major: to have each rows's data adjacent so advancing the index of .Dat by 1 goes to the next column; or to the beginning of the next row if at the last column. This is a no-op if the Matrix already has IsColMajor false.

func (*MatrixInt) ReformatToSliceOfColVec ¶

func (m *MatrixInt) ReformatToSliceOfColVec()

ReformatToSliceOfColVec will set IsSliceOfColVec to true after reformating the data internally to use the SliceOfColVec representation. To do so we'll call ReformatToColumnMajor() which will involve a copy if the Matrix starts out row-major.

func (*MatrixInt) Reshape ¶

func (m *MatrixInt) Reshape(newNrow, newNcol int)

Reshape does not change Dat, but re-assigns Nrow = newNrow and Ncol = newNcol. It also discards m.Colnames and m.Rownames. It will reinitialize Cmeta to be newNcol long; but that looses all Cmeta[i].Names and any other meta information that they contained. So avoid Reshape unless you can re-create any needed Cmeta information. Reshape will panic if m.IsSliceOfColVec is true.

func (*MatrixInt) Row ¶

func (m *MatrixInt) Row(i int) (res []int)

Row will return the underlying slice from .Dat of row i if the the matrix is in row-major order; otherwise it will return a coalesced copy and changing res will have no impact on .Dat.

In other words, it will try and do as little work as possible to return a readable copy of the data. But if you need to write into it... make sure that you have a row-major matrix; or use WriteRow to write it back at the end. (And comment out the panic that warns about this.

func (*MatrixInt) RowChunk ¶

func (m *MatrixInt) RowChunk(beg, endx int) (r *MatrixInt)

RowChunk is like Row, but returns multiple rows in row-major form. All columns are returned.

func (*MatrixInt) RowInto ¶

func (m *MatrixInt) RowInto(i int, fillme []int) (res []int)

RowInto allows less allocation, compared to Row(), by having the caller provide a fillme slice as a working buffer possible fill in and return a slice of in the returned res slice.

This is really only useful when m.IsColMajor is true.

For a column-major Matrix, we will return the answer in res, using fillme to fill in the answer. For row-major, res will point to .Dat rather than fillme, to avoid allocation.

The caller should just use res to be agnostic to the Matrix internal format. The provided fillme must be at least m.Ncol long, else we will panic.

Rather than require the caller to provide fillme, we will just keep it inside Matrix in rowbuffer now. Row() will now automatically use RowInto with rowbuffer if m is column major.

func (*MatrixInt) Set ¶

func (m *MatrixInt) Set(i, j int, v int)

Set v as the value for [i,j]-th element.

func (*MatrixInt) String ¶

func (m *MatrixInt) String() (r string)

String satisfies the common Stringer interface. It provides a view of the contents of the Matrix m.

func (*MatrixInt) SumAll ¶

func (m *MatrixInt) SumAll() (tot int)

SumAll returns the sum of all elements in m.

func (*MatrixInt) Transpose ¶

func (m *MatrixInt) Transpose()

Transpose flips the Matrix without changing Dat. It turns m into its transpose efficiently. Only meta data describing how to access the rows and columns is adjusted, and this is very quick. Transpose is not allowed for IsSliceOfColVec:true Matrixes and we will panic.

func (*MatrixInt) WriteCol ¶

func (m *MatrixInt) WriteCol(j int, writeme []int)

WriteCol will replace column j with writeme, which must have length m.Row.

func (*MatrixInt) WriteRow ¶

func (m *MatrixInt) WriteRow(i int, writeme []int)

WriteRow will replace row i with writeme, which must have length m.Ncol.

type RowColIterFloat64 ¶

type RowColIterFloat64 struct {
	// contains filtered or unexported fields
}

RowColIter is a Matrix iterator that returns, upon Fetch, chunks of rows in a sub-matrix of the specified colums (factors).

func (*RowColIterFloat64) FetchAdv ¶

func (rci *RowColIterFloat64) FetchAdv() (r *MatrixFloat64, done bool)

FetchAdv is the basic iterator operation. Returns a submatrix r that has a subset of columns and a chunk of contigious rows from m.

func (*RowColIterFloat64) FetchAdv1 ¶

func (rci *RowColIterFloat64) FetchAdv1() (r *MatrixFloat64)

FetchAdv1 is the basic iterator operation but without the done return value. The returned r will be nil when there are no more rows to return. See also FetchAdv() which FetchAdv1() calls internally.

type RowColIterInt ¶

type RowColIterInt struct {
	// contains filtered or unexported fields
}

RowColIter is a Matrix iterator that returns, upon Fetch, chunks of rows in a sub-matrix of the specified colums (factors).

func (*RowColIterInt) FetchAdv ¶

func (rci *RowColIterInt) FetchAdv() (r *MatrixInt, done bool)

FetchAdv is the basic iterator operation. Returns a submatrix r that has a subset of columns and a chunk of contigious rows from m.

func (*RowColIterInt) FetchAdv1 ¶

func (rci *RowColIterInt) FetchAdv1() (r *MatrixInt)

FetchAdv1 is the basic iterator operation but without the done return value. The returned r will be nil when there are no more rows to return. See also FetchAdv() which FetchAdv1() calls internally.

type RowIterFloat64 ¶

type RowIterFloat64 struct {
	// contains filtered or unexported fields
}

RowIter refers to a row-slice of a Matrix; see the GetRowIter() method on the Matrix below.

func NewRowIterFloat64 ¶

func NewRowIterFloat64(m *MatrixFloat64, beg, length, chunk int) *RowIterFloat64

NewRowIter makes a row iterator. See also the method GetRowIter on Matrix.

func (*RowIterFloat64) Adv ¶

func (ri *RowIterFloat64) Adv() (done bool)

Adv advances the row iterator

func (*RowIterFloat64) Fetch ¶

func (ri *RowIterFloat64) Fetch() (r *MatrixFloat64, done bool)

Fetch returns the current row set, without advancing

func (*RowIterFloat64) FetchAdv ¶

func (ri *RowIterFloat64) FetchAdv() (r *MatrixFloat64, done bool)

return current row and then advance, so the next Fetch or FetchAdv will read starting with the beg row.

func (*RowIterFloat64) FetchAdv1 ¶

func (ri *RowIterFloat64) FetchAdv1() (r *MatrixFloat64)

FetchAdv1 just returns nil if done, without a separate done flag. Otherwise identical to FetchAdv() which it calls.

func (*RowIterFloat64) FetchAdvBX ¶

func (ri *RowIterFloat64) FetchAdvBX() (beg, endx int, done bool)

FetchAdvBX does FetchBX() and then advances the iterator to the next chunk.

Specifically, FetchAdvBX returns the current chunk of rows, pointed to by the [beg, endx) return values, and then advances the iterator to the next chunk of rows to be read.

The length of the returned range is always endx - beg; so [0, 0) is an empty range. The size of the range will be ri.chunk unless there are insufficient elements left before hitting the endxrow point.

The returned range is empty iff done is returned true; so always check done first. See also FetchAdv to get a row range without advancing the iterator.

If done is returned true, then beg and endx are undefined and should be ignored.

func (*RowIterFloat64) FetchBX ¶

func (ri *RowIterFloat64) FetchBX() (beg, endx int, done bool)

FetchBX returns the current chunk of rows, pointed to by the [beg, endx) returned values. The returned range is empty iff done is returned true; so always check done first.

Concretely, the length of the returned range is always endx - beg; so [0, 0) is an empty range. The size of the range will be ri.chunk unless there are insufficient elements left before hitting the endxrow point.

See also FetchAdvBX to read the current chunk and then advance to the next.

If done is returned true, then beg and endx are undefined and should be ignored.

func (*RowIterFloat64) FetchBegEndx ¶

func (ri *RowIterFloat64) FetchBegEndx() (beg, endx int, done bool)

FetchBegEndx just supplies the beg and endx row index that Fetch would return. This can be used to coordinate/compare with other row iterators or the VectorSlicer.

type RowIterInt ¶

type RowIterInt struct {
	// contains filtered or unexported fields
}

RowIter refers to a row-slice of a Matrix; see the GetRowIter() method on the Matrix below.

func NewRowIterInt ¶

func NewRowIterInt(m *MatrixInt, beg, length, chunk int) *RowIterInt

NewRowIter makes a row iterator. See also the method GetRowIter on Matrix.

func (*RowIterInt) Adv ¶

func (ri *RowIterInt) Adv() (done bool)

Adv advances the row iterator

func (*RowIterInt) Fetch ¶

func (ri *RowIterInt) Fetch() (r *MatrixInt, done bool)

Fetch returns the current row set, without advancing

func (*RowIterInt) FetchAdv ¶

func (ri *RowIterInt) FetchAdv() (r *MatrixInt, done bool)

return current row and then advance, so the next Fetch or FetchAdv will read starting with the beg row.

func (*RowIterInt) FetchAdv1 ¶

func (ri *RowIterInt) FetchAdv1() (r *MatrixInt)

FetchAdv1 just returns nil if done, without a separate done flag. Otherwise identical to FetchAdv() which it calls.

func (*RowIterInt) FetchAdvBX ¶

func (ri *RowIterInt) FetchAdvBX() (beg, endx int, done bool)

FetchAdvBX does FetchBX() and then advances the iterator to the next chunk.

Specifically, FetchAdvBX returns the current chunk of rows, pointed to by the [beg, endx) return values, and then advances the iterator to the next chunk of rows to be read.

The length of the returned range is always endx - beg; so [0, 0) is an empty range. The size of the range will be ri.chunk unless there are insufficient elements left before hitting the endxrow point.

The returned range is empty iff done is returned true; so always check done first. See also FetchAdv to get a row range without advancing the iterator.

If done is returned true, then beg and endx are undefined and should be ignored.

func (*RowIterInt) FetchBX ¶

func (ri *RowIterInt) FetchBX() (beg, endx int, done bool)

FetchBX returns the current chunk of rows, pointed to by the [beg, endx) returned values. The returned range is empty iff done is returned true; so always check done first.

Concretely, the length of the returned range is always endx - beg; so [0, 0) is an empty range. The size of the range will be ri.chunk unless there are insufficient elements left before hitting the endxrow point.

See also FetchAdvBX to read the current chunk and then advance to the next.

If done is returned true, then beg and endx are undefined and should be ignored.

func (*RowIterInt) FetchBegEndx ¶

func (ri *RowIterInt) FetchBegEndx() (beg, endx int, done bool)

FetchBegEndx just supplies the beg and endx row index that Fetch would return. This can be used to coordinate/compare with other row iterators or the VectorSlicer.

type SlurpDataFrame ¶

type SlurpDataFrame struct {

	// nheader = number of fields in the header; nCol will have 2 less for the matrix,
	// since the matrix lacks the first 2 fields which are strings.
	Nheader int

	// the full header, as a single string. Fields separated by commas.
	Header string

	// the header broken out into fields.
	// includes tm,sym as the first two, so is 2 more than nCol, typically;
	// assuming they were present in the original header.
	Colnames []string

	// matching exactly the columns of Matrix, Ncol long
	MatrixColnames []string

	// the numeric, float64 data.
	Matrix []float64

	// number of numeric data colums in matrix (not counting tm,sym)
	Ncol int

	// number of rows (not counting the header)
	Nrow int

	// Just the symbol (2nd column), from the first row.
	// They are probably all the same anyway.
	Sym string

	// the timestamps on the rows
	Tm []time.Time

	Frompath string

	// if the 2 string columns are missing
	Missing2strings bool

	// Instead of being all numeric features, instead we have
	// two parts, numeric features in NumericMat, and factors
	// in FactorMat, and if they were originally interlaced, they
	// are separated out into their own kind of matrix now.
	HasFactors bool
	Kindvec    []ColumnKind

	NumericMat *MatrixFloat64

	// Because we do not know how many factors we will need, and
	// because the initial converts all real features to factors,
	// we will initially deploy the FactorMat will a full int (64-bit integer)
	// work of factor room. Later, perhaps, this can be reduced to uint16 or uint8,
	// but that requires domain knowledge of the features at hand
	// on a case-by-case basis. For now we give ourselves a fighting
	// chance of handling any real-value feature with the full
	// generality of int numbered factors.
	FactorMat *MatrixInt
	// contains filtered or unexported fields
}

SlurpDataFrame handles two type of data frames: those with all float64, and those with string columns. String columns are encoded into uint16 factor matrix.

The all float64 reading takes in a comma-separated-value (csv) files that has a special structure. After the header, the first two columns are expected to two contains strings, (a timestamp string and a symbol string, typically); and then all of the rest of the columns must be float64 values.

Since most of the work is parsing the float64, we try to do that in parallel and using large blocks of contiguous memory to allow the CPU caches and pipelining to be effective. We memory map the file to effect this.

When a .gz file path is supplied, this cannot be memory mapped; so we read it using the csv libraries, which can be slower.

func NewSlurpDataFrameNoStrings ¶

func NewSlurpDataFrameNoStrings() *SlurpDataFrame

func NewSlurpDataFrameTwoStrings ¶

func NewSlurpDataFrameTwoStrings() *SlurpDataFrame

func (*SlurpDataFrame) Disgorge ¶

func (df *SlurpDataFrame) Disgorge(path string) (err error)

Disgorge writes the matrix/data-frame back to disk. As you might guess, this is really slow. It is useful, however, to show that we parsed the original correctly, and can reconstruct it precisely if need be.

func (*SlurpDataFrame) ExtractCols ¶

func (sdf *SlurpDataFrame) ExtractCols(xi0, xi1 int, wcol []int) (n, nvar int, xx []float64, colnames []string)

ExtractCols extracts the wcol columns from sdf. The rows are from [xi0:xi1). See also ExtractXXYY if the X cols desired are a continguous range.

xi0 : row index of first X data xi1 : the excluded endx index of a row that is just after the last included row

n returns the number of rows back in xx; nvar returns the numer of variables back in xx; xx is the matrix (or vector if nvar == 1) of data extracted from sdf;

////

func (*SlurpDataFrame) ExtractXXYY ¶

func (sdf *SlurpDataFrame) ExtractXXYY(xi0, xi1, xj0, xj1, yj int) (n, nvar int, xx, yy []float64, colnames []string, targetname string)

ExtractXXYY extracts a contiguous X range and one Y variable from sdf. See also ExtractCols if the X cols desired are not a continguous range. The Y and X conventions are that of regression in statistics. Y is the target (or targets) to be predicted, X is the set of independent predictors.

xi0 : row index of first X data xi1 : endx row to use (excluded)

xj0 : first X column to use xj1 : endx X column to use (excluded)

yj : target Y column to use, just another column index into the same data frame (targets at the end)

sdf : the data frame to grab the X and Y data from

Note: we need to copy the X and Y data anyway, generaly.

///

func (*SlurpDataFrame) FindTm ¶

func (df *SlurpDataFrame) FindTm(tm time.Time, si time.Duration) (rowi int, err error)

locate the row at or prior to tm. Can return -1 if tm is before us, or -2 if tm is after us. Checks within 1 minute or <= 2*si too, near the first and last, since that is a common case where the actual sample row will be close but maybe not exactly at the boundaries.

func (*SlurpDataFrame) MatFullRow ¶

func (df *SlurpDataFrame) MatFullRow(irow int) []float64

func (*SlurpDataFrame) MatPartRow ¶

func (df *SlurpDataFrame) MatPartRow(irow, leftCount int) []float64

returns the first leftCount elements of irow; useful to pick out just the training data if it is all on the left side of the matrix.

func (*SlurpDataFrame) MatrixAt ¶

func (df *SlurpDataFrame) MatrixAt(irow, jcol int) float64

get element from the matrix, ignoring the first 2 string columns if they exist. irow and jcol are 0 based.

func (*SlurpDataFrame) ReadGzipped ¶

func (df *SlurpDataFrame) ReadGzipped(path string) (err error)

ReadGzipped is used when we have a compressed csv file we cannot directly memory map.

func (*SlurpDataFrame) Row ¶

func (df *SlurpDataFrame) Row(i int) (tm time.Time, dat []float64)

func (*SlurpDataFrame) RowSlice ¶

func (df *SlurpDataFrame) RowSlice(i int, nSpan int) (rowslice []float64)

RowSlice can use nSpan to request just a subset of columns. nSpan must be >= 1, else the rowslice returned will be empty. The returned rowslice will be nSpan in length, being the row i sliced to [0:nSpan]

func (*SlurpDataFrame) Slurp ¶

func (df *SlurpDataFrame) Slurp(path string) (err error)

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL

README ¶

slurpdf: a dataframe package for Go (golang) with parallel multi-core read-in.

Documentation ¶

Index ¶

Constants ¶

Variables ¶

Functions ¶

func AlwaysPrintf ¶

func Caller ¶

func CountLines ¶

func CsvShowMain ¶

func DirExists ¶

func FileExists ¶

func FileLine ¶

func FileSize ¶

func MemoryMapFile ¶

func PP ¶

func Printf ¶

func SumSliceFloat64 ¶

func SumSliceInt ¶

func TSPrintf ¶

func VV ¶

Types ¶

type BoolMatrix ¶

func NewBoolMatrix ¶

func NewBoolMatrixColMajor ¶

func NewBoolMatrixColVec ¶

func (*BoolMatrix) AddRow ¶

func (*BoolMatrix) At ¶

func (*BoolMatrix) Cbind ¶

func (*BoolMatrix) Clone ¶

func (*BoolMatrix) CmetaDisplay ¶

func (*BoolMatrix) Col ¶

func (*BoolMatrix) DeleteCols ¶

func (*BoolMatrix) ExtractFeatAsMatrix ¶

func (*BoolMatrix) ExtractRowsColsAsMatrix ¶

func (*BoolMatrix) FillColMajor ¶

func (*BoolMatrix) FillRowMajor ¶

func (*BoolMatrix) GetRowIter ¶

func (*BoolMatrix) NewRowColIter ¶

func (*BoolMatrix) ReformatToColumnMajor ¶

func (*BoolMatrix) ReformatToRowMajor ¶

func (*BoolMatrix) ReformatToSliceOfColVec ¶

func (*BoolMatrix) Reshape ¶

func (*BoolMatrix) Row ¶

func (*BoolMatrix) RowChunk ¶

func (*BoolMatrix) Set ¶

func (*BoolMatrix) String ¶

func (*BoolMatrix) Transpose ¶

func (*BoolMatrix) WriteCol ¶

func (*BoolMatrix) WriteRow ¶

type BoolRowColIter ¶

func (*BoolRowColIter) FetchAdv ¶

func (*BoolRowColIter) FetchAdv1 ¶

type BoolRowIter ¶

func NewBoolRowIter ¶

func (*BoolRowIter) Adv ¶

func (*BoolRowIter) Fetch ¶

func (*BoolRowIter) FetchAdv ¶

func (*BoolRowIter) FetchAdv1 ¶

func (*BoolRowIter) FetchAdvBX ¶

func (*BoolRowIter) FetchBX ¶

func (*BoolRowIter) FetchBegEndx ¶

type ColVecBool ¶

type ColVecFloat64 ¶

type ColVecInt ¶

type ColumnKind ¶

type CsvLoader2 ¶

func NewCsvLoader2 ¶

func (*CsvLoader2) Close ¶

func (*CsvLoader2) ReadOne ¶

type FeatMeta ¶

func NewFeatMeta ¶

func (*FeatMeta) String ¶

type LexCodeSlice ¶

func (LexCodeSlice) Len ¶

func (LexCodeSlice) Less ¶

func (LexCodeSlice) String ¶

func (LexCodeSlice) Swap ¶

type MatrixFloat64 ¶