Documentation ¶
Overview ¶
steps to add a new function: 1. add an implementation here and add it to FuncProj 2. specify its return types (return_types.go) 3. test both implementations above
Index ¶
- Constants
- Variables
- func ChunksEqual(c1 Chunk, c2 Chunk) bool
- func DatesEqual(a, b date) bool
- func DatesGreaterThan(a, b date) bool
- func DatesGreaterThanEqual(a, b date) bool
- func DatesLessThan(a, b date) bool
- func DatesLessThanEqual(a, b date) bool
- func DatesNotEqual(a, b date) bool
- func DatetimesEqual(a, b datetime) bool
- func DatetimesGreaterThan(a, b datetime) bool
- func DatetimesGreaterThanEqual(a, b datetime) bool
- func DatetimesLessThan(a, b datetime) bool
- func DatetimesLessThanEqual(a, b datetime) bool
- func DatetimesNotEqual(a, b datetime) bool
- func NewAggregator(function string, distinct bool) (func(...Dtype) (*AggState, error), error)
- type AggState
- type Chunk
- func Deserialize(r io.Reader, Dtype Dtype) (Chunk, error)
- func EvalAdd(c1 Chunk, c2 Chunk) (Chunk, error)
- func EvalAnd(c1 Chunk, c2 Chunk) (Chunk, error)
- func EvalDivide(c1 Chunk, c2 Chunk) (Chunk, error)
- func EvalEq(c1 Chunk, c2 Chunk) (Chunk, error)
- func EvalGt(c1 Chunk, c2 Chunk) (Chunk, error)
- func EvalGte(c1 Chunk, c2 Chunk) (Chunk, error)
- func EvalLt(c1 Chunk, c2 Chunk) (Chunk, error)
- func EvalLte(c1 Chunk, c2 Chunk) (Chunk, error)
- func EvalMultiply(c1 Chunk, c2 Chunk) (Chunk, error)
- func EvalNeq(c1 Chunk, c2 Chunk) (Chunk, error)
- func EvalNot(c Chunk) (Chunk, error)
- func EvalOr(c1 Chunk, c2 Chunk) (Chunk, error)
- func EvalSubtract(c1 Chunk, c2 Chunk) (Chunk, error)
- func NewChunkFromSchema(schema Schema) Chunk
- func NewChunkLiteralAuto(s string, length int) (Chunk, error)
- func NewChunkLiteralTyped(s string, dtype Dtype, length int) (Chunk, error)
- type ChunkBools
- func (rc *ChunkBools) AddValue(s string) error
- func (rc *ChunkBools) AddValues(vals []string) error
- func (rc *ChunkBools) Append(tc Chunk) error
- func (bc *ChunkBools) Base() *baseChunk
- func (rc *ChunkBools) Clone() Chunk
- func (rc *ChunkBools) Compare(asc, nullsFirst bool, i, j int) int
- func (rc *ChunkBools) Dtype() Dtype
- func (rc *ChunkBools) Hash(position int, hashes []uint64)
- func (rc *ChunkBools) JSONLiteral(n int) (string, bool)
- func (bc *ChunkBools) Len() int
- func (bc *ChunkBools) Nullify(bm *bitmap.Bitmap)
- func (rc *ChunkBools) Prune(bm *bitmap.Bitmap) Chunk
- func (rc *ChunkBools) Truths() *bitmap.Bitmap
- func (rc *ChunkBools) WriteTo(w io.Writer) (int64, error)
- type ChunkDates
- func (rc *ChunkDates) AddValue(s string) error
- func (rc *ChunkDates) AddValues(vals []string) error
- func (rc *ChunkDates) Append(tc Chunk) error
- func (bc *ChunkDates) Base() *baseChunk
- func (rc *ChunkDates) Clone() Chunk
- func (rc *ChunkDates) Compare(asc, nullsFirst bool, i, j int) int
- func (rc *ChunkDates) Dtype() Dtype
- func (rc *ChunkDates) Hash(position int, hashes []uint64)
- func (rc *ChunkDates) JSONLiteral(n int) (string, bool)
- func (bc *ChunkDates) Len() int
- func (bc *ChunkDates) Nullify(bm *bitmap.Bitmap)
- func (rc *ChunkDates) Prune(bm *bitmap.Bitmap) Chunk
- func (rc *ChunkDates) WriteTo(w io.Writer) (int64, error)
- type ChunkDatetimes
- func (rc *ChunkDatetimes) AddValue(s string) error
- func (rc *ChunkDatetimes) AddValues(vals []string) error
- func (rc *ChunkDatetimes) Append(tc Chunk) error
- func (bc *ChunkDatetimes) Base() *baseChunk
- func (rc *ChunkDatetimes) Clone() Chunk
- func (rc *ChunkDatetimes) Compare(asc, nullsFirst bool, i, j int) int
- func (rc *ChunkDatetimes) Dtype() Dtype
- func (rc *ChunkDatetimes) Hash(position int, hashes []uint64)
- func (rc *ChunkDatetimes) JSONLiteral(n int) (string, bool)
- func (bc *ChunkDatetimes) Len() int
- func (bc *ChunkDatetimes) Nullify(bm *bitmap.Bitmap)
- func (rc *ChunkDatetimes) Prune(bm *bitmap.Bitmap) Chunk
- func (rc *ChunkDatetimes) WriteTo(w io.Writer) (int64, error)
- type ChunkFloats
- func (rc *ChunkFloats) AddValue(s string) error
- func (rc *ChunkFloats) AddValues(vals []string) error
- func (rc *ChunkFloats) Append(tc Chunk) error
- func (bc *ChunkFloats) Base() *baseChunk
- func (rc *ChunkFloats) Clone() Chunk
- func (rc *ChunkFloats) Compare(asc, nullsFirst bool, i, j int) int
- func (rc *ChunkFloats) Dtype() Dtype
- func (rc *ChunkFloats) Hash(position int, hashes []uint64)
- func (rc *ChunkFloats) JSONLiteral(n int) (string, bool)
- func (bc *ChunkFloats) Len() int
- func (bc *ChunkFloats) Nullify(bm *bitmap.Bitmap)
- func (rc *ChunkFloats) Prune(bm *bitmap.Bitmap) Chunk
- func (rc *ChunkFloats) WriteTo(w io.Writer) (int64, error)
- type ChunkInts
- func (rc *ChunkInts) AddValue(s string) error
- func (rc *ChunkInts) AddValues(vals []string) error
- func (rc *ChunkInts) Append(tc Chunk) error
- func (bc *ChunkInts) Base() *baseChunk
- func (rc *ChunkInts) Clone() Chunk
- func (rc *ChunkInts) Compare(asc, nullsFirst bool, i, j int) int
- func (rc *ChunkInts) Dtype() Dtype
- func (rc *ChunkInts) Hash(position int, hashes []uint64)
- func (rc *ChunkInts) JSONLiteral(n int) (string, bool)
- func (bc *ChunkInts) Len() int
- func (bc *ChunkInts) Nullify(bm *bitmap.Bitmap)
- func (rc *ChunkInts) Prune(bm *bitmap.Bitmap) Chunk
- func (rc *ChunkInts) WriteTo(w io.Writer) (int64, error)
- type ChunkNulls
- func (rc *ChunkNulls) AddValue(s string) error
- func (rc *ChunkNulls) AddValues(vals []string) error
- func (rc *ChunkNulls) Append(tc Chunk) error
- func (bc *ChunkNulls) Base() *baseChunk
- func (rc *ChunkNulls) Clone() Chunk
- func (rc *ChunkNulls) Compare(asc, nullsFirst bool, i, j int) int
- func (rc *ChunkNulls) Dtype() Dtype
- func (rc *ChunkNulls) Hash(position int, hashes []uint64)
- func (rc *ChunkNulls) JSONLiteral(n int) (string, bool)
- func (bc *ChunkNulls) Len() int
- func (bc *ChunkNulls) Nullify(bm *bitmap.Bitmap)
- func (rc *ChunkNulls) Prune(bm *bitmap.Bitmap) Chunk
- func (rc *ChunkNulls) WriteTo(w io.Writer) (int64, error)
- type ChunkStrings
- func (rc *ChunkStrings) AddValue(s string) error
- func (rc *ChunkStrings) AddValues(vals []string) error
- func (rc *ChunkStrings) Append(tc Chunk) error
- func (bc *ChunkStrings) Base() *baseChunk
- func (rc *ChunkStrings) Clone() Chunk
- func (rc *ChunkStrings) Compare(asc, nullsFirst bool, i, j int) int
- func (rc *ChunkStrings) Dtype() Dtype
- func (rc *ChunkStrings) Hash(position int, hashes []uint64)
- func (rc *ChunkStrings) JSONLiteral(n int) (string, bool)
- func (bc *ChunkStrings) Len() int
- func (bc *ChunkStrings) Nullify(bm *bitmap.Bitmap)
- func (rc *ChunkStrings) Prune(bm *bitmap.Bitmap) Chunk
- func (rc *ChunkStrings) WriteTo(w io.Writer) (int64, error)
- type Dtype
- type Schema
- type TableSchema
- type TypeGuesser
Constants ¶
const ALL_ONES = uint64(1<<64 - 1)
const ALL_ZEROS = uint64(0)
const DATETIME_BYTE_SIZE = 8
const DATE_BYTE_SIZE = 4
Variables ¶
var FuncProj = map[string]func(...Chunk) (Chunk, error){ "now": evalNow, "version": evalVersion, "nullif": evalNullIf, "coalesce": evalCoalesce, "round": evalRound, "sin": numFunc(math.Sin), "cos": numFunc(math.Cos), "tan": numFunc(math.Tan), "asin": numFunc(math.Asin), "acos": numFunc(math.Acos), "atan": numFunc(math.Atan), "sinh": numFunc(math.Sinh), "cosh": numFunc(math.Cosh), "tanh": numFunc(math.Tanh), "sqrt": numFunc(math.Sqrt), "exp": numFunc(math.Exp), "exp2": numFunc(math.Exp2), "log": numFunc(math.Log), "log2": numFunc(math.Log2), "log10": numFunc(math.Log10), "trim": stringFunc(strings.TrimSpace), "lower": stringFunc(strings.ToLower), "upper": stringFunc(strings.ToUpper), "left": evalLeft, "split_part": evalSplitPart, }
TODO: this will be hard to cover properly, so let's make sure we test everything explicitly ARCH: we're not treating literals any differently, but since they share the same backing store
as non-literals, we're okay... is that okay?
Functions ¶
func ChunksEqual ¶
ChunksEqual compares two chunks, even if they contain []float64 data consider making this lenient enough to compare only the relevant bits in ChunkBools
func DatesEqual ¶
func DatesEqual(a, b date) bool
func DatesGreaterThan ¶
func DatesGreaterThan(a, b date) bool
func DatesGreaterThanEqual ¶
func DatesGreaterThanEqual(a, b date) bool
func DatesLessThan ¶
func DatesLessThan(a, b date) bool
func DatesLessThanEqual ¶
func DatesLessThanEqual(a, b date) bool
func DatesNotEqual ¶
func DatesNotEqual(a, b date) bool
func DatetimesEqual ¶
func DatetimesEqual(a, b datetime) bool
func DatetimesGreaterThan ¶
func DatetimesGreaterThan(a, b datetime) bool
func DatetimesGreaterThanEqual ¶
func DatetimesGreaterThanEqual(a, b datetime) bool
func DatetimesLessThan ¶
func DatetimesLessThan(a, b datetime) bool
func DatetimesLessThanEqual ¶
func DatetimesLessThanEqual(a, b datetime) bool
func DatetimesNotEqual ¶
func DatetimesNotEqual(a, b datetime) bool
func NewAggregator ¶
NewAggregator implements a constructor for various aggregating functions. We got inspired by Postgres' functions https://www.postgresql.org/docs/12/functions-aggregate.html
- not implemented: xml/json functions (don't have the data types), array_agg (no arrays), every (just an alias), bit_and/bit_or (doesn't seem useful for us)
- implemented: min, max, sum, avg, count
- planned: bool_and, bool_or, string_agg
- thinking: countDistinct, sketch-based approxCountDistinct
ARCH: function string -> uint8 const? dtypes are types of inputs - rename? TODO: check for function existence OPTIM: the switch(function) could be hoisted outside the closure (would work as a function existence validator)
Types ¶
type Chunk ¶
type Chunk interface { Dtype() Dtype AddValue(string) error AddValues([]string) error // consider merging AddValues and AddValue (using varargs) WriteTo(io.Writer) (int64, error) Prune(*bitmap.Bitmap) Chunk Append(Chunk) error Hash(int, []uint64) Clone() Chunk JSONLiteral(int) (string, bool) // the bool stands for 'ok' (not null) Compare(bool, bool, int, int) int // contains filtered or unexported methods }
Chunk defines a part of a column - constant type, stored contiguously
func Deserialize ¶
Deserialize reads a chunk from a reader this shouldn't really accept a Dtype - at this point we're requiring it, because we don't serialize Dtypes into the binary representation - but that's just because we always have the schema at hand... but will we always have it? shouldn't the files be readable as standalone files? OPTIM: shouldn't we deserialize based on a byte slice instead? We already have it, so we're just duplicating it using a byte buffer OPTIM: we may be able to safely cast these byte slice in the future - see https://github.com/golang/go/issues/19367
func EvalDivide ¶
different return type for ints! should we perhaps cast to make this more systematic? check for division by zero (gives +- infty, which will break json?)
func NewChunkFromSchema ¶
NewChunkFromSchema creates a new Chunk based a column schema provided
func NewChunkLiteralAuto ¶
NewChunkLiteral creates a chunk that only contains a single value in the whole chunk it's useful in e.g. 'foo > 1', where can convert the '1' to a whole chunk OPTIM: we're using single-value slices, should we perhaps have a value specific for each literal to avoid working with slices (stack allocation etc.)
type ChunkBools ¶
type ChunkBools struct {
// contains filtered or unexported fields
}
ChunkBools defines a backing struct for a chunk of boolean values
func NewChunkBoolsFromBitmap ¶
func NewChunkBoolsFromBitmap(bm *bitmap.Bitmap) *ChunkBools
NewChunkBoolsFromBitmap creates a new bool chunk, but in doing so, doesn't clone the incoming bitmap, it uses it as is - the caller might want to clone it aims to mutate it in the future
func NewChunkLiteralBools ¶
func NewChunkLiteralBools(value bool, length int) *ChunkBools
func (*ChunkBools) AddValue ¶
func (rc *ChunkBools) AddValue(s string) error
AddValue takes in a string representation of a value and converts it into a value suited for this chunk
func (*ChunkBools) AddValues ¶
func (rc *ChunkBools) AddValues(vals []string) error
AddValues is a helper method, it just calls AddValue repeatedly
func (*ChunkBools) Append ¶
func (rc *ChunkBools) Append(tc Chunk) error
Append adds a chunk of the same type at the end of this one (in place update)
func (*ChunkBools) Clone ¶
func (rc *ChunkBools) Clone() Chunk
func (*ChunkBools) Hash ¶
func (rc *ChunkBools) Hash(position int, hashes []uint64)
Hash hashes this chunk's values into a provded container
func (*ChunkBools) JSONLiteral ¶
func (rc *ChunkBools) JSONLiteral(n int) (string, bool)
func (*ChunkBools) Len ¶
func (bc *ChunkBools) Len() int
ARCH: we sometimes use this, sometimes we access the struct field directly... perhaps remove this?
func (*ChunkBools) Nullify ¶
ARCH: Nullify does NOT switch the data values to be nulls/empty as well
func (*ChunkBools) Prune ¶
func (rc *ChunkBools) Prune(bm *bitmap.Bitmap) Chunk
Prune filter this chunk and only preserves values for which the bitmap is set
func (*ChunkBools) Truths ¶
func (rc *ChunkBools) Truths() *bitmap.Bitmap
Truths returns only true values in this boolean column's bitmap - remove those that are null - we use this for filtering, when we're interested in non-null true values (to select given rows)
type ChunkDates ¶
type ChunkDates struct {
// contains filtered or unexported fields
}
ChunkDates defines a backing struct for a chunk of date values
func NewChunkLiteralDates ¶
func NewChunkLiteralDates(value date, length int) *ChunkDates
func (*ChunkDates) AddValue ¶
func (rc *ChunkDates) AddValue(s string) error
func (*ChunkDates) AddValues ¶
func (rc *ChunkDates) AddValues(vals []string) error
AddValues is a helper method, it just calls AddValue repeatedly
func (*ChunkDates) Append ¶
func (rc *ChunkDates) Append(tc Chunk) error
Append adds a chunk of the same type at the end of this one (in place update)
func (*ChunkDates) Clone ¶
func (rc *ChunkDates) Clone() Chunk
func (*ChunkDates) Hash ¶
func (rc *ChunkDates) Hash(position int, hashes []uint64)
func (*ChunkDates) JSONLiteral ¶
func (rc *ChunkDates) JSONLiteral(n int) (string, bool)
func (*ChunkDates) Len ¶
func (bc *ChunkDates) Len() int
ARCH: we sometimes use this, sometimes we access the struct field directly... perhaps remove this?
func (*ChunkDates) Nullify ¶
ARCH: Nullify does NOT switch the data values to be nulls/empty as well
type ChunkDatetimes ¶
type ChunkDatetimes struct {
// contains filtered or unexported fields
}
func NewChunkLiteralDatetimes ¶ added in v0.1.3
func NewChunkLiteralDatetimes(value datetime, length int) *ChunkDatetimes
func (*ChunkDatetimes) AddValue ¶
func (rc *ChunkDatetimes) AddValue(s string) error
func (*ChunkDatetimes) AddValues ¶
func (rc *ChunkDatetimes) AddValues(vals []string) error
AddValues is a helper method, it just calls AddValue repeatedly
func (*ChunkDatetimes) Append ¶
func (rc *ChunkDatetimes) Append(tc Chunk) error
Append adds a chunk of the same type at the end of this one (in place update)
func (*ChunkDatetimes) Clone ¶
func (rc *ChunkDatetimes) Clone() Chunk
func (*ChunkDatetimes) Compare ¶
func (rc *ChunkDatetimes) Compare(asc, nullsFirst bool, i, j int) int
func (*ChunkDatetimes) Dtype ¶
func (rc *ChunkDatetimes) Dtype() Dtype
Dtype returns the type of this chunk
func (*ChunkDatetimes) Hash ¶
func (rc *ChunkDatetimes) Hash(position int, hashes []uint64)
func (*ChunkDatetimes) JSONLiteral ¶
func (rc *ChunkDatetimes) JSONLiteral(n int) (string, bool)
func (*ChunkDatetimes) Len ¶
func (bc *ChunkDatetimes) Len() int
ARCH: we sometimes use this, sometimes we access the struct field directly... perhaps remove this?
func (*ChunkDatetimes) Nullify ¶
ARCH: Nullify does NOT switch the data values to be nulls/empty as well
type ChunkFloats ¶
type ChunkFloats struct {
// contains filtered or unexported fields
}
ChunkFloats defines a backing struct for a chunk of floating point values
func NewChunkFloatsFromSlice ¶
func NewChunkFloatsFromSlice(data []float64, nulls *bitmap.Bitmap) *ChunkFloats
func NewChunkLiteralFloats ¶
func NewChunkLiteralFloats(value float64, length int) *ChunkFloats
func (*ChunkFloats) AddValue ¶
func (rc *ChunkFloats) AddValue(s string) error
AddValue takes in a string representation of a value and converts it into a value suited for this chunk let's really consider adding standard nulls here, it will probably make our lives a lot easier
func (*ChunkFloats) AddValues ¶
func (rc *ChunkFloats) AddValues(vals []string) error
AddValues is a helper method, it just calls AddValue repeatedly
func (*ChunkFloats) Append ¶
func (rc *ChunkFloats) Append(tc Chunk) error
Append adds a chunk of the same type at the end of this one (in place update)
func (*ChunkFloats) Clone ¶
func (rc *ChunkFloats) Clone() Chunk
func (*ChunkFloats) Dtype ¶
func (rc *ChunkFloats) Dtype() Dtype
Dtype returns the type of this chunk
func (*ChunkFloats) Hash ¶
func (rc *ChunkFloats) Hash(position int, hashes []uint64)
Hash hashes this chunk's values into a provded container OPTIM/TODO(next): do we need a fnv hasher for ints/floats/dates? We can just take the uint64 representation of these values... or not?
func (*ChunkFloats) JSONLiteral ¶
func (rc *ChunkFloats) JSONLiteral(n int) (string, bool)
func (*ChunkFloats) Len ¶
func (bc *ChunkFloats) Len() int
ARCH: we sometimes use this, sometimes we access the struct field directly... perhaps remove this?
func (*ChunkFloats) Nullify ¶
ARCH: Nullify does NOT switch the data values to be nulls/empty as well
type ChunkInts ¶
type ChunkInts struct {
// contains filtered or unexported fields
}
ChunkInts defines a backing struct for a chunk of integer values
func NewChunkIntsFromSlice ¶
the next few functions could use some generics
func NewChunkLiteralInts ¶
func (*ChunkInts) AddValue ¶
AddValue takes in a string representation of a value and converts it into a value suited for this chunk
func (*ChunkInts) Append ¶
Append adds a chunk of the same type at the end of this one (in place update)
func (*ChunkInts) Compare ¶
ARCH: this could be made entirely generic by allowing an interface `nthValue(int) T` to genericise v1/v2
EXCEPT for bools :-( (not comparable)
func (*ChunkInts) Hash ¶
Hash hashes this chunk's values into a provded container OPTIM: maphash might be faster than fnv or maphash? test it and if it is so, implement everywhere, but be careful about the seed (needs to be the same for all chunks) careful about maphash: "The hash value of a given byte sequence is consistent within a single process, but will be different in different processes." oh and I rebenchmarked maphash and fnv and found maphash to be much slower (despite no allocs) also, check this https://github.com/segmentio/fasthash/ (via https://segment.com/blog/allocation-efficiency-in-high-performance-go-services/) they reimplement fnv using stack allocation only
- we tested it and got a 90% speedup (no allocs, shorter code) - so let's consider it, it's in the fasthash branch
func (*ChunkInts) Len ¶
func (bc *ChunkInts) Len() int
ARCH: we sometimes use this, sometimes we access the struct field directly... perhaps remove this?
type ChunkNulls ¶
type ChunkNulls struct {
// contains filtered or unexported fields
}
ChunkNulls defines a backing struct for a chunk of null values Since it's all nulls, we only need to know how many there are
func (*ChunkNulls) AddValue ¶
func (rc *ChunkNulls) AddValue(s string) error
AddValue takes in a string representation of a value and converts it into a value suited for this chunk
func (*ChunkNulls) AddValues ¶
func (rc *ChunkNulls) AddValues(vals []string) error
AddValues is a helper method, it just calls AddValue repeatedly
func (*ChunkNulls) Append ¶
func (rc *ChunkNulls) Append(tc Chunk) error
Append adds a chunk of the same type at the end of this one (in place update)
func (*ChunkNulls) Clone ¶
func (rc *ChunkNulls) Clone() Chunk
func (*ChunkNulls) Hash ¶
func (rc *ChunkNulls) Hash(position int, hashes []uint64)
Hash hashes this chunk's values into a provded container
func (*ChunkNulls) JSONLiteral ¶
func (rc *ChunkNulls) JSONLiteral(n int) (string, bool)
func (*ChunkNulls) Len ¶
func (bc *ChunkNulls) Len() int
ARCH: we sometimes use this, sometimes we access the struct field directly... perhaps remove this?
func (*ChunkNulls) Nullify ¶
ARCH: Nullify does NOT switch the data values to be nulls/empty as well
type ChunkStrings ¶
type ChunkStrings struct {
// contains filtered or unexported fields
}
ChunkStrings defines a backing struct for a chunk of string values
func NewChunkLiteralStrings ¶
func NewChunkLiteralStrings(value string, length int) *ChunkStrings
func (*ChunkStrings) AddValue ¶
func (rc *ChunkStrings) AddValue(s string) error
AddValue takes in a string representation of a value and converts it into a value suited for this chunk
func (*ChunkStrings) AddValues ¶
func (rc *ChunkStrings) AddValues(vals []string) error
AddValues is a helper method, it just calls AddValue repeatedly
func (*ChunkStrings) Append ¶
func (rc *ChunkStrings) Append(tc Chunk) error
Append adds a chunk of the same type at the end of this one (in place update)
func (*ChunkStrings) Clone ¶
func (rc *ChunkStrings) Clone() Chunk
func (*ChunkStrings) Dtype ¶
func (rc *ChunkStrings) Dtype() Dtype
Dtype returns the type of this chunk
func (*ChunkStrings) Hash ¶
func (rc *ChunkStrings) Hash(position int, hashes []uint64)
Hash hashes this chunk's values into a provded container
func (*ChunkStrings) JSONLiteral ¶
func (rc *ChunkStrings) JSONLiteral(n int) (string, bool)
func (*ChunkStrings) Len ¶
func (bc *ChunkStrings) Len() int
ARCH: we sometimes use this, sometimes we access the struct field directly... perhaps remove this?
func (*ChunkStrings) Nullify ¶
ARCH: Nullify does NOT switch the data values to be nulls/empty as well
type Dtype ¶
type Dtype uint8
Dtype denotes the data type of a given object (e.g. int or string)
const ( DtypeInvalid Dtype = iota DtypeNull DtypeString DtypeInt DtypeFloat DtypeBool DtypeDate DtypeDatetime // more to be added DtypeMax )
individual dtypes defined as a sequence
func (Dtype) MarshalJSON ¶
MarshalJSON returns the JSON representation of a dtype (stringified + json string) we want Dtypes to be marshaled within Schema correctly without this they'd be returned as an integer (even with ",string" tags)
func (*Dtype) UnmarshalJSON ¶
UnmarshalJSON deserialises a given dtype from a JSON value
type Schema ¶
type Schema struct { Name string `json:"name"` Dtype Dtype `json:"dtype"` Nullable bool `json:"nullable"` }
Schema defines all the necessary properties of column
type TableSchema ¶
type TableSchema []Schema
TableSchema is a collection of column schemas
func (*TableSchema) LocateColumn ¶
func (schema *TableSchema) LocateColumn(s string) (int, Schema, error)
LocateColumn returns a column within a schema - its position and definition; error is triggered if this column is not found or the schema is nil
func (*TableSchema) LocateColumnCaseInsensitive ¶
func (schema *TableSchema) LocateColumnCaseInsensitive(s string) (int, Schema, error)
LocateColumnCaseInsensitive works just like LocateColumn, but it ignores casing ARCH: we could have used strings.EqualFold, but a) we have one static input (s), so we can
amortise the case lowering, b) the extra correctness in EqualFold is irrelevant here, because of our column naming restrictions
type TypeGuesser ¶
type TypeGuesser struct {
// contains filtered or unexported fields
}
TypeGuesser contains state necessary for inferring types from a stream of strings
func (*TypeGuesser) AddValue ¶
func (tg *TypeGuesser) AddValue(s string)
AddValue feeds a new value to a type guesser
func (*TypeGuesser) InferredType ¶
func (tg *TypeGuesser) InferredType() Schema
InferredType returns the best guess of a type for a given stream of strings