column

package

v0.1.3 Latest Latest Go to latest Published: Sep 10, 2021 License: Apache-2.0 Imports: 13 Imported by: 0

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/kokes/smda

Links

Open Source Insights

Documentation ¶

Overview ¶

steps to add a new function: 1. add an implementation here and add it to FuncProj 2. specify its return types (return_types.go) 3. test both implementations above

Index ¶

Constants
Variables
func ChunksEqual(c1 Chunk, c2 Chunk) bool
func DatesEqual(a, b date) bool
func DatesGreaterThan(a, b date) bool
func DatesGreaterThanEqual(a, b date) bool
func DatesLessThan(a, b date) bool
func DatesLessThanEqual(a, b date) bool
func DatesNotEqual(a, b date) bool
func DatetimesEqual(a, b datetime) bool
func DatetimesGreaterThan(a, b datetime) bool
func DatetimesGreaterThanEqual(a, b datetime) bool
func DatetimesLessThan(a, b datetime) bool
func DatetimesLessThanEqual(a, b datetime) bool
func DatetimesNotEqual(a, b datetime) bool
func NewAggregator(function string, distinct bool) (func(...Dtype) (*AggState, error), error)
type AggState
type Chunk
- func Deserialize(r io.Reader, Dtype Dtype) (Chunk, error)
- func EvalAdd(c1 Chunk, c2 Chunk) (Chunk, error)
- func EvalAnd(c1 Chunk, c2 Chunk) (Chunk, error)
- func EvalDivide(c1 Chunk, c2 Chunk) (Chunk, error)
- func EvalEq(c1 Chunk, c2 Chunk) (Chunk, error)
- func EvalGt(c1 Chunk, c2 Chunk) (Chunk, error)
- func EvalGte(c1 Chunk, c2 Chunk) (Chunk, error)
- func EvalLt(c1 Chunk, c2 Chunk) (Chunk, error)
- func EvalLte(c1 Chunk, c2 Chunk) (Chunk, error)
- func EvalMultiply(c1 Chunk, c2 Chunk) (Chunk, error)
- func EvalNeq(c1 Chunk, c2 Chunk) (Chunk, error)
- func EvalNot(c Chunk) (Chunk, error)
- func EvalOr(c1 Chunk, c2 Chunk) (Chunk, error)
- func EvalSubtract(c1 Chunk, c2 Chunk) (Chunk, error)
- func NewChunkFromSchema(schema Schema) Chunk
- func NewChunkLiteralAuto(s string, length int) (Chunk, error)
- func NewChunkLiteralTyped(s string, dtype Dtype, length int) (Chunk, error)
type ChunkBools
- func NewChunkBoolsFromBitmap(bm *bitmap.Bitmap) *ChunkBools
- func NewChunkLiteralBools(value bool, length int) *ChunkBools
- func (rc *ChunkBools) AddValue(s string) error
- func (rc *ChunkBools) AddValues(vals []string) error
- func (rc *ChunkBools) Append(tc Chunk) error
- func (bc *ChunkBools) Base() *baseChunk
- func (rc *ChunkBools) Clone() Chunk
- func (rc *ChunkBools) Compare(asc, nullsFirst bool, i, j int) int
- func (rc *ChunkBools) Dtype() Dtype
- func (rc *ChunkBools) Hash(position int, hashes []uint64)
- func (rc *ChunkBools) JSONLiteral(n int) (string, bool)
- func (bc *ChunkBools) Len() int
- func (bc *ChunkBools) Nullify(bm *bitmap.Bitmap)
- func (rc *ChunkBools) Prune(bm *bitmap.Bitmap) Chunk
- func (rc *ChunkBools) Truths() *bitmap.Bitmap
- func (rc *ChunkBools) WriteTo(w io.Writer) (int64, error)
type ChunkDates
- func NewChunkLiteralDates(value date, length int) *ChunkDates
- func (rc *ChunkDates) AddValue(s string) error
- func (rc *ChunkDates) AddValues(vals []string) error
- func (rc *ChunkDates) Append(tc Chunk) error
- func (bc *ChunkDates) Base() *baseChunk
- func (rc *ChunkDates) Clone() Chunk
- func (rc *ChunkDates) Compare(asc, nullsFirst bool, i, j int) int
- func (rc *ChunkDates) Dtype() Dtype
- func (rc *ChunkDates) Hash(position int, hashes []uint64)
- func (rc *ChunkDates) JSONLiteral(n int) (string, bool)
- func (bc *ChunkDates) Len() int
- func (bc *ChunkDates) Nullify(bm *bitmap.Bitmap)
- func (rc *ChunkDates) Prune(bm *bitmap.Bitmap) Chunk
- func (rc *ChunkDates) WriteTo(w io.Writer) (int64, error)
type ChunkDatetimes
- func NewChunkLiteralDatetimes(value datetime, length int) *ChunkDatetimes
- func (rc *ChunkDatetimes) AddValue(s string) error
- func (rc *ChunkDatetimes) AddValues(vals []string) error
- func (rc *ChunkDatetimes) Append(tc Chunk) error
- func (bc *ChunkDatetimes) Base() *baseChunk
- func (rc *ChunkDatetimes) Clone() Chunk
- func (rc *ChunkDatetimes) Compare(asc, nullsFirst bool, i, j int) int
- func (rc *ChunkDatetimes) Dtype() Dtype
- func (rc *ChunkDatetimes) Hash(position int, hashes []uint64)
- func (rc *ChunkDatetimes) JSONLiteral(n int) (string, bool)
- func (bc *ChunkDatetimes) Len() int
- func (bc *ChunkDatetimes) Nullify(bm *bitmap.Bitmap)
- func (rc *ChunkDatetimes) Prune(bm *bitmap.Bitmap) Chunk
- func (rc *ChunkDatetimes) WriteTo(w io.Writer) (int64, error)
type ChunkFloats
- func NewChunkFloatsFromSlice(data []float64, nulls *bitmap.Bitmap) *ChunkFloats
- func NewChunkLiteralFloats(value float64, length int) *ChunkFloats
- func (rc *ChunkFloats) AddValue(s string) error
- func (rc *ChunkFloats) AddValues(vals []string) error
- func (rc *ChunkFloats) Append(tc Chunk) error
- func (bc *ChunkFloats) Base() *baseChunk
- func (rc *ChunkFloats) Clone() Chunk
- func (rc *ChunkFloats) Compare(asc, nullsFirst bool, i, j int) int
- func (rc *ChunkFloats) Dtype() Dtype
- func (rc *ChunkFloats) Hash(position int, hashes []uint64)
- func (rc *ChunkFloats) JSONLiteral(n int) (string, bool)
- func (bc *ChunkFloats) Len() int
- func (bc *ChunkFloats) Nullify(bm *bitmap.Bitmap)
- func (rc *ChunkFloats) Prune(bm *bitmap.Bitmap) Chunk
- func (rc *ChunkFloats) WriteTo(w io.Writer) (int64, error)
type ChunkInts
- func NewChunkIntsFromSlice(data []int64, nulls *bitmap.Bitmap) *ChunkInts
- func NewChunkLiteralInts(value int64, length int) *ChunkInts
- func (rc *ChunkInts) AddValue(s string) error
- func (rc *ChunkInts) AddValues(vals []string) error
- func (rc *ChunkInts) Append(tc Chunk) error
- func (bc *ChunkInts) Base() *baseChunk
- func (rc *ChunkInts) Clone() Chunk
- func (rc *ChunkInts) Compare(asc, nullsFirst bool, i, j int) int
- func (rc *ChunkInts) Dtype() Dtype
- func (rc *ChunkInts) Hash(position int, hashes []uint64)
- func (rc *ChunkInts) JSONLiteral(n int) (string, bool)
- func (bc *ChunkInts) Len() int
- func (bc *ChunkInts) Nullify(bm *bitmap.Bitmap)
- func (rc *ChunkInts) Prune(bm *bitmap.Bitmap) Chunk
- func (rc *ChunkInts) WriteTo(w io.Writer) (int64, error)
type ChunkNulls
- func (rc *ChunkNulls) AddValue(s string) error
- func (rc *ChunkNulls) AddValues(vals []string) error
- func (rc *ChunkNulls) Append(tc Chunk) error
- func (bc *ChunkNulls) Base() *baseChunk
- func (rc *ChunkNulls) Clone() Chunk
- func (rc *ChunkNulls) Compare(asc, nullsFirst bool, i, j int) int
- func (rc *ChunkNulls) Dtype() Dtype
- func (rc *ChunkNulls) Hash(position int, hashes []uint64)
- func (rc *ChunkNulls) JSONLiteral(n int) (string, bool)
- func (bc *ChunkNulls) Len() int
- func (bc *ChunkNulls) Nullify(bm *bitmap.Bitmap)
- func (rc *ChunkNulls) Prune(bm *bitmap.Bitmap) Chunk
- func (rc *ChunkNulls) WriteTo(w io.Writer) (int64, error)
type ChunkStrings
- func NewChunkLiteralStrings(value string, length int) *ChunkStrings
- func (rc *ChunkStrings) AddValue(s string) error
- func (rc *ChunkStrings) AddValues(vals []string) error
- func (rc *ChunkStrings) Append(tc Chunk) error
- func (bc *ChunkStrings) Base() *baseChunk
- func (rc *ChunkStrings) Clone() Chunk
- func (rc *ChunkStrings) Compare(asc, nullsFirst bool, i, j int) int
- func (rc *ChunkStrings) Dtype() Dtype
- func (rc *ChunkStrings) Hash(position int, hashes []uint64)
- func (rc *ChunkStrings) JSONLiteral(n int) (string, bool)
- func (bc *ChunkStrings) Len() int
- func (bc *ChunkStrings) Nullify(bm *bitmap.Bitmap)
- func (rc *ChunkStrings) Prune(bm *bitmap.Bitmap) Chunk
- func (rc *ChunkStrings) WriteTo(w io.Writer) (int64, error)
type Dtype
- func (dt Dtype) MarshalJSON() ([]byte, error)
- func (dt Dtype) String() string
- func (dt *Dtype) UnmarshalJSON(data []byte) error
type Schema
type TableSchema
- func (schema *TableSchema) LocateColumn(s string) (int, Schema, error)
- func (schema *TableSchema) LocateColumnCaseInsensitive(s string) (int, Schema, error)
type TypeGuesser
- func NewTypeGuesser() *TypeGuesser
- func (tg *TypeGuesser) AddValue(s string)
- func (tg *TypeGuesser) InferredType() Schema

Constants ¶

View Source

const ALL_ONES = uint64(1<<64 - 1)

View Source

const ALL_ZEROS = uint64(0)

View Source

const DATETIME_BYTE_SIZE = 8

View Source

const DATE_BYTE_SIZE = 4

Variables ¶

View Source

var FuncProj = map[string]func(...Chunk) (Chunk, error){
	"now":      evalNow,
	"version":  evalVersion,
	"nullif":   evalNullIf,
	"coalesce": evalCoalesce,
	"round":    evalRound,
	"sin":      numFunc(math.Sin),
	"cos":      numFunc(math.Cos),
	"tan":      numFunc(math.Tan),
	"asin":     numFunc(math.Asin),
	"acos":     numFunc(math.Acos),
	"atan":     numFunc(math.Atan),
	"sinh":     numFunc(math.Sinh),
	"cosh":     numFunc(math.Cosh),
	"tanh":     numFunc(math.Tanh),
	"sqrt":     numFunc(math.Sqrt),
	"exp":      numFunc(math.Exp),
	"exp2":     numFunc(math.Exp2),
	"log":      numFunc(math.Log),
	"log2":     numFunc(math.Log2),
	"log10":    numFunc(math.Log10),

	"trim":       stringFunc(strings.TrimSpace),
	"lower":      stringFunc(strings.ToLower),
	"upper":      stringFunc(strings.ToUpper),
	"left":       evalLeft,
	"split_part": evalSplitPart,
}

TODO: this will be hard to cover properly, so let's make sure we test everything explicitly ARCH: we're not treating literals any differently, but since they share the same backing store

as non-literals, we're okay... is that okay?

Functions ¶

func ChunksEqual ¶

func ChunksEqual(c1 Chunk, c2 Chunk) bool

ChunksEqual compares two chunks, even if they contain []float64 data consider making this lenient enough to compare only the relevant bits in ChunkBools

func DatesEqual ¶

func DatesEqual(a, b date) bool

func DatesGreaterThan ¶

func DatesGreaterThan(a, b date) bool

func DatesGreaterThanEqual ¶

func DatesGreaterThanEqual(a, b date) bool

func DatesLessThan ¶

func DatesLessThan(a, b date) bool

func DatesLessThanEqual ¶

func DatesLessThanEqual(a, b date) bool

func DatesNotEqual ¶

func DatesNotEqual(a, b date) bool

func DatetimesEqual ¶

func DatetimesEqual(a, b datetime) bool

func DatetimesGreaterThan ¶

func DatetimesGreaterThan(a, b datetime) bool

func DatetimesGreaterThanEqual ¶

func DatetimesGreaterThanEqual(a, b datetime) bool

func DatetimesLessThan ¶

func DatetimesLessThan(a, b datetime) bool

func DatetimesLessThanEqual ¶

func DatetimesLessThanEqual(a, b datetime) bool

func DatetimesNotEqual ¶

func DatetimesNotEqual(a, b datetime) bool

func NewAggregator ¶

func NewAggregator(function string, distinct bool) (func(...Dtype) (*AggState, error), error)

NewAggregator implements a constructor for various aggregating functions. We got inspired by Postgres' functions https://www.postgresql.org/docs/12/functions-aggregate.html

not implemented: xml/json functions (don't have the data types), array_agg (no arrays), every (just an alias), bit_and/bit_or (doesn't seem useful for us)
implemented: min, max, sum, avg, count
planned: bool_and, bool_or, string_agg
thinking: countDistinct, sketch-based approxCountDistinct

ARCH: function string -> uint8 const? dtypes are types of inputs - rename? TODO: check for function existence OPTIM: the switch(function) could be hoisted outside the closure (would work as a function existence validator)

Types ¶

type AggState ¶

type AggState struct {
	AddChunk func(buckets []uint64, ndistinct int, data Chunk)
	Resolve  func() (Chunk, error)
	// contains filtered or unexported fields
}

type Chunk ¶

type Chunk interface {
	Dtype() Dtype
	AddValue(string) error
	AddValues([]string) error // consider merging AddValues and AddValue (using varargs)
	WriteTo(io.Writer) (int64, error)
	Prune(*bitmap.Bitmap) Chunk
	Append(Chunk) error
	Hash(int, []uint64)
	Clone() Chunk
	JSONLiteral(int) (string, bool) // the bool stands for 'ok' (not null)
	Compare(bool, bool, int, int) int
	// contains filtered or unexported methods
}

Chunk defines a part of a column - constant type, stored contiguously

func Deserialize ¶

func Deserialize(r io.Reader, Dtype Dtype) (Chunk, error)

Deserialize reads a chunk from a reader this shouldn't really accept a Dtype - at this point we're requiring it, because we don't serialize Dtypes into the binary representation - but that's just because we always have the schema at hand... but will we always have it? shouldn't the files be readable as standalone files? OPTIM: shouldn't we deserialize based on a byte slice instead? We already have it, so we're just duplicating it using a byte buffer OPTIM: we may be able to safely cast these byte slice in the future - see https://github.com/golang/go/issues/19367

func EvalAdd ¶

func EvalAdd(c1 Chunk, c2 Chunk) (Chunk, error)

a solid case for generics?

func EvalAnd ¶

func EvalAnd(c1 Chunk, c2 Chunk) (Chunk, error)

EvalAnd produces a bitwise operation on two bool chunks

func EvalDivide ¶

func EvalDivide(c1 Chunk, c2 Chunk) (Chunk, error)

different return type for ints! should we perhaps cast to make this more systematic? check for division by zero (gives +- infty, which will break json?)

func EvalEq ¶

func EvalEq(c1 Chunk, c2 Chunk) (Chunk, error)

EvalEq compares values from two different chunks

func EvalGt ¶

func EvalGt(c1 Chunk, c2 Chunk) (Chunk, error)

EvalGt checks if values in c1 are greater than in c2

func EvalGte ¶

func EvalGte(c1 Chunk, c2 Chunk) (Chunk, error)

EvalGte checks if values in c1 are greater than or equal to those in c2

func EvalLt ¶

func EvalLt(c1 Chunk, c2 Chunk) (Chunk, error)

EvalLt checks if values in c1 are lower than in c2

func EvalLte ¶

func EvalLte(c1 Chunk, c2 Chunk) (Chunk, error)

EvalLte checks if values in c1 are lower than or equal to those in c2

func EvalMultiply ¶

func EvalMultiply(c1 Chunk, c2 Chunk) (Chunk, error)

func EvalNeq ¶

func EvalNeq(c1 Chunk, c2 Chunk) (Chunk, error)

EvalNeq compares values from two different chunks for inequality

func EvalNot ¶

func EvalNot(c Chunk) (Chunk, error)

func EvalOr ¶

func EvalOr(c1 Chunk, c2 Chunk) (Chunk, error)

EvalOr produces a bitwise operation on two bool chunks

func EvalSubtract ¶

func EvalSubtract(c1 Chunk, c2 Chunk) (Chunk, error)

func NewChunkFromSchema ¶

func NewChunkFromSchema(schema Schema) Chunk

NewChunkFromSchema creates a new Chunk based a column schema provided

func NewChunkLiteralAuto ¶

func NewChunkLiteralAuto(s string, length int) (Chunk, error)

NewChunkLiteral creates a chunk that only contains a single value in the whole chunk it's useful in e.g. 'foo > 1', where can convert the '1' to a whole chunk OPTIM: we're using single-value slices, should we perhaps have a value specific for each literal to avoid working with slices (stack allocation etc.)

func NewChunkLiteralTyped ¶

func NewChunkLiteralTyped(s string, dtype Dtype, length int) (Chunk, error)

type ChunkBools ¶

type ChunkBools struct {
	// contains filtered or unexported fields
}

ChunkBools defines a backing struct for a chunk of boolean values

func NewChunkBoolsFromBitmap ¶

func NewChunkBoolsFromBitmap(bm *bitmap.Bitmap) *ChunkBools

NewChunkBoolsFromBitmap creates a new bool chunk, but in doing so, doesn't clone the incoming bitmap, it uses it as is - the caller might want to clone it aims to mutate it in the future

func NewChunkLiteralBools ¶

func NewChunkLiteralBools(value bool, length int) *ChunkBools

func (*ChunkBools) AddValue ¶

func (rc *ChunkBools) AddValue(s string) error

AddValue takes in a string representation of a value and converts it into a value suited for this chunk

func (*ChunkBools) AddValues ¶

func (rc *ChunkBools) AddValues(vals []string) error

AddValues is a helper method, it just calls AddValue repeatedly

func (*ChunkBools) Append ¶

func (rc *ChunkBools) Append(tc Chunk) error

Append adds a chunk of the same type at the end of this one (in place update)

func (*ChunkBools) Base ¶

func (bc *ChunkBools) Base() *baseChunk

func (*ChunkBools) Clone ¶

func (rc *ChunkBools) Clone() Chunk

func (*ChunkBools) Compare ¶

func (rc *ChunkBools) Compare(asc, nullsFirst bool, i, j int) int

func (*ChunkBools) Dtype ¶

func (rc *ChunkBools) Dtype() Dtype

Dtype returns the type of this chunk

func (*ChunkBools) Hash ¶

func (rc *ChunkBools) Hash(position int, hashes []uint64)

Hash hashes this chunk's values into a provded container

func (*ChunkBools) JSONLiteral ¶

func (rc *ChunkBools) JSONLiteral(n int) (string, bool)

func (*ChunkBools) Len ¶

func (bc *ChunkBools) Len() int

ARCH: we sometimes use this, sometimes we access the struct field directly... perhaps remove this?

func (*ChunkBools) Nullify ¶

func (bc *ChunkBools) Nullify(bm *bitmap.Bitmap)

ARCH: Nullify does NOT switch the data values to be nulls/empty as well

func (*ChunkBools) Prune ¶

func (rc *ChunkBools) Prune(bm *bitmap.Bitmap) Chunk

Prune filter this chunk and only preserves values for which the bitmap is set

func (*ChunkBools) Truths ¶

func (rc *ChunkBools) Truths() *bitmap.Bitmap

Truths returns only true values in this boolean column's bitmap - remove those that are null - we use this for filtering, when we're interested in non-null true values (to select given rows)

func (*ChunkBools) WriteTo ¶

func (rc *ChunkBools) WriteTo(w io.Writer) (int64, error)

WriteTo converts a chunk into its binary representation

type ChunkDates ¶

type ChunkDates struct {
	// contains filtered or unexported fields
}

ChunkDates defines a backing struct for a chunk of date values

func NewChunkLiteralDates ¶

func NewChunkLiteralDates(value date, length int) *ChunkDates

func (*ChunkDates) AddValue ¶

func (rc *ChunkDates) AddValue(s string) error

func (*ChunkDates) AddValues ¶

func (rc *ChunkDates) AddValues(vals []string) error

AddValues is a helper method, it just calls AddValue repeatedly

func (*ChunkDates) Append ¶

func (rc *ChunkDates) Append(tc Chunk) error

Append adds a chunk of the same type at the end of this one (in place update)

func (*ChunkDates) Base ¶

func (bc *ChunkDates) Base() *baseChunk

func (*ChunkDates) Clone ¶

func (rc *ChunkDates) Clone() Chunk

func (*ChunkDates) Compare ¶

func (rc *ChunkDates) Compare(asc, nullsFirst bool, i, j int) int

func (*ChunkDates) Dtype ¶

func (rc *ChunkDates) Dtype() Dtype

Dtype returns the type of this chunk

func (*ChunkDates) Hash ¶

func (rc *ChunkDates) Hash(position int, hashes []uint64)

func (*ChunkDates) JSONLiteral ¶

func (rc *ChunkDates) JSONLiteral(n int) (string, bool)

func (*ChunkDates) Len ¶

func (bc *ChunkDates) Len() int

ARCH: we sometimes use this, sometimes we access the struct field directly... perhaps remove this?

func (*ChunkDates) Nullify ¶

func (bc *ChunkDates) Nullify(bm *bitmap.Bitmap)

ARCH: Nullify does NOT switch the data values to be nulls/empty as well

func (*ChunkDates) Prune ¶

func (rc *ChunkDates) Prune(bm *bitmap.Bitmap) Chunk

Prune filter this chunk and only preserves values for which the bitmap is set

func (*ChunkDates) WriteTo ¶

func (rc *ChunkDates) WriteTo(w io.Writer) (int64, error)

WriteTo converts a chunk into its binary representation

type ChunkDatetimes ¶

type ChunkDatetimes struct {
	// contains filtered or unexported fields
}

func NewChunkLiteralDatetimes ¶ added in v0.1.3

func NewChunkLiteralDatetimes(value datetime, length int) *ChunkDatetimes

func (*ChunkDatetimes) AddValue ¶

func (rc *ChunkDatetimes) AddValue(s string) error

func (*ChunkDatetimes) AddValues ¶

func (rc *ChunkDatetimes) AddValues(vals []string) error

AddValues is a helper method, it just calls AddValue repeatedly

func (*ChunkDatetimes) Append ¶

func (rc *ChunkDatetimes) Append(tc Chunk) error

Append adds a chunk of the same type at the end of this one (in place update)

func (*ChunkDatetimes) Base ¶

func (bc *ChunkDatetimes) Base() *baseChunk

func (*ChunkDatetimes) Clone ¶

func (rc *ChunkDatetimes) Clone() Chunk

func (*ChunkDatetimes) Compare ¶

func (rc *ChunkDatetimes) Compare(asc, nullsFirst bool, i, j int) int

func (*ChunkDatetimes) Dtype ¶

func (rc *ChunkDatetimes) Dtype() Dtype

Dtype returns the type of this chunk

func (*ChunkDatetimes) Hash ¶

func (rc *ChunkDatetimes) Hash(position int, hashes []uint64)

func (*ChunkDatetimes) JSONLiteral ¶

func (rc *ChunkDatetimes) JSONLiteral(n int) (string, bool)

func (*ChunkDatetimes) Len ¶

func (bc *ChunkDatetimes) Len() int

ARCH: we sometimes use this, sometimes we access the struct field directly... perhaps remove this?

func (*ChunkDatetimes) Nullify ¶

func (bc *ChunkDatetimes) Nullify(bm *bitmap.Bitmap)

ARCH: Nullify does NOT switch the data values to be nulls/empty as well

func (*ChunkDatetimes) Prune ¶

func (rc *ChunkDatetimes) Prune(bm *bitmap.Bitmap) Chunk

Prune filter this chunk and only preserves values for which the bitmap is set

func (*ChunkDatetimes) WriteTo ¶

func (rc *ChunkDatetimes) WriteTo(w io.Writer) (int64, error)

WriteTo converts a chunk into its binary representation

type ChunkFloats ¶

type ChunkFloats struct {
	// contains filtered or unexported fields
}

ChunkFloats defines a backing struct for a chunk of floating point values

func NewChunkFloatsFromSlice ¶

func NewChunkFloatsFromSlice(data []float64, nulls *bitmap.Bitmap) *ChunkFloats

func NewChunkLiteralFloats ¶

func NewChunkLiteralFloats(value float64, length int) *ChunkFloats

func (*ChunkFloats) AddValue ¶

func (rc *ChunkFloats) AddValue(s string) error

AddValue takes in a string representation of a value and converts it into a value suited for this chunk let's really consider adding standard nulls here, it will probably make our lives a lot easier

func (*ChunkFloats) AddValues ¶

func (rc *ChunkFloats) AddValues(vals []string) error

AddValues is a helper method, it just calls AddValue repeatedly

func (*ChunkFloats) Append ¶

func (rc *ChunkFloats) Append(tc Chunk) error

Append adds a chunk of the same type at the end of this one (in place update)

func (*ChunkFloats) Base ¶

func (bc *ChunkFloats) Base() *baseChunk

func (*ChunkFloats) Clone ¶

func (rc *ChunkFloats) Clone() Chunk

func (*ChunkFloats) Compare ¶

func (rc *ChunkFloats) Compare(asc, nullsFirst bool, i, j int) int

func (*ChunkFloats) Dtype ¶

func (rc *ChunkFloats) Dtype() Dtype

Dtype returns the type of this chunk

func (*ChunkFloats) Hash ¶

func (rc *ChunkFloats) Hash(position int, hashes []uint64)

Hash hashes this chunk's values into a provded container OPTIM/TODO(next): do we need a fnv hasher for ints/floats/dates? We can just take the uint64 representation of these values... or not?

func (*ChunkFloats) JSONLiteral ¶

func (rc *ChunkFloats) JSONLiteral(n int) (string, bool)

func (*ChunkFloats) Len ¶

func (bc *ChunkFloats) Len() int

ARCH: we sometimes use this, sometimes we access the struct field directly... perhaps remove this?

func (*ChunkFloats) Nullify ¶

func (bc *ChunkFloats) Nullify(bm *bitmap.Bitmap)

ARCH: Nullify does NOT switch the data values to be nulls/empty as well

func (*ChunkFloats) Prune ¶

func (rc *ChunkFloats) Prune(bm *bitmap.Bitmap) Chunk

Prune filter this chunk and only preserves values for which the bitmap is set

func (*ChunkFloats) WriteTo ¶

func (rc *ChunkFloats) WriteTo(w io.Writer) (int64, error)

WriteTo converts a chunk into its binary representation

type ChunkInts ¶

type ChunkInts struct {
	// contains filtered or unexported fields
}

ChunkInts defines a backing struct for a chunk of integer values

func NewChunkIntsFromSlice ¶

func NewChunkIntsFromSlice(data []int64, nulls *bitmap.Bitmap) *ChunkInts

the next few functions could use some generics

func NewChunkLiteralInts ¶

func NewChunkLiteralInts(value int64, length int) *ChunkInts

func (*ChunkInts) AddValue ¶

func (rc *ChunkInts) AddValue(s string) error

AddValue takes in a string representation of a value and converts it into a value suited for this chunk

func (*ChunkInts) AddValues ¶

func (rc *ChunkInts) AddValues(vals []string) error

AddValues is a helper method, it just calls AddValue repeatedly

func (*ChunkInts) Append ¶

func (rc *ChunkInts) Append(tc Chunk) error

Append adds a chunk of the same type at the end of this one (in place update)

func (*ChunkInts) Base ¶

func (bc *ChunkInts) Base() *baseChunk

func (*ChunkInts) Clone ¶

func (rc *ChunkInts) Clone() Chunk

func (*ChunkInts) Compare ¶

func (rc *ChunkInts) Compare(asc, nullsFirst bool, i, j int) int

ARCH: this could be made entirely generic by allowing an interface `nthValue(int) T` to genericise v1/v2

EXCEPT for bools :-( (not comparable)

func (*ChunkInts) Dtype ¶

func (rc *ChunkInts) Dtype() Dtype

Dtype returns the type of this chunk

func (*ChunkInts) Hash ¶

func (rc *ChunkInts) Hash(position int, hashes []uint64)

Hash hashes this chunk's values into a provded container OPTIM: maphash might be faster than fnv or maphash? test it and if it is so, implement everywhere, but be careful about the seed (needs to be the same for all chunks) careful about maphash: "The hash value of a given byte sequence is consistent within a single process, but will be different in different processes." oh and I rebenchmarked maphash and fnv and found maphash to be much slower (despite no allocs) also, check this https://github.com/segmentio/fasthash/ (via https://segment.com/blog/allocation-efficiency-in-high-performance-go-services/) they reimplement fnv using stack allocation only

we tested it and got a 90% speedup (no allocs, shorter code) - so let's consider it, it's in the fasthash branch

func (*ChunkInts) JSONLiteral ¶

func (rc *ChunkInts) JSONLiteral(n int) (string, bool)

func (*ChunkInts) Len ¶

func (bc *ChunkInts) Len() int

ARCH: we sometimes use this, sometimes we access the struct field directly... perhaps remove this?

func (*ChunkInts) Nullify ¶

func (bc *ChunkInts) Nullify(bm *bitmap.Bitmap)

ARCH: Nullify does NOT switch the data values to be nulls/empty as well

func (*ChunkInts) Prune ¶

func (rc *ChunkInts) Prune(bm *bitmap.Bitmap) Chunk

Prune filter this chunk and only preserves values for which the bitmap is set

func (*ChunkInts) WriteTo ¶

func (rc *ChunkInts) WriteTo(w io.Writer) (int64, error)

WriteTo converts a chunk into its binary representation

type ChunkNulls ¶

type ChunkNulls struct {
	// contains filtered or unexported fields
}

ChunkNulls defines a backing struct for a chunk of null values Since it's all nulls, we only need to know how many there are

func (*ChunkNulls) AddValue ¶

func (rc *ChunkNulls) AddValue(s string) error

AddValue takes in a string representation of a value and converts it into a value suited for this chunk

func (*ChunkNulls) AddValues ¶

func (rc *ChunkNulls) AddValues(vals []string) error

AddValues is a helper method, it just calls AddValue repeatedly

func (*ChunkNulls) Append ¶

func (rc *ChunkNulls) Append(tc Chunk) error

Append adds a chunk of the same type at the end of this one (in place update)

func (*ChunkNulls) Base ¶

func (bc *ChunkNulls) Base() *baseChunk

func (*ChunkNulls) Clone ¶

func (rc *ChunkNulls) Clone() Chunk

func (*ChunkNulls) Compare ¶

func (rc *ChunkNulls) Compare(asc, nullsFirst bool, i, j int) int

func (*ChunkNulls) Dtype ¶

func (rc *ChunkNulls) Dtype() Dtype

Dtype returns the type of this chunk

func (*ChunkNulls) Hash ¶

func (rc *ChunkNulls) Hash(position int, hashes []uint64)

Hash hashes this chunk's values into a provded container

func (*ChunkNulls) JSONLiteral ¶

func (rc *ChunkNulls) JSONLiteral(n int) (string, bool)

func (*ChunkNulls) Len ¶

func (bc *ChunkNulls) Len() int

ARCH: we sometimes use this, sometimes we access the struct field directly... perhaps remove this?

func (*ChunkNulls) Nullify ¶

func (bc *ChunkNulls) Nullify(bm *bitmap.Bitmap)

ARCH: Nullify does NOT switch the data values to be nulls/empty as well

func (*ChunkNulls) Prune ¶

func (rc *ChunkNulls) Prune(bm *bitmap.Bitmap) Chunk

Prune filter this chunk and only preserves values for which the bitmap is set

func (*ChunkNulls) WriteTo ¶

func (rc *ChunkNulls) WriteTo(w io.Writer) (int64, error)

WriteTo converts a chunk into its binary representation

type ChunkStrings ¶

type ChunkStrings struct {
	// contains filtered or unexported fields
}

ChunkStrings defines a backing struct for a chunk of string values

func NewChunkLiteralStrings ¶

func NewChunkLiteralStrings(value string, length int) *ChunkStrings

func (*ChunkStrings) AddValue ¶

func (rc *ChunkStrings) AddValue(s string) error

AddValue takes in a string representation of a value and converts it into a value suited for this chunk

func (*ChunkStrings) AddValues ¶

func (rc *ChunkStrings) AddValues(vals []string) error

AddValues is a helper method, it just calls AddValue repeatedly

func (*ChunkStrings) Append ¶

func (rc *ChunkStrings) Append(tc Chunk) error

Append adds a chunk of the same type at the end of this one (in place update)

func (*ChunkStrings) Base ¶

func (bc *ChunkStrings) Base() *baseChunk

func (*ChunkStrings) Clone ¶

func (rc *ChunkStrings) Clone() Chunk

func (*ChunkStrings) Compare ¶

func (rc *ChunkStrings) Compare(asc, nullsFirst bool, i, j int) int

func (*ChunkStrings) Dtype ¶

func (rc *ChunkStrings) Dtype() Dtype

Dtype returns the type of this chunk

func (*ChunkStrings) Hash ¶

func (rc *ChunkStrings) Hash(position int, hashes []uint64)

Hash hashes this chunk's values into a provded container

func (*ChunkStrings) JSONLiteral ¶

func (rc *ChunkStrings) JSONLiteral(n int) (string, bool)

func (*ChunkStrings) Len ¶

func (bc *ChunkStrings) Len() int

ARCH: we sometimes use this, sometimes we access the struct field directly... perhaps remove this?

func (*ChunkStrings) Nullify ¶

func (bc *ChunkStrings) Nullify(bm *bitmap.Bitmap)

ARCH: Nullify does NOT switch the data values to be nulls/empty as well

func (*ChunkStrings) Prune ¶

func (rc *ChunkStrings) Prune(bm *bitmap.Bitmap) Chunk

Prune filter this chunk and only preserves values for which the bitmap is set

func (*ChunkStrings) WriteTo ¶

func (rc *ChunkStrings) WriteTo(w io.Writer) (int64, error)

WriteTo converts a chunk into its binary representation

type Dtype ¶

type Dtype uint8

Dtype denotes the data type of a given object (e.g. int or string)

const (
	DtypeInvalid Dtype = iota
	DtypeNull
	DtypeString
	DtypeInt
	DtypeFloat
	DtypeBool
	DtypeDate
	DtypeDatetime
	// more to be added
	DtypeMax
)

individual dtypes defined as a sequence

func (Dtype) MarshalJSON ¶

func (dt Dtype) MarshalJSON() ([]byte, error)

MarshalJSON returns the JSON representation of a dtype (stringified + json string) we want Dtypes to be marshaled within Schema correctly without this they'd be returned as an integer (even with ",string" tags)

func (Dtype) String ¶

func (dt Dtype) String() string

func (*Dtype) UnmarshalJSON ¶

func (dt *Dtype) UnmarshalJSON(data []byte) error

UnmarshalJSON deserialises a given dtype from a JSON value

type Schema ¶

type Schema struct {
	Name     string `json:"name"`
	Dtype    Dtype  `json:"dtype"`
	Nullable bool   `json:"nullable"`
}

Schema defines all the necessary properties of column

type TableSchema ¶

type TableSchema []Schema

TableSchema is a collection of column schemas

func (*TableSchema) LocateColumn ¶

func (schema *TableSchema) LocateColumn(s string) (int, Schema, error)

LocateColumn returns a column within a schema - its position and definition; error is triggered if this column is not found or the schema is nil

func (*TableSchema) LocateColumnCaseInsensitive ¶

func (schema *TableSchema) LocateColumnCaseInsensitive(s string) (int, Schema, error)

LocateColumnCaseInsensitive works just like LocateColumn, but it ignores casing ARCH: we could have used strings.EqualFold, but a) we have one static input (s), so we can

      amortise the case lowering, b) the extra correctness in EqualFold is irrelevant here,
		 because of our column naming restrictions

type TypeGuesser ¶

type TypeGuesser struct {
	// contains filtered or unexported fields
}

TypeGuesser contains state necessary for inferring types from a stream of strings

func NewTypeGuesser ¶

func NewTypeGuesser() *TypeGuesser

NewTypeGuesser creates a new type guesser

func (*TypeGuesser) AddValue ¶

func (tg *TypeGuesser) AddValue(s string)

AddValue feeds a new value to a type guesser

func (*TypeGuesser) InferredType ¶

func (tg *TypeGuesser) InferredType() Schema

InferredType returns the best guess of a type for a given stream of strings

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL