Documentation ¶
Overview ¶
Package compute is a native-go implementation of an Acero-like arrow compute engine. It requires go1.18+
While consumers of Arrow that are able to use CGO could utilize the C Data API (using the cdata package) and could link against the acero library directly, there are consumers who cannot use CGO. This is an attempt to provide for those users, and in general create a native-go arrow compute engine.
The overwhelming majority of things in this package require go1.18 as it utilizes generics. The files in this package and its sub-packages are all excluded from being built by go versions lower than 1.18 so that the larger Arrow module itself is still compatible with go1.17.
Everything in this package should be considered Experimental for now.
Example (CustomFunction) ¶
This example demonstrates how to register a custom scalar function.
package main import ( "context" "fmt" "log" "github.com/joechenrh/arrow-go/v18/arrow" "github.com/joechenrh/arrow-go/v18/arrow/array" "github.com/joechenrh/arrow-go/v18/arrow/compute" "github.com/joechenrh/arrow-go/v18/arrow/compute/exec" "github.com/joechenrh/arrow-go/v18/arrow/memory" ) func main() { pool := memory.NewGoAllocator() ctx := context.Background() execCtx := compute.DefaultExecCtx() ctx = compute.SetExecCtx(ctx, execCtx) add42 := compute.NewScalarFunction("add_42", compute.Arity{ NArgs: 1, }, compute.FunctionDoc{ Summary: "Returns the input values plus 42", ArgNames: []string{"input"}, }) if err := add42.AddNewKernel( []exec.InputType{ // We accept a single argument (array) of Int8 type. { Kind: exec.InputExact, Type: arrow.PrimitiveTypes.Int8, }, }, // We'll return a single Int8 array. exec.NewOutputType(arrow.PrimitiveTypes.Int8), func(ctx *exec.KernelCtx, span *exec.ExecSpan, result *exec.ExecResult) error { // The second buffer contains the values. Both for the input and the output arrays. for i, x := range span.Values[0].Array.Buffers[1].Buf { result.Buffers[1].Buf[i] = x + 42 } return nil }, nil, ); err != nil { log.Fatal(err) } execCtx.Registry.AddFunction(add42, true) inputArrayBuilder := array.NewInt8Builder(pool) for i := 0; i < 16; i++ { inputArrayBuilder.Append(int8(i)) } inputArray := inputArrayBuilder.NewArray() outputArrayDatum, err := compute.CallFunction( compute.SetExecCtx(context.Background(), execCtx), "add_42", nil, &compute.ArrayDatum{Value: inputArray.Data()}, ) if err != nil { log.Fatal(err) } fmt.Println(array.NewInt8Data(outputArrayDatum.(*compute.ArrayDatum).Value).Int8Values()) }
Output: [42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57]
Index ¶
- Constants
- Variables
- func CanCast(from, to arrow.DataType) bool
- func CastArray(ctx context.Context, val arrow.Array, opts *CastOptions) (arrow.Array, error)
- func CastFromExtension(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error
- func CastList[SrcOffsetT, DestOffsetT int32 | int64](ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error
- func CastStruct(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error
- func CastToType(ctx context.Context, val arrow.Array, toType arrow.DataType) (arrow.Array, error)
- func DatumIsValue(d Datum) bool
- func ExecSpanFromBatch(batch *ExecBatch) *exec.ExecSpan
- func FilterArray(ctx context.Context, values, filter arrow.Array, options FilterOptions) (arrow.Array, error)
- func FilterRecordBatch(ctx context.Context, batch arrow.Record, filter arrow.Array, ...) (arrow.Record, error)
- func FilterTable(ctx context.Context, tbl arrow.Table, filter Datum, opts *FilterOptions) (arrow.Table, error)
- func RegisterScalarArithmetic(reg FunctionRegistry)
- func RegisterScalarBoolean(reg FunctionRegistry)
- func RegisterScalarCast(reg FunctionRegistry)
- func RegisterScalarComparisons(reg FunctionRegistry)
- func RegisterVectorHash(reg FunctionRegistry)
- func RegisterVectorRunEndFuncs(reg FunctionRegistry)
- func RegisterVectorSelection(reg FunctionRegistry)
- func RunEndDecodeArray(ctx context.Context, input arrow.Array) (arrow.Array, error)
- func RunEndEncodeArray(ctx context.Context, opts RunEndEncodeOptions, input arrow.Array) (arrow.Array, error)
- func SerializeExpr(expr Expression, mem memory.Allocator) (*memory.Buffer, error)
- func SerializeOptions(opts FunctionOptions, mem memory.Allocator) (*memory.Buffer, error)
- func SetExecCtx(ctx context.Context, e ExecCtx) context.Context
- func TakeArray(ctx context.Context, values, indices arrow.Array) (arrow.Array, error)
- func TakeArrayOpts(ctx context.Context, values, indices arrow.Array, opts TakeOptions) (arrow.Array, error)
- func UniqueArray(ctx context.Context, values arrow.Array) (arrow.Array, error)
- type ArithmeticOptions
- type Arity
- type ArrayDatum
- func (d *ArrayDatum) Chunks() []arrow.Array
- func (d *ArrayDatum) Equals(other Datum) bool
- func (ArrayDatum) Kind() DatumKind
- func (d *ArrayDatum) Len() int64
- func (d *ArrayDatum) MakeArray() arrow.Array
- func (d *ArrayDatum) NullN() int64
- func (d *ArrayDatum) Release()
- func (d *ArrayDatum) String() string
- func (d *ArrayDatum) ToScalar() (scalar.Scalar, error)
- func (d *ArrayDatum) Type() arrow.DataType
- type ArrayLikeDatum
- type Calldeprecated
- func (c *Call) Equals(other Expression) bool
- func (c *Call) FieldRef() *FieldRef
- func (c *Call) Hash() uint64
- func (c *Call) IsBound() bool
- func (c *Call) IsNullLiteral() bool
- func (c *Call) IsSatisfiable() bool
- func (c *Call) IsScalarExpr() bool
- func (c *Call) Release()
- func (c *Call) String() string
- func (c *Call) Type() arrow.DataType
- type CastOptions
- type ChunkedDatum
- func (d *ChunkedDatum) Chunks() []arrow.Array
- func (d *ChunkedDatum) Equals(other Datum) bool
- func (ChunkedDatum) Kind() DatumKind
- func (d *ChunkedDatum) Len() int64
- func (d *ChunkedDatum) NullN() int64
- func (d *ChunkedDatum) Release()
- func (d *ChunkedDatum) String() string
- func (d *ChunkedDatum) Type() arrow.DataType
- type Datum
- func AbsoluteValue(ctx context.Context, opts ArithmeticOptions, input Datum) (Datum, error)
- func Acos(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error)
- func Add(ctx context.Context, opts ArithmeticOptions, left, right Datum) (Datum, error)
- func Asin(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error)
- func Atan(ctx context.Context, arg Datum) (Datum, error)
- func Atan2(ctx context.Context, x, y Datum) (Datum, error)
- func CallFunction(ctx context.Context, funcName string, opts FunctionOptions, args ...Datum) (Datum, error)
- func CastDatum(ctx context.Context, val Datum, opts *CastOptions) (Datum, error)
- func Cos(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error)
- func Divide(ctx context.Context, opts ArithmeticOptions, left, right Datum) (Datum, error)
- func Filter(ctx context.Context, values, filter Datum, options FilterOptions) (Datum, error)
- func Ln(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error)
- func Log10(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error)
- func Log1p(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error)
- func Log2(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error)
- func Logb(ctx context.Context, opts ArithmeticOptions, x, base Datum) (Datum, error)
- func Multiply(ctx context.Context, opts ArithmeticOptions, left, right Datum) (Datum, error)
- func Negate(ctx context.Context, opts ArithmeticOptions, input Datum) (Datum, error)
- func NewDatum(value interface{}) Datum
- func NewDatumWithoutOwning(value interface{}) Datum
- func Power(ctx context.Context, opts ArithmeticOptions, base, exp Datum) (Datum, error)
- func Round(ctx context.Context, opts RoundOptions, arg Datum) (Datum, error)
- func RoundToMultiple(ctx context.Context, opts RoundToMultipleOptions, arg Datum) (Datum, error)
- func RunEndDecode(ctx context.Context, arg Datum) (Datum, error)
- func RunEndEncode(ctx context.Context, opts RunEndEncodeOptions, arg Datum) (Datum, error)
- func ShiftLeft(ctx context.Context, opts ArithmeticOptions, lhs, rhs Datum) (Datum, error)
- func ShiftRight(ctx context.Context, opts ArithmeticOptions, lhs, rhs Datum) (Datum, error)
- func Sign(ctx context.Context, input Datum) (Datum, error)
- func Sin(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error)
- func Subtract(ctx context.Context, opts ArithmeticOptions, left, right Datum) (Datum, error)
- func Take(ctx context.Context, opts TakeOptions, values, indices Datum) (Datum, error)
- func Tan(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error)
- func Unique(ctx context.Context, values Datum) (Datum, error)
- type DatumKind
- type EmptyDatum
- type ExecBatch
- type ExecCtx
- type Expressiondeprecated
- func And(lhs, rhs Expression, ops ...Expression) Expression
- func Cast(ex Expression, dt arrow.DataType) Expression
- func DeserializeExpr(mem memory.Allocator, buf *memory.Buffer) (Expression, error)
- func Equal(lhs, rhs Expression) Expression
- func Greater(lhs, rhs Expression) Expression
- func GreaterEqual(lhs, rhs Expression) Expression
- func IsNull(lhs Expression, nanIsNull bool) Expression
- func IsValid(lhs Expression) Expression
- func Less(lhs, rhs Expression) Expression
- func LessEqual(lhs, rhs Expression) Expression
- func NewCall(name string, args []Expression, opts FunctionOptions) Expression
- func NewFieldRef(field string) Expression
- func NewLiteral(arg interface{}) Expression
- func NewRef(ref FieldRef) Expression
- func Not(expr Expression) Expression
- func NotEqual(lhs, rhs Expression) Expression
- func NullLiteral(dt arrow.DataType) Expression
- func Or(lhs, rhs Expression, ops ...Expression) Expression
- func Project(values []Expression, names []string) Expression
- type FieldPath
- func (f FieldPath) Get(s *arrow.Schema) (*arrow.Field, error)
- func (f FieldPath) GetColumn(batch arrow.Record) (arrow.Array, error)
- func (f FieldPath) GetField(field arrow.Field) (*arrow.Field, error)
- func (f FieldPath) GetFieldFromSlice(fields []arrow.Field) (*arrow.Field, error)
- func (f FieldPath) GetFieldFromType(typ arrow.DataType) (*arrow.Field, error)
- func (f FieldPath) String() string
- type FieldRef
- func (f *FieldRef) Equals(other FieldRef) bool
- func (f *FieldRef) FieldPath() FieldPath
- func (f FieldRef) FindAll(fields []arrow.Field) []FieldPath
- func (f FieldRef) FindAllField(field arrow.Field) []FieldPath
- func (f FieldRef) FindOne(schema *arrow.Schema) (FieldPath, error)
- func (f FieldRef) FindOneOrNone(schema *arrow.Schema) (FieldPath, error)
- func (f FieldRef) FindOneOrNoneRecord(root arrow.Record) (FieldPath, error)
- func (f FieldRef) GetAllColumns(root arrow.Record) ([]arrow.Array, error)
- func (f FieldRef) GetOneColumnOrNone(root arrow.Record) (arrow.Array, error)
- func (f FieldRef) GetOneField(schema *arrow.Schema) (*arrow.Field, error)
- func (f FieldRef) GetOneOrNone(schema *arrow.Schema) (*arrow.Field, error)
- func (f FieldRef) Hash(seed maphash.Seed) uint64
- func (f *FieldRef) IsFieldPath() bool
- func (f *FieldRef) IsName() bool
- func (f *FieldRef) IsNested() bool
- func (f *FieldRef) Name() string
- func (f FieldRef) String() string
- type FilterOptions
- type FuncKind
- type Function
- type FunctionDoc
- type FunctionOptions
- type FunctionOptionsCloneable
- type FunctionOptionsEqual
- type FunctionRegistry
- type KernelExecutor
- type Literaldeprecated
- func (l *Literal) Equals(other Expression) bool
- func (Literal) FieldRef() *FieldRef
- func (l *Literal) Hash() uint64
- func (l *Literal) IsBound() bool
- func (l *Literal) IsNullLiteral() bool
- func (l *Literal) IsSatisfiable() bool
- func (l *Literal) IsScalarExpr() bool
- func (l *Literal) Release()
- func (l *Literal) String() string
- func (l *Literal) Type() arrow.DataType
- type MakeStructOptions
- type MetaFunction
- func (b *MetaFunction) Arity() Arity
- func (b *MetaFunction) DefaultOptions() FunctionOptions
- func (m *MetaFunction) DispatchBest(...arrow.DataType) (exec.Kernel, error)
- func (m *MetaFunction) DispatchExact(...arrow.DataType) (exec.Kernel, error)
- func (b *MetaFunction) Doc() FunctionDoc
- func (m *MetaFunction) Execute(ctx context.Context, opts FunctionOptions, args ...Datum) (Datum, error)
- func (b *MetaFunction) Kind() FuncKind
- func (b *MetaFunction) Name() string
- func (MetaFunction) NumKernels() int
- func (b *MetaFunction) Validate() error
- type MetaFunctionImpl
- type NullOptions
- type NullSelectionBehavior
- type Parameterdeprecated
- func (p *Parameter) Equals(other Expression) bool
- func (p *Parameter) FieldRef() *FieldRef
- func (p *Parameter) Hash() uint64
- func (p *Parameter) IsBound() bool
- func (Parameter) IsNullLiteral() bool
- func (p *Parameter) IsSatisfiable() bool
- func (p *Parameter) IsScalarExpr() bool
- func (p *Parameter) Release()
- func (p *Parameter) String() string
- func (p *Parameter) Type() arrow.DataType
- type RecordDatum
- type RoundMode
- type RoundOptions
- type RoundToMultipleOptions
- type RunEndEncodeOptions
- type ScalarDatum
- func (ScalarDatum) Chunks() []arrow.Array
- func (d *ScalarDatum) Equals(other Datum) bool
- func (ScalarDatum) Kind() DatumKind
- func (ScalarDatum) Len() int64
- func (d *ScalarDatum) NullN() int64
- func (d *ScalarDatum) Release()
- func (d *ScalarDatum) String() string
- func (d *ScalarDatum) ToScalar() (scalar.Scalar, error)
- func (d *ScalarDatum) Type() arrow.DataType
- type ScalarFunction
- func (s *ScalarFunction) AddKernel(k exec.ScalarKernel) error
- func (s *ScalarFunction) AddNewKernel(inTypes []exec.InputType, outType exec.OutputType, execFn exec.ArrayKernelExec, ...) error
- func (s *ScalarFunction) DispatchBest(vals ...arrow.DataType) (exec.Kernel, error)
- func (s *ScalarFunction) DispatchExact(vals ...arrow.DataType) (exec.Kernel, error)
- func (s *ScalarFunction) Execute(ctx context.Context, opts FunctionOptions, args ...Datum) (Datum, error)
- func (fi *ScalarFunction) Kernels() []*KT
- func (fi *ScalarFunction) NumKernels() int
- func (s *ScalarFunction) SetDefaultOptions(opts FunctionOptions)
- type SetLookupOptions
- type StrptimeOptions
- type TableDatum
- type TableLikeDatum
- type TakeOptions
- type VectorFunction
- func (f *VectorFunction) AddKernel(kernel exec.VectorKernel) error
- func (f *VectorFunction) AddNewKernel(inTypes []exec.InputType, outType exec.OutputType, execFn exec.ArrayKernelExec, ...) error
- func (f *VectorFunction) DispatchBest(vals ...arrow.DataType) (exec.Kernel, error)
- func (f *VectorFunction) DispatchExact(vals ...arrow.DataType) (exec.Kernel, error)
- func (f *VectorFunction) Execute(ctx context.Context, opts FunctionOptions, args ...Datum) (Datum, error)
- func (fi *VectorFunction) Kernels() []*KT
- func (fi *VectorFunction) NumKernels() int
- func (f *VectorFunction) SetDefaultOptions(opts FunctionOptions)
Examples ¶
Constants ¶
const ( // Round to nearest integer less than or equal in magnitude (aka "floor") RoundDown = kernels.RoundDown // Round to nearest integer greater than or equal in magnitude (aka "ceil") RoundUp = kernels.RoundUp // Get integral part without fractional digits (aka "trunc") RoundTowardsZero = kernels.TowardsZero // Round negative values with DOWN and positive values with UP RoundTowardsInfinity = kernels.AwayFromZero // Round ties with DOWN (aka "round half towards negative infinity") RoundHalfDown = kernels.HalfDown // Round ties with UP (aka "round half towards positive infinity") RoundHalfUp = kernels.HalfUp // Round ties with TowardsZero (aka "round half away from infinity") RoundHalfTowardsZero = kernels.HalfTowardsZero // Round ties with AwayFromZero (aka "round half towards infinity") RoundHalfTowardsInfinity = kernels.HalfAwayFromZero // Round ties to nearest even integer RoundHalfToEven = kernels.HalfToEven // Round ties to nearest odd integer RoundHalfToOdd = kernels.HalfToOdd )
const ( SelectionEmitNulls = kernels.EmitNulls SelectionDropNulls = kernels.DropNulls )
const DefaultMaxChunkSize = math.MaxInt64
const UnknownLength int64 = -1
Variables ¶
var ( DefaultRoundOptions = RoundOptions{NDigits: 0, Mode: RoundHalfToEven} DefaultRoundToMultipleOptions = RoundToMultipleOptions{ Multiple: scalar.NewFloat64Scalar(1), Mode: RoundHalfToEven} )
var ( // WithAllocator returns a new context with the provided allocator // embedded into the context. WithAllocator = exec.WithAllocator // GetAllocator retrieves the allocator from the context, or returns // memory.DefaultAllocator if there was no allocator in the provided // context. GetAllocator = exec.GetAllocator )
var ( ErrEmpty = errors.New("cannot traverse empty field path") ErrNoChildren = errors.New("trying to get child of type with no children") ErrIndexRange = errors.New("index out of range") ErrMultipleMatches = errors.New("multiple matches") ErrNoMatch = errors.New("no match") ErrInvalid = errors.New("field ref invalid") )
Functions ¶
func CanCast ¶
CanCast returns true if there is an implementation for casting an array or scalar value from the specified DataType to the other data type.
func CastArray ¶
CastArray is a convenience function for casting an Array to another type. It is equivalent to constructing a Datum for the array and using CallFunction(ctx, "cast", ...).
func CastFromExtension ¶
func CastStruct ¶
func CastToType ¶
CastToType is a convenience function equivalent to calling CastArray(ctx, val, compute.SafeCastOptions(toType))
func DatumIsValue ¶
DatumIsValue returns true if the datum passed is a Scalar, Array or ChunkedArray type (e.g. it contains a specific value not a group of values)
func ExecSpanFromBatch ¶
ExecSpanFromBatch constructs and returns a new ExecSpan from the values inside of the ExecBatch which could be scalar or arrays.
This is mostly used for tests but is also a convenience method for other cases.
func FilterArray ¶
func FilterArray(ctx context.Context, values, filter arrow.Array, options FilterOptions) (arrow.Array, error)
FilterArray is a convenience method for calling Filter without having to manually construct the intervening Datum objects (they will be created for you internally here).
func FilterRecordBatch ¶
func FilterTable ¶
func RegisterScalarArithmetic ¶
func RegisterScalarArithmetic(reg FunctionRegistry)
func RegisterScalarBoolean ¶
func RegisterScalarBoolean(reg FunctionRegistry)
func RegisterScalarCast ¶
func RegisterScalarCast(reg FunctionRegistry)
func RegisterScalarComparisons ¶
func RegisterScalarComparisons(reg FunctionRegistry)
func RegisterVectorHash ¶
func RegisterVectorHash(reg FunctionRegistry)
func RegisterVectorRunEndFuncs ¶
func RegisterVectorRunEndFuncs(reg FunctionRegistry)
func RegisterVectorSelection ¶
func RegisterVectorSelection(reg FunctionRegistry)
RegisterVectorSelection registers functions that select specific values from arrays such as Take and Filter
func RunEndDecodeArray ¶
func RunEndEncodeArray ¶
func SerializeExpr ¶
SerializeExpr serializes expressions by converting them to Metadata and storing this in the schema of a Record. Embedded arrays and scalars are stored in its columns. Finally the record is written as an IPC file
func SerializeOptions ¶
func SetExecCtx ¶
SetExecCtx returns a new child context containing the passed in ExecCtx
func TakeArrayOpts ¶
Types ¶
type ArithmeticOptions ¶
type ArithmeticOptions struct {
NoCheckOverflow bool `compute:"check_overflow"`
}
func (ArithmeticOptions) TypeName ¶
func (ArithmeticOptions) TypeName() string
type Arity ¶
Arity defines the number of required arguments for a function.
Naming conventions are taken from https://en.wikipedia.org/wiki/Arity
type ArrayDatum ¶
ArrayDatum references an array.Data object which can be used to create array instances from if needed.
func (*ArrayDatum) Chunks ¶
func (d *ArrayDatum) Chunks() []arrow.Array
func (*ArrayDatum) Equals ¶
func (d *ArrayDatum) Equals(other Datum) bool
func (ArrayDatum) Kind ¶
func (ArrayDatum) Kind() DatumKind
func (*ArrayDatum) Len ¶
func (d *ArrayDatum) Len() int64
func (*ArrayDatum) MakeArray ¶
func (d *ArrayDatum) MakeArray() arrow.Array
func (*ArrayDatum) NullN ¶
func (d *ArrayDatum) NullN() int64
func (*ArrayDatum) Release ¶
func (d *ArrayDatum) Release()
func (*ArrayDatum) String ¶
func (d *ArrayDatum) String() string
func (*ArrayDatum) Type ¶
func (d *ArrayDatum) Type() arrow.DataType
type ArrayLikeDatum ¶
ArrayLikeDatum is an interface for treating a Datum similarly to an Array, so that it is easy to differentiate between Record/Table/Collection and Scalar, Array/ChunkedArray for ease of use. Chunks will return an empty slice for Scalar, a slice with 1 element for Array, and the slice of chunks for a chunked array.
type Call
deprecated
type Call struct {
// contains filtered or unexported fields
}
Call is a function call with specific arguments which are themselves other expressions. A call can also have options that are specific to the function in question. It must be bound to determine the shape and type.
Deprecated: use substrait-go expression functions instead.
func (*Call) Equals ¶
func (c *Call) Equals(other Expression) bool
func (*Call) IsNullLiteral ¶
func (*Call) IsSatisfiable ¶
func (*Call) IsScalarExpr ¶
type CastOptions ¶
type CastOptions = kernels.CastOptions
func DefaultCastOptions ¶
func DefaultCastOptions(safe bool) *CastOptions
func NewCastOptions ¶
func NewCastOptions(dt arrow.DataType, safe bool) *CastOptions
func SafeCastOptions ¶
func SafeCastOptions(dt arrow.DataType) *CastOptions
func UnsafeCastOptions ¶
func UnsafeCastOptions(dt arrow.DataType) *CastOptions
type ChunkedDatum ¶
ChunkedDatum contains a chunked array for use with expressions and compute.
func (*ChunkedDatum) Chunks ¶
func (d *ChunkedDatum) Chunks() []arrow.Array
func (*ChunkedDatum) Equals ¶
func (d *ChunkedDatum) Equals(other Datum) bool
func (ChunkedDatum) Kind ¶
func (ChunkedDatum) Kind() DatumKind
func (*ChunkedDatum) Len ¶
func (d *ChunkedDatum) Len() int64
func (*ChunkedDatum) NullN ¶
func (d *ChunkedDatum) NullN() int64
func (*ChunkedDatum) Release ¶
func (d *ChunkedDatum) Release()
func (*ChunkedDatum) String ¶
func (d *ChunkedDatum) String() string
func (*ChunkedDatum) Type ¶
func (d *ChunkedDatum) Type() arrow.DataType
type Datum ¶
type Datum interface { fmt.Stringer Kind() DatumKind Len() int64 Equals(Datum) bool Release() // contains filtered or unexported methods }
Datum is a variant interface for wrapping the various Arrow data structures for now the various Datum types just hold a Value which is the type they are wrapping, but it might make sense in the future for those types to actually be aliases or embed their types instead. Not sure yet.
func AbsoluteValue ¶
AbsoluteValue returns the AbsoluteValue for each element in the input argument. It accepts either a scalar or an array.
ArithmeticOptions specifies whether or not to check for overflows, performance is faster if not explicitly checking for overflows but will error on an overflow if CheckOverflow is true.
func Add ¶
Add performs an addition between the passed in arguments (scalar or array) and returns the result. If one argument is a scalar and the other is an array, the scalar value is added to each value of the array.
ArithmeticOptions specifies whether or not to check for overflows, performance is faster if not explicitly checking for overflows but will error on an overflow if NoCheckOverflow is false (default).
func CallFunction ¶
func CallFunction(ctx context.Context, funcName string, opts FunctionOptions, args ...Datum) (Datum, error)
CallFunction is a one-shot invoker for all types of functions.
It will perform kernel-dispatch, argument checking, iteration of ChunkedArray inputs and wrapping of outputs.
To affect the execution options, you must call SetExecCtx and pass the resulting context in here.
func CastDatum ¶
CastDatum is a convenience function for casting a Datum to another type. It is equivalent to calling CallFunction(ctx, "cast", opts, Datum) and should work for Scalar, Array or ChunkedArray Datums.
func Divide ¶
Divide performs a division between the passed in arguments (scalar or array) and returns the result. If one argument is a scalar and the other is an array, the scalar value is used with each value of the array.
ArithmeticOptions specifies whether or not to check for overflows, performance is faster if not explicitly checking for overflows but will error on an overflow if NoCheckOverflow is false (default).
Will error on divide by zero regardless of whether or not checking for overflows.
func Filter ¶
Filter is a wrapper convenience that is equivalent to calling CallFunction(ctx, "filter", &options, values, filter) for filtering an input array (values) by a boolean array (filter). The two inputs must be the same length.
func Multiply ¶
Multiply performs a multiplication between the passed in arguments (scalar or array) and returns the result. If one argument is a scalar and the other is an array, the scalar value is multiplied against each value of the array.
ArithmeticOptions specifies whether or not to check for overflows, performance is faster if not explicitly checking for overflows but will error on an overflow if NoCheckOverflow is false (default).
func Negate ¶
Negate returns a result containing the negation of each element in the input argument. It accepts either a scalar or an array.
ArithmeticOptions specifies whether or not to check for overflows, or to throw an error on unsigned types.
func NewDatum ¶
func NewDatum(value interface{}) Datum
NewDatum will construct the appropriate Datum type based on what is passed in as the argument.
An arrow.Array gets an ArrayDatum An array.Chunked gets a ChunkedDatum An array.Record gets a RecordDatum an array.Table gets a TableDatum a scalar.Scalar gets a ScalarDatum
Anything else is passed to scalar.MakeScalar and receives a scalar datum of that appropriate type.
func NewDatumWithoutOwning ¶
func NewDatumWithoutOwning(value interface{}) Datum
func Power ¶
Power returns base**exp for each element in the input arrays. Should work for both Arrays and Scalars
func RoundToMultiple ¶
func RunEndEncode ¶
func ShiftLeft ¶
ShiftLeft only accepts integral types and shifts each element of the first argument to the left by the value of the corresponding element in the second argument.
The value to shift by should be >= 0 and < precision of the type.
func ShiftRight ¶
ShiftRight only accepts integral types and shifts each element of the first argument to the right by the value of the corresponding element in the second argument.
The value to shift by should be >= 0 and < precision of the type.
func Sign ¶
Sign returns -1, 0, or 1 depending on the sign of each element in the input. For x in the input:
if x > 0: 1 if x < 0: -1 if x == 0: 0
func Subtract ¶
Sub performs a subtraction between the passed in arguments (scalar or array) and returns the result. If one argument is a scalar and the other is an array, the scalar value is subtracted from each value of the array.
ArithmeticOptions specifies whether or not to check for overflows, performance is faster if not explicitly checking for overflows but will error on an overflow if NoCheckOverflow is false (default).
type DatumKind ¶
type DatumKind int
DatumKind is an enum used for denoting which kind of type a datum is encapsulating
type EmptyDatum ¶
type EmptyDatum struct{}
EmptyDatum is the null case, a Datum with nothing in it.
func (EmptyDatum) Equals ¶
func (EmptyDatum) Equals(other Datum) bool
func (EmptyDatum) Kind ¶
func (EmptyDatum) Kind() DatumKind
func (EmptyDatum) Len ¶
func (EmptyDatum) Len() int64
func (EmptyDatum) Release ¶
func (EmptyDatum) Release()
func (EmptyDatum) String ¶
func (EmptyDatum) String() string
type ExecBatch ¶
type ExecBatch struct { Values []Datum // Guarantee is a predicate Expression guaranteed to evaluate to true for // all rows in this batch. // Guarantee Expression // Len is the semantic length of this ExecBatch. When the values are // all scalars, the length should be set to 1 for non-aggregate kernels. // Otherwise the length is taken from the array values. Aggregate kernels // can have an ExecBatch formed by projecting just the partition columns // from a batch in which case it would have scalar rows with length > 1 // // If the array values are of length 0, then the length is 0 regardless of // whether any values are Scalar. Len int64 }
ExecBatch is a unit of work for kernel execution. It contains a collection of Array and Scalar values.
ExecBatch is semantically similar to a RecordBatch but for a SQL-style execution context. It represents a collection or records, but constant "columns" are represented by Scalar values rather than having to be converted into arrays with repeated values.
type ExecCtx ¶
type ExecCtx struct { // ChunkSize is the size used when iterating batches for execution // ChunkSize elements will be operated on as a time unless an argument // is a chunkedarray with a chunk that is smaller ChunkSize int64 // PreallocContiguous determines whether preallocating memory for // execution of compute attempts to preallocate a full contiguous // buffer for all of the chunks beforehand. PreallocContiguous bool // Registry allows specifying the Function Registry to utilize // when searching for kernel implementations. Registry FunctionRegistry // ExecChannelSize is the size of the channel used for passing // exec results to the WrapResults function. ExecChannelSize int // NumParallel determines the number of parallel goroutines // allowed for parallel executions. NumParallel int }
ExecCtx holds simple contextual information for execution such as the default ChunkSize for batch iteration, whether or not to ensure contiguous preallocations for kernels that want preallocation, and a reference to the desired function registry to use.
An ExecCtx should be placed into a context.Context by using SetExecCtx and GetExecCtx to pass it along for execution.
func DefaultExecCtx ¶
func DefaultExecCtx() ExecCtx
DefaultExecCtx returns the default exec context which will be used if there is no ExecCtx set into the context for execution.
This can be called to get a copy of the default values which can then be modified to set into a context.
The default exec context uses the following values:
- ChunkSize = DefaultMaxChunkSize (MaxInt64)
- PreallocContiguous = true
- Registry = GetFunctionRegistry()
- ExecChannelSize = 10
- NumParallel = runtime.NumCPU()
func GetExecCtx ¶
GetExecCtx returns an embedded ExecCtx from the provided context. If it does not contain an ExecCtx, then the default one is returned.
type Expression
deprecated
type Expression interface { fmt.Stringer // IsBound returns true if this expression has been bound to a particular // Datum and/or Schema. IsBound() bool // IsScalarExpr returns true if this expression is composed only of scalar // literals, field references and calls to scalar functions. IsScalarExpr() bool // IsNullLiteral returns true if this expression is a literal and entirely // null. IsNullLiteral() bool // IsSatisfiable returns true if this expression could evaluate to true IsSatisfiable() bool // FieldRef returns a pointer to the underlying field reference, or nil if // this expression is not a field reference. FieldRef() *FieldRef // Type returns the datatype this expression will evaluate to. Type() arrow.DataType Hash() uint64 Equals(Expression) bool // Release releases the underlying bound C++ memory that is allocated when // a Bind is performed. Any bound expression should get released to ensure // no memory leaks. Release() }
Expression is an interface for mapping one datum to another. An expression is one of:
A literal Datum A reference to a single (potentially nested) field of an input Datum A call to a compute function, with arguments specified by other Expressions
Deprecated: use substrait-go expressions instead.
func And ¶
func And(lhs, rhs Expression, ops ...Expression) Expression
And constructs a tree of calls to and_kleene for boolean And logic taking an arbitrary number of values.
func Cast ¶
func Cast(ex Expression, dt arrow.DataType) Expression
func DeserializeExpr ¶
func Equal ¶
func Equal(lhs, rhs Expression) Expression
Equal is a convenience function for the equal function
func Greater ¶
func Greater(lhs, rhs Expression) Expression
Greater is shorthand for NewCall("greater",....)
func GreaterEqual ¶
func GreaterEqual(lhs, rhs Expression) Expression
GreaterEqual is shorthand for NewCall("greater_equal",....)
func IsNull ¶
func IsNull(lhs Expression, nanIsNull bool) Expression
IsNull creates an expression that returns true if the passed in expression is null. Optionally treating NaN as null if desired.
func LessEqual ¶
func LessEqual(lhs, rhs Expression) Expression
LessEqual is shorthand for NewCall("less_equal",....)
func NewCall ¶
func NewCall(name string, args []Expression, opts FunctionOptions) Expression
NewCall constructs an expression that represents a specific function call with the given arguments and options.
func NewFieldRef ¶
func NewFieldRef(field string) Expression
NewFieldRef is shorthand for NewRef(FieldRefName(field))
func NewLiteral ¶
func NewLiteral(arg interface{}) Expression
NewLiteral constructs a new literal expression from any value. It is passed to NewDatum which will construct the appropriate Datum and/or scalar value for the type provided.
func NewRef ¶
func NewRef(ref FieldRef) Expression
NewRef constructs a parameter expression which refers to a specific field
func Not ¶
func Not(expr Expression) Expression
Not creates a call to "invert" for the value specified.
func NullLiteral ¶
func NullLiteral(dt arrow.DataType) Expression
func Or ¶
func Or(lhs, rhs Expression, ops ...Expression) Expression
Or constructs a tree of calls to or_kleene for boolean Or logic taking an arbitrary number of values.
func Project ¶
func Project(values []Expression, names []string) Expression
Project is shorthand for `make_struct` to produce a record batch output from a group of expressions.
type FieldPath ¶
type FieldPath []int
FieldPath represents a path to a nested field using indices of child fields. For example, given the indices {5, 9, 3} the field could be retrieved with: schema.Field(5).Type().(*arrow.StructType).Field(9).Type().(*arrow.StructType).Field(3)
Attempting to retrieve a child field using a FieldPath which is not valid for a given schema will get an error such as an out of range index, or an empty path.
FieldPaths provide for drilling down to potentially nested children for convenience of accepting a slice of fields, a schema or a datatype (which should contain child fields).
A fieldpath can also be used to retrieve a child arrow.Array or column from a record batch.
func (FieldPath) Get ¶
Get retrieves the corresponding nested child field by drilling through the schema's fields as per the field path.
func (FieldPath) GetColumn ¶
GetColumn will return the correct child array by traversing the fieldpath going to the nested arrays of the columns in the record batch.
func (FieldPath) GetFieldFromSlice ¶
GetFieldFromSlice treats the slice as the top layer of fields, so the first value in the field path will index into the slice, and then drill down from there.
func (FieldPath) GetFieldFromType ¶
GetFieldFromType returns the nested field from a datatype by drilling into it's child fields.
type FieldRef ¶
type FieldRef struct {
// contains filtered or unexported fields
}
FieldRef is a descriptor of a (potentially nested) field within a schema.
Unlike FieldPath (which is exclusively indices of child fields), FieldRef may reference a field by name. It can be constructed from either a field index, field name, or field path.
Nested fields can be referenced as well, given the schema:
arrow.NewSchema([]arrow.Field{ {Name: "a", Type: arrow.StructOf(arrow.Field{Name: "n", Type: arrow.Null})}, {Name: "b", Type: arrow.PrimitiveTypes.Int32}, })
the following all indicate the nested field named "n":
FieldRefPath(FieldPath{0, 0}) FieldRefList("a", 0) FieldRefList("a", "n") FieldRefList(0, "n") NewFieldRefFromDotPath(".a[0]")
FieldPaths matching a FieldRef are retrieved with the FindAll* functions Multiple matches are possible because field names may be duplicated within a schema. For example:
aIsAmbiguous := arrow.NewSchema([]arrow.Field{ {Name: "a", Type: arrow.PrimitiveTypes.Int32}, {Name: "a", Type: arrow.PrimitiveTypes.Float32}, }) matches := FieldRefName("a").FindAll(aIsAmbiguous) assert.Len(matches, 2) assert.True(matches[0].Get(aIsAmbiguous).Equals(aIsAmbiguous.Field(0)) assert.True(matches[1].Get(aIsAmbiguous).Equals(aIsAmbiguous.Field(1))
func FieldRefIndex ¶
FieldRefIndex is a convenience function to construct a FieldPath reference of a single index
func FieldRefList ¶
func FieldRefList(elems ...interface{}) FieldRef
FieldRefList takes an arbitrary number of arguments which can be either strings or ints. This will panic if anything other than a string or int is passed in.
func FieldRefPath ¶
FieldRefPath constructs a FieldRef from a given FieldPath
func NewFieldRefFromDotPath ¶
NewFieldRefFromDotPath parses a dot path into a field ref.
dot_path = '.' name
| '[' digit+ ']' | dot_path+
Examples
".alpha" => FieldRefName("alpha") "[2]" => FieldRefIndex(2) ".beta[3]" => FieldRefList("beta", 3) "[5].gamma.delta[7]" => FieldRefList(5, "gamma", "delta", 7) ".hello world" => FieldRefName("hello world") `.\[y\]\\tho\.\` => FieldRef(`[y]\tho.\`)
Note: when parsing a name, a '\' preceding any other character will be dropped from the resulting name. therefore if a name must contain the characters '.', '\', '[' or ']' then they must be escaped with a preceding '\'.
func (*FieldRef) FieldPath ¶
FieldPath returns the fieldpath that this FieldRef uses, otherwise an empty FieldPath if it's not a FieldPath reference
func (FieldRef) FindAll ¶
FindAll returns all the fieldpaths which this FieldRef matches in the given slice of fields.
func (FieldRef) FindAllField ¶
FindAllField returns all the fieldpaths that this FieldRef matches against the type of the given field.
func (FieldRef) FindOne ¶
FindOne returns an error if the field isn't matched or if there are multiple matches otherwise it returns the path to the single valid match.
func (FieldRef) FindOneOrNone ¶
FindOneOrNone is a convenience helper that will either return 1 fieldpath, or an empty fieldpath, and will return an error if there are multiple matches.
func (FieldRef) FindOneOrNoneRecord ¶
FindOneOrNoneRecord is like FindOneOrNone but for the schema of a record, returning an error only if there are multiple matches.
func (FieldRef) GetAllColumns ¶
GetAllColumns gets all the matching column arrays from the given record that this FieldRef references.
func (FieldRef) GetOneColumnOrNone ¶
GetOneColumnOrNone returns either a nil or the referenced array if it can be found, erroring only if there is an ambiguous multiple matches.
func (FieldRef) GetOneField ¶
GetOneField will return a pointer to a field or an error if it is not found or if there are multiple matches.
func (FieldRef) GetOneOrNone ¶
GetOneOrNone will return a field or a nil if the field is found or not, and only errors if there are multiple matches.
func (FieldRef) Hash ¶
Hash produces a hash of this field reference and takes in a seed so that it can maintain consistency across multiple places / processes /etc.
func (*FieldRef) IsFieldPath ¶
IsFieldPath returns true if this FieldRef uses a fieldpath
func (*FieldRef) IsNested ¶
IsNested returns true if this FieldRef expects to represent a nested field.
type FilterOptions ¶
type FilterOptions = kernels.FilterOptions
func DefaultFilterOptions ¶
func DefaultFilterOptions() *FilterOptions
type FuncKind ¶
type FuncKind int8
FuncKind is an enum representing the type of a function
const ( // A function that performs scalar data operations on whole arrays // of data. Can generally process Array or Scalar values. The size // of the output will be the same as the size (or broadcasted size, // in the case of mixing Array and Scalar inputs) of the input. FuncScalar FuncKind = iota // Scalar // A function with array input and output whose behavior depends on // the values of the entire arrays passed, rather than the value of // each scalar value. FuncVector // Vector // A function that computes a scalar summary statistic from array input. FuncScalarAgg // ScalarAggregate // A function that computes grouped summary statistics from array // input and an array of group identifiers. FuncHashAgg // HashAggregate // A function that dispatches to other functions and does not contain // its own kernels. FuncMeta // Meta )
type Function ¶
type Function interface { Name() string Kind() FuncKind Arity() Arity Doc() FunctionDoc NumKernels() int Execute(context.Context, FunctionOptions, ...Datum) (Datum, error) DispatchExact(...arrow.DataType) (exec.Kernel, error) DispatchBest(...arrow.DataType) (exec.Kernel, error) DefaultOptions() FunctionOptions Validate() error }
type FunctionDoc ¶
type FunctionDoc struct { // A one-line summary of the function, using a verb. // // For example, "Add two numeric arrays or scalars" Summary string // A detailed description of the function, meant to follow the summary. Description string // Symbolic names (identifiers) for the function arguments. // // Can be used to generate nicer function signatures. ArgNames []string // Name of the options struct type, if any OptionsType string // Whether or not options are required for function execution. // // If false, then either there are no options for this function, // or there is a usable default options value. OptionsRequired bool }
var EmptyFuncDoc FunctionDoc
EmptyFuncDoc is a reusable empty function doc definition for convenience.
type FunctionOptions ¶
type FunctionOptions interface {
TypeName() string
}
FunctionOptions can be any type which has a TypeName function. The fields of the type will be used (via reflection) to determine the information to propagate when serializing to pass to the C++ for execution.
type FunctionOptionsCloneable ¶
type FunctionOptionsCloneable interface {
Clone() FunctionOptions
}
type FunctionOptionsEqual ¶
type FunctionOptionsEqual interface {
Equals(FunctionOptions) bool
}
type FunctionRegistry ¶
type FunctionRegistry interface { CanAddFunction(fn Function, allowOverwrite bool) bool AddFunction(fn Function, allowOverwrite bool) bool CanAddAlias(target, source string) bool AddAlias(target, source string) bool GetFunction(name string) (Function, bool) GetFunctionNames() []string NumFunctions() int // contains filtered or unexported methods }
func GetFunctionRegistry ¶
func GetFunctionRegistry() FunctionRegistry
func NewChildRegistry ¶
func NewChildRegistry(parent FunctionRegistry) FunctionRegistry
func NewRegistry ¶
func NewRegistry() FunctionRegistry
type KernelExecutor ¶
type KernelExecutor interface { // Init must be called *after* the kernel's init method and any // KernelState must be set into the KernelCtx *before* calling // this Init method. This is to facilitate the case where // Init may be expensive and does not need to be called // again for each execution of the kernel. For example, // the same lookup table can be re-used for all scanned batches // in a dataset filter. Init(*exec.KernelCtx, exec.KernelInitArgs) error // Execute the kernel for the provided batch and pass the resulting // Datum values to the provided channel. Execute(context.Context, *ExecBatch, chan<- Datum) error // WrapResults exists for the case where an executor wants to post process // the batches of result datums. Such as creating a ChunkedArray from // multiple output batches or so on. Results from individual batch // executions should be read from the out channel, and WrapResults should // return the final Datum result. WrapResults(ctx context.Context, out <-chan Datum, chunkedArgs bool) Datum // CheckResultType checks the actual result type against the resolved // output type. If the types don't match an error is returned CheckResultType(out Datum) error // Clear resets the state in the executor so that it can be reused. Clear() }
KernelExecutor is the interface for all executors to initialize and call kernel execution functions on batches.
func NewScalarExecutor ¶
func NewScalarExecutor() KernelExecutor
type Literal
deprecated
type Literal struct {
Literal Datum
}
Literal is an expression denoting a literal Datum which could be any value as a scalar, an array, or so on.
Deprecated: use substrait-go expressions Literal instead.
func (*Literal) Equals ¶
func (l *Literal) Equals(other Expression) bool
func (*Literal) IsNullLiteral ¶
func (*Literal) IsSatisfiable ¶
func (*Literal) IsScalarExpr ¶
type MakeStructOptions ¶
type MakeStructOptions struct { FieldNames []string `compute:"field_names"` FieldNullability []bool `compute:"field_nullability"` FieldMetadata []*arrow.Metadata `compute:"field_metadata"` }
func (MakeStructOptions) TypeName ¶
func (MakeStructOptions) TypeName() string
type MetaFunction ¶
type MetaFunction struct {
// contains filtered or unexported fields
}
MetaFunction is a function which dispatches to other functions, the impl must not be nil.
For Array, ChunkedArray and Scalar datums, this may rely on the execution of concrete function types, but this must handle other Datum kinds on its own.
func NewMetaFunction ¶
func NewMetaFunction(name string, arity Arity, doc FunctionDoc, impl MetaFunctionImpl) *MetaFunction
NewMetaFunction constructs a new MetaFunction which will call the provided impl for dispatching with the expected arity.
Will panic if impl is nil.
func (*MetaFunction) DefaultOptions ¶
func (b *MetaFunction) DefaultOptions() FunctionOptions
func (*MetaFunction) DispatchBest ¶
func (*MetaFunction) DispatchExact ¶
func (*MetaFunction) Doc ¶
func (b *MetaFunction) Doc() FunctionDoc
func (*MetaFunction) Execute ¶
func (m *MetaFunction) Execute(ctx context.Context, opts FunctionOptions, args ...Datum) (Datum, error)
func (MetaFunction) NumKernels ¶
func (MetaFunction) NumKernels() int
type MetaFunctionImpl ¶
MetaFunctionImpl is the signature needed for implementing a MetaFunction which is a function that dispatches to another function instead.
type NullOptions ¶
type NullOptions struct {
NanIsNull bool `compute:"nan_is_null"`
}
func (NullOptions) TypeName ¶
func (NullOptions) TypeName() string
type NullSelectionBehavior ¶
type NullSelectionBehavior = kernels.NullSelectionBehavior
type Parameter
deprecated
type Parameter struct {
// contains filtered or unexported fields
}
Parameter represents a field reference and needs to be bound in order to determine its type and shape.
Deprecated: use substrait-go field references instead.
func (*Parameter) Equals ¶
func (p *Parameter) Equals(other Expression) bool
func (Parameter) IsNullLiteral ¶
func (*Parameter) IsSatisfiable ¶
func (*Parameter) IsScalarExpr ¶
type RecordDatum ¶
RecordDatum contains an array.Record for passing a full record to an expression or to compute.
func (*RecordDatum) Equals ¶
func (r *RecordDatum) Equals(other Datum) bool
func (RecordDatum) Kind ¶
func (RecordDatum) Kind() DatumKind
func (*RecordDatum) Len ¶
func (r *RecordDatum) Len() int64
func (*RecordDatum) Release ¶
func (r *RecordDatum) Release()
func (*RecordDatum) Schema ¶
func (r *RecordDatum) Schema() *arrow.Schema
func (RecordDatum) String ¶
func (RecordDatum) String() string
type RoundOptions ¶
type RoundOptions = kernels.RoundOptions
type RoundToMultipleOptions ¶
type RoundToMultipleOptions = kernels.RoundToMultipleOptions
type RunEndEncodeOptions ¶
type RunEndEncodeOptions = kernels.RunEndEncodeState
type ScalarDatum ¶
ScalarDatum contains a scalar value
func (ScalarDatum) Chunks ¶
func (ScalarDatum) Chunks() []arrow.Array
func (*ScalarDatum) Equals ¶
func (d *ScalarDatum) Equals(other Datum) bool
func (ScalarDatum) Kind ¶
func (ScalarDatum) Kind() DatumKind
func (ScalarDatum) Len ¶
func (ScalarDatum) Len() int64
func (*ScalarDatum) NullN ¶
func (d *ScalarDatum) NullN() int64
func (*ScalarDatum) Release ¶
func (d *ScalarDatum) Release()
func (*ScalarDatum) String ¶
func (d *ScalarDatum) String() string
func (*ScalarDatum) Type ¶
func (d *ScalarDatum) Type() arrow.DataType
type ScalarFunction ¶
type ScalarFunction struct {
// contains filtered or unexported fields
}
A ScalarFunction is a function that executes element-wise operations on arrays or scalars, and therefore whose results generally do not depend on the order of the values in the arguments. Accepts and returns arrays that are all of the same size. These functions roughly correspond to the functions used in most SQL expressions.
func NewScalarFunction ¶
func NewScalarFunction(name string, arity Arity, doc FunctionDoc) *ScalarFunction
NewScalarFunction constructs a new ScalarFunction object with the passed in name, arity and function doc.
func (*ScalarFunction) AddKernel ¶
func (s *ScalarFunction) AddKernel(k exec.ScalarKernel) error
AddKernel adds the provided kernel to the list of kernels this function has. A copy of the kernel is added to the slice of kernels, which means that a given kernel object can be created, added and then reused to add other kernels.
func (*ScalarFunction) AddNewKernel ¶
func (s *ScalarFunction) AddNewKernel(inTypes []exec.InputType, outType exec.OutputType, execFn exec.ArrayKernelExec, init exec.KernelInitFn) error
AddNewKernel constructs a new kernel with the provided signature and execution/init functions and then adds it to the function's list of kernels. This assumes default null handling (intersection of validity bitmaps)
func (*ScalarFunction) DispatchBest ¶
func (*ScalarFunction) DispatchExact ¶
func (*ScalarFunction) Execute ¶
func (s *ScalarFunction) Execute(ctx context.Context, opts FunctionOptions, args ...Datum) (Datum, error)
Execute uses the passed in context, function options and arguments to eagerly execute the function using kernel dispatch, batch iteration and memory allocation details as defined by the kernel.
If opts is nil, then the DefaultOptions() will be used.
func (*ScalarFunction) NumKernels ¶
func (fi *ScalarFunction) NumKernels() int
func (*ScalarFunction) SetDefaultOptions ¶
func (s *ScalarFunction) SetDefaultOptions(opts FunctionOptions)
type SetLookupOptions ¶
type SetLookupOptions struct { ValueSet Datum `compute:"value_set"` SkipNulls bool `compute:"skip_nulls"` }
func (*SetLookupOptions) Equals ¶
func (s *SetLookupOptions) Equals(other FunctionOptions) bool
func (*SetLookupOptions) FromStructScalar ¶
func (s *SetLookupOptions) FromStructScalar(sc *scalar.Struct) error
func (*SetLookupOptions) Release ¶
func (s *SetLookupOptions) Release()
func (SetLookupOptions) TypeName ¶
func (SetLookupOptions) TypeName() string
type StrptimeOptions ¶
type StrptimeOptions struct { Format string `compute:"format"` Unit arrow.TimeUnit `compute:"unit"` }
func (StrptimeOptions) TypeName ¶
func (StrptimeOptions) TypeName() string
type TableDatum ¶
TableDatum contains a table so that multiple record batches can be worked with together as a single table for being passed to compute and expression handling.
func (*TableDatum) Equals ¶
func (d *TableDatum) Equals(other Datum) bool
func (TableDatum) Kind ¶
func (TableDatum) Kind() DatumKind
func (*TableDatum) Len ¶
func (d *TableDatum) Len() int64
func (*TableDatum) Release ¶
func (d *TableDatum) Release()
func (*TableDatum) Schema ¶
func (d *TableDatum) Schema() *arrow.Schema
func (TableDatum) String ¶
func (TableDatum) String() string
type TableLikeDatum ¶
TableLikeDatum is an interface type for specifying either a RecordBatch or a Table as both contain a schema as opposed to just a single data type.
type TakeOptions ¶
type TakeOptions = kernels.TakeOptions
func DefaultTakeOptions ¶
func DefaultTakeOptions() *TakeOptions
type VectorFunction ¶
type VectorFunction struct {
// contains filtered or unexported fields
}
func NewVectorFunction ¶
func NewVectorFunction(name string, arity Arity, doc FunctionDoc) *VectorFunction
func (*VectorFunction) AddKernel ¶
func (f *VectorFunction) AddKernel(kernel exec.VectorKernel) error
func (*VectorFunction) AddNewKernel ¶
func (f *VectorFunction) AddNewKernel(inTypes []exec.InputType, outType exec.OutputType, execFn exec.ArrayKernelExec, init exec.KernelInitFn) error
func (*VectorFunction) DispatchBest ¶
func (*VectorFunction) DispatchExact ¶
func (*VectorFunction) Execute ¶
func (f *VectorFunction) Execute(ctx context.Context, opts FunctionOptions, args ...Datum) (Datum, error)
func (*VectorFunction) NumKernels ¶
func (fi *VectorFunction) NumKernels() int
func (*VectorFunction) SetDefaultOptions ¶
func (f *VectorFunction) SetDefaultOptions(opts FunctionOptions)