compute

package

v18.0.0-...-f9949aa Latest Latest Go to latest Published: Dec 16, 2024 License: Apache-2.0, BSD-3-Clause Imports: 34 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/joechenrh/arrow-go

Documentation ¶

Overview ¶

Package compute is a native-go implementation of an Acero-like arrow compute engine. It requires go1.18+

While consumers of Arrow that are able to use CGO could utilize the C Data API (using the cdata package) and could link against the acero library directly, there are consumers who cannot use CGO. This is an attempt to provide for those users, and in general create a native-go arrow compute engine.

The overwhelming majority of things in this package require go1.18 as it utilizes generics. The files in this package and its sub-packages are all excluded from being built by go versions lower than 1.18 so that the larger Arrow module itself is still compatible with go1.17.

Everything in this package should be considered Experimental for now.

Example (CustomFunction) ¶

This example demonstrates how to register a custom scalar function.

package main

import (
	"context"
	"fmt"
	"log"

	"github.com/joechenrh/arrow-go/v18/arrow"
	"github.com/joechenrh/arrow-go/v18/arrow/array"
	"github.com/joechenrh/arrow-go/v18/arrow/compute"
	"github.com/joechenrh/arrow-go/v18/arrow/compute/exec"
	"github.com/joechenrh/arrow-go/v18/arrow/memory"
)

func main() {
	pool := memory.NewGoAllocator()

	ctx := context.Background()
	execCtx := compute.DefaultExecCtx()
	ctx = compute.SetExecCtx(ctx, execCtx)

	add42 := compute.NewScalarFunction("add_42", compute.Arity{
		NArgs: 1,
	}, compute.FunctionDoc{
		Summary:  "Returns the input values plus 42",
		ArgNames: []string{"input"},
	})

	if err := add42.AddNewKernel(
		[]exec.InputType{
			// We accept a single argument (array) of Int8 type.
			{
				Kind: exec.InputExact,
				Type: arrow.PrimitiveTypes.Int8,
			},
		},
		// We'll return a single Int8 array.
		exec.NewOutputType(arrow.PrimitiveTypes.Int8),
		func(ctx *exec.KernelCtx, span *exec.ExecSpan, result *exec.ExecResult) error {
			// The second buffer contains the values. Both for the input and the output arrays.
			for i, x := range span.Values[0].Array.Buffers[1].Buf {
				result.Buffers[1].Buf[i] = x + 42
			}
			return nil
		},
		nil,
	); err != nil {
		log.Fatal(err)
	}
	execCtx.Registry.AddFunction(add42, true)

	inputArrayBuilder := array.NewInt8Builder(pool)
	for i := 0; i < 16; i++ {
		inputArrayBuilder.Append(int8(i))
	}
	inputArray := inputArrayBuilder.NewArray()

	outputArrayDatum, err := compute.CallFunction(
		compute.SetExecCtx(context.Background(), execCtx),
		"add_42",
		nil,
		&compute.ArrayDatum{Value: inputArray.Data()},
	)
	if err != nil {
		log.Fatal(err)
	}

	fmt.Println(array.NewInt8Data(outputArrayDatum.(*compute.ArrayDatum).Value).Int8Values())

}

Output:

[42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57]

Index ¶

Constants
Variables
func CanCast(from, to arrow.DataType) bool
func CastArray(ctx context.Context, val arrow.Array, opts *CastOptions) (arrow.Array, error)
func CastFromExtension(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error
func CastList[SrcOffsetT, DestOffsetT int32 | int64](ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error
func CastStruct(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error
func CastToType(ctx context.Context, val arrow.Array, toType arrow.DataType) (arrow.Array, error)
func DatumIsValue(d Datum) bool
func ExecSpanFromBatch(batch *ExecBatch) *exec.ExecSpan
func FilterArray(ctx context.Context, values, filter arrow.Array, options FilterOptions) (arrow.Array, error)
func FilterRecordBatch(ctx context.Context, batch arrow.Record, filter arrow.Array, ...) (arrow.Record, error)
func FilterTable(ctx context.Context, tbl arrow.Table, filter Datum, opts *FilterOptions) (arrow.Table, error)
func RegisterScalarArithmetic(reg FunctionRegistry)
func RegisterScalarBoolean(reg FunctionRegistry)
func RegisterScalarCast(reg FunctionRegistry)
func RegisterScalarComparisons(reg FunctionRegistry)
func RegisterVectorHash(reg FunctionRegistry)
func RegisterVectorRunEndFuncs(reg FunctionRegistry)
func RegisterVectorSelection(reg FunctionRegistry)
func RunEndDecodeArray(ctx context.Context, input arrow.Array) (arrow.Array, error)
func RunEndEncodeArray(ctx context.Context, opts RunEndEncodeOptions, input arrow.Array) (arrow.Array, error)
func SerializeExpr(expr Expression, mem memory.Allocator) (*memory.Buffer, error)
func SerializeOptions(opts FunctionOptions, mem memory.Allocator) (*memory.Buffer, error)
func SetExecCtx(ctx context.Context, e ExecCtx) context.Context
func TakeArray(ctx context.Context, values, indices arrow.Array) (arrow.Array, error)
func TakeArrayOpts(ctx context.Context, values, indices arrow.Array, opts TakeOptions) (arrow.Array, error)
func UniqueArray(ctx context.Context, values arrow.Array) (arrow.Array, error)
type ArithmeticOptions
- func (ArithmeticOptions) TypeName() string
type Arity
- func Binary() Arity
- func Nullary() Arity
- func Ternary() Arity
- func Unary() Arity
- func VarArgs(minArgs int) Arity
type ArrayDatum
- func (d *ArrayDatum) Chunks() []arrow.Array
- func (d *ArrayDatum) Equals(other Datum) bool
- func (ArrayDatum) Kind() DatumKind
- func (d *ArrayDatum) Len() int64
- func (d *ArrayDatum) MakeArray() arrow.Array
- func (d *ArrayDatum) NullN() int64
- func (d *ArrayDatum) Release()
- func (d *ArrayDatum) String() string
- func (d *ArrayDatum) ToScalar() (scalar.Scalar, error)
- func (d *ArrayDatum) Type() arrow.DataType
type ArrayLikeDatum
type Calldeprecated
- func (c *Call) Equals(other Expression) bool
- func (c *Call) FieldRef() *FieldRef
- func (c *Call) Hash() uint64
- func (c *Call) IsBound() bool
- func (c *Call) IsNullLiteral() bool
- func (c *Call) IsSatisfiable() bool
- func (c *Call) IsScalarExpr() bool
- func (c *Call) Release()
- func (c *Call) String() string
- func (c *Call) Type() arrow.DataType
type CastOptions
- func DefaultCastOptions(safe bool) *CastOptions
- func NewCastOptions(dt arrow.DataType, safe bool) *CastOptions
- func SafeCastOptions(dt arrow.DataType) *CastOptions
- func UnsafeCastOptions(dt arrow.DataType) *CastOptions
type ChunkedDatum
- func (d *ChunkedDatum) Chunks() []arrow.Array
- func (d *ChunkedDatum) Equals(other Datum) bool
- func (ChunkedDatum) Kind() DatumKind
- func (d *ChunkedDatum) Len() int64
- func (d *ChunkedDatum) NullN() int64
- func (d *ChunkedDatum) Release()
- func (d *ChunkedDatum) String() string
- func (d *ChunkedDatum) Type() arrow.DataType
type Datum
- func AbsoluteValue(ctx context.Context, opts ArithmeticOptions, input Datum) (Datum, error)
- func Acos(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error)
- func Add(ctx context.Context, opts ArithmeticOptions, left, right Datum) (Datum, error)
- func Asin(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error)
- func Atan(ctx context.Context, arg Datum) (Datum, error)
- func Atan2(ctx context.Context, x, y Datum) (Datum, error)
- func CallFunction(ctx context.Context, funcName string, opts FunctionOptions, args ...Datum) (Datum, error)
- func CastDatum(ctx context.Context, val Datum, opts *CastOptions) (Datum, error)
- func Cos(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error)
- func Divide(ctx context.Context, opts ArithmeticOptions, left, right Datum) (Datum, error)
- func Filter(ctx context.Context, values, filter Datum, options FilterOptions) (Datum, error)
- func Ln(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error)
- func Log10(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error)
- func Log1p(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error)
- func Log2(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error)
- func Logb(ctx context.Context, opts ArithmeticOptions, x, base Datum) (Datum, error)
- func Multiply(ctx context.Context, opts ArithmeticOptions, left, right Datum) (Datum, error)
- func Negate(ctx context.Context, opts ArithmeticOptions, input Datum) (Datum, error)
- func NewDatum(value interface{}) Datum
- func NewDatumWithoutOwning(value interface{}) Datum
- func Power(ctx context.Context, opts ArithmeticOptions, base, exp Datum) (Datum, error)
- func Round(ctx context.Context, opts RoundOptions, arg Datum) (Datum, error)
- func RoundToMultiple(ctx context.Context, opts RoundToMultipleOptions, arg Datum) (Datum, error)
- func RunEndDecode(ctx context.Context, arg Datum) (Datum, error)
- func RunEndEncode(ctx context.Context, opts RunEndEncodeOptions, arg Datum) (Datum, error)
- func ShiftLeft(ctx context.Context, opts ArithmeticOptions, lhs, rhs Datum) (Datum, error)
- func ShiftRight(ctx context.Context, opts ArithmeticOptions, lhs, rhs Datum) (Datum, error)
- func Sign(ctx context.Context, input Datum) (Datum, error)
- func Sin(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error)
- func Subtract(ctx context.Context, opts ArithmeticOptions, left, right Datum) (Datum, error)
- func Take(ctx context.Context, opts TakeOptions, values, indices Datum) (Datum, error)
- func Tan(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error)
- func Unique(ctx context.Context, values Datum) (Datum, error)
type DatumKind
- func (i DatumKind) String() string
type EmptyDatum
- func (EmptyDatum) Equals(other Datum) bool
- func (EmptyDatum) Kind() DatumKind
- func (EmptyDatum) Len() int64
- func (EmptyDatum) Release()
- func (EmptyDatum) String() string
type ExecBatch
- func (e ExecBatch) NumValues() int
type ExecCtx
- func DefaultExecCtx() ExecCtx
- func GetExecCtx(ctx context.Context) ExecCtx
type Expressiondeprecated
- func And(lhs, rhs Expression, ops ...Expression) Expression
- func Cast(ex Expression, dt arrow.DataType) Expression
- func DeserializeExpr(mem memory.Allocator, buf *memory.Buffer) (Expression, error)
- func Equal(lhs, rhs Expression) Expression
- func Greater(lhs, rhs Expression) Expression
- func GreaterEqual(lhs, rhs Expression) Expression
- func IsNull(lhs Expression, nanIsNull bool) Expression
- func IsValid(lhs Expression) Expression
- func Less(lhs, rhs Expression) Expression
- func LessEqual(lhs, rhs Expression) Expression
- func NewCall(name string, args []Expression, opts FunctionOptions) Expression
- func NewFieldRef(field string) Expression
- func NewLiteral(arg interface{}) Expression
- func NewRef(ref FieldRef) Expression
- func Not(expr Expression) Expression
- func NotEqual(lhs, rhs Expression) Expression
- func NullLiteral(dt arrow.DataType) Expression
- func Or(lhs, rhs Expression, ops ...Expression) Expression
- func Project(values []Expression, names []string) Expression
type FieldPath
- func (f FieldPath) Get(s *arrow.Schema) (*arrow.Field, error)
- func (f FieldPath) GetColumn(batch arrow.Record) (arrow.Array, error)
- func (f FieldPath) GetField(field arrow.Field) (*arrow.Field, error)
- func (f FieldPath) GetFieldFromSlice(fields []arrow.Field) (*arrow.Field, error)
- func (f FieldPath) GetFieldFromType(typ arrow.DataType) (*arrow.Field, error)
- func (f FieldPath) String() string
type FieldRef
- func FieldRefIndex(i int) FieldRef
- func FieldRefList(elems ...interface{}) FieldRef
- func FieldRefName(n string) FieldRef
- func FieldRefPath(p FieldPath) FieldRef
- func NewFieldRefFromDotPath(dotpath string) (out FieldRef, err error)
- func (f *FieldRef) Equals(other FieldRef) bool
- func (f *FieldRef) FieldPath() FieldPath
- func (f FieldRef) FindAll(fields []arrow.Field) []FieldPath
- func (f FieldRef) FindAllField(field arrow.Field) []FieldPath
- func (f FieldRef) FindOne(schema *arrow.Schema) (FieldPath, error)
- func (f FieldRef) FindOneOrNone(schema *arrow.Schema) (FieldPath, error)
- func (f FieldRef) FindOneOrNoneRecord(root arrow.Record) (FieldPath, error)
- func (f FieldRef) GetAllColumns(root arrow.Record) ([]arrow.Array, error)
- func (f FieldRef) GetOneColumnOrNone(root arrow.Record) (arrow.Array, error)
- func (f FieldRef) GetOneField(schema *arrow.Schema) (*arrow.Field, error)
- func (f FieldRef) GetOneOrNone(schema *arrow.Schema) (*arrow.Field, error)
- func (f FieldRef) Hash(seed maphash.Seed) uint64
- func (f *FieldRef) IsFieldPath() bool
- func (f *FieldRef) IsName() bool
- func (f *FieldRef) IsNested() bool
- func (f *FieldRef) Name() string
- func (f FieldRef) String() string
type FilterOptions
- func DefaultFilterOptions() *FilterOptions
type FuncKind
- func (i FuncKind) String() string
type Function
type FunctionDoc
type FunctionOptions
type FunctionOptionsCloneable
type FunctionOptionsEqual
type FunctionRegistry
- func GetFunctionRegistry() FunctionRegistry
- func NewChildRegistry(parent FunctionRegistry) FunctionRegistry
- func NewRegistry() FunctionRegistry
type KernelExecutor
- func NewScalarExecutor() KernelExecutor
type Literaldeprecated
- func (l *Literal) Equals(other Expression) bool
- func (Literal) FieldRef() *FieldRef
- func (l *Literal) Hash() uint64
- func (l *Literal) IsBound() bool
- func (l *Literal) IsNullLiteral() bool
- func (l *Literal) IsSatisfiable() bool
- func (l *Literal) IsScalarExpr() bool
- func (l *Literal) Release()
- func (l *Literal) String() string
- func (l *Literal) Type() arrow.DataType
type MakeStructOptions
- func (MakeStructOptions) TypeName() string
type MetaFunction
- func NewMetaFunction(name string, arity Arity, doc FunctionDoc, impl MetaFunctionImpl) *MetaFunction
- func (b *MetaFunction) Arity() Arity
- func (b *MetaFunction) DefaultOptions() FunctionOptions
- func (m *MetaFunction) DispatchBest(...arrow.DataType) (exec.Kernel, error)
- func (m *MetaFunction) DispatchExact(...arrow.DataType) (exec.Kernel, error)
- func (b *MetaFunction) Doc() FunctionDoc
- func (m *MetaFunction) Execute(ctx context.Context, opts FunctionOptions, args ...Datum) (Datum, error)
- func (b *MetaFunction) Kind() FuncKind
- func (b *MetaFunction) Name() string
- func (MetaFunction) NumKernels() int
- func (b *MetaFunction) Validate() error
type MetaFunctionImpl
type NullOptions
- func (NullOptions) TypeName() string
type NullSelectionBehavior
type Parameterdeprecated
- func (p *Parameter) Equals(other Expression) bool
- func (p *Parameter) FieldRef() *FieldRef
- func (p *Parameter) Hash() uint64
- func (p *Parameter) IsBound() bool
- func (Parameter) IsNullLiteral() bool
- func (p *Parameter) IsSatisfiable() bool
- func (p *Parameter) IsScalarExpr() bool
- func (p *Parameter) Release()
- func (p *Parameter) String() string
- func (p *Parameter) Type() arrow.DataType
type RecordDatum
- func (r *RecordDatum) Equals(other Datum) bool
- func (RecordDatum) Kind() DatumKind
- func (r *RecordDatum) Len() int64
- func (r *RecordDatum) Release()
- func (r *RecordDatum) Schema() *arrow.Schema
- func (RecordDatum) String() string
type RoundMode
type RoundOptions
type RoundToMultipleOptions
type RunEndEncodeOptions
type ScalarDatum
- func (ScalarDatum) Chunks() []arrow.Array
- func (d *ScalarDatum) Equals(other Datum) bool
- func (ScalarDatum) Kind() DatumKind
- func (ScalarDatum) Len() int64
- func (d *ScalarDatum) NullN() int64
- func (d *ScalarDatum) Release()
- func (d *ScalarDatum) String() string
- func (d *ScalarDatum) ToScalar() (scalar.Scalar, error)
- func (d *ScalarDatum) Type() arrow.DataType
type ScalarFunction
- func NewScalarFunction(name string, arity Arity, doc FunctionDoc) *ScalarFunction
- func (s *ScalarFunction) AddKernel(k exec.ScalarKernel) error
- func (s *ScalarFunction) AddNewKernel(inTypes []exec.InputType, outType exec.OutputType, execFn exec.ArrayKernelExec, ...) error
- func (s *ScalarFunction) DispatchBest(vals ...arrow.DataType) (exec.Kernel, error)
- func (s *ScalarFunction) DispatchExact(vals ...arrow.DataType) (exec.Kernel, error)
- func (s *ScalarFunction) Execute(ctx context.Context, opts FunctionOptions, args ...Datum) (Datum, error)
- func (fi *ScalarFunction) Kernels() []*KT
- func (fi *ScalarFunction) NumKernels() int
- func (s *ScalarFunction) SetDefaultOptions(opts FunctionOptions)
type SetLookupOptions
- func (s *SetLookupOptions) Equals(other FunctionOptions) bool
- func (s *SetLookupOptions) FromStructScalar(sc *scalar.Struct) error
- func (s *SetLookupOptions) Release()
- func (SetLookupOptions) TypeName() string
type StrptimeOptions
- func (StrptimeOptions) TypeName() string
type TableDatum
- func (d *TableDatum) Equals(other Datum) bool
- func (TableDatum) Kind() DatumKind
- func (d *TableDatum) Len() int64
- func (d *TableDatum) Release()
- func (d *TableDatum) Schema() *arrow.Schema
- func (TableDatum) String() string
type TableLikeDatum
type TakeOptions
- func DefaultTakeOptions() *TakeOptions
type VectorFunction
- func NewVectorFunction(name string, arity Arity, doc FunctionDoc) *VectorFunction
- func (f *VectorFunction) AddKernel(kernel exec.VectorKernel) error
- func (f *VectorFunction) AddNewKernel(inTypes []exec.InputType, outType exec.OutputType, execFn exec.ArrayKernelExec, ...) error
- func (f *VectorFunction) DispatchBest(vals ...arrow.DataType) (exec.Kernel, error)
- func (f *VectorFunction) DispatchExact(vals ...arrow.DataType) (exec.Kernel, error)
- func (f *VectorFunction) Execute(ctx context.Context, opts FunctionOptions, args ...Datum) (Datum, error)
- func (fi *VectorFunction) Kernels() []*KT
- func (fi *VectorFunction) NumKernels() int
- func (f *VectorFunction) SetDefaultOptions(opts FunctionOptions)

Examples ¶

Package (CustomFunction)

Constants ¶

View Source

const (
	// Round to nearest integer less than or equal in magnitude (aka "floor")
	RoundDown = kernels.RoundDown
	// Round to nearest integer greater than or equal in magnitude (aka "ceil")
	RoundUp = kernels.RoundUp
	// Get integral part without fractional digits (aka "trunc")
	RoundTowardsZero = kernels.TowardsZero
	// Round negative values with DOWN and positive values with UP
	RoundTowardsInfinity = kernels.AwayFromZero
	// Round ties with DOWN (aka "round half towards negative infinity")
	RoundHalfDown = kernels.HalfDown
	// Round ties with UP (aka "round half towards positive infinity")
	RoundHalfUp = kernels.HalfUp
	// Round ties with TowardsZero (aka "round half away from infinity")
	RoundHalfTowardsZero = kernels.HalfTowardsZero
	// Round ties with AwayFromZero (aka "round half towards infinity")
	RoundHalfTowardsInfinity = kernels.HalfAwayFromZero
	// Round ties to nearest even integer
	RoundHalfToEven = kernels.HalfToEven
	// Round ties to nearest odd integer
	RoundHalfToOdd = kernels.HalfToOdd
)

View Source

const (
	SelectionEmitNulls = kernels.EmitNulls
	SelectionDropNulls = kernels.DropNulls
)

View Source

const DefaultMaxChunkSize = math.MaxInt64

View Source

const UnknownLength int64 = -1

Variables ¶

View Source

var (
	DefaultRoundOptions           = RoundOptions{NDigits: 0, Mode: RoundHalfToEven}
	DefaultRoundToMultipleOptions = RoundToMultipleOptions{
		Multiple: scalar.NewFloat64Scalar(1), Mode: RoundHalfToEven}
)

View Source

var (

	// WithAllocator returns a new context with the provided allocator
	// embedded into the context.
	WithAllocator = exec.WithAllocator
	// GetAllocator retrieves the allocator from the context, or returns
	// memory.DefaultAllocator if there was no allocator in the provided
	// context.
	GetAllocator = exec.GetAllocator
)

View Source

var (
	ErrEmpty           = errors.New("cannot traverse empty field path")
	ErrNoChildren      = errors.New("trying to get child of type with no children")
	ErrIndexRange      = errors.New("index out of range")
	ErrMultipleMatches = errors.New("multiple matches")
	ErrNoMatch         = errors.New("no match")
	ErrInvalid         = errors.New("field ref invalid")
)

Functions ¶

func CanCast ¶

func CanCast(from, to arrow.DataType) bool

CanCast returns true if there is an implementation for casting an array or scalar value from the specified DataType to the other data type.

func CastArray ¶

func CastArray(ctx context.Context, val arrow.Array, opts *CastOptions) (arrow.Array, error)

CastArray is a convenience function for casting an Array to another type. It is equivalent to constructing a Datum for the array and using CallFunction(ctx, "cast", ...).

func CastFromExtension ¶

func CastFromExtension(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error

func CastList ¶

func CastList[SrcOffsetT, DestOffsetT int32 | int64](ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error

func CastStruct ¶

func CastStruct(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error

func CastToType ¶

func CastToType(ctx context.Context, val arrow.Array, toType arrow.DataType) (arrow.Array, error)

CastToType is a convenience function equivalent to calling CastArray(ctx, val, compute.SafeCastOptions(toType))

func DatumIsValue ¶

func DatumIsValue(d Datum) bool

DatumIsValue returns true if the datum passed is a Scalar, Array or ChunkedArray type (e.g. it contains a specific value not a group of values)

func ExecSpanFromBatch ¶

func ExecSpanFromBatch(batch *ExecBatch) *exec.ExecSpan

ExecSpanFromBatch constructs and returns a new ExecSpan from the values inside of the ExecBatch which could be scalar or arrays.

This is mostly used for tests but is also a convenience method for other cases.

func FilterArray ¶

func FilterArray(ctx context.Context, values, filter arrow.Array, options FilterOptions) (arrow.Array, error)

FilterArray is a convenience method for calling Filter without having to manually construct the intervening Datum objects (they will be created for you internally here).

func FilterRecordBatch ¶

func FilterRecordBatch(ctx context.Context, batch arrow.Record, filter arrow.Array, opts *FilterOptions) (arrow.Record, error)

func FilterTable ¶

func FilterTable(ctx context.Context, tbl arrow.Table, filter Datum, opts *FilterOptions) (arrow.Table, error)

func RegisterScalarArithmetic ¶

func RegisterScalarArithmetic(reg FunctionRegistry)

func RegisterScalarBoolean ¶

func RegisterScalarBoolean(reg FunctionRegistry)

func RegisterScalarCast ¶

func RegisterScalarCast(reg FunctionRegistry)

func RegisterScalarComparisons ¶

func RegisterScalarComparisons(reg FunctionRegistry)

func RegisterVectorHash ¶

func RegisterVectorHash(reg FunctionRegistry)

func RegisterVectorRunEndFuncs ¶

func RegisterVectorRunEndFuncs(reg FunctionRegistry)

func RegisterVectorSelection ¶

func RegisterVectorSelection(reg FunctionRegistry)

RegisterVectorSelection registers functions that select specific values from arrays such as Take and Filter

func RunEndDecodeArray ¶

func RunEndDecodeArray(ctx context.Context, input arrow.Array) (arrow.Array, error)

func RunEndEncodeArray ¶

func RunEndEncodeArray(ctx context.Context, opts RunEndEncodeOptions, input arrow.Array) (arrow.Array, error)

func SerializeExpr ¶

func SerializeExpr(expr Expression, mem memory.Allocator) (*memory.Buffer, error)

SerializeExpr serializes expressions by converting them to Metadata and storing this in the schema of a Record. Embedded arrays and scalars are stored in its columns. Finally the record is written as an IPC file

func SerializeOptions ¶

func SerializeOptions(opts FunctionOptions, mem memory.Allocator) (*memory.Buffer, error)

func SetExecCtx ¶

func SetExecCtx(ctx context.Context, e ExecCtx) context.Context

SetExecCtx returns a new child context containing the passed in ExecCtx

func TakeArray ¶

func TakeArray(ctx context.Context, values, indices arrow.Array) (arrow.Array, error)

func TakeArrayOpts ¶

func TakeArrayOpts(ctx context.Context, values, indices arrow.Array, opts TakeOptions) (arrow.Array, error)

func UniqueArray ¶

func UniqueArray(ctx context.Context, values arrow.Array) (arrow.Array, error)

Types ¶

type ArithmeticOptions ¶

type ArithmeticOptions struct {
	NoCheckOverflow bool `compute:"check_overflow"`
}

func (ArithmeticOptions) TypeName ¶

func (ArithmeticOptions) TypeName() string

type Arity ¶

type Arity struct {
	NArgs     int
	IsVarArgs bool
}

Arity defines the number of required arguments for a function.

Naming conventions are taken from https://en.wikipedia.org/wiki/Arity

func Binary ¶

func Binary() Arity

func Nullary ¶

func Nullary() Arity

func Ternary ¶

func Ternary() Arity

func Unary ¶

func Unary() Arity

func VarArgs ¶

func VarArgs(minArgs int) Arity

type ArrayDatum ¶

type ArrayDatum struct {
	Value arrow.ArrayData
}

ArrayDatum references an array.Data object which can be used to create array instances from if needed.

func (*ArrayDatum) Chunks ¶

func (d *ArrayDatum) Chunks() []arrow.Array

func (*ArrayDatum) Equals ¶

func (d *ArrayDatum) Equals(other Datum) bool

func (ArrayDatum) Kind ¶

func (ArrayDatum) Kind() DatumKind

func (*ArrayDatum) Len ¶

func (d *ArrayDatum) Len() int64

func (*ArrayDatum) MakeArray ¶

func (d *ArrayDatum) MakeArray() arrow.Array

func (*ArrayDatum) NullN ¶

func (d *ArrayDatum) NullN() int64

func (*ArrayDatum) Release ¶

func (d *ArrayDatum) Release()

func (*ArrayDatum) String ¶

func (d *ArrayDatum) String() string

func (*ArrayDatum) ToScalar ¶

func (d *ArrayDatum) ToScalar() (scalar.Scalar, error)

func (*ArrayDatum) Type ¶

func (d *ArrayDatum) Type() arrow.DataType

type ArrayLikeDatum ¶

type ArrayLikeDatum interface {
	Datum
	NullN() int64
	Type() arrow.DataType
	Chunks() []arrow.Array
}

ArrayLikeDatum is an interface for treating a Datum similarly to an Array, so that it is easy to differentiate between Record/Table/Collection and Scalar, Array/ChunkedArray for ease of use. Chunks will return an empty slice for Scalar, a slice with 1 element for Array, and the slice of chunks for a chunked array.

type Call deprecated

type Call struct {
	// contains filtered or unexported fields
}

Call is a function call with specific arguments which are themselves other expressions. A call can also have options that are specific to the function in question. It must be bound to determine the shape and type.

Deprecated: use substrait-go expression functions instead.

func (*Call) Equals ¶

func (c *Call) Equals(other Expression) bool

func (*Call) FieldRef ¶

func (c *Call) FieldRef() *FieldRef

func (*Call) Hash ¶

func (c *Call) Hash() uint64

func (*Call) IsBound ¶

func (c *Call) IsBound() bool

func (*Call) IsNullLiteral ¶

func (c *Call) IsNullLiteral() bool

func (*Call) IsSatisfiable ¶

func (c *Call) IsSatisfiable() bool

func (*Call) IsScalarExpr ¶

func (c *Call) IsScalarExpr() bool

func (*Call) Release ¶

func (c *Call) Release()

func (*Call) String ¶

func (c *Call) String() string

func (*Call) Type ¶

func (c *Call) Type() arrow.DataType

type CastOptions ¶

type CastOptions = kernels.CastOptions

func DefaultCastOptions ¶

func DefaultCastOptions(safe bool) *CastOptions

func NewCastOptions ¶

func NewCastOptions(dt arrow.DataType, safe bool) *CastOptions

func SafeCastOptions ¶

func SafeCastOptions(dt arrow.DataType) *CastOptions

func UnsafeCastOptions ¶

func UnsafeCastOptions(dt arrow.DataType) *CastOptions

type ChunkedDatum ¶

type ChunkedDatum struct {
	Value *arrow.Chunked
}

ChunkedDatum contains a chunked array for use with expressions and compute.

func (*ChunkedDatum) Chunks ¶

func (d *ChunkedDatum) Chunks() []arrow.Array

func (*ChunkedDatum) Equals ¶

func (d *ChunkedDatum) Equals(other Datum) bool

func (ChunkedDatum) Kind ¶

func (ChunkedDatum) Kind() DatumKind

func (*ChunkedDatum) Len ¶

func (d *ChunkedDatum) Len() int64

func (*ChunkedDatum) NullN ¶

func (d *ChunkedDatum) NullN() int64

func (*ChunkedDatum) Release ¶

func (d *ChunkedDatum) Release()

func (*ChunkedDatum) String ¶

func (d *ChunkedDatum) String() string

func (*ChunkedDatum) Type ¶

func (d *ChunkedDatum) Type() arrow.DataType

type Datum ¶

type Datum interface {
	fmt.Stringer
	Kind() DatumKind
	Len() int64
	Equals(Datum) bool
	Release()
	// contains filtered or unexported methods
}

Datum is a variant interface for wrapping the various Arrow data structures for now the various Datum types just hold a Value which is the type they are wrapping, but it might make sense in the future for those types to actually be aliases or embed their types instead. Not sure yet.

func AbsoluteValue ¶

func AbsoluteValue(ctx context.Context, opts ArithmeticOptions, input Datum) (Datum, error)

AbsoluteValue returns the AbsoluteValue for each element in the input argument. It accepts either a scalar or an array.

ArithmeticOptions specifies whether or not to check for overflows, performance is faster if not explicitly checking for overflows but will error on an overflow if CheckOverflow is true.

func Acos ¶

func Acos(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error)

func Add ¶

func Add(ctx context.Context, opts ArithmeticOptions, left, right Datum) (Datum, error)

Add performs an addition between the passed in arguments (scalar or array) and returns the result. If one argument is a scalar and the other is an array, the scalar value is added to each value of the array.

ArithmeticOptions specifies whether or not to check for overflows, performance is faster if not explicitly checking for overflows but will error on an overflow if NoCheckOverflow is false (default).

func Asin ¶

func Asin(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error)

func Atan ¶

func Atan(ctx context.Context, arg Datum) (Datum, error)

func Atan2 ¶

func Atan2(ctx context.Context, x, y Datum) (Datum, error)

func CallFunction ¶

func CallFunction(ctx context.Context, funcName string, opts FunctionOptions, args ...Datum) (Datum, error)

CallFunction is a one-shot invoker for all types of functions.

It will perform kernel-dispatch, argument checking, iteration of ChunkedArray inputs and wrapping of outputs.

To affect the execution options, you must call SetExecCtx and pass the resulting context in here.

func CastDatum ¶

func CastDatum(ctx context.Context, val Datum, opts *CastOptions) (Datum, error)

CastDatum is a convenience function for casting a Datum to another type. It is equivalent to calling CallFunction(ctx, "cast", opts, Datum) and should work for Scalar, Array or ChunkedArray Datums.

func Cos ¶

func Cos(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error)

func Divide ¶

func Divide(ctx context.Context, opts ArithmeticOptions, left, right Datum) (Datum, error)

Divide performs a division between the passed in arguments (scalar or array) and returns the result. If one argument is a scalar and the other is an array, the scalar value is used with each value of the array.

ArithmeticOptions specifies whether or not to check for overflows, performance is faster if not explicitly checking for overflows but will error on an overflow if NoCheckOverflow is false (default).

Will error on divide by zero regardless of whether or not checking for overflows.

func Filter ¶

func Filter(ctx context.Context, values, filter Datum, options FilterOptions) (Datum, error)

Filter is a wrapper convenience that is equivalent to calling CallFunction(ctx, "filter", &options, values, filter) for filtering an input array (values) by a boolean array (filter). The two inputs must be the same length.

func Ln ¶

func Ln(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error)

func Log10 ¶

func Log10(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error)

func Log1p ¶

func Log1p(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error)

func Log2 ¶

func Log2(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error)

func Logb ¶

func Logb(ctx context.Context, opts ArithmeticOptions, x, base Datum) (Datum, error)

func Multiply ¶

func Multiply(ctx context.Context, opts ArithmeticOptions, left, right Datum) (Datum, error)

Multiply performs a multiplication between the passed in arguments (scalar or array) and returns the result. If one argument is a scalar and the other is an array, the scalar value is multiplied against each value of the array.

ArithmeticOptions specifies whether or not to check for overflows, performance is faster if not explicitly checking for overflows but will error on an overflow if NoCheckOverflow is false (default).

func Negate ¶

func Negate(ctx context.Context, opts ArithmeticOptions, input Datum) (Datum, error)

Negate returns a result containing the negation of each element in the input argument. It accepts either a scalar or an array.

ArithmeticOptions specifies whether or not to check for overflows, or to throw an error on unsigned types.

func NewDatum ¶

func NewDatum(value interface{}) Datum

NewDatum will construct the appropriate Datum type based on what is passed in as the argument.

An arrow.Array gets an ArrayDatum An array.Chunked gets a ChunkedDatum An array.Record gets a RecordDatum an array.Table gets a TableDatum a scalar.Scalar gets a ScalarDatum

Anything else is passed to scalar.MakeScalar and receives a scalar datum of that appropriate type.

func NewDatumWithoutOwning ¶

func NewDatumWithoutOwning(value interface{}) Datum

func Power ¶

func Power(ctx context.Context, opts ArithmeticOptions, base, exp Datum) (Datum, error)

Power returns base**exp for each element in the input arrays. Should work for both Arrays and Scalars

func Round ¶

func Round(ctx context.Context, opts RoundOptions, arg Datum) (Datum, error)

func RoundToMultiple ¶

func RoundToMultiple(ctx context.Context, opts RoundToMultipleOptions, arg Datum) (Datum, error)

func RunEndDecode ¶

func RunEndDecode(ctx context.Context, arg Datum) (Datum, error)

func RunEndEncode ¶

func RunEndEncode(ctx context.Context, opts RunEndEncodeOptions, arg Datum) (Datum, error)

func ShiftLeft ¶

func ShiftLeft(ctx context.Context, opts ArithmeticOptions, lhs, rhs Datum) (Datum, error)

ShiftLeft only accepts integral types and shifts each element of the first argument to the left by the value of the corresponding element in the second argument.

The value to shift by should be >= 0 and < precision of the type.

func ShiftRight ¶

func ShiftRight(ctx context.Context, opts ArithmeticOptions, lhs, rhs Datum) (Datum, error)

ShiftRight only accepts integral types and shifts each element of the first argument to the right by the value of the corresponding element in the second argument.

The value to shift by should be >= 0 and < precision of the type.

func Sign ¶

func Sign(ctx context.Context, input Datum) (Datum, error)

Sign returns -1, 0, or 1 depending on the sign of each element in the input. For x in the input:

	if x > 0: 1
	if x < 0: -1
    if x == 0: 0

func Sin ¶

func Sin(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error)

func Subtract ¶

func Subtract(ctx context.Context, opts ArithmeticOptions, left, right Datum) (Datum, error)

Sub performs a subtraction between the passed in arguments (scalar or array) and returns the result. If one argument is a scalar and the other is an array, the scalar value is subtracted from each value of the array.

ArithmeticOptions specifies whether or not to check for overflows, performance is faster if not explicitly checking for overflows but will error on an overflow if NoCheckOverflow is false (default).

func Take ¶

func Take(ctx context.Context, opts TakeOptions, values, indices Datum) (Datum, error)

func Tan ¶

func Tan(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error)

func Unique ¶

func Unique(ctx context.Context, values Datum) (Datum, error)

type DatumKind ¶

type DatumKind int

DatumKind is an enum used for denoting which kind of type a datum is encapsulating

const (
	KindNone    DatumKind = iota // none
	KindScalar                   // scalar
	KindArray                    // array
	KindChunked                  // chunked_array
	KindRecord                   // record_batch
	KindTable                    // table
)

func (DatumKind) String ¶

func (i DatumKind) String() string

type EmptyDatum ¶

type EmptyDatum struct{}

EmptyDatum is the null case, a Datum with nothing in it.

func (EmptyDatum) Equals ¶

func (EmptyDatum) Equals(other Datum) bool

func (EmptyDatum) Kind ¶

func (EmptyDatum) Kind() DatumKind

func (EmptyDatum) Len ¶

func (EmptyDatum) Len() int64

func (EmptyDatum) Release ¶

func (EmptyDatum) Release()

func (EmptyDatum) String ¶

func (EmptyDatum) String() string

type ExecBatch ¶

type ExecBatch struct {
	Values []Datum
	// Guarantee is a predicate Expression guaranteed to evaluate to true for
	// all rows in this batch.
	// Guarantee Expression
	// Len is the semantic length of this ExecBatch. When the values are
	// all scalars, the length should be set to 1 for non-aggregate kernels.
	// Otherwise the length is taken from the array values. Aggregate kernels
	// can have an ExecBatch formed by projecting just the partition columns
	// from a batch in which case it would have scalar rows with length > 1
	//
	// If the array values are of length 0, then the length is 0 regardless of
	// whether any values are Scalar.
	Len int64
}

ExecBatch is a unit of work for kernel execution. It contains a collection of Array and Scalar values.

ExecBatch is semantically similar to a RecordBatch but for a SQL-style execution context. It represents a collection or records, but constant "columns" are represented by Scalar values rather than having to be converted into arrays with repeated values.

func (ExecBatch) NumValues ¶

func (e ExecBatch) NumValues() int

type ExecCtx ¶

type ExecCtx struct {
	// ChunkSize is the size used when iterating batches for execution
	// ChunkSize elements will be operated on as a time unless an argument
	// is a chunkedarray with a chunk that is smaller
	ChunkSize int64
	// PreallocContiguous determines whether preallocating memory for
	// execution of compute attempts to preallocate a full contiguous
	// buffer for all of the chunks beforehand.
	PreallocContiguous bool
	// Registry allows specifying the Function Registry to utilize
	// when searching for kernel implementations.
	Registry FunctionRegistry
	// ExecChannelSize is the size of the channel used for passing
	// exec results to the WrapResults function.
	ExecChannelSize int
	// NumParallel determines the number of parallel goroutines
	// allowed for parallel executions.
	NumParallel int
}

ExecCtx holds simple contextual information for execution such as the default ChunkSize for batch iteration, whether or not to ensure contiguous preallocations for kernels that want preallocation, and a reference to the desired function registry to use.

An ExecCtx should be placed into a context.Context by using SetExecCtx and GetExecCtx to pass it along for execution.

func DefaultExecCtx ¶

func DefaultExecCtx() ExecCtx

DefaultExecCtx returns the default exec context which will be used if there is no ExecCtx set into the context for execution.

This can be called to get a copy of the default values which can then be modified to set into a context.

The default exec context uses the following values:

ChunkSize = DefaultMaxChunkSize (MaxInt64)
PreallocContiguous = true
Registry = GetFunctionRegistry()
ExecChannelSize = 10
NumParallel = runtime.NumCPU()

func GetExecCtx ¶

func GetExecCtx(ctx context.Context) ExecCtx

GetExecCtx returns an embedded ExecCtx from the provided context. If it does not contain an ExecCtx, then the default one is returned.

type Expression deprecated

type Expression interface {
	fmt.Stringer
	// IsBound returns true if this expression has been bound to a particular
	// Datum and/or Schema.
	IsBound() bool
	// IsScalarExpr returns true if this expression is composed only of scalar
	// literals, field references and calls to scalar functions.
	IsScalarExpr() bool
	// IsNullLiteral returns true if this expression is a literal and entirely
	// null.
	IsNullLiteral() bool
	// IsSatisfiable returns true if this expression could evaluate to true
	IsSatisfiable() bool
	// FieldRef returns a pointer to the underlying field reference, or nil if
	// this expression is not a field reference.
	FieldRef() *FieldRef
	// Type returns the datatype this expression will evaluate to.
	Type() arrow.DataType

	Hash() uint64
	Equals(Expression) bool

	// Release releases the underlying bound C++ memory that is allocated when
	// a Bind is performed. Any bound expression should get released to ensure
	// no memory leaks.
	Release()
}

Expression is an interface for mapping one datum to another. An expression is one of:

A literal Datum
A reference to a single (potentially nested) field of an input Datum
A call to a compute function, with arguments specified by other Expressions

Deprecated: use substrait-go expressions instead.

func And ¶

func And(lhs, rhs Expression, ops ...Expression) Expression

And constructs a tree of calls to and_kleene for boolean And logic taking an arbitrary number of values.

func Cast ¶

func Cast(ex Expression, dt arrow.DataType) Expression

func DeserializeExpr ¶

func DeserializeExpr(mem memory.Allocator, buf *memory.Buffer) (Expression, error)

func Equal ¶

func Equal(lhs, rhs Expression) Expression

Equal is a convenience function for the equal function

func Greater ¶

func Greater(lhs, rhs Expression) Expression

Greater is shorthand for NewCall("greater",....)

func GreaterEqual ¶

func GreaterEqual(lhs, rhs Expression) Expression

GreaterEqual is shorthand for NewCall("greater_equal",....)

func IsNull ¶

func IsNull(lhs Expression, nanIsNull bool) Expression

IsNull creates an expression that returns true if the passed in expression is null. Optionally treating NaN as null if desired.

func IsValid ¶

func IsValid(lhs Expression) Expression

IsValid is the inverse of IsNull

func Less ¶

func Less(lhs, rhs Expression) Expression

Less is shorthand for NewCall("less",....)

func LessEqual ¶

func LessEqual(lhs, rhs Expression) Expression

LessEqual is shorthand for NewCall("less_equal",....)

func NewCall ¶

func NewCall(name string, args []Expression, opts FunctionOptions) Expression

NewCall constructs an expression that represents a specific function call with the given arguments and options.

func NewFieldRef ¶

func NewFieldRef(field string) Expression

NewFieldRef is shorthand for NewRef(FieldRefName(field))

func NewLiteral ¶

func NewLiteral(arg interface{}) Expression

NewLiteral constructs a new literal expression from any value. It is passed to NewDatum which will construct the appropriate Datum and/or scalar value for the type provided.

func NewRef ¶

func NewRef(ref FieldRef) Expression

NewRef constructs a parameter expression which refers to a specific field

func Not ¶

func Not(expr Expression) Expression

Not creates a call to "invert" for the value specified.

func NotEqual ¶

func NotEqual(lhs, rhs Expression) Expression

NotEqual creates a call to not_equal

func NullLiteral ¶

func NullLiteral(dt arrow.DataType) Expression

func Or ¶

func Or(lhs, rhs Expression, ops ...Expression) Expression

Or constructs a tree of calls to or_kleene for boolean Or logic taking an arbitrary number of values.

func Project ¶

func Project(values []Expression, names []string) Expression

Project is shorthand for `make_struct` to produce a record batch output from a group of expressions.

type FieldPath ¶

type FieldPath []int

FieldPath represents a path to a nested field using indices of child fields. For example, given the indices {5, 9, 3} the field could be retrieved with: schema.Field(5).Type().(*arrow.StructType).Field(9).Type().(*arrow.StructType).Field(3)

Attempting to retrieve a child field using a FieldPath which is not valid for a given schema will get an error such as an out of range index, or an empty path.

FieldPaths provide for drilling down to potentially nested children for convenience of accepting a slice of fields, a schema or a datatype (which should contain child fields).

A fieldpath can also be used to retrieve a child arrow.Array or column from a record batch.

func (FieldPath) Get ¶

func (f FieldPath) Get(s *arrow.Schema) (*arrow.Field, error)

Get retrieves the corresponding nested child field by drilling through the schema's fields as per the field path.

func (FieldPath) GetColumn ¶

func (f FieldPath) GetColumn(batch arrow.Record) (arrow.Array, error)

GetColumn will return the correct child array by traversing the fieldpath going to the nested arrays of the columns in the record batch.

func (FieldPath) GetField ¶

func (f FieldPath) GetField(field arrow.Field) (*arrow.Field, error)

GetField is equivalent to GetFieldFromType(field.Type)

func (FieldPath) GetFieldFromSlice ¶

func (f FieldPath) GetFieldFromSlice(fields []arrow.Field) (*arrow.Field, error)

GetFieldFromSlice treats the slice as the top layer of fields, so the first value in the field path will index into the slice, and then drill down from there.

func (FieldPath) GetFieldFromType ¶

func (f FieldPath) GetFieldFromType(typ arrow.DataType) (*arrow.Field, error)

GetFieldFromType returns the nested field from a datatype by drilling into it's child fields.

func (FieldPath) String ¶

func (f FieldPath) String() string

type FieldRef ¶

type FieldRef struct {
	// contains filtered or unexported fields
}

FieldRef is a descriptor of a (potentially nested) field within a schema.

Unlike FieldPath (which is exclusively indices of child fields), FieldRef may reference a field by name. It can be constructed from either a field index, field name, or field path.

Nested fields can be referenced as well, given the schema:

		arrow.NewSchema([]arrow.Field{
			{Name: "a", Type: arrow.StructOf(arrow.Field{Name: "n", Type: arrow.Null})},
 		{Name: "b", Type: arrow.PrimitiveTypes.Int32},
		})

the following all indicate the nested field named "n":

FieldRefPath(FieldPath{0, 0})
FieldRefList("a", 0)
FieldRefList("a", "n")
FieldRefList(0, "n")
NewFieldRefFromDotPath(".a[0]")

FieldPaths matching a FieldRef are retrieved with the FindAll* functions Multiple matches are possible because field names may be duplicated within a schema. For example:

aIsAmbiguous := arrow.NewSchema([]arrow.Field{
	{Name: "a", Type: arrow.PrimitiveTypes.Int32},
	{Name: "a", Type: arrow.PrimitiveTypes.Float32},
})
matches := FieldRefName("a").FindAll(aIsAmbiguous)
assert.Len(matches, 2)
assert.True(matches[0].Get(aIsAmbiguous).Equals(aIsAmbiguous.Field(0))
assert.True(matches[1].Get(aIsAmbiguous).Equals(aIsAmbiguous.Field(1))

func FieldRefIndex ¶

func FieldRefIndex(i int) FieldRef

FieldRefIndex is a convenience function to construct a FieldPath reference of a single index

func FieldRefList ¶

func FieldRefList(elems ...interface{}) FieldRef

FieldRefList takes an arbitrary number of arguments which can be either strings or ints. This will panic if anything other than a string or int is passed in.

func FieldRefName ¶

func FieldRefName(n string) FieldRef

FieldRefName constructs a FieldRef by name

func FieldRefPath ¶

func FieldRefPath(p FieldPath) FieldRef

FieldRefPath constructs a FieldRef from a given FieldPath

func NewFieldRefFromDotPath ¶

func NewFieldRefFromDotPath(dotpath string) (out FieldRef, err error)

NewFieldRefFromDotPath parses a dot path into a field ref.

dot_path = '.' name

| '[' digit+ ']'
| dot_path+

Examples

".alpha" => FieldRefName("alpha")
"[2]" => FieldRefIndex(2)
".beta[3]" => FieldRefList("beta", 3)
"[5].gamma.delta[7]" => FieldRefList(5, "gamma", "delta", 7)
".hello world" => FieldRefName("hello world")
`.\[y\]\\tho\.\` => FieldRef(`[y]\tho.\`)

Note: when parsing a name, a '\' preceding any other character will be dropped from the resulting name. therefore if a name must contain the characters '.', '\', '[' or ']' then they must be escaped with a preceding '\'.

func (*FieldRef) Equals ¶

func (f *FieldRef) Equals(other FieldRef) bool

func (*FieldRef) FieldPath ¶

func (f *FieldRef) FieldPath() FieldPath

FieldPath returns the fieldpath that this FieldRef uses, otherwise an empty FieldPath if it's not a FieldPath reference

func (FieldRef) FindAll ¶

func (f FieldRef) FindAll(fields []arrow.Field) []FieldPath

FindAll returns all the fieldpaths which this FieldRef matches in the given slice of fields.

func (FieldRef) FindAllField ¶

func (f FieldRef) FindAllField(field arrow.Field) []FieldPath

FindAllField returns all the fieldpaths that this FieldRef matches against the type of the given field.

func (FieldRef) FindOne ¶

func (f FieldRef) FindOne(schema *arrow.Schema) (FieldPath, error)

FindOne returns an error if the field isn't matched or if there are multiple matches otherwise it returns the path to the single valid match.

func (FieldRef) FindOneOrNone ¶

func (f FieldRef) FindOneOrNone(schema *arrow.Schema) (FieldPath, error)

FindOneOrNone is a convenience helper that will either return 1 fieldpath, or an empty fieldpath, and will return an error if there are multiple matches.

func (FieldRef) FindOneOrNoneRecord ¶

func (f FieldRef) FindOneOrNoneRecord(root arrow.Record) (FieldPath, error)

FindOneOrNoneRecord is like FindOneOrNone but for the schema of a record, returning an error only if there are multiple matches.

func (FieldRef) GetAllColumns ¶

func (f FieldRef) GetAllColumns(root arrow.Record) ([]arrow.Array, error)

GetAllColumns gets all the matching column arrays from the given record that this FieldRef references.

func (FieldRef) GetOneColumnOrNone ¶

func (f FieldRef) GetOneColumnOrNone(root arrow.Record) (arrow.Array, error)

GetOneColumnOrNone returns either a nil or the referenced array if it can be found, erroring only if there is an ambiguous multiple matches.

func (FieldRef) GetOneField ¶

func (f FieldRef) GetOneField(schema *arrow.Schema) (*arrow.Field, error)

GetOneField will return a pointer to a field or an error if it is not found or if there are multiple matches.

func (FieldRef) GetOneOrNone ¶

func (f FieldRef) GetOneOrNone(schema *arrow.Schema) (*arrow.Field, error)

GetOneOrNone will return a field or a nil if the field is found or not, and only errors if there are multiple matches.

func (FieldRef) Hash ¶

func (f FieldRef) Hash(seed maphash.Seed) uint64

Hash produces a hash of this field reference and takes in a seed so that it can maintain consistency across multiple places / processes /etc.

func (*FieldRef) IsFieldPath ¶

func (f *FieldRef) IsFieldPath() bool

IsFieldPath returns true if this FieldRef uses a fieldpath

func (*FieldRef) IsName ¶

func (f *FieldRef) IsName() bool

IsName returns true if this fieldref is a name reference

func (*FieldRef) IsNested ¶

func (f *FieldRef) IsNested() bool

IsNested returns true if this FieldRef expects to represent a nested field.

func (*FieldRef) Name ¶

func (f *FieldRef) Name() string

Name returns the name of the field this references if it is a Name reference, otherwise the empty string

func (FieldRef) String ¶

func (f FieldRef) String() string

type FilterOptions = kernels.FilterOptions

func DefaultFilterOptions ¶

func DefaultFilterOptions() *FilterOptions

type FuncKind ¶

type FuncKind int8

FuncKind is an enum representing the type of a function

const (
	// A function that performs scalar data operations on whole arrays
	// of data. Can generally process Array or Scalar values. The size
	// of the output will be the same as the size (or broadcasted size,
	// in the case of mixing Array and Scalar inputs) of the input.
	FuncScalar FuncKind = iota // Scalar
	// A function with array input and output whose behavior depends on
	// the values of the entire arrays passed, rather than the value of
	// each scalar value.
	FuncVector // Vector
	// A function that computes a scalar summary statistic from array input.
	FuncScalarAgg // ScalarAggregate
	// A function that computes grouped summary statistics from array
	// input and an array of group identifiers.
	FuncHashAgg // HashAggregate
	// A function that dispatches to other functions and does not contain
	// its own kernels.
	FuncMeta // Meta
)

func (FuncKind) String ¶

func (i FuncKind) String() string

type Function ¶

type Function interface {
	Name() string
	Kind() FuncKind
	Arity() Arity
	Doc() FunctionDoc
	NumKernels() int
	Execute(context.Context, FunctionOptions, ...Datum) (Datum, error)
	DispatchExact(...arrow.DataType) (exec.Kernel, error)
	DispatchBest(...arrow.DataType) (exec.Kernel, error)
	DefaultOptions() FunctionOptions
	Validate() error
}

type FunctionDoc ¶

type FunctionDoc struct {
	// A one-line summary of the function, using a verb.
	//
	// For example, "Add two numeric arrays or scalars"
	Summary string
	// A detailed description of the function, meant to follow the summary.
	Description string
	// Symbolic names (identifiers) for the function arguments.
	//
	// Can be used to generate nicer function signatures.
	ArgNames []string
	// Name of the options struct type, if any
	OptionsType string
	// Whether or not options are required for function execution.
	//
	// If false, then either there are no options for this function,
	// or there is a usable default options value.
	OptionsRequired bool
}

var EmptyFuncDoc FunctionDoc

EmptyFuncDoc is a reusable empty function doc definition for convenience.

type FunctionOptions ¶

type FunctionOptions interface {
	TypeName() string
}

FunctionOptions can be any type which has a TypeName function. The fields of the type will be used (via reflection) to determine the information to propagate when serializing to pass to the C++ for execution.

type FunctionOptionsCloneable ¶

type FunctionOptionsCloneable interface {
	Clone() FunctionOptions
}

type FunctionOptionsEqual ¶

type FunctionOptionsEqual interface {
	Equals(FunctionOptions) bool
}

type FunctionRegistry ¶

type FunctionRegistry interface {
	CanAddFunction(fn Function, allowOverwrite bool) bool
	AddFunction(fn Function, allowOverwrite bool) bool
	CanAddAlias(target, source string) bool
	AddAlias(target, source string) bool
	GetFunction(name string) (Function, bool)
	GetFunctionNames() []string
	NumFunctions() int
	// contains filtered or unexported methods
}

func GetFunctionRegistry ¶

func GetFunctionRegistry() FunctionRegistry

func NewChildRegistry ¶

func NewChildRegistry(parent FunctionRegistry) FunctionRegistry

func NewRegistry ¶

func NewRegistry() FunctionRegistry

type KernelExecutor ¶

type KernelExecutor interface {
	// Init must be called *after* the kernel's init method and any
	// KernelState must be set into the KernelCtx *before* calling
	// this Init method. This is to facilitate the case where
	// Init may be expensive and does not need to be called
	// again for each execution of the kernel. For example,
	// the same lookup table can be re-used for all scanned batches
	// in a dataset filter.
	Init(*exec.KernelCtx, exec.KernelInitArgs) error
	// Execute the kernel for the provided batch and pass the resulting
	// Datum values to the provided channel.
	Execute(context.Context, *ExecBatch, chan<- Datum) error
	// WrapResults exists for the case where an executor wants to post process
	// the batches of result datums. Such as creating a ChunkedArray from
	// multiple output batches or so on. Results from individual batch
	// executions should be read from the out channel, and WrapResults should
	// return the final Datum result.
	WrapResults(ctx context.Context, out <-chan Datum, chunkedArgs bool) Datum
	// CheckResultType checks the actual result type against the resolved
	// output type. If the types don't match an error is returned
	CheckResultType(out Datum) error
	// Clear resets the state in the executor so that it can be reused.
	Clear()
}

KernelExecutor is the interface for all executors to initialize and call kernel execution functions on batches.

func NewScalarExecutor ¶

func NewScalarExecutor() KernelExecutor

type Literal deprecated

type Literal struct {
	Literal Datum
}

Literal is an expression denoting a literal Datum which could be any value as a scalar, an array, or so on.

Deprecated: use substrait-go expressions Literal instead.

func (*Literal) Equals ¶

func (l *Literal) Equals(other Expression) bool

func (Literal) FieldRef ¶

func (Literal) FieldRef() *FieldRef

func (*Literal) Hash ¶

func (l *Literal) Hash() uint64

func (*Literal) IsBound ¶

func (l *Literal) IsBound() bool

func (*Literal) IsNullLiteral ¶

func (l *Literal) IsNullLiteral() bool

func (*Literal) IsSatisfiable ¶

func (l *Literal) IsSatisfiable() bool

func (*Literal) IsScalarExpr ¶

func (l *Literal) IsScalarExpr() bool

func (*Literal) Release ¶

func (l *Literal) Release()

func (*Literal) String ¶

func (l *Literal) String() string

func (*Literal) Type ¶

func (l *Literal) Type() arrow.DataType

type MakeStructOptions ¶

type MakeStructOptions struct {
	FieldNames       []string          `compute:"field_names"`
	FieldNullability []bool            `compute:"field_nullability"`
	FieldMetadata    []*arrow.Metadata `compute:"field_metadata"`
}

func (MakeStructOptions) TypeName ¶

func (MakeStructOptions) TypeName() string

type MetaFunction ¶

type MetaFunction struct {
	// contains filtered or unexported fields
}

MetaFunction is a function which dispatches to other functions, the impl must not be nil.

For Array, ChunkedArray and Scalar datums, this may rely on the execution of concrete function types, but this must handle other Datum kinds on its own.

func NewMetaFunction ¶

func NewMetaFunction(name string, arity Arity, doc FunctionDoc, impl MetaFunctionImpl) *MetaFunction

NewMetaFunction constructs a new MetaFunction which will call the provided impl for dispatching with the expected arity.

Will panic if impl is nil.

func (*MetaFunction) Arity ¶

func (b *MetaFunction) Arity() Arity

func (*MetaFunction) DefaultOptions ¶

func (b *MetaFunction) DefaultOptions() FunctionOptions

func (*MetaFunction) DispatchBest ¶

func (m *MetaFunction) DispatchBest(...arrow.DataType) (exec.Kernel, error)

func (*MetaFunction) DispatchExact ¶

func (m *MetaFunction) DispatchExact(...arrow.DataType) (exec.Kernel, error)

func (*MetaFunction) Doc ¶

func (b *MetaFunction) Doc() FunctionDoc

func (*MetaFunction) Execute ¶

func (m *MetaFunction) Execute(ctx context.Context, opts FunctionOptions, args ...Datum) (Datum, error)

func (*MetaFunction) Kind ¶

func (b *MetaFunction) Kind() FuncKind

func (*MetaFunction) Name ¶

func (b *MetaFunction) Name() string

func (MetaFunction) NumKernels ¶

func (MetaFunction) NumKernels() int

func (*MetaFunction) Validate ¶

func (b *MetaFunction) Validate() error

type MetaFunctionImpl ¶

type MetaFunctionImpl func(context.Context, FunctionOptions, ...Datum) (Datum, error)

MetaFunctionImpl is the signature needed for implementing a MetaFunction which is a function that dispatches to another function instead.

type NullOptions ¶

type NullOptions struct {
	NanIsNull bool `compute:"nan_is_null"`
}

func (NullOptions) TypeName ¶

func (NullOptions) TypeName() string

type NullSelectionBehavior ¶

type NullSelectionBehavior = kernels.NullSelectionBehavior

type Parameter deprecated

type Parameter struct {
	// contains filtered or unexported fields
}

Parameter represents a field reference and needs to be bound in order to determine its type and shape.

Deprecated: use substrait-go field references instead.

func (*Parameter) Equals ¶

func (p *Parameter) Equals(other Expression) bool

func (*Parameter) FieldRef ¶

func (p *Parameter) FieldRef() *FieldRef

func (*Parameter) Hash ¶

func (p *Parameter) Hash() uint64

func (*Parameter) IsBound ¶

func (p *Parameter) IsBound() bool

func (Parameter) IsNullLiteral ¶

func (Parameter) IsNullLiteral() bool

func (*Parameter) IsSatisfiable ¶

func (p *Parameter) IsSatisfiable() bool

func (*Parameter) IsScalarExpr ¶

func (p *Parameter) IsScalarExpr() bool

func (*Parameter) Release ¶

func (p *Parameter) Release()

func (*Parameter) String ¶

func (p *Parameter) String() string

func (*Parameter) Type ¶

func (p *Parameter) Type() arrow.DataType

type RecordDatum ¶

type RecordDatum struct {
	Value arrow.Record
}

RecordDatum contains an array.Record for passing a full record to an expression or to compute.

func (*RecordDatum) Equals ¶

func (r *RecordDatum) Equals(other Datum) bool

func (RecordDatum) Kind ¶

func (RecordDatum) Kind() DatumKind

func (*RecordDatum) Len ¶

func (r *RecordDatum) Len() int64

func (*RecordDatum) Release ¶

func (r *RecordDatum) Release()

func (*RecordDatum) Schema ¶

func (r *RecordDatum) Schema() *arrow.Schema

func (RecordDatum) String ¶

func (RecordDatum) String() string

type RoundMode ¶

type RoundMode = kernels.RoundMode

type RoundOptions ¶

type RoundOptions = kernels.RoundOptions

type RoundToMultipleOptions ¶

type RoundToMultipleOptions = kernels.RoundToMultipleOptions

type RunEndEncodeOptions ¶

type RunEndEncodeOptions = kernels.RunEndEncodeState

type ScalarDatum ¶

type ScalarDatum struct {
	Value scalar.Scalar
}

ScalarDatum contains a scalar value

func (ScalarDatum) Chunks ¶

func (ScalarDatum) Chunks() []arrow.Array

func (*ScalarDatum) Equals ¶

func (d *ScalarDatum) Equals(other Datum) bool

func (ScalarDatum) Kind ¶

func (ScalarDatum) Kind() DatumKind

func (ScalarDatum) Len ¶

func (ScalarDatum) Len() int64

func (*ScalarDatum) NullN ¶

func (d *ScalarDatum) NullN() int64

func (*ScalarDatum) Release ¶

func (d *ScalarDatum) Release()

func (*ScalarDatum) String ¶

func (d *ScalarDatum) String() string

func (*ScalarDatum) ToScalar ¶

func (d *ScalarDatum) ToScalar() (scalar.Scalar, error)

func (*ScalarDatum) Type ¶

func (d *ScalarDatum) Type() arrow.DataType

type ScalarFunction ¶

type ScalarFunction struct {
	// contains filtered or unexported fields
}

A ScalarFunction is a function that executes element-wise operations on arrays or scalars, and therefore whose results generally do not depend on the order of the values in the arguments. Accepts and returns arrays that are all of the same size. These functions roughly correspond to the functions used in most SQL expressions.

func NewScalarFunction ¶

func NewScalarFunction(name string, arity Arity, doc FunctionDoc) *ScalarFunction

NewScalarFunction constructs a new ScalarFunction object with the passed in name, arity and function doc.

func (*ScalarFunction) AddKernel ¶

func (s *ScalarFunction) AddKernel(k exec.ScalarKernel) error

AddKernel adds the provided kernel to the list of kernels this function has. A copy of the kernel is added to the slice of kernels, which means that a given kernel object can be created, added and then reused to add other kernels.

func (*ScalarFunction) AddNewKernel ¶

func (s *ScalarFunction) AddNewKernel(inTypes []exec.InputType, outType exec.OutputType, execFn exec.ArrayKernelExec, init exec.KernelInitFn) error

AddNewKernel constructs a new kernel with the provided signature and execution/init functions and then adds it to the function's list of kernels. This assumes default null handling (intersection of validity bitmaps)

func (*ScalarFunction) DispatchBest ¶

func (s *ScalarFunction) DispatchBest(vals ...arrow.DataType) (exec.Kernel, error)

func (*ScalarFunction) DispatchExact ¶

func (s *ScalarFunction) DispatchExact(vals ...arrow.DataType) (exec.Kernel, error)

func (*ScalarFunction) Execute ¶

func (s *ScalarFunction) Execute(ctx context.Context, opts FunctionOptions, args ...Datum) (Datum, error)

Execute uses the passed in context, function options and arguments to eagerly execute the function using kernel dispatch, batch iteration and memory allocation details as defined by the kernel.

If opts is nil, then the DefaultOptions() will be used.

func (*ScalarFunction) Kernels ¶

func (fi *ScalarFunction) Kernels() []*KT

func (*ScalarFunction) NumKernels ¶

func (fi *ScalarFunction) NumKernels() int

func (*ScalarFunction) SetDefaultOptions ¶

func (s *ScalarFunction) SetDefaultOptions(opts FunctionOptions)

type SetLookupOptions ¶

type SetLookupOptions struct {
	ValueSet  Datum `compute:"value_set"`
	SkipNulls bool  `compute:"skip_nulls"`
}

func (*SetLookupOptions) Equals ¶

func (s *SetLookupOptions) Equals(other FunctionOptions) bool

func (*SetLookupOptions) FromStructScalar ¶

func (s *SetLookupOptions) FromStructScalar(sc *scalar.Struct) error

func (*SetLookupOptions) Release ¶

func (s *SetLookupOptions) Release()

func (SetLookupOptions) TypeName ¶

func (SetLookupOptions) TypeName() string

type StrptimeOptions ¶

type StrptimeOptions struct {
	Format string         `compute:"format"`
	Unit   arrow.TimeUnit `compute:"unit"`
}

func (StrptimeOptions) TypeName ¶

func (StrptimeOptions) TypeName() string

type TableDatum ¶

type TableDatum struct {
	Value arrow.Table
}

TableDatum contains a table so that multiple record batches can be worked with together as a single table for being passed to compute and expression handling.

func (*TableDatum) Equals ¶

func (d *TableDatum) Equals(other Datum) bool

func (TableDatum) Kind ¶

func (TableDatum) Kind() DatumKind

func (*TableDatum) Len ¶

func (d *TableDatum) Len() int64

func (*TableDatum) Release ¶

func (d *TableDatum) Release()

func (*TableDatum) Schema ¶

func (d *TableDatum) Schema() *arrow.Schema

func (TableDatum) String ¶

func (TableDatum) String() string

type TableLikeDatum ¶

type TableLikeDatum interface {
	Datum
	Schema() *arrow.Schema
}

TableLikeDatum is an interface type for specifying either a RecordBatch or a Table as both contain a schema as opposed to just a single data type.

type TakeOptions ¶

type TakeOptions = kernels.TakeOptions

func DefaultTakeOptions ¶

func DefaultTakeOptions() *TakeOptions

type VectorFunction ¶

type VectorFunction struct {
	// contains filtered or unexported fields
}

func NewVectorFunction ¶

func NewVectorFunction(name string, arity Arity, doc FunctionDoc) *VectorFunction

func (*VectorFunction) AddKernel ¶

func (f *VectorFunction) AddKernel(kernel exec.VectorKernel) error

func (*VectorFunction) AddNewKernel ¶

func (f *VectorFunction) AddNewKernel(inTypes []exec.InputType, outType exec.OutputType, execFn exec.ArrayKernelExec, init exec.KernelInitFn) error

func (*VectorFunction) DispatchBest ¶

func (f *VectorFunction) DispatchBest(vals ...arrow.DataType) (exec.Kernel, error)

func (*VectorFunction) DispatchExact ¶

func (f *VectorFunction) DispatchExact(vals ...arrow.DataType) (exec.Kernel, error)

func (*VectorFunction) Execute ¶

func (f *VectorFunction) Execute(ctx context.Context, opts FunctionOptions, args ...Datum) (Datum, error)

func (*VectorFunction) Kernels ¶

func (fi *VectorFunction) Kernels() []*KT

func (*VectorFunction) NumKernels ¶

func (fi *VectorFunction) NumKernels() int

func (*VectorFunction) SetDefaultOptions ¶

func (f *VectorFunction) SetDefaultOptions(opts FunctionOptions)

Directories ¶

Path	Synopsis
exec
exprs
internal
kernels Package kernels defines all of the computation kernels for the compute library.	Package kernels defines all of the computation kernels for the compute library.

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL