Documentation ¶
Index ¶
- Constants
- Variables
- func ExecuteScalarExpression(ctx context.Context, inputSchema *arrow.Schema, expression expr.Expression, ...) (compute.Datum, error)
- func ExecuteScalarSubstrait(ctx context.Context, expression *expr.Extended, partialInput compute.Datum) (compute.Datum, error)
- func FieldsFromSubstrait(typeList []types.Type, nextName func() string, ext ExtensionIDSet) (out []arrow.Field, err error)
- func FromSubstraitType(t types.Type, ext ExtensionIDSet) (arrow.DataType, bool, error)
- func GetRefField(ref expr.ReferenceSegment, fields []arrow.Field) (*arrow.Field, error)
- func GetRefSchema(ref expr.ReferenceSegment, schema *arrow.Schema) (*arrow.Field, error)
- func GetReferencedValue(mem memory.Allocator, ref expr.ReferenceSegment, value compute.Datum, ...) (compute.Datum, error)
- func GetScalar(ref expr.ReferenceSegment, s scalar.Scalar, mem memory.Allocator, ...) (scalar.Scalar, error)
- func IsNullable(t types.Type) bool
- func NewFieldRef(ref compute.FieldRef, schema *arrow.Schema, ext ExtensionIDSet) (*expr.FieldReference, error)
- func NewFieldRefFromDotPath(dotpath string, rootSchema *arrow.Schema) (expr.ReferenceSegment, error)
- func NewScalarCall(reg ExtensionIDSet, fn string, opts []*types.FunctionOption, ...) (*expr.ScalarFunction, error)
- func RefFromFieldPath(field compute.FieldPath) expr.ReferenceSegment
- func ToArrowSchema(base types.NamedStruct, ext ExtensionIDSet) (*arrow.Schema, error)
- func ToSubstraitType(dt arrow.DataType, nullable bool, ext ExtensionIDSet) (types.Type, error)
- func WithExtensionIDSet(ctx context.Context, ext ExtensionIDSet) context.Context
- func WithExtensionRegistry(ctx context.Context, reg *ExtensionIDRegistry) context.Context
- type Builder
- type ExprBuilder
- func (e *ExprBuilder) CallScalar(fn string, opts []*types.FunctionOption, args ...expr.FuncArgBuilder) (Builder, error)
- func (e *ExprBuilder) Cast(from Builder, to arrow.DataType) (Builder, error)
- func (e *ExprBuilder) FieldIndex(i int) Builder
- func (e *ExprBuilder) FieldPath(path compute.FieldPath) Builder
- func (e *ExprBuilder) FieldRef(field string) Builder
- func (e *ExprBuilder) FieldRefList(elems ...any) Builder
- func (e *ExprBuilder) Literal(l expr.Literal) Builder
- func (*ExprBuilder) Must(b Builder, err error) Builder
- func (e *ExprBuilder) MustCallScalar(fn string, opts []*types.FunctionOption, args ...expr.FuncArgBuilder) Builder
- func (e *ExprBuilder) SetInputSchema(s *arrow.Schema) error
- func (e *ExprBuilder) WrapLiteral(l expr.Literal, err error) Builder
- type ExtensionIDRegistry
- func (e *ExtensionIDRegistry) AddArrowToSubstrait(name string, fn arrowToSubstrait) error
- func (e *ExtensionIDRegistry) AddSubstraitScalarToArrow(id extensions.ID, toArrow substraitToArrow) error
- func (e *ExtensionIDRegistry) GetArrowToSubstrait(name string) (conv arrowToSubstrait, ok bool)
- func (e *ExtensionIDRegistry) GetIDByType(typ arrow.DataType) (extensions.ID, bool)
- func (e *ExtensionIDRegistry) GetSubstraitScalarToArrow(id extensions.ID) (substraitToArrow, bool)
- func (e *ExtensionIDRegistry) GetTypeByID(id extensions.ID) (arrow.DataType, bool)
- func (e *ExtensionIDRegistry) RegisterType(id extensions.ID, dt arrow.DataType) error
- type ExtensionIDSet
Constants ¶
const ( // URI for official Arrow Substrait Extension Types ArrowExtTypesUri = "https://github.com/joechenrh/arrow/blob/main/format/substrait/extension_types.yaml" SubstraitDefaultURIPrefix = extensions.SubstraitDefaultURIPrefix // URI for official Substrait Arithmetic funcs extensions SubstraitArithmeticFuncsURI = SubstraitDefaultURIPrefix + "functions_arithmetic.yaml" // URI for official Substrait Comparison funcs extensions SubstraitComparisonFuncsURI = SubstraitDefaultURIPrefix + "functions_comparison.yaml" SubstraitBooleanFuncsURI = SubstraitDefaultURIPrefix + "functions_boolean.yaml" TimestampTzTimezone = "UTC" )
Variables ¶
var DefaultExtensionIDRegistry = NewExtensionIDRegistry()
the default extension registry that will contain the Arrow extension type variations and types.
Functions ¶
func ExecuteScalarExpression ¶
func ExecuteScalarExpression(ctx context.Context, inputSchema *arrow.Schema, expression expr.Expression, partialInput compute.Datum) (compute.Datum, error)
ExecuteScalarExpression executes the given substrait expression using the provided datum as input. It will first create an exec batch using the input schema and the datum. The datum may have missing or incorrectly ordered columns while the input schema should describe the expected input schema for the expression. Missing fields will be replaced with null scalars and incorrectly ordered columns will be re-ordered according to the schema.
You can provide an allocator to use through the context via compute.WithAllocator.
You can provide the ExtensionIDSet to use through the context via WithExtensionIDSet.
func ExecuteScalarSubstrait ¶
func ExecuteScalarSubstrait(ctx context.Context, expression *expr.Extended, partialInput compute.Datum) (compute.Datum, error)
ExecuteScalarSubstrait uses the provided Substrait extended expression to determine the expected input schema (replacing missing fields in the partial input datum with null scalars and re-ordering columns if necessary) and ExtensionIDSet to use. You can provide the extension registry to use through the context via WithExtensionRegistry, otherwise the default Arrow registry will be used. You can provide a memory.Allocator to use the same way via compute.WithAllocator.
func FieldsFromSubstrait ¶
func FieldsFromSubstrait(typeList []types.Type, nextName func() string, ext ExtensionIDSet) (out []arrow.Field, err error)
FieldsFromSubstrait produces a list of arrow fields from a list of substrait types (such as the fields of a StructType) using nextName to determine the names for the fields.
func FromSubstraitType ¶
FromSubstraitType returns the appropriate Arrow data type for the given substrait type, using the extension set if necessary. Since Substrait types contain their nullability also, the nullability returned along with the data type.
func GetRefField ¶
GetRefField evaluates the substrait field reference to retrieve the referenced field or return an error.
func GetRefSchema ¶
GetRefSchema evaluates the provided substrait field reference against the schema to retrieve the referenced (potentially nested) field.
func GetReferencedValue ¶
func GetReferencedValue(mem memory.Allocator, ref expr.ReferenceSegment, value compute.Datum, ext ExtensionIDSet) (compute.Datum, error)
GetReferencedValue retrieves the referenced (potentially nested) value from the provided datum which may be a scalar, array, or record batch.
func GetScalar ¶
func GetScalar(ref expr.ReferenceSegment, s scalar.Scalar, mem memory.Allocator, ext ExtensionIDSet) (scalar.Scalar, error)
GetScalar returns the evaluated referenced scalar value from the provided scalar which must be appropriate to the type of reference.
A StructFieldRef can only reference against a Struct-type scalar, a ListElementRef can only reference against a List or LargeList scalar, and a MapKeyRef will only reference against a Map scalar. An error is returned if following the reference children ends up with an invalid nested reference object.
func IsNullable ¶
IsNullable is a convenience method to return whether or not a substrait type has Nullability set to NullabilityRequired or not.
func NewFieldRef ¶
func NewFieldRef(ref compute.FieldRef, schema *arrow.Schema, ext ExtensionIDSet) (*expr.FieldReference, error)
NewFieldRef constructs a properly typed substrait field reference segment, from a given arrow field reference, schema and extension set (for resolving substrait types).
func NewFieldRefFromDotPath ¶
func NewFieldRefFromDotPath(dotpath string, rootSchema *arrow.Schema) (expr.ReferenceSegment, error)
NewFieldRefFromDotPath constructs a substrait reference segment from a dot path and the base schema.
dot_path = '.' name
| '[' digit+ ']' | dot_path+
Examples ¶
Assume root schema of {alpha: i32, beta: struct<gamma: list<i32>>, delta: map<string, i32>}
".alpha" => StructFieldRef(0) "[2]" => StructFieldRef(2) ".beta[0]" => StructFieldRef(1, StructFieldRef(0)) "[1].gamma[3]" => StructFieldRef(1, StructFieldRef(0, ListElementRef(3))) ".delta.foobar" => StructFieldRef(2, MapKeyRef("foobar"))
Note: when parsing a name, a '\' preceding any other character will be dropped from the resulting name. Therefore if a name must contain the characters '.', '\', '[', or ']' then they must be escaped with a preceding '\'.
func NewScalarCall ¶
func NewScalarCall(reg ExtensionIDSet, fn string, opts []*types.FunctionOption, args ...types.FuncArg) (*expr.ScalarFunction, error)
NewScalarCall constructs a substrait ScalarFunction expression with the provided options and arguments.
The function name (fn) is looked up in the internal Arrow DefaultExtensionIDRegistry to ensure it exists and to convert from the Arrow function name to the substrait function name. It is then looked up using the DefaultCollection from the substrait extensions module to find the declaration. If it cannot be found, we try constructing the compound signature name by getting the types of the arguments which were passed and appending them to the function name appropriately.
An error is returned if the function cannot be resolved.
func RefFromFieldPath ¶
func RefFromFieldPath(field compute.FieldPath) expr.ReferenceSegment
RefFromFieldPath constructs a substrait field reference segment from a compute.FieldPath which should be a slice of integers indicating nested field paths to travel. This will return a series of StructFieldRef's whose child is the next element in the field path.
func ToArrowSchema ¶
func ToArrowSchema(base types.NamedStruct, ext ExtensionIDSet) (*arrow.Schema, error)
ToArrowSchema takes a substrait NamedStruct and an extension set (for type resolution mapping) and creates the equivalent Arrow Schema.
func ToSubstraitType ¶
ToSubstraitType converts an arrow data type to a Substrait Type. Since arrow types don't have a nullable flag (it is in the arrow.Field) but Substrait types do, the nullability must be passed in here.
func WithExtensionIDSet ¶
func WithExtensionIDSet(ctx context.Context, ext ExtensionIDSet) context.Context
func WithExtensionRegistry ¶
func WithExtensionRegistry(ctx context.Context, reg *ExtensionIDRegistry) context.Context
Types ¶
type Builder ¶
type Builder interface { expr.Builder expr.FuncArgBuilder }
Builder wraps the substrait-go expression Builder and FuncArgBuilder interfaces for a simple interface that can be passed around to build substrait expressions from Arrow data.
type ExprBuilder ¶
type ExprBuilder struct {
// contains filtered or unexported fields
}
ExprBuilder is the parent for building substrait expressions via Arrow types and functions.
The expectation is that it should be utilized like so:
bldr := NewExprBuilder(extSet) bldr.SetInputSchema(arrowschema) call, err := bldr.CallScalar("equal", nil, bldr.FieldRef("i32"), bldr.Literal(expr.NewPrimitiveLiteral( int32(0), false))) ex, err := call.BuildExpr() ... result, err := exprs.ExecuteScalarExpression(ctx, arrowschema, ex, input)
func NewExprBuilder ¶
func NewExprBuilder(extSet ExtensionIDSet) ExprBuilder
NewExprBuilder constructs a new Expression Builder that will use the provided extension set and registry.
func (*ExprBuilder) CallScalar ¶
func (e *ExprBuilder) CallScalar(fn string, opts []*types.FunctionOption, args ...expr.FuncArgBuilder) (Builder, error)
CallScalar constructs a builder for a scalar function call. The function name is expected to be valid in the Arrow function registry which will map it properly to a substrait expression by resolving the types of the arguments. Examples are: "greater", "multiply", "equal", etc.
Can return arrow.ErrNotFound if there is no function mapping found. Or will forward any error encountered when converting from an Arrow function to a substrait one.
func (*ExprBuilder) Cast ¶
Cast returns a Cast expression with the FailBehavior of ThrowException, erroring for invalid casts.
func (*ExprBuilder) FieldIndex ¶
func (e *ExprBuilder) FieldIndex(i int) Builder
FieldIndex is shorthand for creating a single field reference to the struct field index provided.
func (*ExprBuilder) FieldPath ¶
func (e *ExprBuilder) FieldPath(path compute.FieldPath) Builder
FieldPath uses a field path to construct a Field Reference expression.
func (*ExprBuilder) FieldRef ¶
func (e *ExprBuilder) FieldRef(field string) Builder
FieldRef constructs a field reference expression to the field with the given name from the input. It will be resolved to a field index when calling BuildExpr.
func (*ExprBuilder) FieldRefList ¶
func (e *ExprBuilder) FieldRefList(elems ...any) Builder
FieldRefList accepts a list of either integers or strings to construct a field reference expression from. This will panic if any of elems are not a string or int.
Field names will be resolved to their indexes when BuildExpr is called by using the provided Arrow schema.
func (*ExprBuilder) Literal ¶
func (e *ExprBuilder) Literal(l expr.Literal) Builder
Literal wraps a substrait literal to be used as an argument to building other expressions.
func (*ExprBuilder) Must ¶
func (*ExprBuilder) Must(b Builder, err error) Builder
Must is a convenience wrapper for any method that returns a Builder and error, panic'ing if it received an error or otherwise returning the Builder.
func (*ExprBuilder) MustCallScalar ¶
func (e *ExprBuilder) MustCallScalar(fn string, opts []*types.FunctionOption, args ...expr.FuncArgBuilder) Builder
MustCallScalar is like CallScalar, but will panic on error rather than return it.
func (*ExprBuilder) SetInputSchema ¶
func (e *ExprBuilder) SetInputSchema(s *arrow.Schema) error
SetInputSchema sets the current Arrow schema that will be utilized for performing field reference and field type resolutions.
func (*ExprBuilder) WrapLiteral ¶
func (e *ExprBuilder) WrapLiteral(l expr.Literal, err error) Builder
WrapLiteral is a convenience for accepting functions like NewLiteral which can potentially return an error. If an error is encountered, it will be surfaced when BuildExpr is called.
type ExtensionIDRegistry ¶
type ExtensionIDRegistry struct {
// contains filtered or unexported fields
}
ExtensionIDRegistry manages a set of mappings between Arrow types and functions and their substrait equivalents.
func GetExtensionRegistry ¶
func GetExtensionRegistry(ctx context.Context) *ExtensionIDRegistry
func NewExtensionIDRegistry ¶
func NewExtensionIDRegistry() *ExtensionIDRegistry
NewExtensionIDRegistry initializes a new registry for use.
func (*ExtensionIDRegistry) AddArrowToSubstrait ¶
func (e *ExtensionIDRegistry) AddArrowToSubstrait(name string, fn arrowToSubstrait) error
AddArrowToSubstrait creates a mapping between the provided arrow compute function and a function which should provide the correct substrait ExtensionID and function options from that name.
func (*ExtensionIDRegistry) AddSubstraitScalarToArrow ¶
func (e *ExtensionIDRegistry) AddSubstraitScalarToArrow(id extensions.ID, toArrow substraitToArrow) error
AddSubstraitScalarToArrow creates a mapping between a given extension ID and a function which should return the corresponding Arrow compute function name along with any relevant FunctionOptions based on the ScalarFunction instance passed to it.
Any relevant options should be parsed from the ScalarFunction's options and used to ensure the correct arrow compute function is used and necessary options are passed.
func (*ExtensionIDRegistry) GetArrowToSubstrait ¶
func (e *ExtensionIDRegistry) GetArrowToSubstrait(name string) (conv arrowToSubstrait, ok bool)
GetArrowToSubstrait returns the mapped function to convert an arrow compute function to the corresponding Substrait ScalarFunction extension ID and options. False is returned as the second value if there is no mapping found.
func (*ExtensionIDRegistry) GetIDByType ¶
func (e *ExtensionIDRegistry) GetIDByType(typ arrow.DataType) (extensions.ID, bool)
GetIDByType is the inverse of GetTypeByID, returning the mapped substrait extension ID corresponding to the provided arrow data type. The second return is false if there is no mapping found.
func (*ExtensionIDRegistry) GetSubstraitScalarToArrow ¶
func (e *ExtensionIDRegistry) GetSubstraitScalarToArrow(id extensions.ID) (substraitToArrow, bool)
GetSubstraitScalarToArrow returns the mapped conversion function for a given substrait extension ID to convert a substrait ScalarFunction to the corresponding Arrow compute function call. False is returned as the second value if there is no mapping available.
func (*ExtensionIDRegistry) GetTypeByID ¶
func (e *ExtensionIDRegistry) GetTypeByID(id extensions.ID) (arrow.DataType, bool)
GetTypeByID returns the mapped arrow data type from the provided substrait extension id. If no mapping exists for this substrait extension id, the second return value will be false.
func (*ExtensionIDRegistry) RegisterType ¶
func (e *ExtensionIDRegistry) RegisterType(id extensions.ID, dt arrow.DataType) error
RegisterType creates a mapping between the given extension ID and the provided Arrow data type. If this extension ID or arrow type are already registered, an arrow.ErrInvalid error will be returned.
type ExtensionIDSet ¶
type ExtensionIDSet interface { GetArrowRegistry() *ExtensionIDRegistry GetSubstraitRegistry() expr.ExtensionRegistry DecodeTypeArrow(anchor uint32) (extensions.ID, arrow.DataType, bool) DecodeFunction(ref uint32) (extensions.ID, substraitToArrow, bool) EncodeType(dt arrow.DataType) (extensions.ID, uint32, bool) EncodeTypeVariation(dt arrow.DataType) (extensions.ID, uint32, bool) }
ExtensionIDSet is an interface for managing the mapping between arrow and substrait types and function extensions.
func GetExtensionIDSet ¶
func GetExtensionIDSet(ctx context.Context) ExtensionIDSet
func NewDefaultExtensionSet ¶
func NewDefaultExtensionSet() ExtensionIDSet
NewDefaultExtensionSet constructs an empty extension set using the default Arrow Extension registry and the default collection of substrait extensions from the Substrait-go repo.
func NewExtensionSet ¶
func NewExtensionSet(set expr.ExtensionRegistry, reg *ExtensionIDRegistry) ExtensionIDSet
NewExtensionSet creates a new extension set given a substrait extension registry, and an Arrow <--> Substrait registry for mapping substrait extensions to their Arrow equivalents. This extension set can then be used to manage a particular set of extensions in use by an expression or plan, so when serializing you only need to serialize the extensions that have been inserted into the extension set.
func NewExtensionSetDefault ¶
func NewExtensionSetDefault(set expr.ExtensionRegistry) ExtensionIDSet
NewExtensionSetDefault is a convenience function to create a new extension set using the Default arrow extension ID registry.
See NewExtensionSet for more info.