arrow

package
v0.24.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jun 5, 2024 License: Apache-2.0 Imports: 11 Imported by: 0

Documentation

Overview

Package arrow provides a set of utility functions to access Arrow data structures.

Index

Constants

View Source
const (
	StrCode    int8 = 0
	I64Code    int8 = 1
	F64Code    int8 = 2
	BoolCode   int8 = 3
	BinaryCode int8 = 4
	CborCode   int8 = 5
)
View Source
const AbsentFieldID = -1
View Source
const BinarySig = "Bin"
View Source
const BoolSig = "Bol"
View Source
const DenseUnionSig = "DU"
View Source
const DictionarySig = "Dic"
View Source
const Duration = "Dur" // Duration in nanoseconds.
View Source
const F32Sig = "F32"
View Source
const F64Sig = "F64"
View Source
const FixedSizeBinarySig = "FSB"
View Source
const I16Sig = "I16"
View Source
const I32Sig = "I32"
View Source
const I64Sig = "I64"
View Source
const I8Sig = "I8"
View Source
const MapSig = "Map"
View Source
const SparseUnionSig = "SU"
View Source
const StringSig = "Str"
View Source
const Timestamp = "Tns" // Timestamp in nanoseconds.
View Source
const U16Sig = "U16"
View Source
const U32Sig = "U32"
View Source
const U64Sig = "U64"
View Source
const U8Sig = "U8"

Variables

View Source
var (
	// ErrInvalidArrayType is returned when an array is not of the expected type.
	ErrInvalidArrayType = errors.New("invalid arrow array type")

	// ErrNotStructType is returned when an array is not of type Struct.
	ErrNotStructType = errors.New("not arrow.StructType")
	// ErrNotListOfStructsType is returned when an array is not of type List of
	// structs.
	ErrNotListOfStructsType = errors.New("not arrow.ListType of arrow.StructType")
	// ErrNotListType is returned when an array is not of type list.
	ErrNotListType = errors.New("not an arrow.ListType")

	// ErrNotArrayStruct is returned when an array is not an array.Struct.
	ErrNotArrayStruct = errors.New("not an arrow array.Struct")
	// ErrNotArrayList is returned when an array is not an array.List.
	ErrNotArrayList = errors.New("not an arrow array.List")
	// ErrNotArrayListOfStructs is returned when an array is not an array.List
	// of array.Struct.
	ErrNotArrayListOfStructs = errors.New("not an Arrow array.List of array.Struct")

	// ErrDuplicateFieldName is returned when a field name is duplicated in the
	// same struct.
	ErrDuplicateFieldName = errors.New("duplicate field name")

	// ErrMissingFieldName is returned when a field name is missing in a struct.
	ErrMissingFieldName = errors.New("missing field name")
)

Functions

func BinaryFromArray

func BinaryFromArray(arr arrow.Array, row int) ([]byte, error)

BinaryFromArray returns the binary value for a specific row in an Arrow array.

func BinaryFromRecord

func BinaryFromRecord(record arrow.Record, fieldID int, row int) ([]byte, error)

BinaryFromRecord returns the []byte value for a specific row and column in an Arrow record. If the value is null, it returns nil.

func BinaryFromStruct

func BinaryFromStruct(arr arrow.Array, row int, id int) ([]byte, error)

BinaryFromStruct returns the []byte value for a specific field+row in an Arrow Array struct.

func BoolFromArray

func BoolFromArray(arr arrow.Array, row int) (bool, error)

BoolFromArray returns the bool value for a specific row in an Arrow array.

func BoolFromRecord

func BoolFromRecord(record arrow.Record, fieldID int, row int) (bool, error)

BoolFromRecord returns the bool value for a specific row and column in an Arrow record. If the value is null, it returns false.

func BoolFromStruct

func BoolFromStruct(arr arrow.Array, row int, id int) (bool, error)

BoolFromStruct returns the bool value for a specific field+row in an Arrow Array struct.

func DataTypeToID

func DataTypeToID(dt arrow.DataType) string

DataTypeToID creates a unique id for a data type.

func DurationFromArray

func DurationFromArray(arr arrow.Array, row int) (arrow.Duration, error)

DurationFromArray returns the duration value for a specific row in an Arrow array. This Arrow array can be either a duration array or a dictionary array.

func DurationFromRecord

func DurationFromRecord(record arrow.Record, fieldID int, row int) (arrow.Duration, error)

DurationFromRecord returns the duration value for a specific row and column in an Arrow record.

func F64FromArray

func F64FromArray(arr arrow.Array, row int) (float64, error)

F64FromArray returns the float64 value for a specific row in an Arrow array.

func F64FromRecord

func F64FromRecord(record arrow.Record, fieldID int, row int) (float64, error)

F64FromRecord returns the float64 value for a specific row and column in an Arrow record. If the value is nil, it returns 0.

func F64FromStruct

func F64FromStruct(arr arrow.Array, row int, id int) (float64, error)

F64FromStruct returns the float64 value for a specific field+row in an Arrow Array struct.

func F64OrNilFromArray

func F64OrNilFromArray(arr arrow.Array, row int) (*float64, error)

F64OrNilFromArray returns a pointer to the float64 value for a specific row in an Arrow array or nil if the value is nil.

func F64OrNilFromRecord

func F64OrNilFromRecord(record arrow.Record, fieldID int, row int) (*float64, error)

F64OrNilFromRecord returns the float64 value for a specific row and column in an Arrow record. Returns nil if the value is nil

func FieldIDFromSchema

func FieldIDFromSchema(schema *arrow.Schema, fieldName string) (int, error)

FieldIDFromSchema returns the field id of a field from an Arrow schema or -AbsentFieldID for an unknown field.

An error is returned if the field is duplicated.

func FieldIDFromStruct

func FieldIDFromStruct(dt *arrow.StructType, fieldName string) (int, *arrow.DataType)

FieldIDFromStruct returns the field id of a named field from an Arrow struct or AbsentFieldID for an unknown field.

func FieldToID

func FieldToID(field *arrow.Field) string

FieldToID creates a unique id for a field.

func FixedSizeBinaryFieldByIDFromRecord

func FixedSizeBinaryFieldByIDFromRecord(record arrow.Record, fieldID int, row int) ([]byte, error)

FixedSizeBinaryFieldByIDFromRecord returns the fixed size binary value of a field id for a specific row. If the value is null, it returns nil.

func FixedSizeBinaryFromArray

func FixedSizeBinaryFromArray(arr arrow.Array, row int) ([]byte, error)

FixedSizeBinaryFromArray returns the fixed size binary value for a specific row in an Arrow array.

func FixedSizeBinaryFromRecord

func FixedSizeBinaryFromRecord(record arrow.Record, fieldID int, row int) ([]byte, error)

FixedSizeBinaryFromRecord returns the fixed size binary value of a field id for a specific row.

func I32FromArray

func I32FromArray(arr arrow.Array, row int) (int32, error)

I32FromArray returns the int32 value for a specific row in an Arrow array. This Arrow array can be either an int32 array or a dictionary-encoded array.

func I32FromRecord

func I32FromRecord(record arrow.Record, fieldID int, row int) (int32, error)

I32FromRecord returns the int32 value for a specific row and column in an Arrow record. If the value is null, it returns 0.

func I32FromStruct

func I32FromStruct(arr arrow.Array, row int, id int) (int32, error)

I32FromStruct returns the int32 value for a specific field+row in an Arrow Array struct.

func I64FromArray

func I64FromArray(arr arrow.Array, row int) (int64, error)

I64FromArray returns the int64 value for a specific row in an Arrow array. This Arrow array can be either an int64 array or a dictionary-encoded int64 array.

func I64FromRecord

func I64FromRecord(record arrow.Record, fieldID int, row int) (int64, error)

I64FromRecord returns the int64 value for a specific row and column in an Arrow record. If the value is null, it returns 0.

func I64FromStruct

func I64FromStruct(arr arrow.Array, row int, id int) (int64, error)

I64FromStruct returns the int64 value for a specific field+row in an Arrow Array struct.

func I64OrNilFromRecord

func I64OrNilFromRecord(record arrow.Record, fieldID int, row int) (*int64, error)

I64OrNilFromRecord returns the int64 value for a specific row and column in an Arrow record. If the value is nil, it returns nil.

func ListOfStructsFieldIDFromSchema

func ListOfStructsFieldIDFromSchema(schema *arrow.Schema, fieldName string) (int, *arrow.StructType, error)

ListOfStructsFieldIDFromSchema returns the field id of a list of structs field from an Arrow schema or AbsentFieldID for an unknown field.

An error is returned if the field is not a list of structs.

func ListOfStructsFieldIDFromStruct

func ListOfStructsFieldIDFromStruct(dt *arrow.StructType, fieldName string) (int, *arrow.StructType, error)

ListOfStructsFieldIDFromStruct returns the field id of a list of structs field from an Arrow struct or AbsentFieldID if the field is not found.

An error is returned if the field is not a list of structs.

func ListValuesByIDFromRecord

func ListValuesByIDFromRecord(record arrow.Record, fieldID int, row int) (arr arrow.Array, start int, end int, err error)

ListValuesByIDFromRecord return the list array for a field id for a specific row.

func MandatoryFieldIDFromSchema

func MandatoryFieldIDFromSchema(schema *arrow.Schema, fieldName string) (int, error)

MandatoryFieldIDFromSchema returns the field id of a field from an Arrow schema or an error if the field is not present or duplicated.

func NullableU16FromRecord

func NullableU16FromRecord(record arrow.Record, fieldID int, row int) (*uint16, error)

NullableU16FromRecord returns the uint16 value for a specific row and column in an Arrow record. If the value is null, it returns nil.

func NullableU16FromStruct

func NullableU16FromStruct(structArr *array.Struct, row int, fieldID int) (*uint16, error)

NullableU16FromStruct returns a reference to an uint16 value for a specific row in an Arrow struct or nil if the field doesn't exist.

func NullableU32FromRecord

func NullableU32FromRecord(record arrow.Record, fieldID int, row int) (*uint32, error)

NullableU32FromRecord returns the uint32 value for a specific row and column in an Arrow record. If the value is null, it returns nil.

func OptionalFieldIDFromStruct

func OptionalFieldIDFromStruct(dt *arrow.StructType, fieldName string) (id int)

OptionalFieldIDFromStruct returns the field id of a named field from an Arrow struct or AbsentFieldID if the field is unknown.

func PrintRecord

func PrintRecord(name string, record arrow.Record, maxRows int)

PrintRecord prints the contents of an Arrow record to stdout.

func PrintRecordWithProgression added in v0.5.0

func PrintRecordWithProgression(name string, record arrow.Record, maxRows, countPrints, maxPrints int)

PrintRecordWithProgression prints the contents of an Arrow record to stdout.

func SchemaToID

func SchemaToID(schema *arrow.Schema) string

SchemaToID creates a unique id for a schema. Fields are sorted by name before creating the id (done at each nested level).

func ShowDataType

func ShowDataType(dt arrow.DataType, prefix string)

func ShowField

func ShowField(field *arrow.Field, prefix string)

func ShowSchema

func ShowSchema(schema *arrow.Schema, schemaName string, prefix string)

func StringFromArray

func StringFromArray(arr arrow.Array, row int) (string, error)

StringFromArray returns the string value for a specific row in an Arrow array.

func StringFromRecord

func StringFromRecord(record arrow.Record, fieldID int, row int) (string, error)

StringFromRecord returns the string value for a specific row and column in an Arrow record. If the value is null, it returns an empty string.

func StringFromStruct

func StringFromStruct(arr arrow.Array, row int, id int) (string, error)

StringFromStruct returns the string value for a specific row in an Arrow struct.

func StructFieldIDFromSchema

func StructFieldIDFromSchema(schema *arrow.Schema, fieldName string) (int, *arrow.StructType, error)

StructFieldIDFromSchema returns the field id of a struct field from an Arrow schema or AbsentFieldID for an unknown field.

An error is returned if the field is not a struct.

func StructFieldIDFromStruct

func StructFieldIDFromStruct(dt *arrow.StructType, fieldName string) (int, *arrow.StructType, error)

StructFieldIDFromStruct returns the field id of a struct field from an Arrow struct or AbsentFieldID for an unknown field.

An error is returned if the field is not a struct.

func StructFromRecord

func StructFromRecord(record arrow.Record, fieldID int, row int) (sarr *array.Struct, err error)

StructFromRecord returns the struct array for a specific row and column in an Arrow record. If the value is null, it returns nil.

func StructFromSparseUnion

func StructFromSparseUnion(dt *arrow.SparseUnionType, code int8) *arrow.StructType

func TimestampFromArray

func TimestampFromArray(arr arrow.Array, row int) (arrow.Timestamp, error)

TimestampFromArray returns the timestamp value for a specific row in an Arrow array.

func TimestampFromRecord

func TimestampFromRecord(record arrow.Record, fieldID int, row int) (arrow.Timestamp, error)

TimestampFromRecord returns the timestamp value for a specific row and column in an Arrow record. If the value is null, it returns 0.

func U16FromArray

func U16FromArray(arr arrow.Array, row int) (uint16, error)

U16FromArray returns the uint16 value for a specific row in an Arrow array.

func U16FromRecord

func U16FromRecord(record arrow.Record, fieldID int, row int) (uint16, error)

U16FromRecord returns the uint16 value for a specific row and column in an Arrow record. If the value is null, it returns 0.

func U16FromStruct

func U16FromStruct(structArr *array.Struct, row int, fieldID int) (uint16, error)

U16FromStruct returns the uint16 value for a specific row in an Arrow struct or 0 if the field doesn't exist.

func U32FromArray

func U32FromArray(arr arrow.Array, row int) (uint32, error)

U32FromArray returns the uint32 value for a specific row in an Arrow array.

func U32FromRecord

func U32FromRecord(record arrow.Record, fieldID int, row int) (uint32, error)

U32FromRecord returns the uint32 value for a specific row and column in an Arrow record. If the value is null, it returns 0.

func U32FromStruct

func U32FromStruct(structArr *array.Struct, row int, fieldID int) (uint32, error)

U32FromStruct returns the uint32 value for a specific row in an Arrow struct or 0 if the field doesn't exist.

func U64FromArray

func U64FromArray(arr arrow.Array, row int) (uint64, error)

U64FromArray returns the uint64 value for a specific row in an Arrow array.

func U64FromRecord

func U64FromRecord(record arrow.Record, fieldID int, row int) (uint64, error)

U64FromRecord returns the uint64 value for a specific row and column in an Arrow record. If the value is null, it returns 0.

func U8FromArray

func U8FromArray(arr arrow.Array, row int) (uint8, error)

U8FromArray returns the uint8 value for a specific row in an Arrow array.

func U8FromRecord

func U8FromRecord(record arrow.Record, fieldID int, row int) (uint8, error)

U8FromRecord returns the uint8 value for a specific row and column in an Arrow record. If the value is null, it returns 0.

func U8FromStruct

func U8FromStruct(structArr *array.Struct, row int, fieldID int) (uint8, error)

U8FromStruct returns the uint8 value for a specific row in an Arrow struct or 0 if the field doesn't exist.

Types

type Fields

type Fields []SortableField

func (Fields) Len

func (d Fields) Len() int

func (Fields) Less

func (d Fields) Less(i, j int) bool

func (Fields) Swap

func (d Fields) Swap(i, j int)

type ListOfStructs

type ListOfStructs struct {
	// contains filtered or unexported fields
}

ListOfStructs is a wrapper around an Arrow list of structs used to expose utility functions.

func ListOfStructsFromArray

func ListOfStructsFromArray(arr arrow.Array, row int) (*ListOfStructs, error)

func ListOfStructsFromRecord

func ListOfStructsFromRecord(record arrow.Record, fieldID int, row int) (*ListOfStructs, error)

ListOfStructsFromRecord returns the struct type and an array of structs for a given field id.

func ListOfStructsFromStruct

func ListOfStructsFromStruct(parent *array.Struct, fieldID int, row int) (*ListOfStructs, error)

ListOfStructsFromStruct return a ListOfStructs from a struct field.

func (*ListOfStructs) Array

func (los *ListOfStructs) Array() *array.Struct

Array returns the underlying arrow array.

func (*ListOfStructs) BinaryFieldByID

func (los *ListOfStructs) BinaryFieldByID(fieldID int, row int) ([]byte, error)

BinaryFieldByID returns the binary value of a field id for a specific row.

func (*ListOfStructs) BinaryFieldByName

func (los *ListOfStructs) BinaryFieldByName(name string, row int) ([]byte, error)

BinaryFieldByName returns the binary value of a named field for a specific row.

func (*ListOfStructs) BoolFieldByID

func (los *ListOfStructs) BoolFieldByID(fieldID int, row int) (bool, error)

BoolFieldByID returns the bool value of a field id for a specific row.

func (*ListOfStructs) BoolFieldByName

func (los *ListOfStructs) BoolFieldByName(name string, row int) (bool, error)

BoolFieldByName returns the bool value of a named field for a specific row.

func (*ListOfStructs) DataType

func (los *ListOfStructs) DataType() *arrow.StructType

DataType returns the underlying arrow struct type.

func (*ListOfStructs) DurationFieldByID

func (los *ListOfStructs) DurationFieldByID(fieldID int, row int) (arrow.Duration, error)

DurationFieldByID returns the duration value of a field id for a specific row or a 0 if the field doesn't exist.

func (*ListOfStructs) End

func (los *ListOfStructs) End() int

End returns the end index of the list of structs.

func (*ListOfStructs) F64FieldByID

func (los *ListOfStructs) F64FieldByID(fieldID int, row int) (float64, error)

F64FieldByID returns the float64 value of a field id for a specific row.

func (*ListOfStructs) F64FieldByName

func (los *ListOfStructs) F64FieldByName(name string, row int) (float64, error)

F64FieldByName returns the float64 value of a named field for a specific row.

func (*ListOfStructs) F64OrNilFieldByID

func (los *ListOfStructs) F64OrNilFieldByID(fieldID int, row int) (*float64, error)

F64OrNilFieldByID returns the float64 value of a field id for a specific row or nil if the field is null.

func (*ListOfStructs) Field

func (los *ListOfStructs) Field(name string) (arrow.Array, bool)

Field returns the field array of a named field. The boolean return value indicates whether the field was found.

func (*ListOfStructs) FieldByID

func (los *ListOfStructs) FieldByID(id int) arrow.Array

FieldByID returns the field array of a field id.

func (*ListOfStructs) FieldIdx

func (los *ListOfStructs) FieldIdx(name string) (int, bool)

FieldIdx returns the field id of a named field. The boolean return value indicates whether the field was found.

func (*ListOfStructs) FixedSizeBinaryFieldByID

func (los *ListOfStructs) FixedSizeBinaryFieldByID(fieldID int, row int) ([]byte, error)

FixedSizeBinaryFieldByID returns the fixed size binary value of a field id for a specific row.

func (*ListOfStructs) FixedSizeBinaryFieldByName

func (los *ListOfStructs) FixedSizeBinaryFieldByName(name string, row int) ([]byte, error)

FixedSizeBinaryFieldByName returns the fixed size binary value of a named field for a specific row.

func (*ListOfStructs) I32FieldByID

func (los *ListOfStructs) I32FieldByID(fieldID int, row int) (int32, error)

I32FieldByID returns the int32 value of a field id for a specific row.

func (*ListOfStructs) I32FieldByName

func (los *ListOfStructs) I32FieldByName(name string, row int) (int32, error)

I32FieldByName returns the int32 value of a named field for a specific row.

func (*ListOfStructs) I64FieldByID

func (los *ListOfStructs) I64FieldByID(fieldID int, row int) (int64, error)

I64FieldByID returns the int64 value of a field id for a specific row.

func (*ListOfStructs) I64FieldByName

func (los *ListOfStructs) I64FieldByName(name string, row int) (int64, error)

I64FieldByName returns the int64 value of a named field for a specific row.

func (*ListOfStructs) IsNull

func (los *ListOfStructs) IsNull(row int) bool

IsNull returns true if the row is null.

func (*ListOfStructs) ListOfStructsById

func (los *ListOfStructs) ListOfStructsById(row int, fieldID int) (*ListOfStructs, error)

ListOfStructsById returns the list of structs for a field id for a specific row.

func (*ListOfStructs) ListValuesById

func (los *ListOfStructs) ListValuesById(row int, fieldID int) (arr arrow.Array, start int, end int, err error)

ListValuesById return the list array for a field id for a specific row.

func (*ListOfStructs) NullableU16FieldByID

func (los *ListOfStructs) NullableU16FieldByID(fieldID int, row int) (*uint16, error)

NullableU16FieldByID returns the uint16 value of a field id for a specific row or nil if the field doesn't exist.

func (*ListOfStructs) OptionalTimestampFieldByID

func (los *ListOfStructs) OptionalTimestampFieldByID(fieldID int, row int) *pcommon.Timestamp

OptionalTimestampFieldByID returns the timestamp value of a field id for a specific row or nil if the field is null.

func (*ListOfStructs) Start

func (los *ListOfStructs) Start() int

Start returns the start index of the list of structs.

func (*ListOfStructs) StringFieldByID

func (los *ListOfStructs) StringFieldByID(fieldID int, row int) (string, error)

StringFieldByID returns the string value of a field id for a specific row or empty string if the field doesn't exist.

func (*ListOfStructs) StringFieldByName

func (los *ListOfStructs) StringFieldByName(name string, row int) (string, error)

StringFieldByName returns the string value of a named field for a specific row.

func (*ListOfStructs) StructArray

func (los *ListOfStructs) StructArray(name string, row int) (*arrow.StructType, *array.Struct, error)

StructArray returns the underlying arrow array for a named field for a specific row.

func (*ListOfStructs) StructByID

func (los *ListOfStructs) StructByID(fieldID int, row int) (*arrow.StructType, *array.Struct, error)

StructByID returns the underlying arrow struct stype and arrow array for a field id for a specific row.

func (*ListOfStructs) TimestampFieldByID

func (los *ListOfStructs) TimestampFieldByID(fieldID int, row int) (arrow.Timestamp, error)

TimestampFieldByID returns the timestamp value of a field id for a specific row or a zero timestamp if the field doesn't exist.

func (*ListOfStructs) U16FieldByID

func (los *ListOfStructs) U16FieldByID(fieldID int, row int) (uint16, error)

U16FieldByID returns the uint16 value of a field id for a specific row or 0 if the field doesn't exist.

func (*ListOfStructs) U32FieldByID

func (los *ListOfStructs) U32FieldByID(fieldID int, row int) (uint32, error)

U32FieldByID returns the uint32 value of a field id for a specific row or 0 if the field doesn't exist.

func (*ListOfStructs) U32FieldByName

func (los *ListOfStructs) U32FieldByName(name string, row int) (uint32, error)

U32FieldByName returns the uint32 value of a named field for a specific row.

func (*ListOfStructs) U64FieldByID

func (los *ListOfStructs) U64FieldByID(fieldID int, row int) (uint64, error)

U64FieldByID returns the uint64 value of a field id for a specific row or 0 if the field doesn't exist.

func (*ListOfStructs) U64FieldByName

func (los *ListOfStructs) U64FieldByName(name string, row int) (uint64, error)

U64FieldByName returns the uint64 value of a named field for a specific row.

type SortableField

type SortableField struct {
	// contains filtered or unexported fields
}

SortableField is a wrapper around arrow.Field that implements sort.Interface.

type TextColumn added in v0.5.0

type TextColumn struct {
	Name   string
	MaxLen int
	Values []string
}

TextColumn memorizes the contents of a column for printing. MaxLen is used to determine the column width.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL