Documentation ¶
Index ¶
- Variables
- func AddParquetFieldIds(record arrow.Record) arrow.Record
- func CastArraysToBaseDataType[T arrow.Array](arrays ...arrow.Array) ([]T, error)
- func CompareRecordRows(record1, record2 arrow.Record, index1, index2 int, fields ...string) (int, error)
- func ConcatenateRecords(mem *memory.GoAllocator, records ...arrow.Record) (arrow.Record, error)
- func DeduplicateRecord(mem *memory.GoAllocator, record arrow.Record, columns []string, ...) (arrow.Record, error)
- func FErrSchemasNotEqual(record1, record2 arrow.Record, fields ...string) error
- func MockData(mem *memory.GoAllocator, size int, method string) arrow.Record
- func RankArray(mem *memory.GoAllocator, arr arrow.Array) (*array.Uint32, error)
- func RankedSort(mem *memory.GoAllocator, previousArray, currentArray arrow.Array) (*array.Uint32, error)
- func ReadParquetFile(ctx context.Context, mem *memory.GoAllocator, filePath string) ([]arrow.Record, error)
- func RecordSchemasEqual(record1 arrow.Record, record2 arrow.Record, fields ...string) bool
- func RecordsEqual(rec1, rec2 arrow.Record, fields ...string) bool
- func SchemaSubSetEqual(schema1 *arrow.Schema, schema2 *arrow.Schema, fields ...string) bool
- func SortRecord(mem *memory.GoAllocator, record arrow.Record, columns []string) (arrow.Record, error)
- func TakeArray(mem *memory.GoAllocator, arr arrow.Array, indices *array.Uint32) (arrow.Array, error)
- func TakeMultipleArrays(mem *memory.GoAllocator, arrs []arrow.Array, indices arrow.Record) (arrow.Array, error)
- func TakeMultipleRecords(mem *memory.GoAllocator, records []arrow.Record, indices arrow.Record) (arrow.Record, error)
- func TakeRecord(mem *memory.GoAllocator, record arrow.Record, indices *array.Uint32) (arrow.Record, error)
- func TakeRecordColumns(rec arrow.Record, columnNames []string) (arrow.Record, error)
- func WriteRecordToParquetFile(ctx context.Context, mem *memory.GoAllocator, record arrow.Record, ...) error
- func ZeroUint32Array(mem *memory.GoAllocator, length int) *array.Uint32
- type ParquetFile
Constants ¶
This section is empty.
Variables ¶
var ( ErrUnsupportedDataType = errors.New("unsupported data type") ErrColumnNotFound = errors.New("column not found") ErrRecordNotComplete = errors.New("record not complete") ErrNoDataLeft = errors.New("no data left") ErrSchemasNotEqual = errors.New("schemas not equal") ErrDataTypesNotEqual = errors.New("data types not equal") ErrNoDataSupplied = errors.New("no data supplied") ErrIndexOutOfBounds = errors.New("index out of bounds") ErrNullValuesNotAllowed = errors.New("null values not allowed") ErrColumnNamesRequired = errors.New("column names required") ErrNoColumnsProvided = errors.New("no columns provided") )
var TEST_SIZES = []int{10_000, 100_000, 1_000_000}
Functions ¶
func CastArraysToBaseDataType ¶
Take in a slice of arrays and return the arrays in a new slice where each array is in it's base data type.
func CompareRecordRows ¶
func CompareRecordRows(record1, record2 arrow.Record, index1, index2 int, fields ...string) (int, error)
Determines if the row at index1 in record1 is less/equal/greater than the row at index2 in record2. If the column list is empty than all rows will be compared else only the columns in the list will be compared. It's assumed that each record only has one column with the same name or that each column with the same name has the same values. Less than is -1, equal to is 0 and greater than is 1.
func ConcatenateRecords ¶
Concatenate the records together. The records must all have the same schema. The resulting record will have all data from each record in the order they were supplied and will be a new record with all data copied.
func DeduplicateRecord ¶
func DeduplicateRecord(mem *memory.GoAllocator, record arrow.Record, columns []string, presortedByColumnsNames bool) (arrow.Record, error)
Takes a record and deduplicates the rows based on the subset of columns provided. The rows are not garanteed to be in any particular order. All columns from the input record will be returned in the result record.
func FErrSchemasNotEqual ¶
func RankedSort ¶
func ReadParquetFile ¶
func ReadParquetFile(ctx context.Context, mem *memory.GoAllocator, filePath string) ([]arrow.Record, error)
Read a parquet file and return the records for all row groups in the file.
func RecordSchemasEqual ¶
Checks if the two records schemas are equal using only the field/column names provided.
func RecordsEqual ¶
Compares the two records using just the field/column names provided.
func SchemaSubSetEqual ¶
Checks if the two record schemas are equal using only the field/column names provided.
func SortRecord ¶
func SortRecord(mem *memory.GoAllocator, record arrow.Record, columns []string) (arrow.Record, error)
* Sort the record based on the provided columns. The returned record will be sorted in ascending order.
func TakeMultipleArrays ¶ added in v0.1.4
func TakeMultipleRecords ¶
func TakeMultipleRecords(mem *memory.GoAllocator, records []arrow.Record, indices arrow.Record) (arrow.Record, error)
Take all rows from the input record based on the input indices record. The indices record should contain two UINT32 columns. The first column should contain the index of the record in the "records" slice and the second column should contain the index of the row in that record. The record returned contains data copied from the original record.
func TakeRecord ¶
func TakeRecord(mem *memory.GoAllocator, record arrow.Record, indices *array.Uint32) (arrow.Record, error)
Take all rows from the input record based on the input indices array. The resulting record contains data copied from the original record.
func TakeRecordColumns ¶
Take all columns from the record with the given names. The columns are not copied, but referenced from the original record.
func WriteRecordToParquetFile ¶
func WriteRecordToParquetFile(ctx context.Context, mem *memory.GoAllocator, record arrow.Record, filePath string) error
Write a record to a parquet file.
func ZeroUint32Array ¶
func ZeroUint32Array(mem *memory.GoAllocator, length int) *array.Uint32
Creates a new arrow.Uint32 array with all elements set to zero.