Versions in this module Expand all Collapse all v0 v0.0.1 Mar 23, 2024 Changes in this version + const ColumnIndexSize + const DynamicColumnsKey + var ErrMalformedDynamicColumns = errors.New("malformed dynamic columns string") + var ErrNoDynamicColumns = errors.New("no dynamic columns metadata found, it must be present") + var GenerateTestSamples = samples.GenerateTestSamples + var LabelColumn = samples.LabelColumn + var NewNestedSampleSchema = samples.NewNestedSampleSchema + var NewTestSamples = samples.NewTestSamples + var PrehashedSampleDefinition = samples.PrehashedSampleDefinition + var SampleDefinition = samples.SampleDefinition + var SampleDefinitionWithFloat = samples.SampleDefinitionWithFloat + func DefinitionFromParquetFile(file *parquet.File) (*schemapb.Schema, error) + func FieldByName(fields []parquet.Field, name string) parquet.Field + func FindChildIndex(fields []parquet.Field, name string) int + func FindHashedColumn(col string, fields []arrow.Field) int + func HashArray(arr arrow.Array) []uint64 + func HashedColumnName(col string) string + func IsHashedColumn(col string) bool + func MergeDeduplicatedDynCols(dyn []string) []string + func MergeDynamicColumnSets(sets []map[string][]string) map[string][]string + func ParquetSchemaFromV2Definition(def *schemav2pb.Schema) *parquet.Schema + func PrehashColumns(schema *Schema, r arrow.Record) arrow.Record + func RemoveHashedColumns(r arrow.Record) arrow.Record + func SortingColumnsFromDef(def *schemav2pb.Schema) ([]parquet.SortingColumn, error) + func ToSnakeCase(str string) string + func ValuesForIndex(row parquet.Row, index int) []parquet.Value + type Buffer struct + func ToBuffer(s Samples, schema *Schema) (*Buffer, error) + func (b *Buffer) Clone() (*Buffer, error) + func (b *Buffer) ColumnChunks() []parquet.ColumnChunk + func (b *Buffer) DynamicColumns() map[string][]string + func (b *Buffer) DynamicRows() DynamicRowReader + func (b *Buffer) NumRows() int64 + func (b *Buffer) Reset() + func (b *Buffer) Rows() parquet.Rows + func (b *Buffer) Schema() *parquet.Schema + func (b *Buffer) Size() int64 + func (b *Buffer) Sort() + func (b *Buffer) SortingColumns() []parquet.SortingColumn + func (b *Buffer) String() string + func (b *Buffer) WriteRowGroup(rg parquet.RowGroup) (int64, error) + func (b *Buffer) WriteRows(rows []parquet.Row) (int, error) + type ColumnDefinition struct + Dynamic bool + Name string + PreHash bool + StorageLayout parquet.Node + type DynamicRow struct + DynamicColumns map[string][]string + Row parquet.Row + Schema *parquet.Schema + func NewDynamicRow(row parquet.Row, schema *parquet.Schema, dyncols map[string][]string, ...) *DynamicRow + type DynamicRowGroup interface + DynamicColumns func() map[string][]string + DynamicRows func() DynamicRowReader + func Concat(fields []parquet.Field, drg ...DynamicRowGroup) DynamicRowGroup + type DynamicRowGroupMergeAdapter struct + func NewDynamicRowGroupMergeAdapter(schema *parquet.Schema, sortingColumns []parquet.SortingColumn, ...) *DynamicRowGroupMergeAdapter + func (a *DynamicRowGroupMergeAdapter) ColumnChunks() []parquet.ColumnChunk + func (a *DynamicRowGroupMergeAdapter) NumRows() int64 + func (a *DynamicRowGroupMergeAdapter) Rows() parquet.Rows + func (a *DynamicRowGroupMergeAdapter) Schema() *parquet.Schema + func (a *DynamicRowGroupMergeAdapter) SortingColumns() []parquet.SortingColumn + type DynamicRowReader interface + Close func() error + ReadRows func(*DynamicRows) (int, error) + type DynamicRowSorter struct + func NewDynamicRowSorter(schema *Schema, rows *DynamicRows) *DynamicRowSorter + func (d *DynamicRowSorter) Len() int + func (d *DynamicRowSorter) Less(i, j int) bool + func (d *DynamicRowSorter) Swap(i, j int) + type DynamicRows struct + DynamicColumns map[string][]string + Rows []parquet.Row + Schema *parquet.Schema + func NewDynamicRows(rows []parquet.Row, schema *parquet.Schema, dynamicColumns map[string][]string, ...) *DynamicRows + func (r *DynamicRows) Get(i int) *DynamicRow + func (r *DynamicRows) GetCopy(i int) *DynamicRow + func (r *DynamicRows) IsSorted(schema *Schema) bool + type MergeOption func(m *mergeOption) + func WithAlreadySorted() MergeOption + func WithDynamicCols(cols map[string][]string) MergeOption + type MergedRowGroup struct + DynCols map[string][]string + func (r *MergedRowGroup) DynamicColumns() map[string][]string + func (r *MergedRowGroup) DynamicRows() DynamicRowReader + func (r *MergedRowGroup) String() string + type NilColumnChunk struct + func NewNilColumnChunk(typ parquet.Type, columnIndex, numValues int) *NilColumnChunk + func (c *NilColumnChunk) BloomFilter() parquet.BloomFilter + func (c *NilColumnChunk) Column() int + func (c *NilColumnChunk) ColumnIndex() (parquet.ColumnIndex, error) + func (c *NilColumnChunk) NumValues() int64 + func (c *NilColumnChunk) OffsetIndex() (parquet.OffsetIndex, error) + func (c *NilColumnChunk) Pages() parquet.Pages + func (c *NilColumnChunk) Type() parquet.Type + type ParquetWriter interface + Close func() error + Flush func() error + Reset func(writer io.Writer) + Schema func() *parquet.Schema + Write func(rows []any) (int, error) + WriteRows func(rows []parquet.Row) (int, error) + type PooledBuffer struct + type PooledParquetSchema struct + Schema *parquet.Schema + type PooledWriter struct + type Sample = samples.Sample + type Samples = samples.Samples + type Schema struct + UniquePrimaryIndex bool + func NewSampleSchema() *Schema + func SchemaFromDefinition(msg proto.Message) (*Schema, error) + func SchemaFromParquetFile(file *parquet.File) (*Schema, error) + func (s *Schema) Cmp(a, b *DynamicRow) int + func (s *Schema) ColumnByName(name string) (ColumnDefinition, bool) + func (s *Schema) ColumnDefinitionsForSortingColumns() []ColumnDefinition + func (s *Schema) Columns() []ColumnDefinition + func (s *Schema) Definition() proto.Message + func (s *Schema) FindColumn(column string) (ColumnDefinition, bool) + func (s *Schema) FindDynamicColumn(dynamicColumnName string) (ColumnDefinition, bool) + func (s *Schema) FindDynamicColumnForConcreteColumn(column string) (ColumnDefinition, bool) + func (s *Schema) GetBuffer(dynamicColumns map[string][]string) (*PooledBuffer, error) + func (s *Schema) GetDynamicParquetSchema(dynamicColumns map[string][]string) (*PooledParquetSchema, error) + func (s *Schema) GetParquetSortingSchema(dynamicColumns map[string][]string) (*PooledParquetSchema, error) + func (s *Schema) GetWriter(w io.Writer, dynamicColumns map[string][]string, sorting bool) (*PooledWriter, error) + func (s *Schema) MergeDynamicRowGroups(rowGroups []DynamicRowGroup, options ...MergeOption) (DynamicRowGroup, error) + func (s *Schema) Name() string + func (s *Schema) NewBuffer(dynamicColumns map[string][]string) (*Buffer, error) + func (s *Schema) NewBufferV2(dynamicColumns ...*schemav2pb.Node) (*Buffer, error) + func (s *Schema) NewWriter(w io.Writer, dynamicColumns map[string][]string, sorting bool, ...) (ParquetWriter, error) + func (s *Schema) ParquetSchema() *parquet.Schema + func (s *Schema) PutBuffer(b *PooledBuffer) + func (s *Schema) PutPooledParquetSchema(ps *PooledParquetSchema) + func (s *Schema) PutWriter(w *PooledWriter) + func (s *Schema) ResetBuffers() + func (s *Schema) ResetWriters() + func (s *Schema) RowLessThan(a, b *DynamicRow) bool + func (s *Schema) SerializeBuffer(w io.Writer, buffer *Buffer) error + func (s *Schema) SortingColumns() []SortingColumn + func (s Schema) ParquetSortingColumns(dynamicColumns map[string][]string) []parquet.SortingColumn + type SerializedBuffer struct + func NewSerializedBuffer(f *parquet.File) (*SerializedBuffer, error) + func ReaderFromBytes(buf []byte) (*SerializedBuffer, error) + func (b *SerializedBuffer) DynamicColumns() map[string][]string + func (b *SerializedBuffer) DynamicRowGroup(i int) DynamicRowGroup + func (b *SerializedBuffer) DynamicRows() DynamicRowReader + func (b *SerializedBuffer) MultiDynamicRowGroup() DynamicRowGroup + func (b *SerializedBuffer) NumRowGroups() int + func (b *SerializedBuffer) NumRows() int64 + func (b *SerializedBuffer) ParquetFile() *parquet.File + func (b *SerializedBuffer) Reader() *parquet.GenericReader[any] + func (b *SerializedBuffer) String() string + type SortingColumn interface + ColumnName func() string + func Ascending(column string) SortingColumn + func Descending(column string) SortingColumn + func NullsFirst(sortingColumn SortingColumn) SortingColumn + type StorageLayout interface + GetCompressionInt32 func() int32 + GetEncodingInt32 func() int32 + GetNullable func() bool + GetRepeated func() bool + GetTypeInt32 func() int32 + func StorageLayoutWrapper(_ *schemav2pb.StorageLayout) StorageLayout