Documentation ¶
Index ¶
- Constants
- Variables
- func FieldByName(schema *parquet.Schema, name string) parquet.Field
- func FindChildIndex(schema *parquet.Schema, name string) int
- func ValuesForIndex(row parquet.Row, index int) []parquet.Value
- type Buffer
- func (b *Buffer) Clone() (*Buffer, error)
- func (b *Buffer) ColumnChunks() []parquet.ColumnChunk
- func (b *Buffer) DynamicColumns() map[string][]string
- func (b *Buffer) DynamicRows() DynamicRowReader
- func (b *Buffer) NumRows() int64
- func (b *Buffer) Rows() parquet.Rows
- func (b *Buffer) Schema() *parquet.Schema
- func (b *Buffer) Sort()
- func (b *Buffer) SortingColumns() []parquet.SortingColumn
- func (b *Buffer) WriteRowGroup(rg parquet.RowGroup) (int64, error)
- func (b *Buffer) WriteRows(rows []parquet.Row) (int, error)
- type ColumnDefinition
- type DynamicRow
- type DynamicRowGroup
- type DynamicRowReader
- type DynamicRows
- type Label
- type MergedRowGroup
- type NilColumnChunk
- func (c *NilColumnChunk) BloomFilter() parquet.BloomFilter
- func (c *NilColumnChunk) Column() int
- func (c *NilColumnChunk) ColumnIndex() parquet.ColumnIndex
- func (c *NilColumnChunk) NumValues() int64
- func (c *NilColumnChunk) OffsetIndex() parquet.OffsetIndex
- func (c *NilColumnChunk) Pages() parquet.Pages
- func (c *NilColumnChunk) Type() parquet.Type
- type Sample
- type Samples
- type Schema
- func (s *Schema) ColumnByName(name string) (ColumnDefinition, bool)
- func (s *Schema) Columns() []ColumnDefinition
- func (s *Schema) MergeDynamicRowGroups(rowGroups []DynamicRowGroup) (DynamicRowGroup, error)
- func (s *Schema) NewBuffer(dynamicColumns map[string][]string) (*Buffer, error)
- func (s *Schema) NewWriter(w io.Writer, dynamicColumns map[string][]string) (*parquet.Writer, error)
- func (s *Schema) RowLessThan(a, b *DynamicRow) bool
- func (s *Schema) SerializeBuffer(buffer *Buffer) ([]byte, error)
- type SerializedBuffer
- func (b *SerializedBuffer) DynamicColumns() map[string][]string
- func (b *SerializedBuffer) DynamicRowGroup(i int) DynamicRowGroup
- func (b *SerializedBuffer) DynamicRows() DynamicRowReader
- func (b *SerializedBuffer) NumRowGroups() int
- func (b *SerializedBuffer) NumRows() int64
- func (b *SerializedBuffer) ParquetFile() *parquet.File
- func (b *SerializedBuffer) Reader() *parquet.Reader
- type SortingColumn
Constants ¶
const (
// The size of the column indicies in parquet files.
ColumnIndexSize = 16
)
const (
DynamicColumnsKey = "dynamic_columns"
)
Variables ¶
var ErrMalformedDynamicColumns = errors.New("malformed dynamic columns string")
var ErrNoDynamicColumns = errors.New("no dynamic columns metadata found, it must be present")
Functions ¶
func FieldByName ¶
func FieldByName(schema *parquet.Schema, name string) parquet.Field
func FindChildIndex ¶
func ValuesForIndex ¶
func ValuesForIndex(row parquet.Row, index int) []parquet.Value
Types ¶
type Buffer ¶
type Buffer struct {
// contains filtered or unexported fields
}
Buffer represents an batch of rows with a concrete set of dynamic column names representing how its parquet schema was created off of a dynamic parquet schema.
func (*Buffer) ColumnChunks ¶
func (b *Buffer) ColumnChunks() []parquet.ColumnChunk
ColumnChunks returns the list of parquet.ColumnChunk for the given index. It contains all the pages associated with this row group's column. Implements the parquet.RowGroup interface.
func (*Buffer) DynamicColumns ¶
DynamicColumns returns the concrete dynamic column names of the buffer. It implements the DynamicRowGroup interface.
func (*Buffer) DynamicRows ¶
func (b *Buffer) DynamicRows() DynamicRowReader
DynamicRows returns an iterator for the rows in the buffer. It implements the DynamicRowGroup interface.
func (*Buffer) NumRows ¶
NumRows returns the number of rows in the buffer. Implements the parquet.RowGroup interface.
func (*Buffer) Rows ¶
func (b *Buffer) Rows() parquet.Rows
Rows returns an iterator for the rows in the buffer. It implements the parquet.RowGroup interface.
func (*Buffer) Schema ¶
func (b *Buffer) Schema() *parquet.Schema
Schema returns the concrete parquet.Schema of the buffer. Implements the parquet.RowGroup interface.
func (*Buffer) SortingColumns ¶
func (b *Buffer) SortingColumns() []parquet.SortingColumn
SortingColumns returns the concrete slice of parquet.SortingColumns of the buffer. Implements the parquet.RowGroup interface.
func (*Buffer) WriteRowGroup ¶
WriteRowGroup writes a single row to the buffer.
type ColumnDefinition ¶
ColumnDefinition describes a column in a dynamic parquet schema.
type DynamicRow ¶
type DynamicRowGroup ¶
type DynamicRowGroup interface { parquet.RowGroup // DynamicColumns returns the concrete dynamic column names that were used // create its concrete parquet schema with a dynamic parquet schema. DynamicColumns() map[string][]string // DynamicRows return an iterator over the rows in the row group. DynamicRows() DynamicRowReader }
DynamicRowGroup is a parquet.RowGroup that can describe the concrete dynamic columns.
func Concat ¶
func Concat(drg ...DynamicRowGroup) DynamicRowGroup
type DynamicRowReader ¶
type DynamicRowReader interface { ReadRows(*DynamicRows) (int, error) Close() error }
DynamicRowReaders is an iterator over the rows in a DynamicRowGroup.
type DynamicRows ¶
type DynamicRows struct { Rows []parquet.Row Schema *parquet.Schema DynamicColumns map[string][]string }
func (*DynamicRows) Get ¶
func (r *DynamicRows) Get(i int) *DynamicRow
func (*DynamicRows) GetCopy ¶
func (r *DynamicRows) GetCopy(i int) *DynamicRow
type MergedRowGroup ¶
MergedRowGroup allows wrapping any parquet.RowGroup to implement the DynamicRowGroup interface by specifying the concrete dynamic column names the RowGroup's schema contains.
func (*MergedRowGroup) DynamicColumns ¶
func (r *MergedRowGroup) DynamicColumns() map[string][]string
DynamicColumns returns the concrete dynamic column names that were used create its concrete parquet schema with a dynamic parquet schema. Implements the DynamicRowGroup interface.
func (*MergedRowGroup) DynamicRows ¶
func (r *MergedRowGroup) DynamicRows() DynamicRowReader
DynamicRows returns an iterator over the rows in the row group. Implements the DynamicRowGroup interface.
type NilColumnChunk ¶
type NilColumnChunk struct {
// contains filtered or unexported fields
}
NilColumnChunk is a column chunk that contains a single page with all null values of the given type, given length and column index of the parent schema. It implements the parquet.ColumnChunk interface.
func NewNilColumnChunk ¶
func NewNilColumnChunk(typ parquet.Type, columnIndex, numValues int) *NilColumnChunk
NewNilColumnChunk creates a new column chunk configured with the given type, column index and number of values in the page.
func (*NilColumnChunk) BloomFilter ¶
func (c *NilColumnChunk) BloomFilter() parquet.BloomFilter
BloomFilter returns the bloomfilter of the column chunk. Since the NilColumnChunk is a virtual column chunk only for in-memory purposes, it returns nil. Implements the parquet.ColumnChunk interface.
func (*NilColumnChunk) Column ¶
func (c *NilColumnChunk) Column() int
Type returns the index of the column chunk within the parent schema. Implements the parquet.ColumnChunk interface.
func (*NilColumnChunk) ColumnIndex ¶
func (c *NilColumnChunk) ColumnIndex() parquet.ColumnIndex
ColumnIndex returns the column index of the column chunk. Since the NilColumnChunk is a virtual column chunk only for in-memory purposes, it returns nil. Implements the parquet.ColumnChunk interface.
func (*NilColumnChunk) NumValues ¶
func (c *NilColumnChunk) NumValues() int64
NumValues returns the number of values in the column chunk. Implements the parquet.ColumnChunk interface.
func (*NilColumnChunk) OffsetIndex ¶
func (c *NilColumnChunk) OffsetIndex() parquet.OffsetIndex
OffsetIndex returns the offset index of the column chunk. Since the NilColumnChunk is a virtual column chunk only for in-memory purposes, it returns nil. Implements the parquet.ColumnChunk interface.
func (*NilColumnChunk) Pages ¶
func (c *NilColumnChunk) Pages() parquet.Pages
Pages returns an iterator for all pages within the column chunk. This iterator will only ever return a single page filled with all null values of the configured amount. Implements the parquet.ColumnChunk interface.
func (*NilColumnChunk) Type ¶
func (c *NilColumnChunk) Type() parquet.Type
Type returns the type of the column chunk. Implements the parquet.ColumnChunk interface.
type Sample ¶
type Sample struct { ExampleType string Labels []Label Stacktrace []uuid.UUID Timestamp int64 Value int64 }
func (Sample) ToParquetRow ¶
type Samples ¶
type Samples []Sample
func NewTestSamples ¶
func NewTestSamples() Samples
func (Samples) SampleLabelNames ¶
type Schema ¶
type Schema struct {
// contains filtered or unexported fields
}
Schema is a dynamic parquet schema. It extends a parquet schema with the ability that any column definition that is dynamic will have columns dynamically created as their column name is seen for the first time.
func NewSampleSchema ¶
func NewSampleSchema() *Schema
func NewSchema ¶
func NewSchema( name string, columns []ColumnDefinition, sortingColumns []SortingColumn, ) *Schema
NewSchema creates a new dynamic parquet schema with the given name, column definitions and sorting columns. The order of the sorting columns is important as it determines the order in which data is written to a file or laid out in memory.
func (*Schema) ColumnByName ¶
func (s *Schema) ColumnByName(name string) (ColumnDefinition, bool)
func (*Schema) Columns ¶
func (s *Schema) Columns() []ColumnDefinition
func (*Schema) MergeDynamicRowGroups ¶
func (s *Schema) MergeDynamicRowGroups(rowGroups []DynamicRowGroup) (DynamicRowGroup, error)
MergeDynamicRowGroups merges the given dynamic row groups into a single dynamic row group. It merges the parquet schema in a non-conflicting way by merging all the concrete dynamic column names and generating a superset parquet schema that all given dynamic row groups are compatible with.
func (*Schema) NewBuffer ¶
NewBuffer returns a new buffer with a concrete parquet schema generated using the given concrete dynamic column names.
func (*Schema) NewWriter ¶
func (s *Schema) NewWriter(w io.Writer, dynamicColumns map[string][]string) (*parquet.Writer, error)
NewWriter returns a new parquet writer with a concrete parquet schema generated using the given concrete dynamic column names.
func (*Schema) RowLessThan ¶
func (s *Schema) RowLessThan(a, b *DynamicRow) bool
type SerializedBuffer ¶
type SerializedBuffer struct {
// contains filtered or unexported fields
}
func NewSerializedBuffer ¶
func NewSerializedBuffer(f *parquet.File) (*SerializedBuffer, error)
func ReaderFromBytes ¶
func ReaderFromBytes(buf []byte) (*SerializedBuffer, error)
func (*SerializedBuffer) DynamicColumns ¶
func (b *SerializedBuffer) DynamicColumns() map[string][]string
func (*SerializedBuffer) DynamicRowGroup ¶
func (b *SerializedBuffer) DynamicRowGroup(i int) DynamicRowGroup
func (*SerializedBuffer) DynamicRows ¶
func (b *SerializedBuffer) DynamicRows() DynamicRowReader
func (*SerializedBuffer) NumRowGroups ¶
func (b *SerializedBuffer) NumRowGroups() int
func (*SerializedBuffer) NumRows ¶
func (b *SerializedBuffer) NumRows() int64
func (*SerializedBuffer) ParquetFile ¶
func (b *SerializedBuffer) ParquetFile() *parquet.File
func (*SerializedBuffer) Reader ¶
func (b *SerializedBuffer) Reader() *parquet.Reader
type SortingColumn ¶
type SortingColumn interface {
parquet.SortingColumn
ColumnName() string
}
SortingColumn describes a column to sort by in a dynamic parquet schema.
func Ascending ¶
func Ascending(column string) SortingColumn
Ascending constructs a SortingColumn value which dictates to sort by the column in ascending order.
func Descending ¶
func Descending(column string) SortingColumn
Descending constructs a SortingColumn value which dictates to sort by the column in descending order.
func NullsFirst ¶
func NullsFirst(sortingColumn SortingColumn) SortingColumn
NullsFirst wraps the SortingColumn passed as argument so that it instructs the row group to place null values first in the column.