Documentation ¶
Index ¶
- Constants
- func AddMetricsToContext(ctx context.Context, m *Metrics) context.Context
- func CompareRowNumbers(upToDefinitionLevel int, a, b RowNumber) int
- func GetColumnIndexByPath(pf *pq.File, s string) (index, depth int)
- func HasColumn(pf *pq.File, s string) bool
- func NewMultiRepeatedPageIterator[T any](iters ...iter.Iterator[*RepeatedRow[T]]) iter.Iterator[*MultiRepeatedRow[T]]
- func NewRepeatedPageIterator[T any](ctx context.Context, rows iter.Iterator[T], rgs []parquet.RowGroup, column int, ...) iter.Iterator[*RepeatedRow[T]]
- type BinaryJoinIterator
- type EqualInt64Predicate
- type GroupPredicate
- type InstrumentedPredicate
- type IntBetweenPredicate
- type Iterator
- type IteratorResult
- func (r *IteratorResult) Append(rr *IteratorResult)
- func (r *IteratorResult) AppendValue(k string, v parquet.Value)
- func (r *IteratorResult) Columns(buffer [][]parquet.Value, names ...string) [][]parquet.Value
- func (r *IteratorResult) Reset()
- func (r *IteratorResult) ToMap() map[string][]parquet.Value
- type KeyValueGroupPredicate
- type Metrics
- type MultiRepeatedRow
- type Predicate
- type RepeatedRow
- type RowGetter
- type RowNumber
- type RowNumberIterator
- type RowNumberWithDefinitionLevel
- type StringInPredicate
- type SubstringPredicate
- type SyncIterator
- type UnionIterator
Constants ¶
const MaxDefinitionLevel = 5
Variables ¶
This section is empty.
Functions ¶
func AddMetricsToContext ¶
func CompareRowNumbers ¶
CompareRowNumbers compares the sequences of row numbers in a and b for partial equality, descending from top-level through the given definition level. For example, definition level 1 means that row numbers are compared at two levels of nesting, the top-level and 1 level of nesting below.
func NewMultiRepeatedPageIterator ¶
func NewMultiRepeatedPageIterator[T any](iters ...iter.Iterator[*RepeatedRow[T]]) iter.Iterator[*MultiRepeatedRow[T]]
NewMultiRepeatedPageIterator returns an iterator that iterates over the values of repeated columns nested together. Each column is iterate over in parallel. If one column is finished, the iterator will return false.
func NewRepeatedPageIterator ¶
func NewRepeatedPageIterator[T any]( ctx context.Context, rows iter.Iterator[T], rgs []parquet.RowGroup, column int, readSize int, ) iter.Iterator[*RepeatedRow[T]]
NewRepeatedPageIterator returns an iterator that iterates over the repeated values in a column. The iterator can only seek forward and so rows should be sorted by row number.
Types ¶
type BinaryJoinIterator ¶
type BinaryJoinIterator struct {
// contains filtered or unexported fields
}
func NewBinaryJoinIterator ¶
func NewBinaryJoinIterator(definitionLevel int, left, right Iterator) *BinaryJoinIterator
func (*BinaryJoinIterator) At ¶
func (bj *BinaryJoinIterator) At() *IteratorResult
func (*BinaryJoinIterator) Close ¶
func (bj *BinaryJoinIterator) Close() error
func (*BinaryJoinIterator) Err ¶
func (bj *BinaryJoinIterator) Err() error
func (*BinaryJoinIterator) Next ¶
func (bj *BinaryJoinIterator) Next() bool
func (*BinaryJoinIterator) Seek ¶
func (bj *BinaryJoinIterator) Seek(to RowNumberWithDefinitionLevel) bool
type EqualInt64Predicate ¶
type EqualInt64Predicate int64
func NewEqualInt64Predicate ¶
func NewEqualInt64Predicate(value int64) EqualInt64Predicate
func (EqualInt64Predicate) KeepColumnChunk ¶
func (p EqualInt64Predicate) KeepColumnChunk(c pq.ColumnChunk) bool
type GroupPredicate ¶
type GroupPredicate interface {
KeepGroup(*IteratorResult) bool
}
type InstrumentedPredicate ¶
type InstrumentedPredicate struct { InspectedColumnChunks atomic.Int64 InspectedPages atomic.Int64 InspectedValues atomic.Int64 KeptColumnChunks atomic.Int64 KeptPages atomic.Int64 KeptValues atomic.Int64 // contains filtered or unexported fields }
func (*InstrumentedPredicate) KeepColumnChunk ¶
func (p *InstrumentedPredicate) KeepColumnChunk(c pq.ColumnChunk) bool
type IntBetweenPredicate ¶
type IntBetweenPredicate struct {
// contains filtered or unexported fields
}
IntBetweenPredicate checks for int between the bounds [min,max] inclusive
func NewIntBetweenPredicate ¶
func NewIntBetweenPredicate(min, max int64) *IntBetweenPredicate
func (*IntBetweenPredicate) KeepColumnChunk ¶
func (p *IntBetweenPredicate) KeepColumnChunk(c pq.ColumnChunk) bool
type Iterator ¶
type Iterator = iter.SeekIterator[*IteratorResult, RowNumberWithDefinitionLevel]
iterator - Every iterator follows this interface and can be composed.
func NewErrIterator ¶
type IteratorResult ¶
type IteratorResult struct { RowNumber RowNumber Entries []struct { V parquet.Value RowValue interface{} // contains filtered or unexported fields } }
IteratorResult is a row of data with a row number and named columns of data. Internally it has an unstructured list for efficient collection. The ToMap() function can be used to make inspection easier.
func (*IteratorResult) Append ¶
func (r *IteratorResult) Append(rr *IteratorResult)
func (*IteratorResult) AppendValue ¶
func (r *IteratorResult) AppendValue(k string, v parquet.Value)
func (*IteratorResult) Columns ¶
func (r *IteratorResult) Columns(buffer [][]parquet.Value, names ...string) [][]parquet.Value
Columns gets the values for each named column. The order of returned values matches the order of names given. This is more efficient than converting to a map.
func (*IteratorResult) Reset ¶
func (r *IteratorResult) Reset()
func (*IteratorResult) ToMap ¶
func (r *IteratorResult) ToMap() map[string][]parquet.Value
ToMap converts the unstructured list of data into a map containing an entry for each column, and the lists of values. The order of columns is not preseved, but the order of values within each column is.
type KeyValueGroupPredicate ¶
type KeyValueGroupPredicate struct {
// contains filtered or unexported fields
}
KeyValueGroupPredicate takes key/value pairs and checks if the group contains all of them. This is the only predicate/iterator that is knowledgable about our trace or search contents. I'd like to change that and make it generic, but it's quite complex and not figured it out yet.
func NewKeyValueGroupPredicate ¶
func NewKeyValueGroupPredicate(keys, values []string) *KeyValueGroupPredicate
func (*KeyValueGroupPredicate) KeepGroup ¶
func (a *KeyValueGroupPredicate) KeepGroup(group *IteratorResult) bool
KeepGroup checks if the given group contains all of the requested key/value pairs.
type Metrics ¶
type Metrics struct {
// contains filtered or unexported fields
}
func NewMetrics ¶
func NewMetrics(reg prometheus.Registerer) *Metrics
type MultiRepeatedRow ¶
type MultiRepeatedRow[T any] struct { Row T Values [][]parquet.Value }
type Predicate ¶
type Predicate interface { KeepColumnChunk(cc pq.ColumnChunk) bool KeepPage(page pq.Page) bool KeepValue(pq.Value) bool }
Predicate is a pushdown predicate that can be applied at the chunk, page, and value levels.
func NewMapPredicate ¶
func NewMapPredicate[K constraints.Integer, V any](m map[K]V) Predicate
func NewStringInPredicate ¶
type RepeatedRow ¶
type RepeatedRow[T any] struct { Row T Values []parquet.Value }
type RowNumber ¶
type RowNumber [MaxDefinitionLevel + 1]int64
RowNumber is the sequence of row numbers uniquely identifying a value in a tree of nested columns, starting at the top-level and including another row number for each level of nesting. -1 is a placeholder for undefined at lower levels. RowNumbers can be compared for full equality using the == operator, or can be compared partially, looking for equal lineages down to a certain level. For example given the following tree, the row numbers would be:
A 0, -1, -1 B 0, 0, -1 C 0, 1, -1 D 0, 1, 0 E 0, 2, -1
Currently supports 6 levels of nesting which should be enough for anybody. :)
func EmptyRowNumber ¶
func EmptyRowNumber() RowNumber
EmptyRowNumber creates an empty invalid row number.
func MaxRowNumber ¶
func MaxRowNumber() RowNumber
MaxRowNumber is a helper that represents the maximum(-ish) representable value.
func TruncateRowNumber ¶
func TruncateRowNumber(t RowNumberWithDefinitionLevel) RowNumber
func (*RowNumber) Next ¶
Next increments and resets the row numbers according to the given repetition and definition levels. Examples from the Dremel whitepaper: https://storage.googleapis.com/pub-tools-public-publication-data/pdf/36632.pdf Name.Language.Country value | r | d | expected RowNumber -------|---|---|-------------------
| | | { -1, -1, -1, -1 } <-- starting position
us | 0 | 3 | { 0, 0, 0, 0 } null | 2 | 2 | { 0, 0, 1, -1 } null | 1 | 1 | { 0, 1, -1, -1 } gb | 1 | 3 | { 0, 2, 0, 0 } null | 0 | 1 | { 1, 0, -1, -1 }
func (RowNumber) Preceding ¶
Preceding returns the largest representable row number that is immediately prior to this one. Think of it like math.NextAfter but for segmented row numbers. Examples:
RowNumber 1000.0.0 (defined at 3 levels) is preceded by 999.max.max RowNumber 1000.-1.-1 (defined at 1 level) is preceded by 999.-1.-1
type RowNumberIterator ¶
func NewRowNumberIterator ¶
func NewRowNumberIterator[T any](iter iter.Iterator[T]) *RowNumberIterator[T]
func (*RowNumberIterator[T]) At ¶
func (r *RowNumberIterator[T]) At() *IteratorResult
func (*RowNumberIterator[T]) Err ¶
func (r *RowNumberIterator[T]) Err() error
func (*RowNumberIterator[T]) Next ¶
func (r *RowNumberIterator[T]) Next() bool
func (*RowNumberIterator[T]) Seek ¶
func (r *RowNumberIterator[T]) Seek(to RowNumberWithDefinitionLevel) bool
type StringInPredicate ¶
type StringInPredicate struct {
// contains filtered or unexported fields
}
StringInPredicate checks for any of the given strings.
func (*StringInPredicate) KeepColumnChunk ¶
func (p *StringInPredicate) KeepColumnChunk(cc pq.ColumnChunk) bool
type SubstringPredicate ¶
type SubstringPredicate struct {
// contains filtered or unexported fields
}
func NewSubstringPredicate ¶
func NewSubstringPredicate(substring string) *SubstringPredicate
func (*SubstringPredicate) KeepColumnChunk ¶
func (p *SubstringPredicate) KeepColumnChunk(_ pq.ColumnChunk) bool
type SyncIterator ¶
type SyncIterator struct {
// contains filtered or unexported fields
}
SyncIterator is a synchronous column iterator. It scans through the given row groups and column, and applies the optional predicate to each chunk, page, and value. Results are read by calling Next() until it returns nil.
func NewSyncIterator ¶
func (*SyncIterator) At ¶
func (c *SyncIterator) At() *IteratorResult
func (*SyncIterator) Close ¶
func (c *SyncIterator) Close() error
func (*SyncIterator) Err ¶
func (c *SyncIterator) Err() error
func (*SyncIterator) Next ¶
func (c *SyncIterator) Next() bool
func (*SyncIterator) Seek ¶
func (c *SyncIterator) Seek(to RowNumberWithDefinitionLevel) bool
SeekTo moves this iterator to the next result that is greater than or equal to the given row number (and based on the given definition level)
type UnionIterator ¶
type UnionIterator struct {
// contains filtered or unexported fields
}
UnionIterator produces all results for all given iterators. When iterators align to the same row, based on the configured definition level, then the results are returned together. Else the next matching iterator is returned.
func NewUnionIterator ¶
func NewUnionIterator(definitionLevel int, iters []Iterator, pred GroupPredicate) *UnionIterator
func (*UnionIterator) At ¶
func (u *UnionIterator) At() *IteratorResult
func (*UnionIterator) Close ¶
func (u *UnionIterator) Close() error
func (*UnionIterator) Err ¶
func (u *UnionIterator) Err() error
func (*UnionIterator) Next ¶
func (u *UnionIterator) Next() bool
func (*UnionIterator) Seek ¶
func (u *UnionIterator) Seek(to RowNumberWithDefinitionLevel) bool