query

package
v1.11.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 19, 2024 License: AGPL-3.0 Imports: 19 Imported by: 0

Documentation

Index

Constants

View Source
const MaxDefinitionLevel = 5

Variables

View Source
var ErrSeekOutOfRange = fmt.Errorf("bug: south row is out of range")

Functions

func AddMetricsToContext

func AddMetricsToContext(ctx context.Context, m *Metrics) context.Context

func CloneParquetValues added in v1.2.0

func CloneParquetValues(values []parquet.Value) []parquet.Value

func CloneUint32ParquetValues added in v1.2.0

func CloneUint32ParquetValues(values []parquet.Value) []uint32

func CloneUint64ParquetValues added in v1.2.0

func CloneUint64ParquetValues(values []parquet.Value) []uint64

func CompareRowNumbers

func CompareRowNumbers(upToDefinitionLevel int, a, b RowNumber) int

CompareRowNumbers compares the sequences of row numbers in a and b for partial equality, descending from top-level through the given definition level. For example, definition level 1 means that row numbers are compared at two levels of nesting, the top-level and 1 level of nesting below.

func GetColumnIndexByPath

func GetColumnIndexByPath(root *parquet.Column, s string) (index, depth int)

func HasColumn

func HasColumn(root *parquet.Column, s string) bool

func NewMultiColumnIterator added in v1.2.0

func NewMultiColumnIterator(
	ctx context.Context,
	rows iter.Iterator[int64],
	batchSize int,
	rowGroups []parquet.RowGroup,
	columns ...int,
) iter.Iterator[[][]parquet.Value]

func NewRepeatedRowColumnIterator added in v1.2.0

func NewRepeatedRowColumnIterator(ctx context.Context, rows iter.Iterator[int64], rgs []parquet.RowGroup, column int) iter.Iterator[[]parquet.Value]

func NewRepeatedRowIterator added in v1.2.0

func NewRepeatedRowIterator[T any](
	ctx context.Context,
	rows iter.Iterator[T],
	rowGroups []parquet.RowGroup,
	columns ...int,
) iter.Iterator[RepeatedRow[T]]

func NewRepeatedRowIteratorBatchSize added in v1.11.0

func NewRepeatedRowIteratorBatchSize[T any](
	ctx context.Context,
	rows iter.Iterator[T],
	rowGroups []parquet.RowGroup,
	batchSize int64,
	columns ...int,
) iter.Iterator[RepeatedRow[T]]

func ReleaseParquetValues added in v1.2.0

func ReleaseParquetValues(b [][]parquet.Value)

func ReleaseUint32Values added in v1.2.0

func ReleaseUint32Values(b [][]uint32)

func ReleaseUint64Values added in v1.2.0

func ReleaseUint64Values(b [][]uint64)

func RowGroupBoundaries added in v1.2.0

func RowGroupBoundaries(groups []parquet.RowGroup) []int64

func SplitRows added in v1.2.0

func SplitRows(rows, groups []int64) [][]int64

func WrapWithRowNumber added in v1.2.0

func WrapWithRowNumber[T any](it iter.Iterator[T]) iter.Iterator[int64]

Types

type BinaryJoinIterator

type BinaryJoinIterator struct {
	// contains filtered or unexported fields
}

func NewBinaryJoinIterator

func NewBinaryJoinIterator(definitionLevel int, left, right Iterator) *BinaryJoinIterator

func (*BinaryJoinIterator) At

func (*BinaryJoinIterator) Close

func (bj *BinaryJoinIterator) Close() error

func (*BinaryJoinIterator) Err

func (bj *BinaryJoinIterator) Err() error

func (*BinaryJoinIterator) Next

func (bj *BinaryJoinIterator) Next() bool

func (*BinaryJoinIterator) Seek

type EqualInt64Predicate

type EqualInt64Predicate int64

func NewEqualInt64Predicate

func NewEqualInt64Predicate(value int64) EqualInt64Predicate

func (EqualInt64Predicate) KeepColumnChunk

func (p EqualInt64Predicate) KeepColumnChunk(ci pq.ColumnIndex) bool

func (EqualInt64Predicate) KeepPage

func (p EqualInt64Predicate) KeepPage(page pq.Page) bool

func (EqualInt64Predicate) KeepValue

func (p EqualInt64Predicate) KeepValue(v pq.Value) bool

type GroupPredicate

type GroupPredicate interface {
	KeepGroup(*IteratorResult) bool
}

type InstrumentedPredicate

type InstrumentedPredicate struct {
	InspectedColumnChunks atomic.Int64
	InspectedPages        atomic.Int64
	InspectedValues       atomic.Int64
	KeptColumnChunks      atomic.Int64
	KeptPages             atomic.Int64
	KeptValues            atomic.Int64
	// contains filtered or unexported fields
}

func (*InstrumentedPredicate) KeepColumnChunk

func (p *InstrumentedPredicate) KeepColumnChunk(ci pq.ColumnIndex) bool

func (*InstrumentedPredicate) KeepPage

func (p *InstrumentedPredicate) KeepPage(page pq.Page) bool

func (*InstrumentedPredicate) KeepValue

func (p *InstrumentedPredicate) KeepValue(v pq.Value) bool

type IntBetweenPredicate

type IntBetweenPredicate struct {
	// contains filtered or unexported fields
}

IntBetweenPredicate checks for int between the bounds [min,max] inclusive

func NewIntBetweenPredicate

func NewIntBetweenPredicate(min, max int64) *IntBetweenPredicate

func (*IntBetweenPredicate) KeepColumnChunk

func (p *IntBetweenPredicate) KeepColumnChunk(ci pq.ColumnIndex) bool

func (*IntBetweenPredicate) KeepPage

func (p *IntBetweenPredicate) KeepPage(page pq.Page) bool

func (*IntBetweenPredicate) KeepValue

func (p *IntBetweenPredicate) KeepValue(v pq.Value) bool

type Iterator

iterator - Every iterator follows this interface and can be composed.

func NewErrIterator

func NewErrIterator(err error) Iterator

type IteratorResult

type IteratorResult struct {
	RowNumber RowNumber
	Entries   []struct {
		V        parquet.Value
		RowValue interface{}
		// contains filtered or unexported fields
	}
}

IteratorResult is a row of data with a row number and named columns of data. Internally it has an unstructured list for efficient collection. The ToMap() function can be used to make inspection easier.

func (*IteratorResult) Append

func (r *IteratorResult) Append(rr *IteratorResult)

func (*IteratorResult) AppendValue

func (r *IteratorResult) AppendValue(k string, v parquet.Value)

func (*IteratorResult) ColumnValue added in v1.4.0

func (r *IteratorResult) ColumnValue(name string) parquet.Value

func (*IteratorResult) Columns

func (r *IteratorResult) Columns(buffer [][]parquet.Value, names ...string) [][]parquet.Value

Columns gets the values for each named column. The order of returned values matches the order of names given. This is more efficient than converting to a map.

func (*IteratorResult) Reset

func (r *IteratorResult) Reset()

func (*IteratorResult) ToMap

func (r *IteratorResult) ToMap() map[string][]parquet.Value

ToMap converts the unstructured list of data into a map containing an entry for each column, and the lists of values. The order of columns is not preseved, but the order of values within each column is.

type KeyValueGroupPredicate

type KeyValueGroupPredicate struct {
	// contains filtered or unexported fields
}

KeyValueGroupPredicate takes key/value pairs and checks if the group contains all of them. This is the only predicate/iterator that is knowledgable about our trace or search contents. I'd like to change that and make it generic, but it's quite complex and not figured it out yet.

func NewKeyValueGroupPredicate

func NewKeyValueGroupPredicate(keys, values []string) *KeyValueGroupPredicate

func (*KeyValueGroupPredicate) KeepGroup

func (a *KeyValueGroupPredicate) KeepGroup(group *IteratorResult) bool

KeepGroup checks if the given group contains all of the requested key/value pairs.

type Metrics

type Metrics struct {
	// contains filtered or unexported fields
}

func NewMetrics

func NewMetrics(reg prometheus.Registerer) *Metrics

func (*Metrics) Unregister added in v1.3.0

func (m *Metrics) Unregister()

type Predicate

type Predicate interface {
	KeepColumnChunk(ci pq.ColumnIndex) bool
	KeepPage(page pq.Page) bool
	KeepValue(pq.Value) bool
}

Predicate is a pushdown predicate that can be applied at the chunk, page, and value levels.

func NewMapPredicate

func NewMapPredicate[K constraints.Integer, V any](m map[K]V) Predicate

func NewStringInPredicate

func NewStringInPredicate(ss []string) Predicate

type RepeatedRow

type RepeatedRow[T any] struct {
	Row    T
	Values [][]parquet.Value
}

type RowGetter

type RowGetter interface {
	RowNumber() int64
}

type RowNumber

type RowNumber [MaxDefinitionLevel + 1]int64

RowNumber is the sequence of row numbers uniquely identifying a value in a tree of nested columns, starting at the top-level and including another row number for each level of nesting. -1 is a placeholder for undefined at lower levels. RowNumbers can be compared for full equality using the == operator, or can be compared partially, looking for equal lineages down to a certain level. For example given the following tree, the row numbers would be:

A          0, -1, -1
  B        0,  0, -1
  C        0,  1, -1
    D      0,  1,  0
  E        0,  2, -1

Currently supports 6 levels of nesting which should be enough for anybody. :)

func EmptyRowNumber

func EmptyRowNumber() RowNumber

EmptyRowNumber creates an empty invalid row number.

func MaxRowNumber

func MaxRowNumber() RowNumber

MaxRowNumber is a helper that represents the maximum(-ish) representable value.

func (*RowNumber) Next

func (t *RowNumber) Next(repetitionLevel, definitionLevel int)

Next increments and resets the row numbers according to the given repetition and definition levels. Examples from the Dremel whitepaper: https://storage.googleapis.com/pub-tools-public-publication-data/pdf/36632.pdf Name.Language.Country value | r | d | expected RowNumber -------|---|---|-------------------

|   |   | { -1, -1, -1, -1 }  <-- starting position

us | 0 | 3 | { 0, 0, 0, 0 } null | 2 | 2 | { 0, 0, 1, -1 } null | 1 | 1 | { 0, 1, -1, -1 } gb | 1 | 3 | { 0, 2, 0, 0 } null | 0 | 1 | { 1, 0, -1, -1 }

func (RowNumber) Preceding

func (t RowNumber) Preceding() RowNumber

Preceding returns the largest representable row number that is immediately prior to this one. Think of it like math.NextAfter but for segmented row numbers. Examples:

	RowNumber 1000.0.0 (defined at 3 levels) is preceded by 999.max.max
    RowNumber 1000.-1.-1 (defined at 1 level) is preceded by 999.-1.-1

func (*RowNumber) Skip

func (t *RowNumber) Skip(numRows int64)

Skip rows at the root-level.

func (*RowNumber) Valid

func (t *RowNumber) Valid() bool

type RowNumberIterator

type RowNumberIterator[T any] struct {
	iter.Iterator[T]
	// contains filtered or unexported fields
}

func NewRowNumberIterator

func NewRowNumberIterator[T any](iter iter.Iterator[T]) *RowNumberIterator[T]

func (*RowNumberIterator[T]) At

func (r *RowNumberIterator[T]) At() *IteratorResult

func (*RowNumberIterator[T]) Err

func (r *RowNumberIterator[T]) Err() error

func (*RowNumberIterator[T]) Next

func (r *RowNumberIterator[T]) Next() bool

func (*RowNumberIterator[T]) Seek

type RowNumberWithDefinitionLevel

type RowNumberWithDefinitionLevel struct {
	RowNumber       RowNumber
	DefinitionLevel int
}

type StringInPredicate

type StringInPredicate struct {
	// contains filtered or unexported fields
}

StringInPredicate checks for any of the given strings.

func (*StringInPredicate) KeepColumnChunk

func (p *StringInPredicate) KeepColumnChunk(ci pq.ColumnIndex) bool

func (*StringInPredicate) KeepPage

func (p *StringInPredicate) KeepPage(page pq.Page) bool

func (*StringInPredicate) KeepValue

func (p *StringInPredicate) KeepValue(v pq.Value) bool

type SubstringPredicate

type SubstringPredicate struct {
	// contains filtered or unexported fields
}

func NewSubstringPredicate

func NewSubstringPredicate(substring string) *SubstringPredicate

func (*SubstringPredicate) KeepColumnChunk

func (p *SubstringPredicate) KeepColumnChunk(_ pq.ColumnIndex) bool

func (*SubstringPredicate) KeepPage

func (p *SubstringPredicate) KeepPage(page pq.Page) bool

func (*SubstringPredicate) KeepValue

func (p *SubstringPredicate) KeepValue(v pq.Value) bool

type SyncIterator

type SyncIterator struct {
	// contains filtered or unexported fields
}

SyncIterator is a synchronous column iterator. It scans through the given row groups and column, and applies the optional predicate to each chunk, page, and value. Results are read by calling Next() until it returns nil.

func NewSyncIterator

func NewSyncIterator(ctx context.Context, rgs []parquet.RowGroup, column int, columnName string, readSize int, filter Predicate, selectAs string) *SyncIterator

func (*SyncIterator) At

func (c *SyncIterator) At() *IteratorResult

func (*SyncIterator) Close

func (c *SyncIterator) Close() error

func (*SyncIterator) Err

func (c *SyncIterator) Err() error

func (*SyncIterator) Next

func (c *SyncIterator) Next() bool

func (*SyncIterator) Seek

SeekTo moves this iterator to the next result that is greater than or equal to the given row number (and based on the given definition level)

type UnionIterator

type UnionIterator struct {
	// contains filtered or unexported fields
}

UnionIterator produces all results for all given iterators. When iterators align to the same row, based on the configured definition level, then the results are returned together. Else the next matching iterator is returned.

func NewUnionIterator

func NewUnionIterator(definitionLevel int, iters []Iterator, pred GroupPredicate) *UnionIterator

func (*UnionIterator) At

func (u *UnionIterator) At() *IteratorResult

func (*UnionIterator) Close

func (u *UnionIterator) Close() error

func (*UnionIterator) Err

func (u *UnionIterator) Err() error

func (*UnionIterator) Next

func (u *UnionIterator) Next() bool

func (*UnionIterator) Seek

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL