dynparquet

package

v0.0.0-...-969a4e3 Latest Latest Go to latest Published: Jun 16, 2022 License: Apache-2.0 Imports: 8 Imported by: 0

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/polarsignals/arcticdb

Documentation ¶

Index ¶

Constants
Variables
func FieldByName(schema *parquet.Schema, name string) parquet.Field
func FindChildIndex(schema *parquet.Schema, name string) int
func ValuesForIndex(row parquet.Row, index int) []parquet.Value
type Buffer
- func (b *Buffer) Clone() (*Buffer, error)
- func (b *Buffer) ColumnChunks() []parquet.ColumnChunk
- func (b *Buffer) DynamicColumns() map[string][]string
- func (b *Buffer) DynamicRows() DynamicRowReader
- func (b *Buffer) NumRows() int64
- func (b *Buffer) Rows() parquet.Rows
- func (b *Buffer) Schema() *parquet.Schema
- func (b *Buffer) Sort()
- func (b *Buffer) SortingColumns() []parquet.SortingColumn
- func (b *Buffer) WriteRowGroup(rg parquet.RowGroup) (int64, error)
- func (b *Buffer) WriteRows(rows []parquet.Row) (int, error)
type ColumnDefinition
type DynamicRow
type DynamicRowGroup
- func Concat(drg ...DynamicRowGroup) DynamicRowGroup
type DynamicRowReader
type DynamicRows
- func (r *DynamicRows) Get(i int) *DynamicRow
- func (r *DynamicRows) GetCopy(i int) *DynamicRow
type Label
type MergedRowGroup
- func (r *MergedRowGroup) DynamicColumns() map[string][]string
- func (r *MergedRowGroup) DynamicRows() DynamicRowReader
type NilColumnChunk
- func NewNilColumnChunk(typ parquet.Type, columnIndex, numValues int) *NilColumnChunk
- func (c *NilColumnChunk) BloomFilter() parquet.BloomFilter
- func (c *NilColumnChunk) Column() int
- func (c *NilColumnChunk) ColumnIndex() parquet.ColumnIndex
- func (c *NilColumnChunk) NumValues() int64
- func (c *NilColumnChunk) OffsetIndex() parquet.OffsetIndex
- func (c *NilColumnChunk) Pages() parquet.Pages
- func (c *NilColumnChunk) Type() parquet.Type
type Sample
- func (s Sample) ToParquetRow(labelNames []string) parquet.Row
type Samples
- func NewTestSamples() Samples
- func (s Samples) SampleLabelNames() []string
- func (s Samples) ToBuffer(schema *Schema) (*Buffer, error)
type Schema
- func NewSampleSchema() *Schema
- func NewSchema(name string, columns []ColumnDefinition, sortingColumns []SortingColumn) *Schema
- func (s *Schema) ColumnByName(name string) (ColumnDefinition, bool)
- func (s *Schema) Columns() []ColumnDefinition
- func (s *Schema) MergeDynamicRowGroups(rowGroups []DynamicRowGroup) (DynamicRowGroup, error)
- func (s *Schema) NewBuffer(dynamicColumns map[string][]string) (*Buffer, error)
- func (s *Schema) NewWriter(w io.Writer, dynamicColumns map[string][]string) (*parquet.Writer, error)
- func (s *Schema) RowLessThan(a, b *DynamicRow) bool
- func (s *Schema) SerializeBuffer(buffer *Buffer) ([]byte, error)
type SerializedBuffer
- func NewSerializedBuffer(f *parquet.File) (*SerializedBuffer, error)
- func ReaderFromBytes(buf []byte) (*SerializedBuffer, error)
- func (b *SerializedBuffer) DynamicColumns() map[string][]string
- func (b *SerializedBuffer) DynamicRowGroup(i int) DynamicRowGroup
- func (b *SerializedBuffer) DynamicRows() DynamicRowReader
- func (b *SerializedBuffer) NumRowGroups() int
- func (b *SerializedBuffer) NumRows() int64
- func (b *SerializedBuffer) ParquetFile() *parquet.File
- func (b *SerializedBuffer) Reader() *parquet.Reader
type SortingColumn
- func Ascending(column string) SortingColumn
- func Descending(column string) SortingColumn
- func NullsFirst(sortingColumn SortingColumn) SortingColumn

Constants ¶

View Source

const (
	// The size of the column indicies in parquet files.
	ColumnIndexSize = 16
)

View Source

const (
	DynamicColumnsKey = "dynamic_columns"
)

Variables ¶

View Source

var ErrMalformedDynamicColumns = errors.New("malformed dynamic columns string")

View Source

var ErrNoDynamicColumns = errors.New("no dynamic columns metadata found, it must be present")

Functions ¶

func FieldByName ¶

func FieldByName(schema *parquet.Schema, name string) parquet.Field

func FindChildIndex ¶

func FindChildIndex(schema *parquet.Schema, name string) int

func ValuesForIndex ¶

func ValuesForIndex(row parquet.Row, index int) []parquet.Value

Types ¶

type Buffer ¶

type Buffer struct {
	// contains filtered or unexported fields
}

Buffer represents an batch of rows with a concrete set of dynamic column names representing how its parquet schema was created off of a dynamic parquet schema.

func (*Buffer) Clone ¶

func (b *Buffer) Clone() (*Buffer, error)

func (*Buffer) ColumnChunks ¶

func (b *Buffer) ColumnChunks() []parquet.ColumnChunk

ColumnChunks returns the list of parquet.ColumnChunk for the given index. It contains all the pages associated with this row group's column. Implements the parquet.RowGroup interface.

func (*Buffer) DynamicColumns ¶

func (b *Buffer) DynamicColumns() map[string][]string

DynamicColumns returns the concrete dynamic column names of the buffer. It implements the DynamicRowGroup interface.

func (*Buffer) DynamicRows ¶

func (b *Buffer) DynamicRows() DynamicRowReader

DynamicRows returns an iterator for the rows in the buffer. It implements the DynamicRowGroup interface.

func (*Buffer) NumRows ¶

func (b *Buffer) NumRows() int64

NumRows returns the number of rows in the buffer. Implements the parquet.RowGroup interface.

func (*Buffer) Rows ¶

func (b *Buffer) Rows() parquet.Rows

Rows returns an iterator for the rows in the buffer. It implements the parquet.RowGroup interface.

func (*Buffer) Schema ¶

func (b *Buffer) Schema() *parquet.Schema

Schema returns the concrete parquet.Schema of the buffer. Implements the parquet.RowGroup interface.

func (*Buffer) Sort ¶

func (b *Buffer) Sort()

func (*Buffer) SortingColumns ¶

func (b *Buffer) SortingColumns() []parquet.SortingColumn

SortingColumns returns the concrete slice of parquet.SortingColumns of the buffer. Implements the parquet.RowGroup interface.

func (*Buffer) WriteRowGroup ¶

func (b *Buffer) WriteRowGroup(rg parquet.RowGroup) (int64, error)

WriteRowGroup writes a single row to the buffer.

func (*Buffer) WriteRows ¶

func (b *Buffer) WriteRows(rows []parquet.Row) (int, error)

WriteRow writes a single row to the buffer.

type ColumnDefinition ¶

type ColumnDefinition struct {
	Name          string
	StorageLayout parquet.Node
	Dynamic       bool
}

ColumnDefinition describes a column in a dynamic parquet schema.

type DynamicRow ¶

type DynamicRow struct {
	Row            parquet.Row
	Schema         *parquet.Schema
	DynamicColumns map[string][]string
}

type DynamicRowGroup ¶

type DynamicRowGroup interface {
	parquet.RowGroup
	// DynamicColumns returns the concrete dynamic column names that were used
	// create its concrete parquet schema with a dynamic parquet schema.
	DynamicColumns() map[string][]string
	// DynamicRows return an iterator over the rows in the row group.
	DynamicRows() DynamicRowReader
}

DynamicRowGroup is a parquet.RowGroup that can describe the concrete dynamic columns.

func Concat ¶

func Concat(drg ...DynamicRowGroup) DynamicRowGroup

type DynamicRowReader ¶

type DynamicRowReader interface {
	ReadRows(*DynamicRows) (int, error)
	Close() error
}

DynamicRowReaders is an iterator over the rows in a DynamicRowGroup.

type DynamicRows ¶

type DynamicRows struct {
	Rows           []parquet.Row
	Schema         *parquet.Schema
	DynamicColumns map[string][]string
}

func (*DynamicRows) Get ¶

func (r *DynamicRows) Get(i int) *DynamicRow

func (*DynamicRows) GetCopy ¶

func (r *DynamicRows) GetCopy(i int) *DynamicRow

type Label ¶

type Label struct {
	Name  string
	Value string
}

type MergedRowGroup ¶

type MergedRowGroup struct {
	parquet.RowGroup
	DynCols map[string][]string
}

MergedRowGroup allows wrapping any parquet.RowGroup to implement the DynamicRowGroup interface by specifying the concrete dynamic column names the RowGroup's schema contains.

func (*MergedRowGroup) DynamicColumns ¶

func (r *MergedRowGroup) DynamicColumns() map[string][]string

DynamicColumns returns the concrete dynamic column names that were used create its concrete parquet schema with a dynamic parquet schema. Implements the DynamicRowGroup interface.

func (*MergedRowGroup) DynamicRows ¶

func (r *MergedRowGroup) DynamicRows() DynamicRowReader

DynamicRows returns an iterator over the rows in the row group. Implements the DynamicRowGroup interface.

type NilColumnChunk ¶

type NilColumnChunk struct {
	// contains filtered or unexported fields
}

NilColumnChunk is a column chunk that contains a single page with all null values of the given type, given length and column index of the parent schema. It implements the parquet.ColumnChunk interface.

func NewNilColumnChunk ¶

func NewNilColumnChunk(typ parquet.Type, columnIndex, numValues int) *NilColumnChunk

NewNilColumnChunk creates a new column chunk configured with the given type, column index and number of values in the page.

func (*NilColumnChunk) BloomFilter ¶

func (c *NilColumnChunk) BloomFilter() parquet.BloomFilter

BloomFilter returns the bloomfilter of the column chunk. Since the NilColumnChunk is a virtual column chunk only for in-memory purposes, it returns nil. Implements the parquet.ColumnChunk interface.

func (*NilColumnChunk) Column ¶

func (c *NilColumnChunk) Column() int

Type returns the index of the column chunk within the parent schema. Implements the parquet.ColumnChunk interface.

func (*NilColumnChunk) ColumnIndex ¶

func (c *NilColumnChunk) ColumnIndex() parquet.ColumnIndex

ColumnIndex returns the column index of the column chunk. Since the NilColumnChunk is a virtual column chunk only for in-memory purposes, it returns nil. Implements the parquet.ColumnChunk interface.

func (*NilColumnChunk) NumValues ¶

func (c *NilColumnChunk) NumValues() int64

NumValues returns the number of values in the column chunk. Implements the parquet.ColumnChunk interface.

func (*NilColumnChunk) OffsetIndex ¶

func (c *NilColumnChunk) OffsetIndex() parquet.OffsetIndex

OffsetIndex returns the offset index of the column chunk. Since the NilColumnChunk is a virtual column chunk only for in-memory purposes, it returns nil. Implements the parquet.ColumnChunk interface.

func (*NilColumnChunk) Pages ¶

func (c *NilColumnChunk) Pages() parquet.Pages

Pages returns an iterator for all pages within the column chunk. This iterator will only ever return a single page filled with all null values of the configured amount. Implements the parquet.ColumnChunk interface.

func (*NilColumnChunk) Type ¶

func (c *NilColumnChunk) Type() parquet.Type

Type returns the type of the column chunk. Implements the parquet.ColumnChunk interface.

type Sample ¶

type Sample struct {
	ExampleType string
	Labels      []Label
	Stacktrace  []uuid.UUID
	Timestamp   int64
	Value       int64
}

func (Sample) ToParquetRow ¶

func (s Sample) ToParquetRow(labelNames []string) parquet.Row

type Samples ¶

type Samples []Sample

func NewTestSamples ¶

func NewTestSamples() Samples

func (Samples) SampleLabelNames ¶

func (s Samples) SampleLabelNames() []string

func (Samples) ToBuffer ¶

func (s Samples) ToBuffer(schema *Schema) (*Buffer, error)

type Schema ¶

type Schema struct {
	// contains filtered or unexported fields
}

Schema is a dynamic parquet schema. It extends a parquet schema with the ability that any column definition that is dynamic will have columns dynamically created as their column name is seen for the first time.

func NewSampleSchema ¶

func NewSampleSchema() *Schema

func NewSchema ¶

func NewSchema(
	name string,
	columns []ColumnDefinition,
	sortingColumns []SortingColumn,
) *Schema

NewSchema creates a new dynamic parquet schema with the given name, column definitions and sorting columns. The order of the sorting columns is important as it determines the order in which data is written to a file or laid out in memory.

func (*Schema) ColumnByName ¶

func (s *Schema) ColumnByName(name string) (ColumnDefinition, bool)

func (*Schema) Columns ¶

func (s *Schema) Columns() []ColumnDefinition

func (*Schema) MergeDynamicRowGroups ¶

func (s *Schema) MergeDynamicRowGroups(rowGroups []DynamicRowGroup) (DynamicRowGroup, error)

MergeDynamicRowGroups merges the given dynamic row groups into a single dynamic row group. It merges the parquet schema in a non-conflicting way by merging all the concrete dynamic column names and generating a superset parquet schema that all given dynamic row groups are compatible with.

func (*Schema) NewBuffer ¶

func (s *Schema) NewBuffer(dynamicColumns map[string][]string) (*Buffer, error)

NewBuffer returns a new buffer with a concrete parquet schema generated using the given concrete dynamic column names.

func (*Schema) NewWriter ¶

func (s *Schema) NewWriter(w io.Writer, dynamicColumns map[string][]string) (*parquet.Writer, error)

NewWriter returns a new parquet writer with a concrete parquet schema generated using the given concrete dynamic column names.

func (*Schema) RowLessThan ¶

func (s *Schema) RowLessThan(a, b *DynamicRow) bool

func (*Schema) SerializeBuffer ¶

func (s *Schema) SerializeBuffer(buffer *Buffer) ([]byte, error)

NewWriter returns a new parquet writer with a concrete parquet schema generated using the given concrete dynamic column names.

type SerializedBuffer ¶

type SerializedBuffer struct {
	// contains filtered or unexported fields
}

func NewSerializedBuffer ¶

func NewSerializedBuffer(f *parquet.File) (*SerializedBuffer, error)

func ReaderFromBytes ¶

func ReaderFromBytes(buf []byte) (*SerializedBuffer, error)

func (*SerializedBuffer) DynamicColumns ¶

func (b *SerializedBuffer) DynamicColumns() map[string][]string

func (*SerializedBuffer) DynamicRowGroup ¶

func (b *SerializedBuffer) DynamicRowGroup(i int) DynamicRowGroup

func (*SerializedBuffer) DynamicRows ¶

func (b *SerializedBuffer) DynamicRows() DynamicRowReader

func (*SerializedBuffer) NumRowGroups ¶

func (b *SerializedBuffer) NumRowGroups() int

func (*SerializedBuffer) NumRows ¶

func (b *SerializedBuffer) NumRows() int64

func (*SerializedBuffer) ParquetFile ¶

func (b *SerializedBuffer) ParquetFile() *parquet.File

func (*SerializedBuffer) Reader ¶

func (b *SerializedBuffer) Reader() *parquet.Reader

type SortingColumn ¶

type SortingColumn interface {
	parquet.SortingColumn
	ColumnName() string
}

SortingColumn describes a column to sort by in a dynamic parquet schema.

func Ascending ¶

func Ascending(column string) SortingColumn

Ascending constructs a SortingColumn value which dictates to sort by the column in ascending order.

func Descending ¶

func Descending(column string) SortingColumn

Descending constructs a SortingColumn value which dictates to sort by the column in descending order.

func NullsFirst ¶

func NullsFirst(sortingColumn SortingColumn) SortingColumn

NullsFirst wraps the SortingColumn passed as argument so that it instructs the row group to place null values first in the column.

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL