parquet

package
v1.4.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 6, 2024 License: AGPL-3.0 Imports: 14 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var (
	EmptyRowReader = &emptyRowReader{}
)

Functions

func CopyAsRowGroups

func CopyAsRowGroups(dst RowWriterFlusher, src parquet.RowReader, rowGroupNumCount int) (total uint64, rowGroupCount uint64, err error)

CopyAsRowGroups copies row groups to dst from src and flush a rowgroup per rowGroupNumCount read. It returns the total number of rows copied and the number of row groups written. Flush is called to create a new row group.

func NewGroupField

func NewGroupField(name string, node parquet.Node) parquet.Field

func NewMergeRowReader

func NewMergeRowReader(readers []parquet.RowReader, maxValue parquet.Row, less func(parquet.Row, parquet.Row) bool) parquet.RowReader

NewMergeRowReader returns a RowReader that k-way merges the given readers using the less function. Each reader must be sorted according to the less function already.

func ReadAll

func ReadAll(r parquet.RowReader) ([]parquet.Row, error)

func ReadAllWithBufferSize

func ReadAllWithBufferSize(r parquet.RowReader, bufferSize int) ([]parquet.Row, error)

Types

type BufferedRowReaderIterator

type BufferedRowReaderIterator struct {
	// contains filtered or unexported fields
}

func NewBufferedRowReaderIterator

func NewBufferedRowReaderIterator(reader parquet.RowReader, bufferSize int) *BufferedRowReaderIterator

NewBufferedRowReaderIterator returns a new `iter.Iterator[parquet.Row]` from a RowReader. The iterator will buffer `bufferSize` rows from the reader.

func (*BufferedRowReaderIterator) At

func (r *BufferedRowReaderIterator) At() parquet.Row

func (*BufferedRowReaderIterator) Close

func (r *BufferedRowReaderIterator) Close() error

func (*BufferedRowReaderIterator) Err

func (*BufferedRowReaderIterator) Next

func (r *BufferedRowReaderIterator) Next() bool

type ErrRowReader

type ErrRowReader struct {
	// contains filtered or unexported fields
}

func NewErrRowReader

func NewErrRowReader(err error) *ErrRowReader

func (ErrRowReader) ReadRows

func (e ErrRowReader) ReadRows(rows []parquet.Row) (int, error)

type Group

type Group []parquet.Field

Group allows to write a custom ordered schema. As opposed to parquet.Group which orders fields alphabethical as it is based on a map.

func (Group) Compression

func (g Group) Compression() compress.Codec

func (Group) Encoding

func (g Group) Encoding() encoding.Encoding

func (Group) Fields

func (g Group) Fields() []parquet.Field

func (Group) GoType

func (g Group) GoType() reflect.Type

func (Group) Leaf

func (g Group) Leaf() bool

func (Group) Optional

func (g Group) Optional() bool

func (Group) Repeated

func (g Group) Repeated() bool

func (Group) Required

func (g Group) Required() bool

func (Group) String

func (g Group) String() string

func (Group) Type

func (g Group) Type() parquet.Type

type IteratorRowReader

type IteratorRowReader struct {
	iter.Iterator[parquet.Row]
}

func NewIteratorRowReader

func NewIteratorRowReader(it iter.Iterator[parquet.Row]) *IteratorRowReader

NewIteratorRowReader returns a RowReader that reads rows from the given iterator.

func (*IteratorRowReader) ReadRows

func (it *IteratorRowReader) ReadRows(rows []parquet.Row) (int, error)

type RowWriterFlusher

type RowWriterFlusher interface {
	parquet.RowWriter
	Flush() error
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL