Documentation ¶
Index ¶
- func DecodeDictChunk(chunk *Chunk)
- func ReadDataPageValues(bytesReader *bytes.Reader, encodingMethod parquet.Encoding, ...) ([]interface{}, error)
- func ReadPageHeader(thriftReader *thrift.TBufferedTransport) (*parquet.PageHeader, error)
- type Chunk
- type DictRecType
- type Page
- func DictRecToDictPage(dictRec *DictRecType, pageSize int32, compressType parquet.CompressionCodec) (*Page, int64)
- func NewDataPage() *Page
- func NewDictPage() *Page
- func NewPage() *Page
- func ReadPage(thriftReader *thrift.TBufferedTransport, schemaHandler *schema.SchemaHandler, ...) (*Page, int64, int64, error)
- func ReadPage2(thriftReader *thrift.TBufferedTransport, schemaHandler *schema.SchemaHandler, ...) (*Page, int64, int64, error)
- func ReadPageRawData(thriftReader *thrift.TBufferedTransport, schemaHandler *schema.SchemaHandler, ...) (*Page, error)
- func TableToDataPages(table *Table, pageSize int32, compressType parquet.CompressionCodec) ([]*Page, int64)
- func TableToDictDataPages(dictRec *DictRecType, table *Table, pageSize int32, bitWidth int32, ...) ([]*Page, int64)
- func (page *Page) DataPageCompress(compressType parquet.CompressionCodec) []byte
- func (page *Page) DataPageV2Compress(compressType parquet.CompressionCodec) []byte
- func (page *Page) Decode(dictPage *Page)
- func (page *Page) DictDataPageCompress(compressType parquet.CompressionCodec, bitWidth int32, values []int32) []byte
- func (page *Page) DictPageCompress(compressType parquet.CompressionCodec, pT parquet.Type) []byte
- func (page *Page) EncodingValues(valuesBuf []interface{}) []byte
- func (p *Page) GetRLDLFromRawData(schemaHandler *schema.SchemaHandler) (int64, int64, error)
- func (p *Page) GetValueFromRawData(schemaHandler *schema.SchemaHandler) error
- type RowGroup
- type Table
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func ReadDataPageValues ¶
func ReadDataPageValues(bytesReader *bytes.Reader, encodingMethod parquet.Encoding, dataType parquet.Type, convertedType parquet.ConvertedType, cnt uint64, bitWidth uint64) ([]interface{}, error)
Read data page values
func ReadPageHeader ¶
func ReadPageHeader(thriftReader *thrift.TBufferedTransport) (*parquet.PageHeader, error)
Read page header
Types ¶
type Chunk ¶
type Chunk struct { Pages []*Page ChunkHeader *parquet.ColumnChunk }
Chunk stores the ColumnChunk in parquet file
func PagesToDictChunk ¶
Convert several pages to one chunk with dict page first
func ReadChunk ¶
func ReadChunk(thriftReader *thrift.TBufferedTransport, schemaHandler *schema.SchemaHandler, chunkHeader *parquet.ColumnChunk) (*Chunk, error)
Read one chunk from parquet file (Deprecated)
type DictRecType ¶
func NewDictRec ¶
func NewDictRec(pT parquet.Type) *DictRecType
type Page ¶
type Page struct { //Header of a page Header *parquet.PageHeader //Table to store values DataTable *Table //Compressed data of the page, which is written in parquet file RawData []byte //Compress type: gzip/snappy/zstd/none CompressType parquet.CompressionCodec //Schema Schema *parquet.SchemaElement //Path in schema(include the root) Path []string //Maximum of the values MaxVal interface{} //Minimum of the values MinVal interface{} //NullCount NullCount *int64 //Tag info Info *common.Tag PageSize int32 }
Page is used to store the page data
func DictRecToDictPage ¶
func DictRecToDictPage(dictRec *DictRecType, pageSize int32, compressType parquet.CompressionCodec) (*Page, int64)
func ReadPage ¶
func ReadPage(thriftReader *thrift.TBufferedTransport, schemaHandler *schema.SchemaHandler, colMetaData *parquet.ColumnMetaData) (*Page, int64, int64, error)
Read page from parquet file
func ReadPage2 ¶
func ReadPage2(thriftReader *thrift.TBufferedTransport, schemaHandler *schema.SchemaHandler, colMetaData *parquet.ColumnMetaData) (*Page, int64, int64, error)
This is a test function
func ReadPageRawData ¶
func ReadPageRawData(thriftReader *thrift.TBufferedTransport, schemaHandler *schema.SchemaHandler, colMetaData *parquet.ColumnMetaData) (*Page, error)
Read page RawData
func TableToDataPages ¶
func TableToDataPages(table *Table, pageSize int32, compressType parquet.CompressionCodec) ([]*Page, int64)
Convert a table to data pages
func TableToDictDataPages ¶
func TableToDictDataPages(dictRec *DictRecType, table *Table, pageSize int32, bitWidth int32, compressType parquet.CompressionCodec) ([]*Page, int64)
Convert a table to dict data pages
func (*Page) DataPageCompress ¶
func (page *Page) DataPageCompress(compressType parquet.CompressionCodec) []byte
Compress the data page to parquet file
func (*Page) DataPageV2Compress ¶
func (page *Page) DataPageV2Compress(compressType parquet.CompressionCodec) []byte
Compress data page v2 to parquet file
func (*Page) DictDataPageCompress ¶
func (page *Page) DictDataPageCompress(compressType parquet.CompressionCodec, bitWidth int32, values []int32) []byte
Compress the data page to parquet file
func (*Page) DictPageCompress ¶
Compress the dict page to parquet file
func (*Page) EncodingValues ¶
Encoding values
func (*Page) GetRLDLFromRawData ¶
Get RepetitionLevels and Definitions from RawData
func (*Page) GetValueFromRawData ¶
func (p *Page) GetValueFromRawData(schemaHandler *schema.SchemaHandler) error
Get values from raw data
type RowGroup ¶
RowGroup stores the RowGroup in parquet file
func ReadRowGroup ¶
func ReadRowGroup(rowGroupHeader *parquet.RowGroup, PFile source.ParquetFile, schemaHandler *schema.SchemaHandler, NP int64) (*RowGroup, error)
Read one RowGroup from parquet file (Deprecated)
func (*RowGroup) RowGroupToTableMap ¶
Convert a RowGroup to table map
type Table ¶
type Table struct { //Repetition type of the values: REQUIRED/OPTIONAL/REPEATED RepetitionType parquet.FieldRepetitionType //Schema Schema *parquet.SchemaElement //Path of this column Path []string //Maximum of definition levels MaxDefinitionLevel int32 //Maximum of repetition levels MaxRepetitionLevel int32 //Parquet values Values []interface{} //Definition Levels slice DefinitionLevels []int32 //Repetition Levels slice RepetitionLevels []int32 //Tag info Info *common.Tag }
Table is the core data structure used to store the values
func NewEmptyTable ¶
func NewEmptyTable() *Table