Documentation ¶
Index ¶
- func DecodeDictChunk(chunk *Chunk)
- func ReadDataPageValues(bytesReader *bytes.Reader, encoding parquet.Encoding, dataType parquet.Type, ...) ([]interface{}, error)
- func ReadPageHeader(thriftReader *thrift.TBufferedTransport) (*parquet.PageHeader, error)
- type Chunk
- type DictRecType
- type Page
- func DictRecToDictPage(dictRec *DictRecType, pageSize int32, compressType parquet.CompressionCodec) (*Page, int64)
- func NewDataPage() *Page
- func NewDictPage() *Page
- func NewPage() *Page
- func ReadPage(thriftReader *thrift.TBufferedTransport, ...) (*Page, int64, int64, error)
- func TableToDataPages(table *Table, pageSize int32, compressType parquet.CompressionCodec) ([]*Page, int64)
- func TableToDictDataPages(dictRec *DictRecType, table *Table, pageSize int32, bitWidth int32, ...) ([]*Page, int64)
- func TableToDictPage(table *Table, pageSize int32, compressType parquet.CompressionCodec) (*Page, int64)
- func (page *Page) DataPageCompress(compressType parquet.CompressionCodec) []byte
- func (page *Page) DataPageV2Compress(compressType parquet.CompressionCodec) []byte
- func (page *Page) Decode(dictPage *Page)
- func (page *Page) DictDataPageCompress(compressType parquet.CompressionCodec, bitWidth int32) []byte
- func (page *Page) DictPageCompress(compressType parquet.CompressionCodec) []byte
- func (page *Page) EncodingValues(valuesBuf []interface{}) []byte
- type RowGroup
- type Table
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func ReadDataPageValues ¶
func ReadDataPageValues(bytesReader *bytes.Reader, encoding parquet.Encoding, dataType parquet.Type, convertedType parquet.ConvertedType, cnt uint64, bitWidth uint64) ([]interface{}, error)
Read data page values
func ReadPageHeader ¶
func ReadPageHeader(thriftReader *thrift.TBufferedTransport) (*parquet.PageHeader, error)
Read page header
Types ¶
type Chunk ¶
type Chunk struct { Pages []*Page ChunkHeader *parquet.ColumnChunk }
Chunk stores the ColumnChunk in parquet file
func PagesToDictChunk ¶
Convert several pages to one chunk with dict page first
func ReadChunk ¶
func ReadChunk(thriftReader *thrift.TBufferedTransport, schemaHandler *SchemaHandler.SchemaHandler, chunkHeader *parquet.ColumnChunk) (*Chunk, error)
Read one chunk from parquet file (Deprecated)
type DictRecType ¶
type DictRecType struct { DictMap map[interface{}]int32 DictSlice []interface{} }
func NewDictRec ¶
func NewDictRec() *DictRecType
type Page ¶
type Page struct { //Header of a page Header *parquet.PageHeader //Table to store values DataTable *Table //Compressed data of the page, which is written in parquet file RawData []byte //Compress type: gzip/snappy/none CompressType parquet.CompressionCodec //Parquet type of the values in the page DataType parquet.Type //Path in schema(include the root) Path []string //Maximum of the values MaxVal interface{} //Minimum of the values MinVal interface{} //Tag info Info *Common.Tag PageSize int32 }
Page is used to store the page data
func DictRecToDictPage ¶
func DictRecToDictPage(dictRec *DictRecType, pageSize int32, compressType parquet.CompressionCodec) (*Page, int64)
func ReadPage ¶
func ReadPage(thriftReader *thrift.TBufferedTransport, schemaHandler *SchemaHandler.SchemaHandler, colMetaData *parquet.ColumnMetaData) (*Page, int64, int64, error)
Read page from parquet file
func TableToDataPages ¶
func TableToDataPages(table *Table, pageSize int32, compressType parquet.CompressionCodec) ([]*Page, int64)
Convert a table to data pages
func TableToDictDataPages ¶
func TableToDictDataPages(dictRec *DictRecType, table *Table, pageSize int32, bitWidth int32, compressType parquet.CompressionCodec) ([]*Page, int64)
Convert a table to dict data pages
func TableToDictPage ¶
func TableToDictPage(table *Table, pageSize int32, compressType parquet.CompressionCodec) (*Page, int64)
Convert a table to dict page
func (*Page) DataPageCompress ¶
func (page *Page) DataPageCompress(compressType parquet.CompressionCodec) []byte
Compress the data page to parquet file
func (*Page) DataPageV2Compress ¶
func (page *Page) DataPageV2Compress(compressType parquet.CompressionCodec) []byte
Compress data page v2 to parquet file
func (*Page) DictDataPageCompress ¶
func (page *Page) DictDataPageCompress(compressType parquet.CompressionCodec, bitWidth int32) []byte
Compress the data page to parquet file
func (*Page) DictPageCompress ¶
func (page *Page) DictPageCompress(compressType parquet.CompressionCodec) []byte
Compress the dict page to parquet file
func (*Page) EncodingValues ¶
Encoding values
type RowGroup ¶
RowGroup stores the RowGroup in parquet file
func ReadRowGroup ¶
func ReadRowGroup(rowGroupHeader *parquet.RowGroup, PFile ParquetFile.ParquetFile, schemaHandler *SchemaHandler.SchemaHandler, NP int64) (*RowGroup, error)
Read one RowGroup from parquet file (Deprecated)
func (*RowGroup) RowGroupToTableMap ¶
Convert a RowGroup to table map
type Table ¶
type Table struct { //Repetition type of the values: REQUIRED/OPTIONAL/REPEATED RepetitionType parquet.FieldRepetitionType //Parquet type Type parquet.Type //Path of this column Path []string //Maximum of definition levels MaxDefinitionLevel int32 //Maximum of repetition levels MaxRepetitionLevel int32 //Parquet values Values []interface{} //Definition Levels slice DefinitionLevels []int32 //Repetition Levels slice RepetitionLevels []int32 //Tag info Info *Common.Tag }
Table is the core data structure used to store the values
func NewEmptyTable ¶
func NewEmptyTable() *Table