Documentation ¶
Index ¶
- type ColumnBufferType
- type ParquetReader
- func (self *ParquetReader) GetFooterSize() (uint32, error)
- func (self *ParquetReader) GetNumRows() int64
- func (self *ParquetReader) Read(dstInterface interface{}) error
- func (self *ParquetReader) ReadByNumber(maxReadNumber int) ([]interface{}, error)
- func (self *ParquetReader) ReadColumnByIndex(index int, num int) (values []interface{}, rls []int32, dls []int32, err error)
- func (self *ParquetReader) ReadColumnByPath(pathStr string, num int) (values []interface{}, rls []int32, dls []int32, err error)
- func (self *ParquetReader) ReadFooter() error
- func (self *ParquetReader) ReadPartial(dstInterface interface{}, prefixPath string) error
- func (self *ParquetReader) ReadPartialByNumber(maxReadNumber int, prefixPath string) ([]interface{}, error)
- func (self *ParquetReader) ReadStop()
- func (self *ParquetReader) RenameSchema()
- func (self *ParquetReader) SetSchemaHandlerFromJSON(jsonSchema string) error
- func (self *ParquetReader) SkipRows(num int64) error
- func (self *ParquetReader) SkipRowsByIndex(index int, num int)
- func (self *ParquetReader) SkipRowsByPath(pathStr string, num int) error
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type ColumnBufferType ¶
type ColumnBufferType struct { PFile source.ParquetFile ThriftReader *thrift.TBufferedTransport SchemaHandler *schema.SchemaHandler PathStr string RowGroupIndex int64 ChunkHeader *parquet.ColumnChunk ChunkReadValues int64 DictPage *layout.Page DataTable *layout.Table DataTableNumRows int64 }
func NewColumnBuffer ¶
func NewColumnBuffer(pFile source.ParquetFile, footer *parquet.FileMetaData, schemaHandler *schema.SchemaHandler, pathStr string) (*ColumnBufferType, error)
func (*ColumnBufferType) NextRowGroup ¶
func (self *ColumnBufferType) NextRowGroup() error
func (*ColumnBufferType) ReadPage ¶
func (self *ColumnBufferType) ReadPage() error
func (*ColumnBufferType) ReadPageForSkip ¶
func (self *ColumnBufferType) ReadPageForSkip() (*layout.Page, error)
func (*ColumnBufferType) ReadRows ¶
func (self *ColumnBufferType) ReadRows(num int64) (*layout.Table, int64)
func (*ColumnBufferType) SkipRows ¶
func (self *ColumnBufferType) SkipRows(num int64) int64
type ParquetReader ¶
type ParquetReader struct { SchemaHandler *schema.SchemaHandler NP int64 //parallel number PFile source.ParquetFile ColumnBuffers map[string]*ColumnBufferType //One reader can only read one type objects ObjType reflect.Type ObjPartialType reflect.Type }
func NewParquetColumnReader ¶
func NewParquetColumnReader(pFile source.ParquetFile, np int64) (*ParquetReader, error)
NewParquetColumnReader creates a parquet column reader
func NewParquetReader ¶
func NewParquetReader(pFile source.ParquetFile, obj interface{}, np int64) (*ParquetReader, error)
Create a parquet reader: obj is a object with schema tags or a JSON schema string
func (*ParquetReader) GetFooterSize ¶
func (self *ParquetReader) GetFooterSize() (uint32, error)
Get the footer size
func (*ParquetReader) GetNumRows ¶
func (self *ParquetReader) GetNumRows() int64
func (*ParquetReader) Read ¶
func (self *ParquetReader) Read(dstInterface interface{}) error
Read rows of parquet file and unmarshal all to dst
func (*ParquetReader) ReadByNumber ¶ added in v1.4.0
func (self *ParquetReader) ReadByNumber(maxReadNumber int) ([]interface{}, error)
Read maxReadNumber objects
func (*ParquetReader) ReadColumnByIndex ¶
func (self *ParquetReader) ReadColumnByIndex(index int, num int) (values []interface{}, rls []int32, dls []int32, err error)
ReadColumnByIndex reads column by index. The index of first column is 0.
func (*ParquetReader) ReadColumnByPath ¶
func (self *ParquetReader) ReadColumnByPath(pathStr string, num int) (values []interface{}, rls []int32, dls []int32, err error)
ReadColumnByPath reads column by path in schema.
func (*ParquetReader) ReadFooter ¶
func (self *ParquetReader) ReadFooter() error
Read footer from parquet file
func (*ParquetReader) ReadPartial ¶ added in v1.4.0
func (self *ParquetReader) ReadPartial(dstInterface interface{}, prefixPath string) error
Read rows of parquet file and unmarshal all to dst
func (*ParquetReader) ReadPartialByNumber ¶ added in v1.4.0
func (self *ParquetReader) ReadPartialByNumber(maxReadNumber int, prefixPath string) ([]interface{}, error)
Read maxReadNumber partial objects
func (*ParquetReader) RenameSchema ¶
func (self *ParquetReader) RenameSchema()
Rename schema name to inname
func (*ParquetReader) SetSchemaHandlerFromJSON ¶
func (self *ParquetReader) SetSchemaHandlerFromJSON(jsonSchema string) error
func (*ParquetReader) SkipRows ¶
func (self *ParquetReader) SkipRows(num int64) error
Skip rows of parquet file
func (*ParquetReader) SkipRowsByIndex ¶
func (self *ParquetReader) SkipRowsByIndex(index int, num int)
func (*ParquetReader) SkipRowsByPath ¶
func (self *ParquetReader) SkipRowsByPath(pathStr string, num int) error
Click to show internal directories.
Click to hide internal directories.