Documentation ¶
Index ¶
- Variables
- type ColumnChunk
- func (p *ColumnChunk) GetFileOffset() int64
- func (p *ColumnChunk) GetFilePath() string
- func (p *ColumnChunk) GetMetaData() *ColumnMetaData
- func (p *ColumnChunk) IsSetFilePath() bool
- func (p *ColumnChunk) IsSetMetaData() bool
- func (p *ColumnChunk) Read(iprot thrift.TProtocol) error
- func (p *ColumnChunk) ReadField1(iprot thrift.TProtocol) error
- func (p *ColumnChunk) ReadField2(iprot thrift.TProtocol) error
- func (p *ColumnChunk) ReadField3(iprot thrift.TProtocol) error
- func (p *ColumnChunk) String() string
- func (p *ColumnChunk) Write(oprot thrift.TProtocol) error
- type ColumnMetaData
- func (p *ColumnMetaData) GetCodec() CompressionCodec
- func (p *ColumnMetaData) GetDataPageOffset() int64
- func (p *ColumnMetaData) GetDictionaryPageOffset() int64
- func (p *ColumnMetaData) GetEncodingStats() []*PageEncodingStats
- func (p *ColumnMetaData) GetEncodings() []Encoding
- func (p *ColumnMetaData) GetIndexPageOffset() int64
- func (p *ColumnMetaData) GetKeyValueMetadata() []*KeyValue
- func (p *ColumnMetaData) GetNumValues() int64
- func (p *ColumnMetaData) GetPathInSchema() []string
- func (p *ColumnMetaData) GetStatistics() *Statistics
- func (p *ColumnMetaData) GetTotalCompressedSize() int64
- func (p *ColumnMetaData) GetTotalUncompressedSize() int64
- func (p *ColumnMetaData) GetType() Type
- func (p *ColumnMetaData) IsSetDictionaryPageOffset() bool
- func (p *ColumnMetaData) IsSetEncodingStats() bool
- func (p *ColumnMetaData) IsSetIndexPageOffset() bool
- func (p *ColumnMetaData) IsSetKeyValueMetadata() bool
- func (p *ColumnMetaData) IsSetStatistics() bool
- func (p *ColumnMetaData) Read(iprot thrift.TProtocol) error
- func (p *ColumnMetaData) ReadField1(iprot thrift.TProtocol) error
- func (p *ColumnMetaData) ReadField10(iprot thrift.TProtocol) error
- func (p *ColumnMetaData) ReadField11(iprot thrift.TProtocol) error
- func (p *ColumnMetaData) ReadField12(iprot thrift.TProtocol) error
- func (p *ColumnMetaData) ReadField13(iprot thrift.TProtocol) error
- func (p *ColumnMetaData) ReadField2(iprot thrift.TProtocol) error
- func (p *ColumnMetaData) ReadField3(iprot thrift.TProtocol) error
- func (p *ColumnMetaData) ReadField4(iprot thrift.TProtocol) error
- func (p *ColumnMetaData) ReadField5(iprot thrift.TProtocol) error
- func (p *ColumnMetaData) ReadField6(iprot thrift.TProtocol) error
- func (p *ColumnMetaData) ReadField7(iprot thrift.TProtocol) error
- func (p *ColumnMetaData) ReadField8(iprot thrift.TProtocol) error
- func (p *ColumnMetaData) ReadField9(iprot thrift.TProtocol) error
- func (p *ColumnMetaData) String() string
- func (p *ColumnMetaData) Write(oprot thrift.TProtocol) error
- type CompressionCodec
- type ConvertedType
- type DataPageHeader
- func (p *DataPageHeader) GetDefinitionLevelEncoding() Encoding
- func (p *DataPageHeader) GetEncoding() Encoding
- func (p *DataPageHeader) GetNumValues() int32
- func (p *DataPageHeader) GetRepetitionLevelEncoding() Encoding
- func (p *DataPageHeader) GetStatistics() *Statistics
- func (p *DataPageHeader) IsSetStatistics() bool
- func (p *DataPageHeader) Read(iprot thrift.TProtocol) error
- func (p *DataPageHeader) ReadField1(iprot thrift.TProtocol) error
- func (p *DataPageHeader) ReadField2(iprot thrift.TProtocol) error
- func (p *DataPageHeader) ReadField3(iprot thrift.TProtocol) error
- func (p *DataPageHeader) ReadField4(iprot thrift.TProtocol) error
- func (p *DataPageHeader) ReadField5(iprot thrift.TProtocol) error
- func (p *DataPageHeader) String() string
- func (p *DataPageHeader) Write(oprot thrift.TProtocol) error
- type DataPageHeaderV2
- func (p *DataPageHeaderV2) GetDefinitionLevelsByteLength() int32
- func (p *DataPageHeaderV2) GetEncoding() Encoding
- func (p *DataPageHeaderV2) GetIsCompressed() bool
- func (p *DataPageHeaderV2) GetNumNulls() int32
- func (p *DataPageHeaderV2) GetNumRows() int32
- func (p *DataPageHeaderV2) GetNumValues() int32
- func (p *DataPageHeaderV2) GetRepetitionLevelsByteLength() int32
- func (p *DataPageHeaderV2) GetStatistics() *Statistics
- func (p *DataPageHeaderV2) IsSetIsCompressed() bool
- func (p *DataPageHeaderV2) IsSetStatistics() bool
- func (p *DataPageHeaderV2) Read(iprot thrift.TProtocol) error
- func (p *DataPageHeaderV2) ReadField1(iprot thrift.TProtocol) error
- func (p *DataPageHeaderV2) ReadField2(iprot thrift.TProtocol) error
- func (p *DataPageHeaderV2) ReadField3(iprot thrift.TProtocol) error
- func (p *DataPageHeaderV2) ReadField4(iprot thrift.TProtocol) error
- func (p *DataPageHeaderV2) ReadField5(iprot thrift.TProtocol) error
- func (p *DataPageHeaderV2) ReadField6(iprot thrift.TProtocol) error
- func (p *DataPageHeaderV2) ReadField7(iprot thrift.TProtocol) error
- func (p *DataPageHeaderV2) ReadField8(iprot thrift.TProtocol) error
- func (p *DataPageHeaderV2) String() string
- func (p *DataPageHeaderV2) Write(oprot thrift.TProtocol) error
- type DictionaryPageHeader
- func (p *DictionaryPageHeader) GetEncoding() Encoding
- func (p *DictionaryPageHeader) GetIsSorted() bool
- func (p *DictionaryPageHeader) GetNumValues() int32
- func (p *DictionaryPageHeader) IsSetIsSorted() bool
- func (p *DictionaryPageHeader) Read(iprot thrift.TProtocol) error
- func (p *DictionaryPageHeader) ReadField1(iprot thrift.TProtocol) error
- func (p *DictionaryPageHeader) ReadField2(iprot thrift.TProtocol) error
- func (p *DictionaryPageHeader) ReadField3(iprot thrift.TProtocol) error
- func (p *DictionaryPageHeader) String() string
- func (p *DictionaryPageHeader) Write(oprot thrift.TProtocol) error
- type Encoding
- type FieldRepetitionType
- type FileMetaData
- func (p *FileMetaData) GetCreatedBy() string
- func (p *FileMetaData) GetKeyValueMetadata() []*KeyValue
- func (p *FileMetaData) GetNumRows() int64
- func (p *FileMetaData) GetRowGroups() []*RowGroup
- func (p *FileMetaData) GetSchema() []*SchemaElement
- func (p *FileMetaData) GetVersion() int32
- func (p *FileMetaData) IsSetCreatedBy() bool
- func (p *FileMetaData) IsSetKeyValueMetadata() bool
- func (p *FileMetaData) Read(iprot thrift.TProtocol) error
- func (p *FileMetaData) ReadField1(iprot thrift.TProtocol) error
- func (p *FileMetaData) ReadField2(iprot thrift.TProtocol) error
- func (p *FileMetaData) ReadField3(iprot thrift.TProtocol) error
- func (p *FileMetaData) ReadField4(iprot thrift.TProtocol) error
- func (p *FileMetaData) ReadField5(iprot thrift.TProtocol) error
- func (p *FileMetaData) ReadField6(iprot thrift.TProtocol) error
- func (p *FileMetaData) String() string
- func (p *FileMetaData) Write(oprot thrift.TProtocol) error
- type IndexPageHeader
- type KeyValue
- func (p *KeyValue) GetKey() string
- func (p *KeyValue) GetValue() string
- func (p *KeyValue) IsSetValue() bool
- func (p *KeyValue) Read(iprot thrift.TProtocol) error
- func (p *KeyValue) ReadField1(iprot thrift.TProtocol) error
- func (p *KeyValue) ReadField2(iprot thrift.TProtocol) error
- func (p *KeyValue) String() string
- func (p *KeyValue) Write(oprot thrift.TProtocol) error
- type PageEncodingStats
- func (p *PageEncodingStats) GetCount() int32
- func (p *PageEncodingStats) GetEncoding() Encoding
- func (p *PageEncodingStats) GetPageType() PageType
- func (p *PageEncodingStats) Read(iprot thrift.TProtocol) error
- func (p *PageEncodingStats) ReadField1(iprot thrift.TProtocol) error
- func (p *PageEncodingStats) ReadField2(iprot thrift.TProtocol) error
- func (p *PageEncodingStats) ReadField3(iprot thrift.TProtocol) error
- func (p *PageEncodingStats) String() string
- func (p *PageEncodingStats) Write(oprot thrift.TProtocol) error
- type PageHeader
- func (p *PageHeader) GetCompressedPageSize() int32
- func (p *PageHeader) GetCrc() int32
- func (p *PageHeader) GetDataPageHeader() *DataPageHeader
- func (p *PageHeader) GetDataPageHeaderV2() *DataPageHeaderV2
- func (p *PageHeader) GetDictionaryPageHeader() *DictionaryPageHeader
- func (p *PageHeader) GetIndexPageHeader() *IndexPageHeader
- func (p *PageHeader) GetType() PageType
- func (p *PageHeader) GetUncompressedPageSize() int32
- func (p *PageHeader) IsSetCrc() bool
- func (p *PageHeader) IsSetDataPageHeader() bool
- func (p *PageHeader) IsSetDataPageHeaderV2() bool
- func (p *PageHeader) IsSetDictionaryPageHeader() bool
- func (p *PageHeader) IsSetIndexPageHeader() bool
- func (p *PageHeader) Read(iprot thrift.TProtocol) error
- func (p *PageHeader) ReadField1(iprot thrift.TProtocol) error
- func (p *PageHeader) ReadField2(iprot thrift.TProtocol) error
- func (p *PageHeader) ReadField3(iprot thrift.TProtocol) error
- func (p *PageHeader) ReadField4(iprot thrift.TProtocol) error
- func (p *PageHeader) ReadField5(iprot thrift.TProtocol) error
- func (p *PageHeader) ReadField6(iprot thrift.TProtocol) error
- func (p *PageHeader) ReadField7(iprot thrift.TProtocol) error
- func (p *PageHeader) ReadField8(iprot thrift.TProtocol) error
- func (p *PageHeader) String() string
- func (p *PageHeader) Write(oprot thrift.TProtocol) error
- type PageType
- type RowGroup
- func (p *RowGroup) GetColumns() []*ColumnChunk
- func (p *RowGroup) GetNumRows() int64
- func (p *RowGroup) GetSortingColumns() []*SortingColumn
- func (p *RowGroup) GetTotalByteSize() int64
- func (p *RowGroup) IsSetSortingColumns() bool
- func (p *RowGroup) Read(iprot thrift.TProtocol) error
- func (p *RowGroup) ReadField1(iprot thrift.TProtocol) error
- func (p *RowGroup) ReadField2(iprot thrift.TProtocol) error
- func (p *RowGroup) ReadField3(iprot thrift.TProtocol) error
- func (p *RowGroup) ReadField4(iprot thrift.TProtocol) error
- func (p *RowGroup) String() string
- func (p *RowGroup) Write(oprot thrift.TProtocol) error
- type SchemaElement
- func (p *SchemaElement) GetConvertedType() ConvertedType
- func (p *SchemaElement) GetFieldID() int32
- func (p *SchemaElement) GetName() string
- func (p *SchemaElement) GetNumChildren() int32
- func (p *SchemaElement) GetPrecision() int32
- func (p *SchemaElement) GetRepetitionType() FieldRepetitionType
- func (p *SchemaElement) GetScale() int32
- func (p *SchemaElement) GetType() Type
- func (p *SchemaElement) GetTypeLength() int32
- func (p *SchemaElement) IsSetConvertedType() bool
- func (p *SchemaElement) IsSetFieldID() bool
- func (p *SchemaElement) IsSetNumChildren() bool
- func (p *SchemaElement) IsSetPrecision() bool
- func (p *SchemaElement) IsSetRepetitionType() bool
- func (p *SchemaElement) IsSetScale() bool
- func (p *SchemaElement) IsSetType() bool
- func (p *SchemaElement) IsSetTypeLength() bool
- func (p *SchemaElement) Read(iprot thrift.TProtocol) error
- func (p *SchemaElement) ReadField1(iprot thrift.TProtocol) error
- func (p *SchemaElement) ReadField2(iprot thrift.TProtocol) error
- func (p *SchemaElement) ReadField3(iprot thrift.TProtocol) error
- func (p *SchemaElement) ReadField4(iprot thrift.TProtocol) error
- func (p *SchemaElement) ReadField5(iprot thrift.TProtocol) error
- func (p *SchemaElement) ReadField6(iprot thrift.TProtocol) error
- func (p *SchemaElement) ReadField7(iprot thrift.TProtocol) error
- func (p *SchemaElement) ReadField8(iprot thrift.TProtocol) error
- func (p *SchemaElement) ReadField9(iprot thrift.TProtocol) error
- func (p *SchemaElement) String() string
- func (p *SchemaElement) Write(oprot thrift.TProtocol) error
- type SortingColumn
- func (p *SortingColumn) GetColumnIdx() int32
- func (p *SortingColumn) GetDescending() bool
- func (p *SortingColumn) GetNullsFirst() bool
- func (p *SortingColumn) Read(iprot thrift.TProtocol) error
- func (p *SortingColumn) ReadField1(iprot thrift.TProtocol) error
- func (p *SortingColumn) ReadField2(iprot thrift.TProtocol) error
- func (p *SortingColumn) ReadField3(iprot thrift.TProtocol) error
- func (p *SortingColumn) String() string
- func (p *SortingColumn) Write(oprot thrift.TProtocol) error
- type Statistics
- func (p *Statistics) GetDistinctCount() int64
- func (p *Statistics) GetMax() []byte
- func (p *Statistics) GetMin() []byte
- func (p *Statistics) GetNullCount() int64
- func (p *Statistics) IsSetDistinctCount() bool
- func (p *Statistics) IsSetMax() bool
- func (p *Statistics) IsSetMin() bool
- func (p *Statistics) IsSetNullCount() bool
- func (p *Statistics) Read(iprot thrift.TProtocol) error
- func (p *Statistics) ReadField1(iprot thrift.TProtocol) error
- func (p *Statistics) ReadField2(iprot thrift.TProtocol) error
- func (p *Statistics) ReadField3(iprot thrift.TProtocol) error
- func (p *Statistics) ReadField4(iprot thrift.TProtocol) error
- func (p *Statistics) String() string
- func (p *Statistics) Write(oprot thrift.TProtocol) error
- type Type
Constants ¶
This section is empty.
Variables ¶
var ColumnChunk_FilePath_DEFAULT string
var ColumnMetaData_DictionaryPageOffset_DEFAULT int64
var ColumnMetaData_EncodingStats_DEFAULT []*PageEncodingStats
var ColumnMetaData_IndexPageOffset_DEFAULT int64
var ColumnMetaData_KeyValueMetadata_DEFAULT []*KeyValue
var DataPageHeaderV2_IsCompressed_DEFAULT bool = true
var DictionaryPageHeader_IsSorted_DEFAULT bool
var FileMetaData_CreatedBy_DEFAULT string
var FileMetaData_KeyValueMetadata_DEFAULT []*KeyValue
var GoUnusedProtection__ int
var KeyValue_Value_DEFAULT string
var PageHeader_Crc_DEFAULT int32
var RowGroup_SortingColumns_DEFAULT []*SortingColumn
var SchemaElement_FieldID_DEFAULT int32
var SchemaElement_NumChildren_DEFAULT int32
var SchemaElement_Precision_DEFAULT int32
var SchemaElement_Scale_DEFAULT int32
var SchemaElement_TypeLength_DEFAULT int32
var Statistics_DistinctCount_DEFAULT int64
var Statistics_Max_DEFAULT []byte
var Statistics_Min_DEFAULT []byte
var Statistics_NullCount_DEFAULT int64
Functions ¶
This section is empty.
Types ¶
type ColumnChunk ¶
type ColumnChunk struct { FilePath *string `thrift:"file_path,1" db:"file_path" json:"file_path,omitempty"` FileOffset int64 `thrift:"file_offset,2,required" db:"file_offset" json:"file_offset"` MetaData *ColumnMetaData `thrift:"meta_data,3" db:"meta_data" json:"meta_data,omitempty"` }
Attributes:
- FilePath: File where column data is stored. If not set, assumed to be same file as
metadata. This path is relative to the current file.
- FileOffset: Byte offset in file_path to the ColumnMetaData *
- MetaData: Column metadata for this chunk. This is the same content as what is at
file_path/file_offset. Having it here has it replicated in the file metadata.
func NewColumnChunk ¶
func NewColumnChunk() *ColumnChunk
func (*ColumnChunk) GetFileOffset ¶
func (p *ColumnChunk) GetFileOffset() int64
func (*ColumnChunk) GetFilePath ¶
func (p *ColumnChunk) GetFilePath() string
func (*ColumnChunk) GetMetaData ¶
func (p *ColumnChunk) GetMetaData() *ColumnMetaData
func (*ColumnChunk) IsSetFilePath ¶
func (p *ColumnChunk) IsSetFilePath() bool
func (*ColumnChunk) IsSetMetaData ¶
func (p *ColumnChunk) IsSetMetaData() bool
func (*ColumnChunk) ReadField1 ¶
func (p *ColumnChunk) ReadField1(iprot thrift.TProtocol) error
func (*ColumnChunk) ReadField2 ¶
func (p *ColumnChunk) ReadField2(iprot thrift.TProtocol) error
func (*ColumnChunk) ReadField3 ¶
func (p *ColumnChunk) ReadField3(iprot thrift.TProtocol) error
func (*ColumnChunk) String ¶
func (p *ColumnChunk) String() string
type ColumnMetaData ¶
type ColumnMetaData struct { Type Type `thrift:"type,1,required" db:"type" json:"type"` Encodings []Encoding `thrift:"encodings,2,required" db:"encodings" json:"encodings"` PathInSchema []string `thrift:"path_in_schema,3,required" db:"path_in_schema" json:"path_in_schema"` Codec CompressionCodec `thrift:"codec,4,required" db:"codec" json:"codec"` NumValues int64 `thrift:"num_values,5,required" db:"num_values" json:"num_values"` TotalUncompressedSize int64 `thrift:"total_uncompressed_size,6,required" db:"total_uncompressed_size" json:"total_uncompressed_size"` TotalCompressedSize int64 `thrift:"total_compressed_size,7,required" db:"total_compressed_size" json:"total_compressed_size"` KeyValueMetadata []*KeyValue `thrift:"key_value_metadata,8" db:"key_value_metadata" json:"key_value_metadata,omitempty"` DataPageOffset int64 `thrift:"data_page_offset,9,required" db:"data_page_offset" json:"data_page_offset"` IndexPageOffset *int64 `thrift:"index_page_offset,10" db:"index_page_offset" json:"index_page_offset,omitempty"` DictionaryPageOffset *int64 `thrift:"dictionary_page_offset,11" db:"dictionary_page_offset" json:"dictionary_page_offset,omitempty"` Statistics *Statistics `thrift:"statistics,12" db:"statistics" json:"statistics,omitempty"` EncodingStats []*PageEncodingStats `thrift:"encoding_stats,13" db:"encoding_stats" json:"encoding_stats,omitempty"` }
Description for column metadata
Attributes:
- Type: Type of this column *
- Encodings: Set of all encodings used for this column. The purpose is to validate
whether we can decode those pages. *
- PathInSchema: Path in schema *
- Codec: Compression codec *
- NumValues: Number of values in this column *
- TotalUncompressedSize: total byte size of all uncompressed pages in this column chunk (including the headers) *
- TotalCompressedSize: total byte size of all compressed pages in this column chunk (including the headers) *
- KeyValueMetadata: Optional key/value metadata *
- DataPageOffset: Byte offset from beginning of file to first data page *
- IndexPageOffset: Byte offset from beginning of file to root index page *
- DictionaryPageOffset: Byte offset from the beginning of file to first (only) dictionary page *
- Statistics: optional statistics for this column chunk
- EncodingStats: Set of all encodings used for pages in this column chunk.
This information can be used to determine if all data pages are dictionary encoded for example *
var ColumnChunk_MetaData_DEFAULT *ColumnMetaData
func NewColumnMetaData ¶
func NewColumnMetaData() *ColumnMetaData
func (*ColumnMetaData) GetCodec ¶
func (p *ColumnMetaData) GetCodec() CompressionCodec
func (*ColumnMetaData) GetDataPageOffset ¶
func (p *ColumnMetaData) GetDataPageOffset() int64
func (*ColumnMetaData) GetDictionaryPageOffset ¶
func (p *ColumnMetaData) GetDictionaryPageOffset() int64
func (*ColumnMetaData) GetEncodingStats ¶
func (p *ColumnMetaData) GetEncodingStats() []*PageEncodingStats
func (*ColumnMetaData) GetEncodings ¶
func (p *ColumnMetaData) GetEncodings() []Encoding
func (*ColumnMetaData) GetIndexPageOffset ¶
func (p *ColumnMetaData) GetIndexPageOffset() int64
func (*ColumnMetaData) GetKeyValueMetadata ¶
func (p *ColumnMetaData) GetKeyValueMetadata() []*KeyValue
func (*ColumnMetaData) GetNumValues ¶
func (p *ColumnMetaData) GetNumValues() int64
func (*ColumnMetaData) GetPathInSchema ¶
func (p *ColumnMetaData) GetPathInSchema() []string
func (*ColumnMetaData) GetStatistics ¶
func (p *ColumnMetaData) GetStatistics() *Statistics
func (*ColumnMetaData) GetTotalCompressedSize ¶
func (p *ColumnMetaData) GetTotalCompressedSize() int64
func (*ColumnMetaData) GetTotalUncompressedSize ¶
func (p *ColumnMetaData) GetTotalUncompressedSize() int64
func (*ColumnMetaData) GetType ¶
func (p *ColumnMetaData) GetType() Type
func (*ColumnMetaData) IsSetDictionaryPageOffset ¶
func (p *ColumnMetaData) IsSetDictionaryPageOffset() bool
func (*ColumnMetaData) IsSetEncodingStats ¶
func (p *ColumnMetaData) IsSetEncodingStats() bool
func (*ColumnMetaData) IsSetIndexPageOffset ¶
func (p *ColumnMetaData) IsSetIndexPageOffset() bool
func (*ColumnMetaData) IsSetKeyValueMetadata ¶
func (p *ColumnMetaData) IsSetKeyValueMetadata() bool
func (*ColumnMetaData) IsSetStatistics ¶
func (p *ColumnMetaData) IsSetStatistics() bool
func (*ColumnMetaData) ReadField1 ¶
func (p *ColumnMetaData) ReadField1(iprot thrift.TProtocol) error
func (*ColumnMetaData) ReadField10 ¶
func (p *ColumnMetaData) ReadField10(iprot thrift.TProtocol) error
func (*ColumnMetaData) ReadField11 ¶
func (p *ColumnMetaData) ReadField11(iprot thrift.TProtocol) error
func (*ColumnMetaData) ReadField12 ¶
func (p *ColumnMetaData) ReadField12(iprot thrift.TProtocol) error
func (*ColumnMetaData) ReadField13 ¶
func (p *ColumnMetaData) ReadField13(iprot thrift.TProtocol) error
func (*ColumnMetaData) ReadField2 ¶
func (p *ColumnMetaData) ReadField2(iprot thrift.TProtocol) error
func (*ColumnMetaData) ReadField3 ¶
func (p *ColumnMetaData) ReadField3(iprot thrift.TProtocol) error
func (*ColumnMetaData) ReadField4 ¶
func (p *ColumnMetaData) ReadField4(iprot thrift.TProtocol) error
func (*ColumnMetaData) ReadField5 ¶
func (p *ColumnMetaData) ReadField5(iprot thrift.TProtocol) error
func (*ColumnMetaData) ReadField6 ¶
func (p *ColumnMetaData) ReadField6(iprot thrift.TProtocol) error
func (*ColumnMetaData) ReadField7 ¶
func (p *ColumnMetaData) ReadField7(iprot thrift.TProtocol) error
func (*ColumnMetaData) ReadField8 ¶
func (p *ColumnMetaData) ReadField8(iprot thrift.TProtocol) error
func (*ColumnMetaData) ReadField9 ¶
func (p *ColumnMetaData) ReadField9(iprot thrift.TProtocol) error
func (*ColumnMetaData) String ¶
func (p *ColumnMetaData) String() string
type CompressionCodec ¶
type CompressionCodec int64
Supported compression algorithms.
const ( CompressionCodec_UNCOMPRESSED CompressionCodec = 0 CompressionCodec_SNAPPY CompressionCodec = 1 CompressionCodec_GZIP CompressionCodec = 2 CompressionCodec_LZO CompressionCodec = 3 )
func CompressionCodecFromString ¶
func CompressionCodecFromString(s string) (CompressionCodec, error)
func CompressionCodecPtr ¶
func CompressionCodecPtr(v CompressionCodec) *CompressionCodec
func (CompressionCodec) MarshalText ¶
func (p CompressionCodec) MarshalText() ([]byte, error)
func (*CompressionCodec) Scan ¶
func (p *CompressionCodec) Scan(value interface{}) error
func (CompressionCodec) String ¶
func (p CompressionCodec) String() string
func (*CompressionCodec) UnmarshalText ¶
func (p *CompressionCodec) UnmarshalText(text []byte) error
type ConvertedType ¶
type ConvertedType int64
Common types used by frameworks(e.g. hive, pig) using parquet. This helps map between types in those frameworks to the base types in parquet. This is only metadata and not needed to read or write the data.
const ( ConvertedType_UTF8 ConvertedType = 0 ConvertedType_MAP ConvertedType = 1 ConvertedType_MAP_KEY_VALUE ConvertedType = 2 ConvertedType_LIST ConvertedType = 3 ConvertedType_ENUM ConvertedType = 4 ConvertedType_DECIMAL ConvertedType = 5 ConvertedType_DATE ConvertedType = 6 ConvertedType_TIME_MILLIS ConvertedType = 7 ConvertedType_TIME_MICROS ConvertedType = 8 ConvertedType_TIMESTAMP_MILLIS ConvertedType = 9 ConvertedType_TIMESTAMP_MICROS ConvertedType = 10 ConvertedType_UINT_8 ConvertedType = 11 ConvertedType_UINT_16 ConvertedType = 12 ConvertedType_UINT_32 ConvertedType = 13 ConvertedType_UINT_64 ConvertedType = 14 ConvertedType_INT_8 ConvertedType = 15 ConvertedType_INT_16 ConvertedType = 16 ConvertedType_INT_32 ConvertedType = 17 ConvertedType_INT_64 ConvertedType = 18 ConvertedType_JSON ConvertedType = 19 ConvertedType_BSON ConvertedType = 20 ConvertedType_INTERVAL ConvertedType = 21 )
var SchemaElement_ConvertedType_DEFAULT ConvertedType
func ConvertedTypeFromString ¶
func ConvertedTypeFromString(s string) (ConvertedType, error)
func ConvertedTypePtr ¶
func ConvertedTypePtr(v ConvertedType) *ConvertedType
func (ConvertedType) MarshalText ¶
func (p ConvertedType) MarshalText() ([]byte, error)
func (*ConvertedType) Scan ¶
func (p *ConvertedType) Scan(value interface{}) error
func (ConvertedType) String ¶
func (p ConvertedType) String() string
func (*ConvertedType) UnmarshalText ¶
func (p *ConvertedType) UnmarshalText(text []byte) error
type DataPageHeader ¶
type DataPageHeader struct { NumValues int32 `thrift:"num_values,1,required" db:"num_values" json:"num_values"` Encoding Encoding `thrift:"encoding,2,required" db:"encoding" json:"encoding"` DefinitionLevelEncoding Encoding `thrift:"definition_level_encoding,3,required" db:"definition_level_encoding" json:"definition_level_encoding"` RepetitionLevelEncoding Encoding `thrift:"repetition_level_encoding,4,required" db:"repetition_level_encoding" json:"repetition_level_encoding"` Statistics *Statistics `thrift:"statistics,5" db:"statistics" json:"statistics,omitempty"` }
Data page header
Attributes:
- NumValues: Number of values, including NULLs, in this data page. *
- Encoding: Encoding used for this data page *
- DefinitionLevelEncoding: Encoding used for definition levels *
- RepetitionLevelEncoding: Encoding used for repetition levels *
- Statistics: Optional statistics for the data in this page*
var PageHeader_DataPageHeader_DEFAULT *DataPageHeader
func NewDataPageHeader ¶
func NewDataPageHeader() *DataPageHeader
func (*DataPageHeader) GetDefinitionLevelEncoding ¶
func (p *DataPageHeader) GetDefinitionLevelEncoding() Encoding
func (*DataPageHeader) GetEncoding ¶
func (p *DataPageHeader) GetEncoding() Encoding
func (*DataPageHeader) GetNumValues ¶
func (p *DataPageHeader) GetNumValues() int32
func (*DataPageHeader) GetRepetitionLevelEncoding ¶
func (p *DataPageHeader) GetRepetitionLevelEncoding() Encoding
func (*DataPageHeader) GetStatistics ¶
func (p *DataPageHeader) GetStatistics() *Statistics
func (*DataPageHeader) IsSetStatistics ¶
func (p *DataPageHeader) IsSetStatistics() bool
func (*DataPageHeader) ReadField1 ¶
func (p *DataPageHeader) ReadField1(iprot thrift.TProtocol) error
func (*DataPageHeader) ReadField2 ¶
func (p *DataPageHeader) ReadField2(iprot thrift.TProtocol) error
func (*DataPageHeader) ReadField3 ¶
func (p *DataPageHeader) ReadField3(iprot thrift.TProtocol) error
func (*DataPageHeader) ReadField4 ¶
func (p *DataPageHeader) ReadField4(iprot thrift.TProtocol) error
func (*DataPageHeader) ReadField5 ¶
func (p *DataPageHeader) ReadField5(iprot thrift.TProtocol) error
func (*DataPageHeader) String ¶
func (p *DataPageHeader) String() string
type DataPageHeaderV2 ¶
type DataPageHeaderV2 struct { NumValues int32 `thrift:"num_values,1,required" db:"num_values" json:"num_values"` NumNulls int32 `thrift:"num_nulls,2,required" db:"num_nulls" json:"num_nulls"` NumRows int32 `thrift:"num_rows,3,required" db:"num_rows" json:"num_rows"` Encoding Encoding `thrift:"encoding,4,required" db:"encoding" json:"encoding"` DefinitionLevelsByteLength int32 `thrift:"definition_levels_byte_length,5,required" db:"definition_levels_byte_length" json:"definition_levels_byte_length"` RepetitionLevelsByteLength int32 `thrift:"repetition_levels_byte_length,6,required" db:"repetition_levels_byte_length" json:"repetition_levels_byte_length"` IsCompressed bool `thrift:"is_compressed,7" db:"is_compressed" json:"is_compressed,omitempty"` Statistics *Statistics `thrift:"statistics,8" db:"statistics" json:"statistics,omitempty"` }
New page format alowing reading levels without decompressing the data Repetition and definition levels are uncompressed The remaining section containing the data is compressed if is_compressed is true
Attributes:
- NumValues: Number of values, including NULLs, in this data page. *
- NumNulls: Number of NULL values, in this data page.
Number of non-null = num_values - num_nulls which is also the number of values in the data section *
- NumRows: Number of rows in this data page. which means pages change on record boundaries (r = 0) *
- Encoding: Encoding used for data in this page *
- DefinitionLevelsByteLength: length of the repetition levels
- RepetitionLevelsByteLength: length of the definition levels
- IsCompressed: whether the values are compressed.
Which means the section of the page between definition_levels_byte_length + repetition_levels_byte_length + 1 and compressed_page_size (included) is compressed with the compression_codec. If missing it is considered compressed
- Statistics: optional statistics for this column chunk
var PageHeader_DataPageHeaderV2_DEFAULT *DataPageHeaderV2
func NewDataPageHeaderV2 ¶
func NewDataPageHeaderV2() *DataPageHeaderV2
func (*DataPageHeaderV2) GetDefinitionLevelsByteLength ¶
func (p *DataPageHeaderV2) GetDefinitionLevelsByteLength() int32
func (*DataPageHeaderV2) GetEncoding ¶
func (p *DataPageHeaderV2) GetEncoding() Encoding
func (*DataPageHeaderV2) GetIsCompressed ¶
func (p *DataPageHeaderV2) GetIsCompressed() bool
func (*DataPageHeaderV2) GetNumNulls ¶
func (p *DataPageHeaderV2) GetNumNulls() int32
func (*DataPageHeaderV2) GetNumRows ¶
func (p *DataPageHeaderV2) GetNumRows() int32
func (*DataPageHeaderV2) GetNumValues ¶
func (p *DataPageHeaderV2) GetNumValues() int32
func (*DataPageHeaderV2) GetRepetitionLevelsByteLength ¶
func (p *DataPageHeaderV2) GetRepetitionLevelsByteLength() int32
func (*DataPageHeaderV2) GetStatistics ¶
func (p *DataPageHeaderV2) GetStatistics() *Statistics
func (*DataPageHeaderV2) IsSetIsCompressed ¶
func (p *DataPageHeaderV2) IsSetIsCompressed() bool
func (*DataPageHeaderV2) IsSetStatistics ¶
func (p *DataPageHeaderV2) IsSetStatistics() bool
func (*DataPageHeaderV2) ReadField1 ¶
func (p *DataPageHeaderV2) ReadField1(iprot thrift.TProtocol) error
func (*DataPageHeaderV2) ReadField2 ¶
func (p *DataPageHeaderV2) ReadField2(iprot thrift.TProtocol) error
func (*DataPageHeaderV2) ReadField3 ¶
func (p *DataPageHeaderV2) ReadField3(iprot thrift.TProtocol) error
func (*DataPageHeaderV2) ReadField4 ¶
func (p *DataPageHeaderV2) ReadField4(iprot thrift.TProtocol) error
func (*DataPageHeaderV2) ReadField5 ¶
func (p *DataPageHeaderV2) ReadField5(iprot thrift.TProtocol) error
func (*DataPageHeaderV2) ReadField6 ¶
func (p *DataPageHeaderV2) ReadField6(iprot thrift.TProtocol) error
func (*DataPageHeaderV2) ReadField7 ¶
func (p *DataPageHeaderV2) ReadField7(iprot thrift.TProtocol) error
func (*DataPageHeaderV2) ReadField8 ¶
func (p *DataPageHeaderV2) ReadField8(iprot thrift.TProtocol) error
func (*DataPageHeaderV2) String ¶
func (p *DataPageHeaderV2) String() string
type DictionaryPageHeader ¶
type DictionaryPageHeader struct { NumValues int32 `thrift:"num_values,1,required" db:"num_values" json:"num_values"` Encoding Encoding `thrift:"encoding,2,required" db:"encoding" json:"encoding"` IsSorted *bool `thrift:"is_sorted,3" db:"is_sorted" json:"is_sorted,omitempty"` }
TODO: *
Attributes:
- NumValues: Number of values in the dictionary *
- Encoding: Encoding using this dictionary page *
- IsSorted: If true, the entries in the dictionary are sorted in ascending order *
var PageHeader_DictionaryPageHeader_DEFAULT *DictionaryPageHeader
func NewDictionaryPageHeader ¶
func NewDictionaryPageHeader() *DictionaryPageHeader
func (*DictionaryPageHeader) GetEncoding ¶
func (p *DictionaryPageHeader) GetEncoding() Encoding
func (*DictionaryPageHeader) GetIsSorted ¶
func (p *DictionaryPageHeader) GetIsSorted() bool
func (*DictionaryPageHeader) GetNumValues ¶
func (p *DictionaryPageHeader) GetNumValues() int32
func (*DictionaryPageHeader) IsSetIsSorted ¶
func (p *DictionaryPageHeader) IsSetIsSorted() bool
func (*DictionaryPageHeader) Read ¶
func (p *DictionaryPageHeader) Read(iprot thrift.TProtocol) error
func (*DictionaryPageHeader) ReadField1 ¶
func (p *DictionaryPageHeader) ReadField1(iprot thrift.TProtocol) error
func (*DictionaryPageHeader) ReadField2 ¶
func (p *DictionaryPageHeader) ReadField2(iprot thrift.TProtocol) error
func (*DictionaryPageHeader) ReadField3 ¶
func (p *DictionaryPageHeader) ReadField3(iprot thrift.TProtocol) error
func (*DictionaryPageHeader) String ¶
func (p *DictionaryPageHeader) String() string
type Encoding ¶
type Encoding int64
Encodings supported by Parquet. Not all encodings are valid for all types. These enums are also used to specify the encoding of definition and repetition levels. See the accompanying doc for the details of the more complicated encodings.
func EncodingFromString ¶
func EncodingPtr ¶
func (Encoding) MarshalText ¶
func (*Encoding) UnmarshalText ¶
type FieldRepetitionType ¶
type FieldRepetitionType int64
Representation of Schemas
const ( FieldRepetitionType_REQUIRED FieldRepetitionType = 0 FieldRepetitionType_OPTIONAL FieldRepetitionType = 1 FieldRepetitionType_REPEATED FieldRepetitionType = 2 )
var SchemaElement_RepetitionType_DEFAULT FieldRepetitionType
func FieldRepetitionTypeFromString ¶
func FieldRepetitionTypeFromString(s string) (FieldRepetitionType, error)
func FieldRepetitionTypePtr ¶
func FieldRepetitionTypePtr(v FieldRepetitionType) *FieldRepetitionType
func (FieldRepetitionType) MarshalText ¶
func (p FieldRepetitionType) MarshalText() ([]byte, error)
func (*FieldRepetitionType) Scan ¶
func (p *FieldRepetitionType) Scan(value interface{}) error
func (FieldRepetitionType) String ¶
func (p FieldRepetitionType) String() string
func (*FieldRepetitionType) UnmarshalText ¶
func (p *FieldRepetitionType) UnmarshalText(text []byte) error
type FileMetaData ¶
type FileMetaData struct { Version int32 `thrift:"version,1,required" db:"version" json:"version"` Schema []*SchemaElement `thrift:"schema,2,required" db:"schema" json:"schema"` NumRows int64 `thrift:"num_rows,3,required" db:"num_rows" json:"num_rows"` RowGroups []*RowGroup `thrift:"row_groups,4,required" db:"row_groups" json:"row_groups"` KeyValueMetadata []*KeyValue `thrift:"key_value_metadata,5" db:"key_value_metadata" json:"key_value_metadata,omitempty"` CreatedBy *string `thrift:"created_by,6" db:"created_by" json:"created_by,omitempty"` }
Description for file metadata
Attributes:
- Version: Version of this file *
- Schema: Parquet schema for this file. This schema contains metadata for all the columns.
The schema is represented as a tree with a single root. The nodes of the tree are flattened to a list by doing a depth-first traversal. The column metadata contains the path in the schema for that column which can be used to map columns to nodes in the schema. The first element is the root *
- NumRows: Number of rows in this file *
- RowGroups: Row groups in this file *
- KeyValueMetadata: Optional key/value metadata *
- CreatedBy: String for application that wrote this file. This should be in the format
<Application> version <App Version> (build <App Build Hash>). e.g. impala version 1.0 (build 6cf94d29b2b7115df4de2c06e2ab4326d721eb55)
func NewFileMetaData ¶
func NewFileMetaData() *FileMetaData
func (*FileMetaData) GetCreatedBy ¶
func (p *FileMetaData) GetCreatedBy() string
func (*FileMetaData) GetKeyValueMetadata ¶
func (p *FileMetaData) GetKeyValueMetadata() []*KeyValue
func (*FileMetaData) GetNumRows ¶
func (p *FileMetaData) GetNumRows() int64
func (*FileMetaData) GetRowGroups ¶
func (p *FileMetaData) GetRowGroups() []*RowGroup
func (*FileMetaData) GetSchema ¶
func (p *FileMetaData) GetSchema() []*SchemaElement
func (*FileMetaData) GetVersion ¶
func (p *FileMetaData) GetVersion() int32
func (*FileMetaData) IsSetCreatedBy ¶
func (p *FileMetaData) IsSetCreatedBy() bool
func (*FileMetaData) IsSetKeyValueMetadata ¶
func (p *FileMetaData) IsSetKeyValueMetadata() bool
func (*FileMetaData) ReadField1 ¶
func (p *FileMetaData) ReadField1(iprot thrift.TProtocol) error
func (*FileMetaData) ReadField2 ¶
func (p *FileMetaData) ReadField2(iprot thrift.TProtocol) error
func (*FileMetaData) ReadField3 ¶
func (p *FileMetaData) ReadField3(iprot thrift.TProtocol) error
func (*FileMetaData) ReadField4 ¶
func (p *FileMetaData) ReadField4(iprot thrift.TProtocol) error
func (*FileMetaData) ReadField5 ¶
func (p *FileMetaData) ReadField5(iprot thrift.TProtocol) error
func (*FileMetaData) ReadField6 ¶
func (p *FileMetaData) ReadField6(iprot thrift.TProtocol) error
func (*FileMetaData) String ¶
func (p *FileMetaData) String() string
type IndexPageHeader ¶
type IndexPageHeader struct { }
var PageHeader_IndexPageHeader_DEFAULT *IndexPageHeader
func NewIndexPageHeader ¶
func NewIndexPageHeader() *IndexPageHeader
func (*IndexPageHeader) String ¶
func (p *IndexPageHeader) String() string
type KeyValue ¶
type KeyValue struct { Key string `thrift:"key,1,required" db:"key" json:"key"` Value *string `thrift:"value,2" db:"value" json:"value,omitempty"` }
Wrapper struct to store key values
Attributes:
- Key
- Value
func NewKeyValue ¶
func NewKeyValue() *KeyValue
func (*KeyValue) IsSetValue ¶
type PageEncodingStats ¶
type PageEncodingStats struct { PageType PageType `thrift:"page_type,1,required" db:"page_type" json:"page_type"` Encoding Encoding `thrift:"encoding,2,required" db:"encoding" json:"encoding"` Count int32 `thrift:"count,3,required" db:"count" json:"count"` }
statistics of a given page type and encoding
Attributes:
- PageType: the page type (data/dic/...) *
- Encoding: encoding of the page *
- Count: number of pages of this type with this encoding *
func NewPageEncodingStats ¶
func NewPageEncodingStats() *PageEncodingStats
func (*PageEncodingStats) GetCount ¶
func (p *PageEncodingStats) GetCount() int32
func (*PageEncodingStats) GetEncoding ¶
func (p *PageEncodingStats) GetEncoding() Encoding
func (*PageEncodingStats) GetPageType ¶
func (p *PageEncodingStats) GetPageType() PageType
func (*PageEncodingStats) ReadField1 ¶
func (p *PageEncodingStats) ReadField1(iprot thrift.TProtocol) error
func (*PageEncodingStats) ReadField2 ¶
func (p *PageEncodingStats) ReadField2(iprot thrift.TProtocol) error
func (*PageEncodingStats) ReadField3 ¶
func (p *PageEncodingStats) ReadField3(iprot thrift.TProtocol) error
func (*PageEncodingStats) String ¶
func (p *PageEncodingStats) String() string
type PageHeader ¶
type PageHeader struct { Type PageType `thrift:"type,1,required" db:"type" json:"type"` UncompressedPageSize int32 `thrift:"uncompressed_page_size,2,required" db:"uncompressed_page_size" json:"uncompressed_page_size"` CompressedPageSize int32 `thrift:"compressed_page_size,3,required" db:"compressed_page_size" json:"compressed_page_size"` Crc *int32 `thrift:"crc,4" db:"crc" json:"crc,omitempty"` DataPageHeader *DataPageHeader `thrift:"data_page_header,5" db:"data_page_header" json:"data_page_header,omitempty"` IndexPageHeader *IndexPageHeader `thrift:"index_page_header,6" db:"index_page_header" json:"index_page_header,omitempty"` DictionaryPageHeader *DictionaryPageHeader `thrift:"dictionary_page_header,7" db:"dictionary_page_header" json:"dictionary_page_header,omitempty"` DataPageHeaderV2 *DataPageHeaderV2 `thrift:"data_page_header_v2,8" db:"data_page_header_v2" json:"data_page_header_v2,omitempty"` }
Attributes:
- Type: the type of the page: indicates which of the *_header fields is set *
- UncompressedPageSize: Uncompressed page size in bytes (not including this header) *
- CompressedPageSize: Compressed page size in bytes (not including this header) *
- Crc: 32bit crc for the data below. This allows for disabling checksumming in HDFS
if only a few pages needs to be read
- DataPageHeader
- IndexPageHeader
- DictionaryPageHeader
- DataPageHeaderV2
func NewPageHeader ¶
func NewPageHeader() *PageHeader
func (*PageHeader) GetCompressedPageSize ¶
func (p *PageHeader) GetCompressedPageSize() int32
func (*PageHeader) GetCrc ¶
func (p *PageHeader) GetCrc() int32
func (*PageHeader) GetDataPageHeader ¶
func (p *PageHeader) GetDataPageHeader() *DataPageHeader
func (*PageHeader) GetDataPageHeaderV2 ¶
func (p *PageHeader) GetDataPageHeaderV2() *DataPageHeaderV2
func (*PageHeader) GetDictionaryPageHeader ¶
func (p *PageHeader) GetDictionaryPageHeader() *DictionaryPageHeader
func (*PageHeader) GetIndexPageHeader ¶
func (p *PageHeader) GetIndexPageHeader() *IndexPageHeader
func (*PageHeader) GetType ¶
func (p *PageHeader) GetType() PageType
func (*PageHeader) GetUncompressedPageSize ¶
func (p *PageHeader) GetUncompressedPageSize() int32
func (*PageHeader) IsSetCrc ¶
func (p *PageHeader) IsSetCrc() bool
func (*PageHeader) IsSetDataPageHeader ¶
func (p *PageHeader) IsSetDataPageHeader() bool
func (*PageHeader) IsSetDataPageHeaderV2 ¶
func (p *PageHeader) IsSetDataPageHeaderV2() bool
func (*PageHeader) IsSetDictionaryPageHeader ¶
func (p *PageHeader) IsSetDictionaryPageHeader() bool
func (*PageHeader) IsSetIndexPageHeader ¶
func (p *PageHeader) IsSetIndexPageHeader() bool
func (*PageHeader) ReadField1 ¶
func (p *PageHeader) ReadField1(iprot thrift.TProtocol) error
func (*PageHeader) ReadField2 ¶
func (p *PageHeader) ReadField2(iprot thrift.TProtocol) error
func (*PageHeader) ReadField3 ¶
func (p *PageHeader) ReadField3(iprot thrift.TProtocol) error
func (*PageHeader) ReadField4 ¶
func (p *PageHeader) ReadField4(iprot thrift.TProtocol) error
func (*PageHeader) ReadField5 ¶
func (p *PageHeader) ReadField5(iprot thrift.TProtocol) error
func (*PageHeader) ReadField6 ¶
func (p *PageHeader) ReadField6(iprot thrift.TProtocol) error
func (*PageHeader) ReadField7 ¶
func (p *PageHeader) ReadField7(iprot thrift.TProtocol) error
func (*PageHeader) ReadField8 ¶
func (p *PageHeader) ReadField8(iprot thrift.TProtocol) error
func (*PageHeader) String ¶
func (p *PageHeader) String() string
type PageType ¶
type PageType int64
func PageTypeFromString ¶
func PageTypePtr ¶
func (PageType) MarshalText ¶
func (*PageType) UnmarshalText ¶
type RowGroup ¶
type RowGroup struct { Columns []*ColumnChunk `thrift:"columns,1,required" db:"columns" json:"columns"` TotalByteSize int64 `thrift:"total_byte_size,2,required" db:"total_byte_size" json:"total_byte_size"` NumRows int64 `thrift:"num_rows,3,required" db:"num_rows" json:"num_rows"` SortingColumns []*SortingColumn `thrift:"sorting_columns,4" db:"sorting_columns" json:"sorting_columns,omitempty"` }
Attributes:
- Columns: Metadata for each column chunk in this row group.
This list must have the same order as the SchemaElement list in FileMetaData.
- TotalByteSize: Total byte size of all the uncompressed column data in this row group *
- NumRows: Number of rows in this row group *
- SortingColumns: If set, specifies a sort ordering of the rows in this RowGroup.
The sorting columns can be a subset of all the columns.
func NewRowGroup ¶
func NewRowGroup() *RowGroup
func (*RowGroup) GetColumns ¶
func (p *RowGroup) GetColumns() []*ColumnChunk
func (*RowGroup) GetNumRows ¶
func (*RowGroup) GetSortingColumns ¶
func (p *RowGroup) GetSortingColumns() []*SortingColumn
func (*RowGroup) GetTotalByteSize ¶
func (*RowGroup) IsSetSortingColumns ¶
type SchemaElement ¶
type SchemaElement struct { Type *Type `thrift:"type,1" db:"type" json:"type,omitempty"` TypeLength *int32 `thrift:"type_length,2" db:"type_length" json:"type_length,omitempty"` RepetitionType *FieldRepetitionType `thrift:"repetition_type,3" db:"repetition_type" json:"repetition_type,omitempty"` Name string `thrift:"name,4,required" db:"name" json:"name"` NumChildren *int32 `thrift:"num_children,5" db:"num_children" json:"num_children,omitempty"` ConvertedType *ConvertedType `thrift:"converted_type,6" db:"converted_type" json:"converted_type,omitempty"` Scale *int32 `thrift:"scale,7" db:"scale" json:"scale,omitempty"` Precision *int32 `thrift:"precision,8" db:"precision" json:"precision,omitempty"` FieldID *int32 `thrift:"field_id,9" db:"field_id" json:"field_id,omitempty"` }
Represents a element inside a schema definition.
- if it is a group (inner node) then type is undefined and num_children is defined
- if it is a primitive type (leaf) then type is defined and num_children is undefined
the nodes are listed in depth first traversal order.
Attributes:
- Type: Data type for this field. Not set if the current element is a non-leaf node
- TypeLength: If type is FIXED_LEN_BYTE_ARRAY, this is the byte length of the vales.
Otherwise, if specified, this is the maximum bit length to store any of the values. (e.g. a low cardinality INT col could have this set to 3). Note that this is in the schema, and therefore fixed for the entire file.
- RepetitionType: repetition of the field. The root of the schema does not have a repetition_type.
All other nodes must have one
- Name: Name of the field in the schema
- NumChildren: Nested fields. Since thrift does not support nested fields,
the nesting is flattened to a single list by a depth-first traversal. The children count is used to construct the nested relationship. This field is not set when the element is a primitive type
- ConvertedType: When the schema is the result of a conversion from another model
Used to record the original type to help with cross conversion.
- Scale: Used when this column contains decimal data.
See the DECIMAL converted type for more details.
- Precision
- FieldID: When the original schema supports field ids, this will save the
original field id in the parquet schema
func NewSchemaElement ¶
func NewSchemaElement() *SchemaElement
func (*SchemaElement) GetConvertedType ¶
func (p *SchemaElement) GetConvertedType() ConvertedType
func (*SchemaElement) GetFieldID ¶
func (p *SchemaElement) GetFieldID() int32
func (*SchemaElement) GetName ¶
func (p *SchemaElement) GetName() string
func (*SchemaElement) GetNumChildren ¶
func (p *SchemaElement) GetNumChildren() int32
func (*SchemaElement) GetPrecision ¶
func (p *SchemaElement) GetPrecision() int32
func (*SchemaElement) GetRepetitionType ¶
func (p *SchemaElement) GetRepetitionType() FieldRepetitionType
func (*SchemaElement) GetScale ¶
func (p *SchemaElement) GetScale() int32
func (*SchemaElement) GetType ¶
func (p *SchemaElement) GetType() Type
func (*SchemaElement) GetTypeLength ¶
func (p *SchemaElement) GetTypeLength() int32
func (*SchemaElement) IsSetConvertedType ¶
func (p *SchemaElement) IsSetConvertedType() bool
func (*SchemaElement) IsSetFieldID ¶
func (p *SchemaElement) IsSetFieldID() bool
func (*SchemaElement) IsSetNumChildren ¶
func (p *SchemaElement) IsSetNumChildren() bool
func (*SchemaElement) IsSetPrecision ¶
func (p *SchemaElement) IsSetPrecision() bool
func (*SchemaElement) IsSetRepetitionType ¶
func (p *SchemaElement) IsSetRepetitionType() bool
func (*SchemaElement) IsSetScale ¶
func (p *SchemaElement) IsSetScale() bool
func (*SchemaElement) IsSetType ¶
func (p *SchemaElement) IsSetType() bool
func (*SchemaElement) IsSetTypeLength ¶
func (p *SchemaElement) IsSetTypeLength() bool
func (*SchemaElement) ReadField1 ¶
func (p *SchemaElement) ReadField1(iprot thrift.TProtocol) error
func (*SchemaElement) ReadField2 ¶
func (p *SchemaElement) ReadField2(iprot thrift.TProtocol) error
func (*SchemaElement) ReadField3 ¶
func (p *SchemaElement) ReadField3(iprot thrift.TProtocol) error
func (*SchemaElement) ReadField4 ¶
func (p *SchemaElement) ReadField4(iprot thrift.TProtocol) error
func (*SchemaElement) ReadField5 ¶
func (p *SchemaElement) ReadField5(iprot thrift.TProtocol) error
func (*SchemaElement) ReadField6 ¶
func (p *SchemaElement) ReadField6(iprot thrift.TProtocol) error
func (*SchemaElement) ReadField7 ¶
func (p *SchemaElement) ReadField7(iprot thrift.TProtocol) error
func (*SchemaElement) ReadField8 ¶
func (p *SchemaElement) ReadField8(iprot thrift.TProtocol) error
func (*SchemaElement) ReadField9 ¶
func (p *SchemaElement) ReadField9(iprot thrift.TProtocol) error
func (*SchemaElement) String ¶
func (p *SchemaElement) String() string
type SortingColumn ¶
type SortingColumn struct { ColumnIdx int32 `thrift:"column_idx,1,required" db:"column_idx" json:"column_idx"` Descending bool `thrift:"descending,2,required" db:"descending" json:"descending"` NullsFirst bool `thrift:"nulls_first,3,required" db:"nulls_first" json:"nulls_first"` }
Wrapper struct to specify sort order
Attributes:
- ColumnIdx: The column index (in this row group) *
- Descending: If true, indicates this column is sorted in descending order. *
- NullsFirst: If true, nulls will come before non-null values, otherwise,
nulls go at the end.
func NewSortingColumn ¶
func NewSortingColumn() *SortingColumn
func (*SortingColumn) GetColumnIdx ¶
func (p *SortingColumn) GetColumnIdx() int32
func (*SortingColumn) GetDescending ¶
func (p *SortingColumn) GetDescending() bool
func (*SortingColumn) GetNullsFirst ¶
func (p *SortingColumn) GetNullsFirst() bool
func (*SortingColumn) ReadField1 ¶
func (p *SortingColumn) ReadField1(iprot thrift.TProtocol) error
func (*SortingColumn) ReadField2 ¶
func (p *SortingColumn) ReadField2(iprot thrift.TProtocol) error
func (*SortingColumn) ReadField3 ¶
func (p *SortingColumn) ReadField3(iprot thrift.TProtocol) error
func (*SortingColumn) String ¶
func (p *SortingColumn) String() string
type Statistics ¶
type Statistics struct { Max []byte `thrift:"max,1" db:"max" json:"max,omitempty"` Min []byte `thrift:"min,2" db:"min" json:"min,omitempty"` NullCount *int64 `thrift:"null_count,3" db:"null_count" json:"null_count,omitempty"` DistinctCount *int64 `thrift:"distinct_count,4" db:"distinct_count" json:"distinct_count,omitempty"` }
Statistics per row group and per page All fields are optional.
Attributes:
- Max: min and max value of the column, encoded in PLAIN encoding
- Min
- NullCount: count of null value in the column
- DistinctCount: count of distinct values occurring
var ColumnMetaData_Statistics_DEFAULT *Statistics
var DataPageHeaderV2_Statistics_DEFAULT *Statistics
var DataPageHeader_Statistics_DEFAULT *Statistics
func NewStatistics ¶
func NewStatistics() *Statistics
func (*Statistics) GetDistinctCount ¶
func (p *Statistics) GetDistinctCount() int64
func (*Statistics) GetMax ¶
func (p *Statistics) GetMax() []byte
func (*Statistics) GetMin ¶
func (p *Statistics) GetMin() []byte
func (*Statistics) GetNullCount ¶
func (p *Statistics) GetNullCount() int64
func (*Statistics) IsSetDistinctCount ¶
func (p *Statistics) IsSetDistinctCount() bool
func (*Statistics) IsSetMax ¶
func (p *Statistics) IsSetMax() bool
func (*Statistics) IsSetMin ¶
func (p *Statistics) IsSetMin() bool
func (*Statistics) IsSetNullCount ¶
func (p *Statistics) IsSetNullCount() bool
func (*Statistics) ReadField1 ¶
func (p *Statistics) ReadField1(iprot thrift.TProtocol) error
func (*Statistics) ReadField2 ¶
func (p *Statistics) ReadField2(iprot thrift.TProtocol) error
func (*Statistics) ReadField3 ¶
func (p *Statistics) ReadField3(iprot thrift.TProtocol) error
func (*Statistics) ReadField4 ¶
func (p *Statistics) ReadField4(iprot thrift.TProtocol) error
func (*Statistics) String ¶
func (p *Statistics) String() string
type Type ¶
type Type int64
Types supported by Parquet. These types are intended to be used in combination with the encodings to control the on disk storage format. For example INT16 is not included as a type since a good encoding of INT32 would handle this.
var SchemaElement_Type_DEFAULT Type