Documentation ¶
Index ¶
- Variables
- type BoundaryOrder
- type BsonType
- type ColumnChunk
- func (p *ColumnChunk) GetColumnIndexLength() int32
- func (p *ColumnChunk) GetColumnIndexOffset() int64
- func (p *ColumnChunk) GetFileOffset() int64
- func (p *ColumnChunk) GetFilePath() string
- func (p *ColumnChunk) GetMetaData() *ColumnMetaData
- func (p *ColumnChunk) GetOffsetIndexLength() int32
- func (p *ColumnChunk) GetOffsetIndexOffset() int64
- func (p *ColumnChunk) IsSetColumnIndexLength() bool
- func (p *ColumnChunk) IsSetColumnIndexOffset() bool
- func (p *ColumnChunk) IsSetFilePath() bool
- func (p *ColumnChunk) IsSetMetaData() bool
- func (p *ColumnChunk) IsSetOffsetIndexLength() bool
- func (p *ColumnChunk) IsSetOffsetIndexOffset() bool
- func (p *ColumnChunk) String() string
- type ColumnIndex
- func (p *ColumnIndex) GetBoundaryOrder() BoundaryOrder
- func (p *ColumnIndex) GetMaxValues() [][]byte
- func (p *ColumnIndex) GetMinValues() [][]byte
- func (p *ColumnIndex) GetNullCounts() []int64
- func (p *ColumnIndex) GetNullPages() []bool
- func (p *ColumnIndex) IsSetNullCounts() bool
- func (p *ColumnIndex) String() string
- type ColumnMetaData
- func (p *ColumnMetaData) GetCodec() CompressionCodec
- func (p *ColumnMetaData) GetDataPageOffset() int64
- func (p *ColumnMetaData) GetDictionaryPageOffset() int64
- func (p *ColumnMetaData) GetEncodingStats() []*PageEncodingStats
- func (p *ColumnMetaData) GetEncodings() []Encoding
- func (p *ColumnMetaData) GetIndexPageOffset() int64
- func (p *ColumnMetaData) GetKeyValueMetadata() []*KeyValue
- func (p *ColumnMetaData) GetNumValues() int64
- func (p *ColumnMetaData) GetPathInSchema() []string
- func (p *ColumnMetaData) GetStatistics() *Statistics
- func (p *ColumnMetaData) GetTotalCompressedSize() int64
- func (p *ColumnMetaData) GetTotalUncompressedSize() int64
- func (p *ColumnMetaData) GetType() Type
- func (p *ColumnMetaData) IsSetDictionaryPageOffset() bool
- func (p *ColumnMetaData) IsSetEncodingStats() bool
- func (p *ColumnMetaData) IsSetIndexPageOffset() bool
- func (p *ColumnMetaData) IsSetKeyValueMetadata() bool
- func (p *ColumnMetaData) IsSetStatistics() bool
- func (p *ColumnMetaData) String() string
- type ColumnOrder
- type CompressionCodec
- type ConvertedType
- type DataPageHeader
- func (p *DataPageHeader) GetDefinitionLevelEncoding() Encoding
- func (p *DataPageHeader) GetEncoding() Encoding
- func (p *DataPageHeader) GetNumValues() int32
- func (p *DataPageHeader) GetRepetitionLevelEncoding() Encoding
- func (p *DataPageHeader) GetStatistics() *Statistics
- func (p *DataPageHeader) IsSetStatistics() bool
- func (p *DataPageHeader) String() string
- type DataPageHeaderV2
- func (p *DataPageHeaderV2) GetDefinitionLevelsByteLength() int32
- func (p *DataPageHeaderV2) GetEncoding() Encoding
- func (p *DataPageHeaderV2) GetIsCompressed() bool
- func (p *DataPageHeaderV2) GetNumNulls() int32
- func (p *DataPageHeaderV2) GetNumRows() int32
- func (p *DataPageHeaderV2) GetNumValues() int32
- func (p *DataPageHeaderV2) GetRepetitionLevelsByteLength() int32
- func (p *DataPageHeaderV2) GetStatistics() *Statistics
- func (p *DataPageHeaderV2) IsSetIsCompressed() bool
- func (p *DataPageHeaderV2) IsSetStatistics() bool
- func (p *DataPageHeaderV2) String() string
- type DateType
- type DecimalType
- type DictionaryPageHeader
- type Encoding
- type EnumType
- type FieldRepetitionType
- type FileMetaData
- func (p *FileMetaData) GetColumnOrders() []*ColumnOrder
- func (p *FileMetaData) GetCreatedBy() string
- func (p *FileMetaData) GetKeyValueMetadata() []*KeyValue
- func (p *FileMetaData) GetNumRows() int64
- func (p *FileMetaData) GetRowGroups() []*RowGroup
- func (p *FileMetaData) GetSchema() []*SchemaElement
- func (p *FileMetaData) GetVersion() int32
- func (p *FileMetaData) IsSetColumnOrders() bool
- func (p *FileMetaData) IsSetCreatedBy() bool
- func (p *FileMetaData) IsSetKeyValueMetadata() bool
- func (meta *FileMetaData) Read(r io.Reader) error
- func (p *FileMetaData) String() string
- type IndexPageHeader
- type IntType
- type JsonType
- type KeyValue
- type ListType
- type LogicalType
- func (p *LogicalType) CountSetFieldsLogicalType() int
- func (p *LogicalType) GetBSON() *BsonType
- func (p *LogicalType) GetDATE() *DateType
- func (p *LogicalType) GetDECIMAL() *DecimalType
- func (p *LogicalType) GetENUM() *EnumType
- func (p *LogicalType) GetINTEGER() *IntType
- func (p *LogicalType) GetJSON() *JsonType
- func (p *LogicalType) GetLIST() *ListType
- func (p *LogicalType) GetMAP() *MapType
- func (p *LogicalType) GetSTRING() *StringType
- func (p *LogicalType) GetTIME() *TimeType
- func (p *LogicalType) GetTIMESTAMP() *TimestampType
- func (p *LogicalType) GetUNKNOWN() *NullType
- func (p *LogicalType) GetUUID() *UUIDType
- func (p *LogicalType) IsSetBSON() bool
- func (p *LogicalType) IsSetDATE() bool
- func (p *LogicalType) IsSetDECIMAL() bool
- func (p *LogicalType) IsSetENUM() bool
- func (p *LogicalType) IsSetINTEGER() bool
- func (p *LogicalType) IsSetJSON() bool
- func (p *LogicalType) IsSetLIST() bool
- func (p *LogicalType) IsSetMAP() bool
- func (p *LogicalType) IsSetSTRING() bool
- func (p *LogicalType) IsSetTIME() bool
- func (p *LogicalType) IsSetTIMESTAMP() bool
- func (p *LogicalType) IsSetUNKNOWN() bool
- func (p *LogicalType) IsSetUUID() bool
- func (p *LogicalType) String() string
- type MapType
- type MicroSeconds
- type MilliSeconds
- type NullType
- type OffsetIndex
- type PageEncodingStats
- type PageHeader
- func (p *PageHeader) GetCompressedPageSize() int32
- func (p *PageHeader) GetCrc() int32
- func (p *PageHeader) GetDataPageHeader() *DataPageHeader
- func (p *PageHeader) GetDataPageHeaderV2() *DataPageHeaderV2
- func (p *PageHeader) GetDictionaryPageHeader() *DictionaryPageHeader
- func (p *PageHeader) GetIndexPageHeader() *IndexPageHeader
- func (p *PageHeader) GetType() PageType
- func (p *PageHeader) GetUncompressedPageSize() int32
- func (p *PageHeader) IsSetCrc() bool
- func (p *PageHeader) IsSetDataPageHeader() bool
- func (p *PageHeader) IsSetDataPageHeaderV2() bool
- func (p *PageHeader) IsSetDictionaryPageHeader() bool
- func (p *PageHeader) IsSetIndexPageHeader() bool
- func (ph *PageHeader) Read(r io.Reader) error
- func (p *PageHeader) String() string
- type PageLocation
- type PageType
- type RowGroup
- type SchemaElement
- func (p *SchemaElement) GetConvertedType() ConvertedType
- func (p *SchemaElement) GetFieldID() int32
- func (p *SchemaElement) GetLogicalType() *LogicalType
- func (p *SchemaElement) GetName() string
- func (p *SchemaElement) GetNumChildren() int32
- func (p *SchemaElement) GetPrecision() int32
- func (p *SchemaElement) GetRepetitionType() FieldRepetitionType
- func (p *SchemaElement) GetScale() int32
- func (p *SchemaElement) GetType() Type
- func (p *SchemaElement) GetTypeLength() int32
- func (p *SchemaElement) IsSetConvertedType() bool
- func (p *SchemaElement) IsSetFieldID() bool
- func (p *SchemaElement) IsSetLogicalType() bool
- func (p *SchemaElement) IsSetNumChildren() bool
- func (p *SchemaElement) IsSetPrecision() bool
- func (p *SchemaElement) IsSetRepetitionType() bool
- func (p *SchemaElement) IsSetScale() bool
- func (p *SchemaElement) IsSetType() bool
- func (p *SchemaElement) IsSetTypeLength() bool
- func (p *SchemaElement) String() string
- type SortingColumn
- type Statistics
- func (p *Statistics) GetDistinctCount() int64
- func (p *Statistics) GetMax() []byte
- func (p *Statistics) GetMaxValue() []byte
- func (p *Statistics) GetMin() []byte
- func (p *Statistics) GetMinValue() []byte
- func (p *Statistics) GetNullCount() int64
- func (p *Statistics) IsSetDistinctCount() bool
- func (p *Statistics) IsSetMax() bool
- func (p *Statistics) IsSetMaxValue() bool
- func (p *Statistics) IsSetMin() bool
- func (p *Statistics) IsSetMinValue() bool
- func (p *Statistics) IsSetNullCount() bool
- func (p *Statistics) String() string
- type StringType
- type TimeType
- type TimeUnit
- type TimestampType
- type Type
- type TypeDefinedOrder
- type UUIDType
Constants ¶
This section is empty.
Variables ¶
var ColumnChunk_ColumnIndexLength_DEFAULT int32
var ColumnChunk_ColumnIndexOffset_DEFAULT int64
var ColumnChunk_FilePath_DEFAULT string
var ColumnChunk_OffsetIndexLength_DEFAULT int32
var ColumnChunk_OffsetIndexOffset_DEFAULT int64
var ColumnIndex_NullCounts_DEFAULT []int64
var ColumnMetaData_DictionaryPageOffset_DEFAULT int64
var ColumnMetaData_EncodingStats_DEFAULT []*PageEncodingStats
var ColumnMetaData_IndexPageOffset_DEFAULT int64
var ColumnMetaData_KeyValueMetadata_DEFAULT []*KeyValue
var DataPageHeaderV2_IsCompressed_DEFAULT bool = true
var DictionaryPageHeader_IsSorted_DEFAULT bool
var FileMetaData_ColumnOrders_DEFAULT []*ColumnOrder
var FileMetaData_CreatedBy_DEFAULT string
var FileMetaData_KeyValueMetadata_DEFAULT []*KeyValue
var GoUnusedProtection__ int
var KeyValue_Value_DEFAULT string
var PageHeader_Crc_DEFAULT int32
var RowGroup_SortingColumns_DEFAULT []*SortingColumn
var SchemaElement_FieldID_DEFAULT int32
var SchemaElement_NumChildren_DEFAULT int32
var SchemaElement_Precision_DEFAULT int32
var SchemaElement_Scale_DEFAULT int32
var SchemaElement_TypeLength_DEFAULT int32
var Statistics_DistinctCount_DEFAULT int64
var Statistics_MaxValue_DEFAULT []byte
var Statistics_Max_DEFAULT []byte
var Statistics_MinValue_DEFAULT []byte
var Statistics_Min_DEFAULT []byte
var Statistics_NullCount_DEFAULT int64
Functions ¶
This section is empty.
Types ¶
type BoundaryOrder ¶
type BoundaryOrder int64
Enum to annotate whether lists of min/max elements inside ColumnIndex are ordered and if so, in which direction.
const ( BoundaryOrder_UNORDERED BoundaryOrder = 0 BoundaryOrder_ASCENDING BoundaryOrder = 1 BoundaryOrder_DESCENDING BoundaryOrder = 2 )
func BoundaryOrderFromString ¶
func BoundaryOrderFromString(s string) (BoundaryOrder, error)
func BoundaryOrderPtr ¶
func BoundaryOrderPtr(v BoundaryOrder) *BoundaryOrder
func (BoundaryOrder) MarshalText ¶
func (p BoundaryOrder) MarshalText() ([]byte, error)
func (BoundaryOrder) String ¶
func (p BoundaryOrder) String() string
func (*BoundaryOrder) UnmarshalText ¶
func (p *BoundaryOrder) UnmarshalText(text []byte) error
type BsonType ¶
type BsonType struct { }
Embedded BSON logical type annotation
Allowed for physical types: BINARY
var LogicalType_BSON_DEFAULT *BsonType
func NewBsonType ¶
func NewBsonType() *BsonType
type ColumnChunk ¶
type ColumnChunk struct { FilePath *string `thrift:"file_path,1" json:"file_path,omitempty"` FileOffset int64 `thrift:"file_offset,2,required" json:"file_offset"` MetaData *ColumnMetaData `thrift:"meta_data,3" json:"meta_data,omitempty"` OffsetIndexOffset *int64 `thrift:"offset_index_offset,4" json:"offset_index_offset,omitempty"` OffsetIndexLength *int32 `thrift:"offset_index_length,5" json:"offset_index_length,omitempty"` ColumnIndexOffset *int64 `thrift:"column_index_offset,6" json:"column_index_offset,omitempty"` ColumnIndexLength *int32 `thrift:"column_index_length,7" json:"column_index_length,omitempty"` }
Attributes:
- FilePath: File where column data is stored. If not set, assumed to be same file as
metadata. This path is relative to the current file.
- FileOffset: Byte offset in file_path to the ColumnMetaData *
- MetaData: Column metadata for this chunk. This is the same content as what is at
file_path/file_offset. Having it here has it replicated in the file metadata.
- OffsetIndexOffset: File offset of ColumnChunk's OffsetIndex *
- OffsetIndexLength: Size of ColumnChunk's OffsetIndex, in bytes *
- ColumnIndexOffset: File offset of ColumnChunk's ColumnIndex *
- ColumnIndexLength: Size of ColumnChunk's ColumnIndex, in bytes *
func NewColumnChunk ¶
func NewColumnChunk() *ColumnChunk
func (*ColumnChunk) GetColumnIndexLength ¶
func (p *ColumnChunk) GetColumnIndexLength() int32
func (*ColumnChunk) GetColumnIndexOffset ¶
func (p *ColumnChunk) GetColumnIndexOffset() int64
func (*ColumnChunk) GetFileOffset ¶
func (p *ColumnChunk) GetFileOffset() int64
func (*ColumnChunk) GetFilePath ¶
func (p *ColumnChunk) GetFilePath() string
func (*ColumnChunk) GetMetaData ¶
func (p *ColumnChunk) GetMetaData() *ColumnMetaData
func (*ColumnChunk) GetOffsetIndexLength ¶
func (p *ColumnChunk) GetOffsetIndexLength() int32
func (*ColumnChunk) GetOffsetIndexOffset ¶
func (p *ColumnChunk) GetOffsetIndexOffset() int64
func (*ColumnChunk) IsSetColumnIndexLength ¶
func (p *ColumnChunk) IsSetColumnIndexLength() bool
func (*ColumnChunk) IsSetColumnIndexOffset ¶
func (p *ColumnChunk) IsSetColumnIndexOffset() bool
func (*ColumnChunk) IsSetFilePath ¶
func (p *ColumnChunk) IsSetFilePath() bool
func (*ColumnChunk) IsSetMetaData ¶
func (p *ColumnChunk) IsSetMetaData() bool
func (*ColumnChunk) IsSetOffsetIndexLength ¶
func (p *ColumnChunk) IsSetOffsetIndexLength() bool
func (*ColumnChunk) IsSetOffsetIndexOffset ¶
func (p *ColumnChunk) IsSetOffsetIndexOffset() bool
func (*ColumnChunk) String ¶
func (p *ColumnChunk) String() string
type ColumnIndex ¶
type ColumnIndex struct { NullPages []bool `thrift:"null_pages,1,required" json:"null_pages"` MinValues [][]byte `thrift:"min_values,2,required" json:"min_values"` MaxValues [][]byte `thrift:"max_values,3,required" json:"max_values"` BoundaryOrder BoundaryOrder `thrift:"boundary_order,4,required" json:"boundary_order"` NullCounts []int64 `thrift:"null_counts,5" json:"null_counts,omitempty"` }
Description for ColumnIndex. Each <array-field>[i] refers to the page at OffsetIndex.page_locations[i]
Attributes:
- NullPages: A list of Boolean values to determine the validity of the corresponding
min and max values. If true, a page contains only null values, and writers have to set the corresponding entries in min_values and max_values to byte[0], so that all lists have the same length. If false, the corresponding entries in min_values and max_values must be valid.
- MinValues: Two lists containing lower and upper bounds for the values of each page.
These may be the actual minimum and maximum values found on a page, but can also be (more compact) values that do not exist on a page. For example, instead of storing ""Blart Versenwald III", a writer may set min_values[i]="B", max_values[i]="C". Such more compact values must still be valid values within the column's logical type. Readers must make sure that list entries are populated before using them by inspecting null_pages.
- MaxValues
- BoundaryOrder: Stores whether both min_values and max_values are orderd and if so, in
which direction. This allows readers to perform binary searches in both lists. Readers cannot assume that max_values[i] <= min_values[i+1], even if the lists are ordered.
- NullCounts: A list containing the number of null values for each page *
func NewColumnIndex ¶
func NewColumnIndex() *ColumnIndex
func (*ColumnIndex) GetBoundaryOrder ¶
func (p *ColumnIndex) GetBoundaryOrder() BoundaryOrder
func (*ColumnIndex) GetMaxValues ¶
func (p *ColumnIndex) GetMaxValues() [][]byte
func (*ColumnIndex) GetMinValues ¶
func (p *ColumnIndex) GetMinValues() [][]byte
func (*ColumnIndex) GetNullCounts ¶
func (p *ColumnIndex) GetNullCounts() []int64
func (*ColumnIndex) GetNullPages ¶
func (p *ColumnIndex) GetNullPages() []bool
func (*ColumnIndex) IsSetNullCounts ¶
func (p *ColumnIndex) IsSetNullCounts() bool
func (*ColumnIndex) String ¶
func (p *ColumnIndex) String() string
type ColumnMetaData ¶
type ColumnMetaData struct { Type Type `thrift:"type,1,required" json:"type"` Encodings []Encoding `thrift:"encodings,2,required" json:"encodings"` PathInSchema []string `thrift:"path_in_schema,3,required" json:"path_in_schema"` Codec CompressionCodec `thrift:"codec,4,required" json:"codec"` NumValues int64 `thrift:"num_values,5,required" json:"num_values"` TotalUncompressedSize int64 `thrift:"total_uncompressed_size,6,required" json:"total_uncompressed_size"` TotalCompressedSize int64 `thrift:"total_compressed_size,7,required" json:"total_compressed_size"` KeyValueMetadata []*KeyValue `thrift:"key_value_metadata,8" json:"key_value_metadata,omitempty"` DataPageOffset int64 `thrift:"data_page_offset,9,required" json:"data_page_offset"` IndexPageOffset *int64 `thrift:"index_page_offset,10" json:"index_page_offset,omitempty"` DictionaryPageOffset *int64 `thrift:"dictionary_page_offset,11" json:"dictionary_page_offset,omitempty"` Statistics *Statistics `thrift:"statistics,12" json:"statistics,omitempty"` EncodingStats []*PageEncodingStats `thrift:"encoding_stats,13" json:"encoding_stats,omitempty"` }
Description for column metadata
Attributes:
- Type: Type of this column *
- Encodings: Set of all encodings used for this column. The purpose is to validate
whether we can decode those pages. *
- PathInSchema: Path in schema *
- Codec: Compression codec *
- NumValues: Number of values in this column *
- TotalUncompressedSize: total byte size of all uncompressed pages in this column chunk (including the headers) *
- TotalCompressedSize: total byte size of all compressed pages in this column chunk (including the headers) *
- KeyValueMetadata: Optional key/value metadata *
- DataPageOffset: Byte offset from beginning of file to first data page *
- IndexPageOffset: Byte offset from beginning of file to root index page *
- DictionaryPageOffset: Byte offset from the beginning of file to first (only) dictionary page *
- Statistics: optional statistics for this column chunk
- EncodingStats: Set of all encodings used for pages in this column chunk.
This information can be used to determine if all data pages are dictionary encoded for example *
var ColumnChunk_MetaData_DEFAULT *ColumnMetaData
func NewColumnMetaData ¶
func NewColumnMetaData() *ColumnMetaData
func (*ColumnMetaData) GetCodec ¶
func (p *ColumnMetaData) GetCodec() CompressionCodec
func (*ColumnMetaData) GetDataPageOffset ¶
func (p *ColumnMetaData) GetDataPageOffset() int64
func (*ColumnMetaData) GetDictionaryPageOffset ¶
func (p *ColumnMetaData) GetDictionaryPageOffset() int64
func (*ColumnMetaData) GetEncodingStats ¶
func (p *ColumnMetaData) GetEncodingStats() []*PageEncodingStats
func (*ColumnMetaData) GetEncodings ¶
func (p *ColumnMetaData) GetEncodings() []Encoding
func (*ColumnMetaData) GetIndexPageOffset ¶
func (p *ColumnMetaData) GetIndexPageOffset() int64
func (*ColumnMetaData) GetKeyValueMetadata ¶
func (p *ColumnMetaData) GetKeyValueMetadata() []*KeyValue
func (*ColumnMetaData) GetNumValues ¶
func (p *ColumnMetaData) GetNumValues() int64
func (*ColumnMetaData) GetPathInSchema ¶
func (p *ColumnMetaData) GetPathInSchema() []string
func (*ColumnMetaData) GetStatistics ¶
func (p *ColumnMetaData) GetStatistics() *Statistics
func (*ColumnMetaData) GetTotalCompressedSize ¶
func (p *ColumnMetaData) GetTotalCompressedSize() int64
func (*ColumnMetaData) GetTotalUncompressedSize ¶
func (p *ColumnMetaData) GetTotalUncompressedSize() int64
func (*ColumnMetaData) GetType ¶
func (p *ColumnMetaData) GetType() Type
func (*ColumnMetaData) IsSetDictionaryPageOffset ¶
func (p *ColumnMetaData) IsSetDictionaryPageOffset() bool
func (*ColumnMetaData) IsSetEncodingStats ¶
func (p *ColumnMetaData) IsSetEncodingStats() bool
func (*ColumnMetaData) IsSetIndexPageOffset ¶
func (p *ColumnMetaData) IsSetIndexPageOffset() bool
func (*ColumnMetaData) IsSetKeyValueMetadata ¶
func (p *ColumnMetaData) IsSetKeyValueMetadata() bool
func (*ColumnMetaData) IsSetStatistics ¶
func (p *ColumnMetaData) IsSetStatistics() bool
func (*ColumnMetaData) String ¶
func (p *ColumnMetaData) String() string
type ColumnOrder ¶
type ColumnOrder struct {
TYPE_ORDER *TypeDefinedOrder `thrift:"TYPE_ORDER,1" json:"TYPE_ORDER,omitempty"`
}
Union to specify the order used for the min_value and max_value fields for a column. This union takes the role of an enhanced enum that allows rich elements (which will be needed for a collation-based ordering in the future).
Possible values are:
- TypeDefinedOrder - the column uses the order defined by its logical or physical type (if there is no logical type).
If the reader does not support the value of this union, min and max stats for this column should be ignored.
Attributes:
- TYPE_ORDER: The sort orders for logical types are: UTF8 - unsigned byte-wise comparison INT8 - signed comparison INT16 - signed comparison INT32 - signed comparison INT64 - signed comparison UINT8 - unsigned comparison UINT16 - unsigned comparison UINT32 - unsigned comparison UINT64 - unsigned comparison DECIMAL - signed comparison of the represented value DATE - signed comparison TIME_MILLIS - signed comparison TIME_MICROS - signed comparison TIMESTAMP_MILLIS - signed comparison TIMESTAMP_MICROS - signed comparison INTERVAL - unsigned comparison JSON - unsigned byte-wise comparison BSON - unsigned byte-wise comparison ENUM - unsigned byte-wise comparison LIST - undefined MAP - undefined
In the absence of logical types, the sort order is determined by the physical type:
BOOLEAN - false, true INT32 - signed comparison INT64 - signed comparison INT96 (only used for legacy timestamps) - undefined FLOAT - signed comparison of the represented value (*) DOUBLE - signed comparison of the represented value (*) BYTE_ARRAY - unsigned byte-wise comparison FIXED_LEN_BYTE_ARRAY - unsigned byte-wise comparison
(*) Because the sorting order is not specified properly for floating
point values (relations vs. total ordering) the following compatibility rules should be applied when reading statistics: - If the min is a NaN, it should be ignored. - If the max is a NaN, it should be ignored. - If the min is +0, the row group may contain -0 values as well. - If the max is -0, the row group may contain +0 values as well. - When looking for NaN values, min and max should be ignored.
func NewColumnOrder ¶
func NewColumnOrder() *ColumnOrder
func (*ColumnOrder) CountSetFieldsColumnOrder ¶
func (p *ColumnOrder) CountSetFieldsColumnOrder() int
func (*ColumnOrder) GetTYPE_ORDER ¶
func (p *ColumnOrder) GetTYPE_ORDER() *TypeDefinedOrder
func (*ColumnOrder) IsSetTYPE_ORDER ¶
func (p *ColumnOrder) IsSetTYPE_ORDER() bool
func (*ColumnOrder) String ¶
func (p *ColumnOrder) String() string
type CompressionCodec ¶
type CompressionCodec int64
Supported compression algorithms.
Codecs added in 2.4 can be read by readers based on 2.4 and later. Codec support may vary between readers based on the format version and libraries available at runtime. Gzip, Snappy, and LZ4 codecs are widely available, while Zstd and Brotli require additional libraries.
const ( CompressionCodec_UNCOMPRESSED CompressionCodec = 0 CompressionCodec_SNAPPY CompressionCodec = 1 CompressionCodec_GZIP CompressionCodec = 2 CompressionCodec_LZO CompressionCodec = 3 CompressionCodec_BROTLI CompressionCodec = 4 CompressionCodec_LZ4 CompressionCodec = 5 CompressionCodec_ZSTD CompressionCodec = 6 )
func CompressionCodecFromString ¶
func CompressionCodecFromString(s string) (CompressionCodec, error)
func CompressionCodecPtr ¶
func CompressionCodecPtr(v CompressionCodec) *CompressionCodec
func (CompressionCodec) MarshalText ¶
func (p CompressionCodec) MarshalText() ([]byte, error)
func (CompressionCodec) String ¶
func (p CompressionCodec) String() string
func (*CompressionCodec) UnmarshalText ¶
func (p *CompressionCodec) UnmarshalText(text []byte) error
type ConvertedType ¶
type ConvertedType int64
Common types used by frameworks(e.g. hive, pig) using parquet. This helps map between types in those frameworks to the base types in parquet. This is only metadata and not needed to read or write the data.
const ( ConvertedType_UTF8 ConvertedType = 0 ConvertedType_MAP ConvertedType = 1 ConvertedType_MAP_KEY_VALUE ConvertedType = 2 ConvertedType_LIST ConvertedType = 3 ConvertedType_ENUM ConvertedType = 4 ConvertedType_DECIMAL ConvertedType = 5 ConvertedType_DATE ConvertedType = 6 ConvertedType_TIME_MILLIS ConvertedType = 7 ConvertedType_TIME_MICROS ConvertedType = 8 ConvertedType_TIMESTAMP_MILLIS ConvertedType = 9 ConvertedType_TIMESTAMP_MICROS ConvertedType = 10 ConvertedType_UINT_8 ConvertedType = 11 ConvertedType_UINT_16 ConvertedType = 12 ConvertedType_UINT_32 ConvertedType = 13 ConvertedType_UINT_64 ConvertedType = 14 ConvertedType_INT_8 ConvertedType = 15 ConvertedType_INT_16 ConvertedType = 16 ConvertedType_INT_32 ConvertedType = 17 ConvertedType_INT_64 ConvertedType = 18 ConvertedType_JSON ConvertedType = 19 ConvertedType_BSON ConvertedType = 20 ConvertedType_INTERVAL ConvertedType = 21 )
var SchemaElement_ConvertedType_DEFAULT ConvertedType
func ConvertedTypeFromString ¶
func ConvertedTypeFromString(s string) (ConvertedType, error)
func ConvertedTypePtr ¶
func ConvertedTypePtr(v ConvertedType) *ConvertedType
func (ConvertedType) MarshalText ¶
func (p ConvertedType) MarshalText() ([]byte, error)
func (ConvertedType) String ¶
func (p ConvertedType) String() string
func (*ConvertedType) UnmarshalText ¶
func (p *ConvertedType) UnmarshalText(text []byte) error
type DataPageHeader ¶
type DataPageHeader struct { NumValues int32 `thrift:"num_values,1,required" json:"num_values"` Encoding Encoding `thrift:"encoding,2,required" json:"encoding"` DefinitionLevelEncoding Encoding `thrift:"definition_level_encoding,3,required" json:"definition_level_encoding"` RepetitionLevelEncoding Encoding `thrift:"repetition_level_encoding,4,required" json:"repetition_level_encoding"` Statistics *Statistics `thrift:"statistics,5" json:"statistics,omitempty"` }
Data page header
Attributes:
- NumValues: Number of values, including NULLs, in this data page. *
- Encoding: Encoding used for this data page *
- DefinitionLevelEncoding: Encoding used for definition levels *
- RepetitionLevelEncoding: Encoding used for repetition levels *
- Statistics: Optional statistics for the data in this page*
var PageHeader_DataPageHeader_DEFAULT *DataPageHeader
func NewDataPageHeader ¶
func NewDataPageHeader() *DataPageHeader
func (*DataPageHeader) GetDefinitionLevelEncoding ¶
func (p *DataPageHeader) GetDefinitionLevelEncoding() Encoding
func (*DataPageHeader) GetEncoding ¶
func (p *DataPageHeader) GetEncoding() Encoding
func (*DataPageHeader) GetNumValues ¶
func (p *DataPageHeader) GetNumValues() int32
func (*DataPageHeader) GetRepetitionLevelEncoding ¶
func (p *DataPageHeader) GetRepetitionLevelEncoding() Encoding
func (*DataPageHeader) GetStatistics ¶
func (p *DataPageHeader) GetStatistics() *Statistics
func (*DataPageHeader) IsSetStatistics ¶
func (p *DataPageHeader) IsSetStatistics() bool
func (*DataPageHeader) String ¶
func (p *DataPageHeader) String() string
type DataPageHeaderV2 ¶
type DataPageHeaderV2 struct { NumValues int32 `thrift:"num_values,1,required" json:"num_values"` NumNulls int32 `thrift:"num_nulls,2,required" json:"num_nulls"` NumRows int32 `thrift:"num_rows,3,required" json:"num_rows"` Encoding Encoding `thrift:"encoding,4,required" json:"encoding"` DefinitionLevelsByteLength int32 `thrift:"definition_levels_byte_length,5,required" json:"definition_levels_byte_length"` RepetitionLevelsByteLength int32 `thrift:"repetition_levels_byte_length,6,required" json:"repetition_levels_byte_length"` IsCompressed bool `thrift:"is_compressed,7" json:"is_compressed,omitempty"` Statistics *Statistics `thrift:"statistics,8" json:"statistics,omitempty"` }
New page format allowing reading levels without decompressing the data Repetition and definition levels are uncompressed The remaining section containing the data is compressed if is_compressed is true
Attributes:
- NumValues: Number of values, including NULLs, in this data page. *
- NumNulls: Number of NULL values, in this data page.
Number of non-null = num_values - num_nulls which is also the number of values in the data section *
- NumRows: Number of rows in this data page. which means pages change on record boundaries (r = 0) *
- Encoding: Encoding used for data in this page *
- DefinitionLevelsByteLength: length of the definition levels
- RepetitionLevelsByteLength: length of the repetition levels
- IsCompressed: whether the values are compressed.
Which means the section of the page between definition_levels_byte_length + repetition_levels_byte_length + 1 and compressed_page_size (included) is compressed with the compression_codec. If missing it is considered compressed
- Statistics: optional statistics for this column chunk
var PageHeader_DataPageHeaderV2_DEFAULT *DataPageHeaderV2
func NewDataPageHeaderV2 ¶
func NewDataPageHeaderV2() *DataPageHeaderV2
func (*DataPageHeaderV2) GetDefinitionLevelsByteLength ¶
func (p *DataPageHeaderV2) GetDefinitionLevelsByteLength() int32
func (*DataPageHeaderV2) GetEncoding ¶
func (p *DataPageHeaderV2) GetEncoding() Encoding
func (*DataPageHeaderV2) GetIsCompressed ¶
func (p *DataPageHeaderV2) GetIsCompressed() bool
func (*DataPageHeaderV2) GetNumNulls ¶
func (p *DataPageHeaderV2) GetNumNulls() int32
func (*DataPageHeaderV2) GetNumRows ¶
func (p *DataPageHeaderV2) GetNumRows() int32
func (*DataPageHeaderV2) GetNumValues ¶
func (p *DataPageHeaderV2) GetNumValues() int32
func (*DataPageHeaderV2) GetRepetitionLevelsByteLength ¶
func (p *DataPageHeaderV2) GetRepetitionLevelsByteLength() int32
func (*DataPageHeaderV2) GetStatistics ¶
func (p *DataPageHeaderV2) GetStatistics() *Statistics
func (*DataPageHeaderV2) IsSetIsCompressed ¶
func (p *DataPageHeaderV2) IsSetIsCompressed() bool
func (*DataPageHeaderV2) IsSetStatistics ¶
func (p *DataPageHeaderV2) IsSetStatistics() bool
func (*DataPageHeaderV2) String ¶
func (p *DataPageHeaderV2) String() string
type DateType ¶
type DateType struct { }
var LogicalType_DATE_DEFAULT *DateType
func NewDateType ¶
func NewDateType() *DateType
type DecimalType ¶
type DecimalType struct { Scale int32 `thrift:"scale,1,required" json:"scale"` Precision int32 `thrift:"precision,2,required" json:"precision"` }
Decimal logical type annotation
To maintain forward-compatibility in v1, implementations using this logical type must also set scale and precision on the annotated SchemaElement.
Allowed for physical types: INT32, INT64, FIXED, and BINARY
Attributes:
- Scale
- Precision
var LogicalType_DECIMAL_DEFAULT *DecimalType
func NewDecimalType ¶
func NewDecimalType() *DecimalType
func (*DecimalType) GetPrecision ¶
func (p *DecimalType) GetPrecision() int32
func (*DecimalType) GetScale ¶
func (p *DecimalType) GetScale() int32
func (*DecimalType) String ¶
func (p *DecimalType) String() string
type DictionaryPageHeader ¶
type DictionaryPageHeader struct { NumValues int32 `thrift:"num_values,1,required" json:"num_values"` Encoding Encoding `thrift:"encoding,2,required" json:"encoding"` IsSorted *bool `thrift:"is_sorted,3" json:"is_sorted,omitempty"` }
TODO: *
Attributes:
- NumValues: Number of values in the dictionary *
- Encoding: Encoding using this dictionary page *
- IsSorted: If true, the entries in the dictionary are sorted in ascending order *
var PageHeader_DictionaryPageHeader_DEFAULT *DictionaryPageHeader
func NewDictionaryPageHeader ¶
func NewDictionaryPageHeader() *DictionaryPageHeader
func (*DictionaryPageHeader) GetEncoding ¶
func (p *DictionaryPageHeader) GetEncoding() Encoding
func (*DictionaryPageHeader) GetIsSorted ¶
func (p *DictionaryPageHeader) GetIsSorted() bool
func (*DictionaryPageHeader) GetNumValues ¶
func (p *DictionaryPageHeader) GetNumValues() int32
func (*DictionaryPageHeader) IsSetIsSorted ¶
func (p *DictionaryPageHeader) IsSetIsSorted() bool
func (*DictionaryPageHeader) String ¶
func (p *DictionaryPageHeader) String() string
type Encoding ¶
type Encoding int64
Encodings supported by Parquet. Not all encodings are valid for all types. These enums are also used to specify the encoding of definition and repetition levels. See the accompanying doc for the details of the more complicated encodings.
func EncodingFromString ¶
func EncodingPtr ¶
func (Encoding) MarshalText ¶
func (*Encoding) UnmarshalText ¶
type EnumType ¶
type EnumType struct { }
var LogicalType_ENUM_DEFAULT *EnumType
func NewEnumType ¶
func NewEnumType() *EnumType
type FieldRepetitionType ¶
type FieldRepetitionType int64
Representation of Schemas
const ( FieldRepetitionType_REQUIRED FieldRepetitionType = 0 FieldRepetitionType_OPTIONAL FieldRepetitionType = 1 FieldRepetitionType_REPEATED FieldRepetitionType = 2 )
var SchemaElement_RepetitionType_DEFAULT FieldRepetitionType
func FieldRepetitionTypeFromString ¶
func FieldRepetitionTypeFromString(s string) (FieldRepetitionType, error)
func FieldRepetitionTypePtr ¶
func FieldRepetitionTypePtr(v FieldRepetitionType) *FieldRepetitionType
func (FieldRepetitionType) MarshalText ¶
func (p FieldRepetitionType) MarshalText() ([]byte, error)
func (FieldRepetitionType) String ¶
func (p FieldRepetitionType) String() string
func (*FieldRepetitionType) UnmarshalText ¶
func (p *FieldRepetitionType) UnmarshalText(text []byte) error
type FileMetaData ¶
type FileMetaData struct { Version int32 `thrift:"version,1,required" json:"version"` Schema []*SchemaElement `thrift:"schema,2,required" json:"schema"` NumRows int64 `thrift:"num_rows,3,required" json:"num_rows"` RowGroups []*RowGroup `thrift:"row_groups,4,required" json:"row_groups"` KeyValueMetadata []*KeyValue `thrift:"key_value_metadata,5" json:"key_value_metadata,omitempty"` CreatedBy *string `thrift:"created_by,6" json:"created_by,omitempty"` ColumnOrders []*ColumnOrder `thrift:"column_orders,7" json:"column_orders,omitempty"` }
Description for file metadata
Attributes:
- Version: Version of this file *
- Schema: Parquet schema for this file. This schema contains metadata for all the columns.
The schema is represented as a tree with a single root. The nodes of the tree are flattened to a list by doing a depth-first traversal. The column metadata contains the path in the schema for that column which can be used to map columns to nodes in the schema. The first element is the root *
- NumRows: Number of rows in this file *
- RowGroups: Row groups in this file *
- KeyValueMetadata: Optional key/value metadata *
- CreatedBy: String for application that wrote this file. This should be in the format
<Application> version <App Version> (build <App Build Hash>). e.g. impala version 1.0 (build 6cf94d29b2b7115df4de2c06e2ab4326d721eb55)
- ColumnOrders: Sort order used for the min_value and max_value fields of each column in
this file. Each sort order corresponds to one column, determined by its position in the list, matching the position of the column in the schema.
Without column_orders, the meaning of the min_value and max_value fields is undefined. To ensure well-defined behaviour, if min_value and max_value are written to a Parquet file, column_orders must be written as well.
The obsolete min and max fields are always sorted by signed comparison regardless of column_orders.
func NewFileMetaData ¶
func NewFileMetaData() *FileMetaData
func (*FileMetaData) GetColumnOrders ¶
func (p *FileMetaData) GetColumnOrders() []*ColumnOrder
func (*FileMetaData) GetCreatedBy ¶
func (p *FileMetaData) GetCreatedBy() string
func (*FileMetaData) GetKeyValueMetadata ¶
func (p *FileMetaData) GetKeyValueMetadata() []*KeyValue
func (*FileMetaData) GetNumRows ¶
func (p *FileMetaData) GetNumRows() int64
func (*FileMetaData) GetRowGroups ¶
func (p *FileMetaData) GetRowGroups() []*RowGroup
func (*FileMetaData) GetSchema ¶
func (p *FileMetaData) GetSchema() []*SchemaElement
func (*FileMetaData) GetVersion ¶
func (p *FileMetaData) GetVersion() int32
func (*FileMetaData) IsSetColumnOrders ¶
func (p *FileMetaData) IsSetColumnOrders() bool
func (*FileMetaData) IsSetCreatedBy ¶
func (p *FileMetaData) IsSetCreatedBy() bool
func (*FileMetaData) IsSetKeyValueMetadata ¶
func (p *FileMetaData) IsSetKeyValueMetadata() bool
func (*FileMetaData) Read ¶
func (meta *FileMetaData) Read(r io.Reader) error
FileMetaData.Read reads the object from a io.Reader
func (*FileMetaData) String ¶
func (p *FileMetaData) String() string
type IndexPageHeader ¶
type IndexPageHeader struct { }
var PageHeader_IndexPageHeader_DEFAULT *IndexPageHeader
func NewIndexPageHeader ¶
func NewIndexPageHeader() *IndexPageHeader
func (*IndexPageHeader) String ¶
func (p *IndexPageHeader) String() string
type IntType ¶
type IntType struct { BitWidth int8 `thrift:"bitWidth,1,required" json:"bitWidth"` IsSigned bool `thrift:"isSigned,2,required" json:"isSigned"` }
Integer logical type annotation
bitWidth must be 8, 16, 32, or 64.
Allowed for physical types: INT32, INT64
Attributes:
- BitWidth
- IsSigned
var LogicalType_INTEGER_DEFAULT *IntType
func NewIntType ¶
func NewIntType() *IntType
func (*IntType) GetBitWidth ¶
func (*IntType) GetIsSigned ¶
type JsonType ¶
type JsonType struct { }
Embedded JSON logical type annotation
Allowed for physical types: BINARY
var LogicalType_JSON_DEFAULT *JsonType
func NewJsonType ¶
func NewJsonType() *JsonType
type KeyValue ¶
type KeyValue struct { Key string `thrift:"key,1,required" json:"key"` Value *string `thrift:"value,2" json:"value,omitempty"` }
Wrapper struct to store key values
Attributes:
- Key
- Value
func NewKeyValue ¶
func NewKeyValue() *KeyValue
func (*KeyValue) IsSetValue ¶
type ListType ¶
type ListType struct { }
var LogicalType_LIST_DEFAULT *ListType
func NewListType ¶
func NewListType() *ListType
type LogicalType ¶
type LogicalType struct { STRING *StringType `thrift:"STRING,1" json:"STRING,omitempty"` MAP *MapType `thrift:"MAP,2" json:"MAP,omitempty"` LIST *ListType `thrift:"LIST,3" json:"LIST,omitempty"` ENUM *EnumType `thrift:"ENUM,4" json:"ENUM,omitempty"` DECIMAL *DecimalType `thrift:"DECIMAL,5" json:"DECIMAL,omitempty"` DATE *DateType `thrift:"DATE,6" json:"DATE,omitempty"` TIME *TimeType `thrift:"TIME,7" json:"TIME,omitempty"` TIMESTAMP *TimestampType `thrift:"TIMESTAMP,8" json:"TIMESTAMP,omitempty"` // unused field # 9 INTEGER *IntType `thrift:"INTEGER,10" json:"INTEGER,omitempty"` UNKNOWN *NullType `thrift:"UNKNOWN,11" json:"UNKNOWN,omitempty"` JSON *JsonType `thrift:"JSON,12" json:"JSON,omitempty"` BSON *BsonType `thrift:"BSON,13" json:"BSON,omitempty"` UUID *UUIDType `thrift:"UUID,14" json:"UUID,omitempty"` }
LogicalType annotations to replace ConvertedType.
To maintain compatibility, implementations using LogicalType for a SchemaElement must also set the corresponding ConvertedType from the following table.
Attributes:
- STRING
- MAP
- LIST
- ENUM
- DECIMAL
- DATE
- TIME
- TIMESTAMP
- INTEGER
- UNKNOWN
- JSON
- BSON
- UUID
var SchemaElement_LogicalType_DEFAULT *LogicalType
func NewLogicalType ¶
func NewLogicalType() *LogicalType
func (*LogicalType) CountSetFieldsLogicalType ¶
func (p *LogicalType) CountSetFieldsLogicalType() int
func (*LogicalType) GetBSON ¶
func (p *LogicalType) GetBSON() *BsonType
func (*LogicalType) GetDATE ¶
func (p *LogicalType) GetDATE() *DateType
func (*LogicalType) GetDECIMAL ¶
func (p *LogicalType) GetDECIMAL() *DecimalType
func (*LogicalType) GetENUM ¶
func (p *LogicalType) GetENUM() *EnumType
func (*LogicalType) GetINTEGER ¶
func (p *LogicalType) GetINTEGER() *IntType
func (*LogicalType) GetJSON ¶
func (p *LogicalType) GetJSON() *JsonType
func (*LogicalType) GetLIST ¶
func (p *LogicalType) GetLIST() *ListType
func (*LogicalType) GetMAP ¶
func (p *LogicalType) GetMAP() *MapType
func (*LogicalType) GetSTRING ¶
func (p *LogicalType) GetSTRING() *StringType
func (*LogicalType) GetTIME ¶
func (p *LogicalType) GetTIME() *TimeType
func (*LogicalType) GetTIMESTAMP ¶
func (p *LogicalType) GetTIMESTAMP() *TimestampType
func (*LogicalType) GetUNKNOWN ¶
func (p *LogicalType) GetUNKNOWN() *NullType
func (*LogicalType) GetUUID ¶
func (p *LogicalType) GetUUID() *UUIDType
func (*LogicalType) IsSetBSON ¶
func (p *LogicalType) IsSetBSON() bool
func (*LogicalType) IsSetDATE ¶
func (p *LogicalType) IsSetDATE() bool
func (*LogicalType) IsSetDECIMAL ¶
func (p *LogicalType) IsSetDECIMAL() bool
func (*LogicalType) IsSetENUM ¶
func (p *LogicalType) IsSetENUM() bool
func (*LogicalType) IsSetINTEGER ¶
func (p *LogicalType) IsSetINTEGER() bool
func (*LogicalType) IsSetJSON ¶
func (p *LogicalType) IsSetJSON() bool
func (*LogicalType) IsSetLIST ¶
func (p *LogicalType) IsSetLIST() bool
func (*LogicalType) IsSetMAP ¶
func (p *LogicalType) IsSetMAP() bool
func (*LogicalType) IsSetSTRING ¶
func (p *LogicalType) IsSetSTRING() bool
func (*LogicalType) IsSetTIME ¶
func (p *LogicalType) IsSetTIME() bool
func (*LogicalType) IsSetTIMESTAMP ¶
func (p *LogicalType) IsSetTIMESTAMP() bool
func (*LogicalType) IsSetUNKNOWN ¶
func (p *LogicalType) IsSetUNKNOWN() bool
func (*LogicalType) IsSetUUID ¶
func (p *LogicalType) IsSetUUID() bool
func (*LogicalType) String ¶
func (p *LogicalType) String() string
type MapType ¶
type MapType struct { }
var LogicalType_MAP_DEFAULT *MapType
func NewMapType ¶
func NewMapType() *MapType
type MicroSeconds ¶
type MicroSeconds struct { }
var TimeUnit_MICROS_DEFAULT *MicroSeconds
func NewMicroSeconds ¶
func NewMicroSeconds() *MicroSeconds
func (*MicroSeconds) String ¶
func (p *MicroSeconds) String() string
type MilliSeconds ¶
type MilliSeconds struct { }
Time units for logical types
var TimeUnit_MILLIS_DEFAULT *MilliSeconds
func NewMilliSeconds ¶
func NewMilliSeconds() *MilliSeconds
func (*MilliSeconds) String ¶
func (p *MilliSeconds) String() string
type NullType ¶
type NullType struct { }
Logical type to annotate a column that is always null.
Sometimes when discovering the schema of existing data, values are always null and the physical type can't be determined. This annotation signals the case where the physical type was guessed from all null values.
var LogicalType_UNKNOWN_DEFAULT *NullType
func NewNullType ¶
func NewNullType() *NullType
type OffsetIndex ¶
type OffsetIndex struct {
PageLocations []*PageLocation `thrift:"page_locations,1,required" json:"page_locations"`
}
Attributes:
- PageLocations: PageLocations, ordered by increasing PageLocation.offset. It is required
that page_locations[i].first_row_index < page_locations[i+1].first_row_index.
func NewOffsetIndex ¶
func NewOffsetIndex() *OffsetIndex
func (*OffsetIndex) GetPageLocations ¶
func (p *OffsetIndex) GetPageLocations() []*PageLocation
func (*OffsetIndex) String ¶
func (p *OffsetIndex) String() string
type PageEncodingStats ¶
type PageEncodingStats struct { PageType PageType `thrift:"page_type,1,required" json:"page_type"` Encoding Encoding `thrift:"encoding,2,required" json:"encoding"` Count int32 `thrift:"count,3,required" json:"count"` }
statistics of a given page type and encoding
Attributes:
- PageType: the page type (data/dic/...) *
- Encoding: encoding of the page *
- Count: number of pages of this type with this encoding *
func NewPageEncodingStats ¶
func NewPageEncodingStats() *PageEncodingStats
func (*PageEncodingStats) GetCount ¶
func (p *PageEncodingStats) GetCount() int32
func (*PageEncodingStats) GetEncoding ¶
func (p *PageEncodingStats) GetEncoding() Encoding
func (*PageEncodingStats) GetPageType ¶
func (p *PageEncodingStats) GetPageType() PageType
func (*PageEncodingStats) String ¶
func (p *PageEncodingStats) String() string
type PageHeader ¶
type PageHeader struct { Type PageType `thrift:"type,1,required" json:"type"` UncompressedPageSize int32 `thrift:"uncompressed_page_size,2,required" json:"uncompressed_page_size"` CompressedPageSize int32 `thrift:"compressed_page_size,3,required" json:"compressed_page_size"` Crc *int32 `thrift:"crc,4" json:"crc,omitempty"` DataPageHeader *DataPageHeader `thrift:"data_page_header,5" json:"data_page_header,omitempty"` IndexPageHeader *IndexPageHeader `thrift:"index_page_header,6" json:"index_page_header,omitempty"` DictionaryPageHeader *DictionaryPageHeader `thrift:"dictionary_page_header,7" json:"dictionary_page_header,omitempty"` DataPageHeaderV2 *DataPageHeaderV2 `thrift:"data_page_header_v2,8" json:"data_page_header_v2,omitempty"` }
Attributes:
- Type: the type of the page: indicates which of the *_header fields is set *
- UncompressedPageSize: Uncompressed page size in bytes (not including this header) *
- CompressedPageSize: Compressed page size in bytes (not including this header) *
- Crc: 32bit crc for the data below. This allows for disabling checksumming in HDFS
if only a few pages needs to be read
- DataPageHeader
- IndexPageHeader
- DictionaryPageHeader
- DataPageHeaderV2
func NewPageHeader ¶
func NewPageHeader() *PageHeader
func (*PageHeader) GetCompressedPageSize ¶
func (p *PageHeader) GetCompressedPageSize() int32
func (*PageHeader) GetCrc ¶
func (p *PageHeader) GetCrc() int32
func (*PageHeader) GetDataPageHeader ¶
func (p *PageHeader) GetDataPageHeader() *DataPageHeader
func (*PageHeader) GetDataPageHeaderV2 ¶
func (p *PageHeader) GetDataPageHeaderV2() *DataPageHeaderV2
func (*PageHeader) GetDictionaryPageHeader ¶
func (p *PageHeader) GetDictionaryPageHeader() *DictionaryPageHeader
func (*PageHeader) GetIndexPageHeader ¶
func (p *PageHeader) GetIndexPageHeader() *IndexPageHeader
func (*PageHeader) GetType ¶
func (p *PageHeader) GetType() PageType
func (*PageHeader) GetUncompressedPageSize ¶
func (p *PageHeader) GetUncompressedPageSize() int32
func (*PageHeader) IsSetCrc ¶
func (p *PageHeader) IsSetCrc() bool
func (*PageHeader) IsSetDataPageHeader ¶
func (p *PageHeader) IsSetDataPageHeader() bool
func (*PageHeader) IsSetDataPageHeaderV2 ¶
func (p *PageHeader) IsSetDataPageHeaderV2() bool
func (*PageHeader) IsSetDictionaryPageHeader ¶
func (p *PageHeader) IsSetDictionaryPageHeader() bool
func (*PageHeader) IsSetIndexPageHeader ¶
func (p *PageHeader) IsSetIndexPageHeader() bool
func (*PageHeader) Read ¶
func (ph *PageHeader) Read(r io.Reader) error
PageHeader.Read reads the object from a io.Reader
func (*PageHeader) String ¶
func (p *PageHeader) String() string
type PageLocation ¶
type PageLocation struct { Offset int64 `thrift:"offset,1,required" json:"offset"` CompressedPageSize int32 `thrift:"compressed_page_size,2,required" json:"compressed_page_size"` FirstRowIndex int64 `thrift:"first_row_index,3,required" json:"first_row_index"` }
Attributes:
- Offset: Offset of the page in the file *
- CompressedPageSize: Size of the page, including header. Sum of compressed_page_size and header
length
- FirstRowIndex: Index within the RowGroup of the first row of the page; this means pages
change on record boundaries (r = 0).
func NewPageLocation ¶
func NewPageLocation() *PageLocation
func (*PageLocation) GetCompressedPageSize ¶
func (p *PageLocation) GetCompressedPageSize() int32
func (*PageLocation) GetFirstRowIndex ¶
func (p *PageLocation) GetFirstRowIndex() int64
func (*PageLocation) GetOffset ¶
func (p *PageLocation) GetOffset() int64
func (*PageLocation) String ¶
func (p *PageLocation) String() string
type PageType ¶
type PageType int64
func PageTypeFromString ¶
func PageTypePtr ¶
func (PageType) MarshalText ¶
func (*PageType) UnmarshalText ¶
type RowGroup ¶
type RowGroup struct { Columns []*ColumnChunk `thrift:"columns,1,required" json:"columns"` TotalByteSize int64 `thrift:"total_byte_size,2,required" json:"total_byte_size"` NumRows int64 `thrift:"num_rows,3,required" json:"num_rows"` SortingColumns []*SortingColumn `thrift:"sorting_columns,4" json:"sorting_columns,omitempty"` }
Attributes:
- Columns: Metadata for each column chunk in this row group.
This list must have the same order as the SchemaElement list in FileMetaData.
- TotalByteSize: Total byte size of all the uncompressed column data in this row group *
- NumRows: Number of rows in this row group *
- SortingColumns: If set, specifies a sort ordering of the rows in this RowGroup.
The sorting columns can be a subset of all the columns.
func NewRowGroup ¶
func NewRowGroup() *RowGroup
func (*RowGroup) GetColumns ¶
func (p *RowGroup) GetColumns() []*ColumnChunk
func (*RowGroup) GetNumRows ¶
func (*RowGroup) GetSortingColumns ¶
func (p *RowGroup) GetSortingColumns() []*SortingColumn
func (*RowGroup) GetTotalByteSize ¶
func (*RowGroup) IsSetSortingColumns ¶
type SchemaElement ¶
type SchemaElement struct { Type *Type `thrift:"type,1" json:"type,omitempty"` TypeLength *int32 `thrift:"type_length,2" json:"type_length,omitempty"` RepetitionType *FieldRepetitionType `thrift:"repetition_type,3" json:"repetition_type,omitempty"` Name string `thrift:"name,4,required" json:"name"` NumChildren *int32 `thrift:"num_children,5" json:"num_children,omitempty"` ConvertedType *ConvertedType `thrift:"converted_type,6" json:"converted_type,omitempty"` Scale *int32 `thrift:"scale,7" json:"scale,omitempty"` Precision *int32 `thrift:"precision,8" json:"precision,omitempty"` FieldID *int32 `thrift:"field_id,9" json:"field_id,omitempty"` LogicalType *LogicalType `thrift:"logicalType,10" json:"logicalType,omitempty"` }
Represents a element inside a schema definition.
- if it is a group (inner node) then type is undefined and num_children is defined
- if it is a primitive type (leaf) then type is defined and num_children is undefined
the nodes are listed in depth first traversal order.
Attributes:
- Type: Data type for this field. Not set if the current element is a non-leaf node
- TypeLength: If type is FIXED_LEN_BYTE_ARRAY, this is the byte length of the vales.
Otherwise, if specified, this is the maximum bit length to store any of the values. (e.g. a low cardinality INT col could have this set to 3). Note that this is in the schema, and therefore fixed for the entire file.
- RepetitionType: repetition of the field. The root of the schema does not have a repetition_type.
All other nodes must have one
- Name: Name of the field in the schema
- NumChildren: Nested fields. Since thrift does not support nested fields,
the nesting is flattened to a single list by a depth-first traversal. The children count is used to construct the nested relationship. This field is not set when the element is a primitive type
- ConvertedType: When the schema is the result of a conversion from another model
Used to record the original type to help with cross conversion.
- Scale: Used when this column contains decimal data.
See the DECIMAL converted type for more details.
- Precision
- FieldID: When the original schema supports field ids, this will save the
original field id in the parquet schema
- LogicalType: The logical type of this SchemaElement
LogicalType replaces ConvertedType, but ConvertedType is still required for some logical types to ensure forward-compatibility in format v1.
func NewSchemaElement ¶
func NewSchemaElement() *SchemaElement
func (*SchemaElement) GetConvertedType ¶
func (p *SchemaElement) GetConvertedType() ConvertedType
func (*SchemaElement) GetFieldID ¶
func (p *SchemaElement) GetFieldID() int32
func (*SchemaElement) GetLogicalType ¶
func (p *SchemaElement) GetLogicalType() *LogicalType
func (*SchemaElement) GetName ¶
func (p *SchemaElement) GetName() string
func (*SchemaElement) GetNumChildren ¶
func (p *SchemaElement) GetNumChildren() int32
func (*SchemaElement) GetPrecision ¶
func (p *SchemaElement) GetPrecision() int32
func (*SchemaElement) GetRepetitionType ¶
func (p *SchemaElement) GetRepetitionType() FieldRepetitionType
func (*SchemaElement) GetScale ¶
func (p *SchemaElement) GetScale() int32
func (*SchemaElement) GetType ¶
func (p *SchemaElement) GetType() Type
func (*SchemaElement) GetTypeLength ¶
func (p *SchemaElement) GetTypeLength() int32
func (*SchemaElement) IsSetConvertedType ¶
func (p *SchemaElement) IsSetConvertedType() bool
func (*SchemaElement) IsSetFieldID ¶
func (p *SchemaElement) IsSetFieldID() bool
func (*SchemaElement) IsSetLogicalType ¶
func (p *SchemaElement) IsSetLogicalType() bool
func (*SchemaElement) IsSetNumChildren ¶
func (p *SchemaElement) IsSetNumChildren() bool
func (*SchemaElement) IsSetPrecision ¶
func (p *SchemaElement) IsSetPrecision() bool
func (*SchemaElement) IsSetRepetitionType ¶
func (p *SchemaElement) IsSetRepetitionType() bool
func (*SchemaElement) IsSetScale ¶
func (p *SchemaElement) IsSetScale() bool
func (*SchemaElement) IsSetType ¶
func (p *SchemaElement) IsSetType() bool
func (*SchemaElement) IsSetTypeLength ¶
func (p *SchemaElement) IsSetTypeLength() bool
func (*SchemaElement) String ¶
func (p *SchemaElement) String() string
type SortingColumn ¶
type SortingColumn struct { ColumnIdx int32 `thrift:"column_idx,1,required" json:"column_idx"` Descending bool `thrift:"descending,2,required" json:"descending"` NullsFirst bool `thrift:"nulls_first,3,required" json:"nulls_first"` }
Wrapper struct to specify sort order
Attributes:
- ColumnIdx: The column index (in this row group) *
- Descending: If true, indicates this column is sorted in descending order. *
- NullsFirst: If true, nulls will come before non-null values, otherwise,
nulls go at the end.
func NewSortingColumn ¶
func NewSortingColumn() *SortingColumn
func (*SortingColumn) GetColumnIdx ¶
func (p *SortingColumn) GetColumnIdx() int32
func (*SortingColumn) GetDescending ¶
func (p *SortingColumn) GetDescending() bool
func (*SortingColumn) GetNullsFirst ¶
func (p *SortingColumn) GetNullsFirst() bool
func (*SortingColumn) String ¶
func (p *SortingColumn) String() string
type Statistics ¶
type Statistics struct { Max []byte `thrift:"max,1" json:"max,omitempty"` Min []byte `thrift:"min,2" json:"min,omitempty"` NullCount *int64 `thrift:"null_count,3" json:"null_count,omitempty"` DistinctCount *int64 `thrift:"distinct_count,4" json:"distinct_count,omitempty"` MaxValue []byte `thrift:"max_value,5" json:"max_value,omitempty"` MinValue []byte `thrift:"min_value,6" json:"min_value,omitempty"` }
Statistics per row group and per page All fields are optional.
Attributes:
- Max: DEPRECATED: min and max value of the column. Use min_value and max_value.
Values are encoded using PLAIN encoding, except that variable-length byte arrays do not include a length prefix.
These fields encode min and max values determined by signed comparison only. New files should use the correct order for a column's logical type and store the values in the min_value and max_value fields.
To support older readers, these may be set when the column order is signed.
- Min
- NullCount: count of null value in the column
- DistinctCount: count of distinct values occurring
- MaxValue: Min and max values for the column, determined by its ColumnOrder.
Values are encoded using PLAIN encoding, except that variable-length byte arrays do not include a length prefix.
- MinValue
var ColumnMetaData_Statistics_DEFAULT *Statistics
var DataPageHeaderV2_Statistics_DEFAULT *Statistics
var DataPageHeader_Statistics_DEFAULT *Statistics
func NewStatistics ¶
func NewStatistics() *Statistics
func (*Statistics) GetDistinctCount ¶
func (p *Statistics) GetDistinctCount() int64
func (*Statistics) GetMax ¶
func (p *Statistics) GetMax() []byte
func (*Statistics) GetMaxValue ¶
func (p *Statistics) GetMaxValue() []byte
func (*Statistics) GetMin ¶
func (p *Statistics) GetMin() []byte
func (*Statistics) GetMinValue ¶
func (p *Statistics) GetMinValue() []byte
func (*Statistics) GetNullCount ¶
func (p *Statistics) GetNullCount() int64
func (*Statistics) IsSetDistinctCount ¶
func (p *Statistics) IsSetDistinctCount() bool
func (*Statistics) IsSetMax ¶
func (p *Statistics) IsSetMax() bool
func (*Statistics) IsSetMaxValue ¶
func (p *Statistics) IsSetMaxValue() bool
func (*Statistics) IsSetMin ¶
func (p *Statistics) IsSetMin() bool
func (*Statistics) IsSetMinValue ¶
func (p *Statistics) IsSetMinValue() bool
func (*Statistics) IsSetNullCount ¶
func (p *Statistics) IsSetNullCount() bool
func (*Statistics) String ¶
func (p *Statistics) String() string
type StringType ¶
type StringType struct { }
Empty structs to use as logical type annotations
var LogicalType_STRING_DEFAULT *StringType
func NewStringType ¶
func NewStringType() *StringType
func (*StringType) String ¶
func (p *StringType) String() string
type TimeType ¶
type TimeType struct { IsAdjustedToUTC bool `thrift:"isAdjustedToUTC,1,required" json:"isAdjustedToUTC"` Unit *TimeUnit `thrift:"unit,2,required" json:"unit"` }
Time logical type annotation
Allowed for physical types: INT32 (millis), INT64 (micros)
Attributes:
- IsAdjustedToUTC
- Unit
var LogicalType_TIME_DEFAULT *TimeType
func NewTimeType ¶
func NewTimeType() *TimeType
func (*TimeType) GetIsAdjustedToUTC ¶
type TimeUnit ¶
type TimeUnit struct { MILLIS *MilliSeconds `thrift:"MILLIS,1" json:"MILLIS,omitempty"` MICROS *MicroSeconds `thrift:"MICROS,2" json:"MICROS,omitempty"` }
Attributes:
- MILLIS
- MICROS
var TimeType_Unit_DEFAULT *TimeUnit
var TimestampType_Unit_DEFAULT *TimeUnit
func NewTimeUnit ¶
func NewTimeUnit() *TimeUnit
func (*TimeUnit) CountSetFieldsTimeUnit ¶
func (*TimeUnit) GetMICROS ¶
func (p *TimeUnit) GetMICROS() *MicroSeconds
func (*TimeUnit) GetMILLIS ¶
func (p *TimeUnit) GetMILLIS() *MilliSeconds
func (*TimeUnit) IsSetMICROS ¶
func (*TimeUnit) IsSetMILLIS ¶
type TimestampType ¶
type TimestampType struct { IsAdjustedToUTC bool `thrift:"isAdjustedToUTC,1,required" json:"isAdjustedToUTC"` Unit *TimeUnit `thrift:"unit,2,required" json:"unit"` }
Timestamp logical type annotation
Allowed for physical types: INT64
Attributes:
- IsAdjustedToUTC
- Unit
var LogicalType_TIMESTAMP_DEFAULT *TimestampType
func NewTimestampType ¶
func NewTimestampType() *TimestampType
func (*TimestampType) GetIsAdjustedToUTC ¶
func (p *TimestampType) GetIsAdjustedToUTC() bool
func (*TimestampType) GetUnit ¶
func (p *TimestampType) GetUnit() *TimeUnit
func (*TimestampType) IsSetUnit ¶
func (p *TimestampType) IsSetUnit() bool
func (*TimestampType) String ¶
func (p *TimestampType) String() string
type Type ¶
type Type int64
Types supported by Parquet. These types are intended to be used in combination with the encodings to control the on disk storage format. For example INT16 is not included as a type since a good encoding of INT32 would handle this.
var SchemaElement_Type_DEFAULT Type
func TypeFromString ¶
func (Type) MarshalText ¶
func (*Type) UnmarshalText ¶
type TypeDefinedOrder ¶
type TypeDefinedOrder struct { }
Empty struct to signal the order defined by the physical or logical type
var ColumnOrder_TYPE_ORDER_DEFAULT *TypeDefinedOrder
func NewTypeDefinedOrder ¶
func NewTypeDefinedOrder() *TypeDefinedOrder
func (*TypeDefinedOrder) String ¶
func (p *TypeDefinedOrder) String() string