parquet

package
v18.0.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 15, 2024 License: Apache-2.0, BSD-3-Clause Imports: 10 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var AesGcmCtrV1_AadFileUnique_DEFAULT []byte
View Source
var AesGcmCtrV1_AadPrefix_DEFAULT []byte
View Source
var AesGcmCtrV1_SupplyAadPrefix_DEFAULT bool
View Source
var AesGcmV1_AadFileUnique_DEFAULT []byte
View Source
var AesGcmV1_AadPrefix_DEFAULT []byte
View Source
var AesGcmV1_SupplyAadPrefix_DEFAULT bool
View Source
var ColumnChunk_ColumnIndexLength_DEFAULT int32
View Source
var ColumnChunk_ColumnIndexOffset_DEFAULT int64
View Source
var ColumnChunk_EncryptedColumnMetadata_DEFAULT []byte
View Source
var ColumnChunk_FilePath_DEFAULT string
View Source
var ColumnChunk_OffsetIndexLength_DEFAULT int32
View Source
var ColumnChunk_OffsetIndexOffset_DEFAULT int64
View Source
var ColumnIndex_DefinitionLevelHistograms_DEFAULT []int64
View Source
var ColumnIndex_NullCounts_DEFAULT []int64
View Source
var ColumnIndex_RepetitionLevelHistograms_DEFAULT []int64
View Source
var ColumnMetaData_BloomFilterLength_DEFAULT int32
View Source
var ColumnMetaData_BloomFilterOffset_DEFAULT int64
View Source
var ColumnMetaData_DictionaryPageOffset_DEFAULT int64
View Source
var ColumnMetaData_EncodingStats_DEFAULT []*PageEncodingStats
View Source
var ColumnMetaData_IndexPageOffset_DEFAULT int64
View Source
var ColumnMetaData_KeyValueMetadata_DEFAULT []*KeyValue
View Source
var DataPageHeaderV2_IsCompressed_DEFAULT bool = true
View Source
var DictionaryPageHeader_IsSorted_DEFAULT bool
View Source
var EncryptionWithColumnKey_KeyMetadata_DEFAULT []byte
View Source
var FileCryptoMetaData_KeyMetadata_DEFAULT []byte
View Source
var FileMetaData_ColumnOrders_DEFAULT []*ColumnOrder
View Source
var FileMetaData_CreatedBy_DEFAULT string
View Source
var FileMetaData_FooterSigningKeyMetadata_DEFAULT []byte
View Source
var FileMetaData_KeyValueMetadata_DEFAULT []*KeyValue
View Source
var GoUnusedProtection__ int
View Source
var KeyValue_Value_DEFAULT string
View Source
var OffsetIndex_UnencodedByteArrayDataBytes_DEFAULT []int64
View Source
var PageHeader_Crc_DEFAULT int32
View Source
var RowGroup_FileOffset_DEFAULT int64
View Source
var RowGroup_Ordinal_DEFAULT int16
View Source
var RowGroup_SortingColumns_DEFAULT []*SortingColumn
View Source
var RowGroup_TotalCompressedSize_DEFAULT int64
View Source
var SchemaElement_FieldID_DEFAULT int32
View Source
var SchemaElement_NumChildren_DEFAULT int32
View Source
var SchemaElement_Precision_DEFAULT int32
View Source
var SchemaElement_Scale_DEFAULT int32
View Source
var SchemaElement_TypeLength_DEFAULT int32
View Source
var SizeStatistics_DefinitionLevelHistogram_DEFAULT []int64
View Source
var SizeStatistics_RepetitionLevelHistogram_DEFAULT []int64
View Source
var SizeStatistics_UnencodedByteArrayDataBytes_DEFAULT int64
View Source
var Statistics_DistinctCount_DEFAULT int64
View Source
var Statistics_IsMaxValueExact_DEFAULT bool
View Source
var Statistics_IsMinValueExact_DEFAULT bool
View Source
var Statistics_MaxValue_DEFAULT []byte
View Source
var Statistics_Max_DEFAULT []byte
View Source
var Statistics_MinValue_DEFAULT []byte
View Source
var Statistics_Min_DEFAULT []byte
View Source
var Statistics_NullCount_DEFAULT int64

Functions

This section is empty.

Types

type AesGcmCtrV1

type AesGcmCtrV1 struct {
	AadPrefix       []byte `thrift:"aad_prefix,1" db:"aad_prefix" json:"aad_prefix,omitempty"`
	AadFileUnique   []byte `thrift:"aad_file_unique,2" db:"aad_file_unique" json:"aad_file_unique,omitempty"`
	SupplyAadPrefix *bool  `thrift:"supply_aad_prefix,3" db:"supply_aad_prefix" json:"supply_aad_prefix,omitempty"`
}

Attributes:

  • AadPrefix: AAD prefix *
  • AadFileUnique: Unique file identifier part of AAD suffix *
  • SupplyAadPrefix: In files encrypted with AAD prefix without storing it,

readers must supply the prefix *

var EncryptionAlgorithm_AES_GCM_CTR_V1_DEFAULT *AesGcmCtrV1

func NewAesGcmCtrV1

func NewAesGcmCtrV1() *AesGcmCtrV1

func (*AesGcmCtrV1) Equals

func (p *AesGcmCtrV1) Equals(other *AesGcmCtrV1) bool

func (*AesGcmCtrV1) GetAadFileUnique

func (p *AesGcmCtrV1) GetAadFileUnique() []byte

func (*AesGcmCtrV1) GetAadPrefix

func (p *AesGcmCtrV1) GetAadPrefix() []byte

func (*AesGcmCtrV1) GetSupplyAadPrefix

func (p *AesGcmCtrV1) GetSupplyAadPrefix() bool

func (*AesGcmCtrV1) IsSetAadFileUnique

func (p *AesGcmCtrV1) IsSetAadFileUnique() bool

func (*AesGcmCtrV1) IsSetAadPrefix

func (p *AesGcmCtrV1) IsSetAadPrefix() bool

func (*AesGcmCtrV1) IsSetSupplyAadPrefix

func (p *AesGcmCtrV1) IsSetSupplyAadPrefix() bool

func (*AesGcmCtrV1) LogValue

func (p *AesGcmCtrV1) LogValue() slog.Value

func (*AesGcmCtrV1) Read

func (p *AesGcmCtrV1) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*AesGcmCtrV1) ReadField1

func (p *AesGcmCtrV1) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*AesGcmCtrV1) ReadField2

func (p *AesGcmCtrV1) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*AesGcmCtrV1) ReadField3

func (p *AesGcmCtrV1) ReadField3(ctx context.Context, iprot thrift.TProtocol) error

func (*AesGcmCtrV1) String

func (p *AesGcmCtrV1) String() string

func (*AesGcmCtrV1) Validate

func (p *AesGcmCtrV1) Validate() error

func (*AesGcmCtrV1) Write

func (p *AesGcmCtrV1) Write(ctx context.Context, oprot thrift.TProtocol) error

type AesGcmV1

type AesGcmV1 struct {
	AadPrefix       []byte `thrift:"aad_prefix,1" db:"aad_prefix" json:"aad_prefix,omitempty"`
	AadFileUnique   []byte `thrift:"aad_file_unique,2" db:"aad_file_unique" json:"aad_file_unique,omitempty"`
	SupplyAadPrefix *bool  `thrift:"supply_aad_prefix,3" db:"supply_aad_prefix" json:"supply_aad_prefix,omitempty"`
}

Attributes:

  • AadPrefix: AAD prefix *
  • AadFileUnique: Unique file identifier part of AAD suffix *
  • SupplyAadPrefix: In files encrypted with AAD prefix without storing it,

readers must supply the prefix *

var EncryptionAlgorithm_AES_GCM_V1_DEFAULT *AesGcmV1

func NewAesGcmV1

func NewAesGcmV1() *AesGcmV1

func (*AesGcmV1) Equals

func (p *AesGcmV1) Equals(other *AesGcmV1) bool

func (*AesGcmV1) GetAadFileUnique

func (p *AesGcmV1) GetAadFileUnique() []byte

func (*AesGcmV1) GetAadPrefix

func (p *AesGcmV1) GetAadPrefix() []byte

func (*AesGcmV1) GetSupplyAadPrefix

func (p *AesGcmV1) GetSupplyAadPrefix() bool

func (*AesGcmV1) IsSetAadFileUnique

func (p *AesGcmV1) IsSetAadFileUnique() bool

func (*AesGcmV1) IsSetAadPrefix

func (p *AesGcmV1) IsSetAadPrefix() bool

func (*AesGcmV1) IsSetSupplyAadPrefix

func (p *AesGcmV1) IsSetSupplyAadPrefix() bool

func (*AesGcmV1) LogValue

func (p *AesGcmV1) LogValue() slog.Value

func (*AesGcmV1) Read

func (p *AesGcmV1) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*AesGcmV1) ReadField1

func (p *AesGcmV1) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*AesGcmV1) ReadField2

func (p *AesGcmV1) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*AesGcmV1) ReadField3

func (p *AesGcmV1) ReadField3(ctx context.Context, iprot thrift.TProtocol) error

func (*AesGcmV1) String

func (p *AesGcmV1) String() string

func (*AesGcmV1) Validate

func (p *AesGcmV1) Validate() error

func (*AesGcmV1) Write

func (p *AesGcmV1) Write(ctx context.Context, oprot thrift.TProtocol) error

type BloomFilterAlgorithm

type BloomFilterAlgorithm struct {
	BLOCK *SplitBlockAlgorithm `thrift:"BLOCK,1" db:"BLOCK" json:"BLOCK,omitempty"`
}

The algorithm used in Bloom filter. *

Attributes:

  • BLOCK: Block-based Bloom filter. *
var BloomFilterHeader_Algorithm_DEFAULT *BloomFilterAlgorithm

func NewBloomFilterAlgorithm

func NewBloomFilterAlgorithm() *BloomFilterAlgorithm

func (*BloomFilterAlgorithm) CountSetFieldsBloomFilterAlgorithm

func (p *BloomFilterAlgorithm) CountSetFieldsBloomFilterAlgorithm() int

func (*BloomFilterAlgorithm) Equals

func (*BloomFilterAlgorithm) GetBLOCK

func (*BloomFilterAlgorithm) IsSetBLOCK

func (p *BloomFilterAlgorithm) IsSetBLOCK() bool

func (*BloomFilterAlgorithm) LogValue

func (p *BloomFilterAlgorithm) LogValue() slog.Value

func (*BloomFilterAlgorithm) Read

func (*BloomFilterAlgorithm) ReadField1

func (p *BloomFilterAlgorithm) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*BloomFilterAlgorithm) String

func (p *BloomFilterAlgorithm) String() string

func (*BloomFilterAlgorithm) Validate

func (p *BloomFilterAlgorithm) Validate() error

func (*BloomFilterAlgorithm) Write

type BloomFilterCompression

type BloomFilterCompression struct {
	UNCOMPRESSED *Uncompressed `thrift:"UNCOMPRESSED,1" db:"UNCOMPRESSED" json:"UNCOMPRESSED,omitempty"`
}

Attributes:

  • UNCOMPRESSED
var BloomFilterHeader_Compression_DEFAULT *BloomFilterCompression

func NewBloomFilterCompression

func NewBloomFilterCompression() *BloomFilterCompression

func (*BloomFilterCompression) CountSetFieldsBloomFilterCompression

func (p *BloomFilterCompression) CountSetFieldsBloomFilterCompression() int

func (*BloomFilterCompression) Equals

func (*BloomFilterCompression) GetUNCOMPRESSED

func (p *BloomFilterCompression) GetUNCOMPRESSED() *Uncompressed

func (*BloomFilterCompression) IsSetUNCOMPRESSED

func (p *BloomFilterCompression) IsSetUNCOMPRESSED() bool

func (*BloomFilterCompression) LogValue

func (p *BloomFilterCompression) LogValue() slog.Value

func (*BloomFilterCompression) Read

func (*BloomFilterCompression) ReadField1

func (p *BloomFilterCompression) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*BloomFilterCompression) String

func (p *BloomFilterCompression) String() string

func (*BloomFilterCompression) Validate

func (p *BloomFilterCompression) Validate() error

func (*BloomFilterCompression) Write

type BloomFilterHash

type BloomFilterHash struct {
	XXHASH *XxHash `thrift:"XXHASH,1" db:"XXHASH" json:"XXHASH,omitempty"`
}

The hash function used in Bloom filter. This function takes the hash of a column value using plain encoding.

Attributes:

  • XXHASH: xxHash Strategy. *
var BloomFilterHeader_Hash_DEFAULT *BloomFilterHash

func NewBloomFilterHash

func NewBloomFilterHash() *BloomFilterHash

func (*BloomFilterHash) CountSetFieldsBloomFilterHash

func (p *BloomFilterHash) CountSetFieldsBloomFilterHash() int

func (*BloomFilterHash) Equals

func (p *BloomFilterHash) Equals(other *BloomFilterHash) bool

func (*BloomFilterHash) GetXXHASH

func (p *BloomFilterHash) GetXXHASH() *XxHash

func (*BloomFilterHash) IsSetXXHASH

func (p *BloomFilterHash) IsSetXXHASH() bool

func (*BloomFilterHash) LogValue

func (p *BloomFilterHash) LogValue() slog.Value

func (*BloomFilterHash) Read

func (p *BloomFilterHash) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*BloomFilterHash) ReadField1

func (p *BloomFilterHash) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*BloomFilterHash) String

func (p *BloomFilterHash) String() string

func (*BloomFilterHash) Validate

func (p *BloomFilterHash) Validate() error

func (*BloomFilterHash) Write

func (p *BloomFilterHash) Write(ctx context.Context, oprot thrift.TProtocol) error

type BloomFilterHeader

type BloomFilterHeader struct {
	NumBytes    int32                   `thrift:"numBytes,1,required" db:"numBytes" json:"numBytes"`
	Algorithm   *BloomFilterAlgorithm   `thrift:"algorithm,2,required" db:"algorithm" json:"algorithm"`
	Hash        *BloomFilterHash        `thrift:"hash,3,required" db:"hash" json:"hash"`
	Compression *BloomFilterCompression `thrift:"compression,4,required" db:"compression" json:"compression"`
}

Bloom filter header is stored at beginning of Bloom filter data of each column and followed by its bitset.

Attributes:

  • NumBytes: The size of bitset in bytes *
  • Algorithm: The algorithm for setting bits. *
  • Hash: The hash function used for Bloom filter. *
  • Compression: The compression used in the Bloom filter *

func NewBloomFilterHeader

func NewBloomFilterHeader() *BloomFilterHeader

func (*BloomFilterHeader) Equals

func (p *BloomFilterHeader) Equals(other *BloomFilterHeader) bool

func (*BloomFilterHeader) GetAlgorithm

func (p *BloomFilterHeader) GetAlgorithm() *BloomFilterAlgorithm

func (*BloomFilterHeader) GetCompression

func (p *BloomFilterHeader) GetCompression() *BloomFilterCompression

func (*BloomFilterHeader) GetHash

func (p *BloomFilterHeader) GetHash() *BloomFilterHash

func (*BloomFilterHeader) GetNumBytes

func (p *BloomFilterHeader) GetNumBytes() int32

func (*BloomFilterHeader) IsSetAlgorithm

func (p *BloomFilterHeader) IsSetAlgorithm() bool

func (*BloomFilterHeader) IsSetCompression

func (p *BloomFilterHeader) IsSetCompression() bool

func (*BloomFilterHeader) IsSetHash

func (p *BloomFilterHeader) IsSetHash() bool

func (*BloomFilterHeader) LogValue

func (p *BloomFilterHeader) LogValue() slog.Value

func (*BloomFilterHeader) Read

func (p *BloomFilterHeader) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*BloomFilterHeader) ReadField1

func (p *BloomFilterHeader) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*BloomFilterHeader) ReadField2

func (p *BloomFilterHeader) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*BloomFilterHeader) ReadField3

func (p *BloomFilterHeader) ReadField3(ctx context.Context, iprot thrift.TProtocol) error

func (*BloomFilterHeader) ReadField4

func (p *BloomFilterHeader) ReadField4(ctx context.Context, iprot thrift.TProtocol) error

func (*BloomFilterHeader) String

func (p *BloomFilterHeader) String() string

func (*BloomFilterHeader) Validate

func (p *BloomFilterHeader) Validate() error

func (*BloomFilterHeader) Write

func (p *BloomFilterHeader) Write(ctx context.Context, oprot thrift.TProtocol) error

type BoundaryOrder

type BoundaryOrder int64

Enum to annotate whether lists of min/max elements inside ColumnIndex are ordered and if so, in which direction.

const (
	BoundaryOrder_UNORDERED  BoundaryOrder = 0
	BoundaryOrder_ASCENDING  BoundaryOrder = 1
	BoundaryOrder_DESCENDING BoundaryOrder = 2
)

func BoundaryOrderFromString

func BoundaryOrderFromString(s string) (BoundaryOrder, error)

func BoundaryOrderPtr

func BoundaryOrderPtr(v BoundaryOrder) *BoundaryOrder

func (BoundaryOrder) MarshalText

func (p BoundaryOrder) MarshalText() ([]byte, error)

func (*BoundaryOrder) Scan

func (p *BoundaryOrder) Scan(value interface{}) error

func (BoundaryOrder) String

func (p BoundaryOrder) String() string

func (*BoundaryOrder) UnmarshalText

func (p *BoundaryOrder) UnmarshalText(text []byte) error

func (*BoundaryOrder) Value

func (p *BoundaryOrder) Value() (driver.Value, error)

type BsonType

type BsonType struct {
}

Embedded BSON logical type annotation

Allowed for physical types: BYTE_ARRAY

var LogicalType_BSON_DEFAULT *BsonType

func NewBsonType

func NewBsonType() *BsonType

func (*BsonType) Equals

func (p *BsonType) Equals(other *BsonType) bool

func (*BsonType) LogValue

func (p *BsonType) LogValue() slog.Value

func (*BsonType) Read

func (p *BsonType) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*BsonType) String

func (p *BsonType) String() string

func (*BsonType) Validate

func (p *BsonType) Validate() error

func (*BsonType) Write

func (p *BsonType) Write(ctx context.Context, oprot thrift.TProtocol) error

type ColumnChunk

type ColumnChunk struct {
	FilePath                *string               `thrift:"file_path,1" db:"file_path" json:"file_path,omitempty"`
	FileOffset              int64                 `thrift:"file_offset,2,required" db:"file_offset" json:"file_offset"`
	MetaData                *ColumnMetaData       `thrift:"meta_data,3" db:"meta_data" json:"meta_data,omitempty"`
	OffsetIndexOffset       *int64                `thrift:"offset_index_offset,4" db:"offset_index_offset" json:"offset_index_offset,omitempty"`
	OffsetIndexLength       *int32                `thrift:"offset_index_length,5" db:"offset_index_length" json:"offset_index_length,omitempty"`
	ColumnIndexOffset       *int64                `thrift:"column_index_offset,6" db:"column_index_offset" json:"column_index_offset,omitempty"`
	ColumnIndexLength       *int32                `thrift:"column_index_length,7" db:"column_index_length" json:"column_index_length,omitempty"`
	CryptoMetadata          *ColumnCryptoMetaData `thrift:"crypto_metadata,8" db:"crypto_metadata" json:"crypto_metadata,omitempty"`
	EncryptedColumnMetadata []byte                `thrift:"encrypted_column_metadata,9" db:"encrypted_column_metadata" json:"encrypted_column_metadata,omitempty"`
}

Attributes:

  • FilePath: File where column data is stored. If not set, assumed to be same file as

metadata. This path is relative to the current file.

  • FileOffset: Deprecated: Byte offset in file_path to the ColumnMetaData

Past use of this field has been inconsistent, with some implementations using it to point to the ColumnMetaData and some using it to point to the first page in the column chunk. In many cases, the ColumnMetaData at this location is wrong. This field is now deprecated and should not be used. Writers should set this field to 0 if no ColumnMetaData has been written outside the footer.

  • MetaData: Column metadata for this chunk. Some writers may also replicate this at the

location pointed to by file_path/file_offset. Note: while marked as optional, this field is in fact required by most major Parquet implementations. As such, writers MUST populate this field.

  • OffsetIndexOffset: File offset of ColumnChunk's OffsetIndex *
  • OffsetIndexLength: Size of ColumnChunk's OffsetIndex, in bytes *
  • ColumnIndexOffset: File offset of ColumnChunk's ColumnIndex *
  • ColumnIndexLength: Size of ColumnChunk's ColumnIndex, in bytes *
  • CryptoMetadata: Crypto metadata of encrypted columns *
  • EncryptedColumnMetadata: Encrypted column metadata for this chunk *

func NewColumnChunk

func NewColumnChunk() *ColumnChunk

func (*ColumnChunk) Equals

func (p *ColumnChunk) Equals(other *ColumnChunk) bool

func (*ColumnChunk) GetColumnIndexLength

func (p *ColumnChunk) GetColumnIndexLength() int32

func (*ColumnChunk) GetColumnIndexOffset

func (p *ColumnChunk) GetColumnIndexOffset() int64

func (*ColumnChunk) GetCryptoMetadata

func (p *ColumnChunk) GetCryptoMetadata() *ColumnCryptoMetaData

func (*ColumnChunk) GetEncryptedColumnMetadata

func (p *ColumnChunk) GetEncryptedColumnMetadata() []byte

func (*ColumnChunk) GetFileOffset

func (p *ColumnChunk) GetFileOffset() int64

func (*ColumnChunk) GetFilePath

func (p *ColumnChunk) GetFilePath() string

func (*ColumnChunk) GetMetaData

func (p *ColumnChunk) GetMetaData() *ColumnMetaData

func (*ColumnChunk) GetOffsetIndexLength

func (p *ColumnChunk) GetOffsetIndexLength() int32

func (*ColumnChunk) GetOffsetIndexOffset

func (p *ColumnChunk) GetOffsetIndexOffset() int64

func (*ColumnChunk) IsSetColumnIndexLength

func (p *ColumnChunk) IsSetColumnIndexLength() bool

func (*ColumnChunk) IsSetColumnIndexOffset

func (p *ColumnChunk) IsSetColumnIndexOffset() bool

func (*ColumnChunk) IsSetCryptoMetadata

func (p *ColumnChunk) IsSetCryptoMetadata() bool

func (*ColumnChunk) IsSetEncryptedColumnMetadata

func (p *ColumnChunk) IsSetEncryptedColumnMetadata() bool

func (*ColumnChunk) IsSetFilePath

func (p *ColumnChunk) IsSetFilePath() bool

func (*ColumnChunk) IsSetMetaData

func (p *ColumnChunk) IsSetMetaData() bool

func (*ColumnChunk) IsSetOffsetIndexLength

func (p *ColumnChunk) IsSetOffsetIndexLength() bool

func (*ColumnChunk) IsSetOffsetIndexOffset

func (p *ColumnChunk) IsSetOffsetIndexOffset() bool

func (*ColumnChunk) LogValue

func (p *ColumnChunk) LogValue() slog.Value

func (*ColumnChunk) Read

func (p *ColumnChunk) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnChunk) ReadField1

func (p *ColumnChunk) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnChunk) ReadField2

func (p *ColumnChunk) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnChunk) ReadField3

func (p *ColumnChunk) ReadField3(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnChunk) ReadField4

func (p *ColumnChunk) ReadField4(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnChunk) ReadField5

func (p *ColumnChunk) ReadField5(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnChunk) ReadField6

func (p *ColumnChunk) ReadField6(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnChunk) ReadField7

func (p *ColumnChunk) ReadField7(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnChunk) ReadField8

func (p *ColumnChunk) ReadField8(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnChunk) ReadField9

func (p *ColumnChunk) ReadField9(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnChunk) String

func (p *ColumnChunk) String() string

func (*ColumnChunk) Validate

func (p *ColumnChunk) Validate() error

func (*ColumnChunk) Write

func (p *ColumnChunk) Write(ctx context.Context, oprot thrift.TProtocol) error

type ColumnCryptoMetaData

type ColumnCryptoMetaData struct {
	ENCRYPTION_WITH_FOOTER_KEY *EncryptionWithFooterKey `thrift:"ENCRYPTION_WITH_FOOTER_KEY,1" db:"ENCRYPTION_WITH_FOOTER_KEY" json:"ENCRYPTION_WITH_FOOTER_KEY,omitempty"`
	ENCRYPTION_WITH_COLUMN_KEY *EncryptionWithColumnKey `thrift:"ENCRYPTION_WITH_COLUMN_KEY,2" db:"ENCRYPTION_WITH_COLUMN_KEY" json:"ENCRYPTION_WITH_COLUMN_KEY,omitempty"`
}

Attributes:

  • ENCRYPTION_WITH_FOOTER_KEY
  • ENCRYPTION_WITH_COLUMN_KEY
var ColumnChunk_CryptoMetadata_DEFAULT *ColumnCryptoMetaData

func NewColumnCryptoMetaData

func NewColumnCryptoMetaData() *ColumnCryptoMetaData

func (*ColumnCryptoMetaData) CountSetFieldsColumnCryptoMetaData

func (p *ColumnCryptoMetaData) CountSetFieldsColumnCryptoMetaData() int

func (*ColumnCryptoMetaData) Equals

func (*ColumnCryptoMetaData) GetENCRYPTION_WITH_COLUMN_KEY

func (p *ColumnCryptoMetaData) GetENCRYPTION_WITH_COLUMN_KEY() *EncryptionWithColumnKey
func (p *ColumnCryptoMetaData) GetENCRYPTION_WITH_FOOTER_KEY() *EncryptionWithFooterKey

func (*ColumnCryptoMetaData) IsSetENCRYPTION_WITH_COLUMN_KEY

func (p *ColumnCryptoMetaData) IsSetENCRYPTION_WITH_COLUMN_KEY() bool
func (p *ColumnCryptoMetaData) IsSetENCRYPTION_WITH_FOOTER_KEY() bool

func (*ColumnCryptoMetaData) LogValue

func (p *ColumnCryptoMetaData) LogValue() slog.Value

func (*ColumnCryptoMetaData) Read

func (*ColumnCryptoMetaData) ReadField1

func (p *ColumnCryptoMetaData) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnCryptoMetaData) ReadField2

func (p *ColumnCryptoMetaData) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnCryptoMetaData) String

func (p *ColumnCryptoMetaData) String() string

func (*ColumnCryptoMetaData) Validate

func (p *ColumnCryptoMetaData) Validate() error

func (*ColumnCryptoMetaData) Write

type ColumnIndex

type ColumnIndex struct {
	NullPages                 []bool        `thrift:"null_pages,1,required" db:"null_pages" json:"null_pages"`
	MinValues                 [][]byte      `thrift:"min_values,2,required" db:"min_values" json:"min_values"`
	MaxValues                 [][]byte      `thrift:"max_values,3,required" db:"max_values" json:"max_values"`
	BoundaryOrder             BoundaryOrder `thrift:"boundary_order,4,required" db:"boundary_order" json:"boundary_order"`
	NullCounts                []int64       `thrift:"null_counts,5" db:"null_counts" json:"null_counts,omitempty"`
	RepetitionLevelHistograms []int64       `thrift:"repetition_level_histograms,6" db:"repetition_level_histograms" json:"repetition_level_histograms,omitempty"`
	DefinitionLevelHistograms []int64       `thrift:"definition_level_histograms,7" db:"definition_level_histograms" json:"definition_level_histograms,omitempty"`
}

Optional statistics for each data page in a ColumnChunk.

Forms part the page index, along with OffsetIndex.

If this structure is present, OffsetIndex must also be present.

For each field in this structure, <field>[i] refers to the page at OffsetIndex.page_locations[i]

Attributes:

  • NullPages: A list of Boolean values to determine the validity of the corresponding

min and max values. If true, a page contains only null values, and writers have to set the corresponding entries in min_values and max_values to byte[0], so that all lists have the same length. If false, the corresponding entries in min_values and max_values must be valid.

  • MinValues: Two lists containing lower and upper bounds for the values of each page

determined by the ColumnOrder of the column. These may be the actual minimum and maximum values found on a page, but can also be (more compact) values that do not exist on a page. For example, instead of storing ""Blart Versenwald III", a writer may set min_values[i]="B", max_values[i]="C". Such more compact values must still be valid values within the column's logical type. Readers must make sure that list entries are populated before using them by inspecting null_pages.

  • MaxValues
  • BoundaryOrder: Stores whether both min_values and max_values are ordered and if so, in

which direction. This allows readers to perform binary searches in both lists. Readers cannot assume that max_values[i] <= min_values[i+1], even if the lists are ordered.

  • NullCounts: A list containing the number of null values for each page

Writers SHOULD always write this field even if no null values are present or the column is not nullable. Readers MUST distinguish between null_counts not being present and null_count being 0. If null_counts are not present, readers MUST NOT assume all null counts are 0.

  • RepetitionLevelHistograms: Contains repetition level histograms for each page

concatenated together. The repetition_level_histogram field on SizeStatistics contains more details.

When present the length should always be (number of pages * (max_repetition_level + 1)) elements.

Element 0 is the first element of the histogram for the first page. Element (max_repetition_level + 1) is the first element of the histogram for the second page.

  • DefinitionLevelHistograms: Same as repetition_level_histograms except for definitions levels.

func NewColumnIndex

func NewColumnIndex() *ColumnIndex

func (*ColumnIndex) Equals

func (p *ColumnIndex) Equals(other *ColumnIndex) bool

func (*ColumnIndex) GetBoundaryOrder

func (p *ColumnIndex) GetBoundaryOrder() BoundaryOrder

func (*ColumnIndex) GetDefinitionLevelHistograms

func (p *ColumnIndex) GetDefinitionLevelHistograms() []int64

func (*ColumnIndex) GetMaxValues

func (p *ColumnIndex) GetMaxValues() [][]byte

func (*ColumnIndex) GetMinValues

func (p *ColumnIndex) GetMinValues() [][]byte

func (*ColumnIndex) GetNullCounts

func (p *ColumnIndex) GetNullCounts() []int64

func (*ColumnIndex) GetNullPages

func (p *ColumnIndex) GetNullPages() []bool

func (*ColumnIndex) GetRepetitionLevelHistograms

func (p *ColumnIndex) GetRepetitionLevelHistograms() []int64

func (*ColumnIndex) IsSetDefinitionLevelHistograms

func (p *ColumnIndex) IsSetDefinitionLevelHistograms() bool

func (*ColumnIndex) IsSetNullCounts

func (p *ColumnIndex) IsSetNullCounts() bool

func (*ColumnIndex) IsSetRepetitionLevelHistograms

func (p *ColumnIndex) IsSetRepetitionLevelHistograms() bool

func (*ColumnIndex) LogValue

func (p *ColumnIndex) LogValue() slog.Value

func (*ColumnIndex) Read

func (p *ColumnIndex) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnIndex) ReadField1

func (p *ColumnIndex) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnIndex) ReadField2

func (p *ColumnIndex) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnIndex) ReadField3

func (p *ColumnIndex) ReadField3(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnIndex) ReadField4

func (p *ColumnIndex) ReadField4(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnIndex) ReadField5

func (p *ColumnIndex) ReadField5(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnIndex) ReadField6

func (p *ColumnIndex) ReadField6(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnIndex) ReadField7

func (p *ColumnIndex) ReadField7(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnIndex) String

func (p *ColumnIndex) String() string

func (*ColumnIndex) Validate

func (p *ColumnIndex) Validate() error

func (*ColumnIndex) Write

func (p *ColumnIndex) Write(ctx context.Context, oprot thrift.TProtocol) error

type ColumnMetaData

type ColumnMetaData struct {
	Type                  Type                 `thrift:"type,1,required" db:"type" json:"type"`
	Encodings             []Encoding           `thrift:"encodings,2,required" db:"encodings" json:"encodings"`
	PathInSchema          []string             `thrift:"path_in_schema,3,required" db:"path_in_schema" json:"path_in_schema"`
	Codec                 CompressionCodec     `thrift:"codec,4,required" db:"codec" json:"codec"`
	NumValues             int64                `thrift:"num_values,5,required" db:"num_values" json:"num_values"`
	TotalUncompressedSize int64                `thrift:"total_uncompressed_size,6,required" db:"total_uncompressed_size" json:"total_uncompressed_size"`
	TotalCompressedSize   int64                `thrift:"total_compressed_size,7,required" db:"total_compressed_size" json:"total_compressed_size"`
	KeyValueMetadata      []*KeyValue          `thrift:"key_value_metadata,8" db:"key_value_metadata" json:"key_value_metadata,omitempty"`
	DataPageOffset        int64                `thrift:"data_page_offset,9,required" db:"data_page_offset" json:"data_page_offset"`
	IndexPageOffset       *int64               `thrift:"index_page_offset,10" db:"index_page_offset" json:"index_page_offset,omitempty"`
	DictionaryPageOffset  *int64               `thrift:"dictionary_page_offset,11" db:"dictionary_page_offset" json:"dictionary_page_offset,omitempty"`
	Statistics            *Statistics          `thrift:"statistics,12" db:"statistics" json:"statistics,omitempty"`
	EncodingStats         []*PageEncodingStats `thrift:"encoding_stats,13" db:"encoding_stats" json:"encoding_stats,omitempty"`
	BloomFilterOffset     *int64               `thrift:"bloom_filter_offset,14" db:"bloom_filter_offset" json:"bloom_filter_offset,omitempty"`
	BloomFilterLength     *int32               `thrift:"bloom_filter_length,15" db:"bloom_filter_length" json:"bloom_filter_length,omitempty"`
	SizeStatistics        *SizeStatistics      `thrift:"size_statistics,16" db:"size_statistics" json:"size_statistics,omitempty"`
}

Description for column metadata

Attributes:

  • Type: Type of this column *
  • Encodings: Set of all encodings used for this column. The purpose is to validate

whether we can decode those pages. *

  • PathInSchema: Path in schema *
  • Codec: Compression codec *
  • NumValues: Number of values in this column *
  • TotalUncompressedSize: total byte size of all uncompressed pages in this column chunk (including the headers) *
  • TotalCompressedSize: total byte size of all compressed, and potentially encrypted, pages

in this column chunk (including the headers) *

  • KeyValueMetadata: Optional key/value metadata *
  • DataPageOffset: Byte offset from beginning of file to first data page *
  • IndexPageOffset: Byte offset from beginning of file to root index page *
  • DictionaryPageOffset: Byte offset from the beginning of file to first (only) dictionary page *
  • Statistics: optional statistics for this column chunk
  • EncodingStats: Set of all encodings used for pages in this column chunk.

This information can be used to determine if all data pages are dictionary encoded for example *

  • BloomFilterOffset: Byte offset from beginning of file to Bloom filter data. *
  • BloomFilterLength: Size of Bloom filter data including the serialized header, in bytes.

Added in 2.10 so readers may not read this field from old files and it can be obtained after the BloomFilterHeader has been deserialized. Writers should write this field so readers can read the bloom filter in a single I/O.

  • SizeStatistics: Optional statistics to help estimate total memory when converted to in-memory

representations. The histograms contained in these statistics can also be useful in some cases for more fine-grained nullability/list length filter pushdown.

var ColumnChunk_MetaData_DEFAULT *ColumnMetaData

func NewColumnMetaData

func NewColumnMetaData() *ColumnMetaData

func (*ColumnMetaData) Equals

func (p *ColumnMetaData) Equals(other *ColumnMetaData) bool

func (*ColumnMetaData) GetBloomFilterLength

func (p *ColumnMetaData) GetBloomFilterLength() int32

func (*ColumnMetaData) GetBloomFilterOffset

func (p *ColumnMetaData) GetBloomFilterOffset() int64

func (*ColumnMetaData) GetCodec

func (p *ColumnMetaData) GetCodec() CompressionCodec

func (*ColumnMetaData) GetDataPageOffset

func (p *ColumnMetaData) GetDataPageOffset() int64

func (*ColumnMetaData) GetDictionaryPageOffset

func (p *ColumnMetaData) GetDictionaryPageOffset() int64

func (*ColumnMetaData) GetEncodingStats

func (p *ColumnMetaData) GetEncodingStats() []*PageEncodingStats

func (*ColumnMetaData) GetEncodings

func (p *ColumnMetaData) GetEncodings() []Encoding

func (*ColumnMetaData) GetIndexPageOffset

func (p *ColumnMetaData) GetIndexPageOffset() int64

func (*ColumnMetaData) GetKeyValueMetadata

func (p *ColumnMetaData) GetKeyValueMetadata() []*KeyValue

func (*ColumnMetaData) GetNumValues

func (p *ColumnMetaData) GetNumValues() int64

func (*ColumnMetaData) GetPathInSchema

func (p *ColumnMetaData) GetPathInSchema() []string

func (*ColumnMetaData) GetSizeStatistics

func (p *ColumnMetaData) GetSizeStatistics() *SizeStatistics

func (*ColumnMetaData) GetStatistics

func (p *ColumnMetaData) GetStatistics() *Statistics

func (*ColumnMetaData) GetTotalCompressedSize

func (p *ColumnMetaData) GetTotalCompressedSize() int64

func (*ColumnMetaData) GetTotalUncompressedSize

func (p *ColumnMetaData) GetTotalUncompressedSize() int64

func (*ColumnMetaData) GetType

func (p *ColumnMetaData) GetType() Type

func (*ColumnMetaData) IsSetBloomFilterLength

func (p *ColumnMetaData) IsSetBloomFilterLength() bool

func (*ColumnMetaData) IsSetBloomFilterOffset

func (p *ColumnMetaData) IsSetBloomFilterOffset() bool

func (*ColumnMetaData) IsSetDictionaryPageOffset

func (p *ColumnMetaData) IsSetDictionaryPageOffset() bool

func (*ColumnMetaData) IsSetEncodingStats

func (p *ColumnMetaData) IsSetEncodingStats() bool

func (*ColumnMetaData) IsSetIndexPageOffset

func (p *ColumnMetaData) IsSetIndexPageOffset() bool

func (*ColumnMetaData) IsSetKeyValueMetadata

func (p *ColumnMetaData) IsSetKeyValueMetadata() bool

func (*ColumnMetaData) IsSetSizeStatistics

func (p *ColumnMetaData) IsSetSizeStatistics() bool

func (*ColumnMetaData) IsSetStatistics

func (p *ColumnMetaData) IsSetStatistics() bool

func (*ColumnMetaData) LogValue

func (p *ColumnMetaData) LogValue() slog.Value

func (*ColumnMetaData) Read

func (p *ColumnMetaData) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnMetaData) ReadField1

func (p *ColumnMetaData) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnMetaData) ReadField10

func (p *ColumnMetaData) ReadField10(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnMetaData) ReadField11

func (p *ColumnMetaData) ReadField11(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnMetaData) ReadField12

func (p *ColumnMetaData) ReadField12(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnMetaData) ReadField13

func (p *ColumnMetaData) ReadField13(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnMetaData) ReadField14

func (p *ColumnMetaData) ReadField14(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnMetaData) ReadField15

func (p *ColumnMetaData) ReadField15(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnMetaData) ReadField16

func (p *ColumnMetaData) ReadField16(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnMetaData) ReadField2

func (p *ColumnMetaData) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnMetaData) ReadField3

func (p *ColumnMetaData) ReadField3(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnMetaData) ReadField4

func (p *ColumnMetaData) ReadField4(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnMetaData) ReadField5

func (p *ColumnMetaData) ReadField5(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnMetaData) ReadField6

func (p *ColumnMetaData) ReadField6(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnMetaData) ReadField7

func (p *ColumnMetaData) ReadField7(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnMetaData) ReadField8

func (p *ColumnMetaData) ReadField8(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnMetaData) ReadField9

func (p *ColumnMetaData) ReadField9(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnMetaData) String

func (p *ColumnMetaData) String() string

func (*ColumnMetaData) Validate

func (p *ColumnMetaData) Validate() error

func (*ColumnMetaData) Write

func (p *ColumnMetaData) Write(ctx context.Context, oprot thrift.TProtocol) error

type ColumnOrder

type ColumnOrder struct {
	TYPE_ORDER *TypeDefinedOrder `thrift:"TYPE_ORDER,1" db:"TYPE_ORDER" json:"TYPE_ORDER,omitempty"`
}

Union to specify the order used for the min_value and max_value fields for a column. This union takes the role of an enhanced enum that allows rich elements (which will be needed for a collation-based ordering in the future).

Possible values are:

  • TypeDefinedOrder - the column uses the order defined by its logical or physical type (if there is no logical type).

If the reader does not support the value of this union, min and max stats for this column should be ignored.

Attributes:

  • TYPE_ORDER: The sort orders for logical types are: UTF8 - unsigned byte-wise comparison INT8 - signed comparison INT16 - signed comparison INT32 - signed comparison INT64 - signed comparison UINT8 - unsigned comparison UINT16 - unsigned comparison UINT32 - unsigned comparison UINT64 - unsigned comparison DECIMAL - signed comparison of the represented value DATE - signed comparison TIME_MILLIS - signed comparison TIME_MICROS - signed comparison TIMESTAMP_MILLIS - signed comparison TIMESTAMP_MICROS - signed comparison INTERVAL - undefined JSON - unsigned byte-wise comparison BSON - unsigned byte-wise comparison ENUM - unsigned byte-wise comparison LIST - undefined MAP - undefined

In the absence of logical types, the sort order is determined by the physical type:

BOOLEAN - false, true
INT32 - signed comparison
INT64 - signed comparison
INT96 (only used for legacy timestamps) - undefined
FLOAT - signed comparison of the represented value (*)
DOUBLE - signed comparison of the represented value (*)
BYTE_ARRAY - unsigned byte-wise comparison
FIXED_LEN_BYTE_ARRAY - unsigned byte-wise comparison

(*) Because the sorting order is not specified properly for floating

point values (relations vs. total ordering) the following
compatibility rules should be applied when reading statistics:
- If the min is a NaN, it should be ignored.
- If the max is a NaN, it should be ignored.
- If the min is +0, the row group may contain -0 values as well.
- If the max is -0, the row group may contain +0 values as well.
- When looking for NaN values, min and max should be ignored.

When writing statistics the following rules should be followed:
- NaNs should not be written to min or max statistics fields.
- If the computed max value is zero (whether negative or positive),
  `+0.0` should be written into the max statistics field.
- If the computed min value is zero (whether negative or positive),
  `-0.0` should be written into the min statistics field.

func NewColumnOrder

func NewColumnOrder() *ColumnOrder

func (*ColumnOrder) CountSetFieldsColumnOrder

func (p *ColumnOrder) CountSetFieldsColumnOrder() int

func (*ColumnOrder) Equals

func (p *ColumnOrder) Equals(other *ColumnOrder) bool

func (*ColumnOrder) GetTYPE_ORDER

func (p *ColumnOrder) GetTYPE_ORDER() *TypeDefinedOrder

func (*ColumnOrder) IsSetTYPE_ORDER

func (p *ColumnOrder) IsSetTYPE_ORDER() bool

func (*ColumnOrder) LogValue

func (p *ColumnOrder) LogValue() slog.Value

func (*ColumnOrder) Read

func (p *ColumnOrder) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnOrder) ReadField1

func (p *ColumnOrder) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnOrder) String

func (p *ColumnOrder) String() string

func (*ColumnOrder) Validate

func (p *ColumnOrder) Validate() error

func (*ColumnOrder) Write

func (p *ColumnOrder) Write(ctx context.Context, oprot thrift.TProtocol) error

type CompressionCodec

type CompressionCodec int64

Supported compression algorithms.

Codecs added in format version X.Y can be read by readers based on X.Y and later. Codec support may vary between readers based on the format version and libraries available at runtime.

See Compression.md for a detailed specification of these algorithms.

const (
	CompressionCodec_UNCOMPRESSED CompressionCodec = 0
	CompressionCodec_SNAPPY       CompressionCodec = 1
	CompressionCodec_GZIP         CompressionCodec = 2
	CompressionCodec_LZO          CompressionCodec = 3
	CompressionCodec_BROTLI       CompressionCodec = 4
	CompressionCodec_LZ4          CompressionCodec = 5
	CompressionCodec_ZSTD         CompressionCodec = 6
	CompressionCodec_LZ4_RAW      CompressionCodec = 7
)

func CompressionCodecFromString

func CompressionCodecFromString(s string) (CompressionCodec, error)

func CompressionCodecPtr

func CompressionCodecPtr(v CompressionCodec) *CompressionCodec

func (CompressionCodec) MarshalText

func (p CompressionCodec) MarshalText() ([]byte, error)

func (*CompressionCodec) Scan

func (p *CompressionCodec) Scan(value interface{}) error

func (CompressionCodec) String

func (p CompressionCodec) String() string

func (*CompressionCodec) UnmarshalText

func (p *CompressionCodec) UnmarshalText(text []byte) error

func (*CompressionCodec) Value

func (p *CompressionCodec) Value() (driver.Value, error)

type ConvertedType

type ConvertedType int64

DEPRECATED: Common types used by frameworks(e.g. hive, pig) using parquet. ConvertedType is superseded by LogicalType. This enum should not be extended.

See LogicalTypes.md for conversion between ConvertedType and LogicalType.

const (
	ConvertedType_UTF8             ConvertedType = 0
	ConvertedType_MAP              ConvertedType = 1
	ConvertedType_MAP_KEY_VALUE    ConvertedType = 2
	ConvertedType_LIST             ConvertedType = 3
	ConvertedType_ENUM             ConvertedType = 4
	ConvertedType_DECIMAL          ConvertedType = 5
	ConvertedType_DATE             ConvertedType = 6
	ConvertedType_TIME_MILLIS      ConvertedType = 7
	ConvertedType_TIME_MICROS      ConvertedType = 8
	ConvertedType_TIMESTAMP_MILLIS ConvertedType = 9
	ConvertedType_TIMESTAMP_MICROS ConvertedType = 10
	ConvertedType_UINT_8           ConvertedType = 11
	ConvertedType_UINT_16          ConvertedType = 12
	ConvertedType_UINT_32          ConvertedType = 13
	ConvertedType_UINT_64          ConvertedType = 14
	ConvertedType_INT_8            ConvertedType = 15
	ConvertedType_INT_16           ConvertedType = 16
	ConvertedType_INT_32           ConvertedType = 17
	ConvertedType_INT_64           ConvertedType = 18
	ConvertedType_JSON             ConvertedType = 19
	ConvertedType_BSON             ConvertedType = 20
	ConvertedType_INTERVAL         ConvertedType = 21
)
var SchemaElement_ConvertedType_DEFAULT ConvertedType

func ConvertedTypeFromString

func ConvertedTypeFromString(s string) (ConvertedType, error)

func ConvertedTypePtr

func ConvertedTypePtr(v ConvertedType) *ConvertedType

func (ConvertedType) MarshalText

func (p ConvertedType) MarshalText() ([]byte, error)

func (*ConvertedType) Scan

func (p *ConvertedType) Scan(value interface{}) error

func (ConvertedType) String

func (p ConvertedType) String() string

func (*ConvertedType) UnmarshalText

func (p *ConvertedType) UnmarshalText(text []byte) error

func (*ConvertedType) Value

func (p *ConvertedType) Value() (driver.Value, error)

type DataPageHeader

type DataPageHeader struct {
	NumValues               int32       `thrift:"num_values,1,required" db:"num_values" json:"num_values"`
	Encoding                Encoding    `thrift:"encoding,2,required" db:"encoding" json:"encoding"`
	DefinitionLevelEncoding Encoding    `thrift:"definition_level_encoding,3,required" db:"definition_level_encoding" json:"definition_level_encoding"`
	RepetitionLevelEncoding Encoding    `thrift:"repetition_level_encoding,4,required" db:"repetition_level_encoding" json:"repetition_level_encoding"`
	Statistics              *Statistics `thrift:"statistics,5" db:"statistics" json:"statistics,omitempty"`
}

Data page header

Attributes:

  • NumValues: Number of values, including NULLs, in this data page.

If a OffsetIndex is present, a page must begin at a row boundary (repetition_level = 0). Otherwise, pages may begin within a row (repetition_level > 0).

  • Encoding: Encoding used for this data page *
  • DefinitionLevelEncoding: Encoding used for definition levels *
  • RepetitionLevelEncoding: Encoding used for repetition levels *
  • Statistics: Optional statistics for the data in this page *
var PageHeader_DataPageHeader_DEFAULT *DataPageHeader

func NewDataPageHeader

func NewDataPageHeader() *DataPageHeader

func (*DataPageHeader) Equals

func (p *DataPageHeader) Equals(other *DataPageHeader) bool

func (*DataPageHeader) GetDefinitionLevelEncoding

func (p *DataPageHeader) GetDefinitionLevelEncoding() Encoding

func (*DataPageHeader) GetEncoding

func (p *DataPageHeader) GetEncoding() Encoding

func (*DataPageHeader) GetNumValues

func (p *DataPageHeader) GetNumValues() int32

func (*DataPageHeader) GetRepetitionLevelEncoding

func (p *DataPageHeader) GetRepetitionLevelEncoding() Encoding

func (*DataPageHeader) GetStatistics

func (p *DataPageHeader) GetStatistics() *Statistics

func (*DataPageHeader) IsSetStatistics

func (p *DataPageHeader) IsSetStatistics() bool

func (*DataPageHeader) LogValue

func (p *DataPageHeader) LogValue() slog.Value

func (*DataPageHeader) Read

func (p *DataPageHeader) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*DataPageHeader) ReadField1

func (p *DataPageHeader) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*DataPageHeader) ReadField2

func (p *DataPageHeader) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*DataPageHeader) ReadField3

func (p *DataPageHeader) ReadField3(ctx context.Context, iprot thrift.TProtocol) error

func (*DataPageHeader) ReadField4

func (p *DataPageHeader) ReadField4(ctx context.Context, iprot thrift.TProtocol) error

func (*DataPageHeader) ReadField5

func (p *DataPageHeader) ReadField5(ctx context.Context, iprot thrift.TProtocol) error

func (*DataPageHeader) String

func (p *DataPageHeader) String() string

func (*DataPageHeader) Validate

func (p *DataPageHeader) Validate() error

func (*DataPageHeader) Write

func (p *DataPageHeader) Write(ctx context.Context, oprot thrift.TProtocol) error

type DataPageHeaderV2

type DataPageHeaderV2 struct {
	NumValues                  int32       `thrift:"num_values,1,required" db:"num_values" json:"num_values"`
	NumNulls                   int32       `thrift:"num_nulls,2,required" db:"num_nulls" json:"num_nulls"`
	NumRows                    int32       `thrift:"num_rows,3,required" db:"num_rows" json:"num_rows"`
	Encoding                   Encoding    `thrift:"encoding,4,required" db:"encoding" json:"encoding"`
	DefinitionLevelsByteLength int32       `thrift:"definition_levels_byte_length,5,required" db:"definition_levels_byte_length" json:"definition_levels_byte_length"`
	RepetitionLevelsByteLength int32       `thrift:"repetition_levels_byte_length,6,required" db:"repetition_levels_byte_length" json:"repetition_levels_byte_length"`
	IsCompressed               bool        `thrift:"is_compressed,7" db:"is_compressed" json:"is_compressed"`
	Statistics                 *Statistics `thrift:"statistics,8" db:"statistics" json:"statistics,omitempty"`
}

New page format allowing reading levels without decompressing the data Repetition and definition levels are uncompressed The remaining section containing the data is compressed if is_compressed is true

Attributes:

  • NumValues: Number of values, including NULLs, in this data page. *
  • NumNulls: Number of NULL values, in this data page.

Number of non-null = num_values - num_nulls which is also the number of values in the data section *

  • NumRows: Number of rows in this data page. Every page must begin at a

row boundary (repetition_level = 0): rows must **not** be split across page boundaries when using V2 data pages.

  • Encoding: Encoding used for data in this page *
  • DefinitionLevelsByteLength: Length of the definition levels
  • RepetitionLevelsByteLength: Length of the repetition levels
  • IsCompressed: Whether the values are compressed.

Which means the section of the page between definition_levels_byte_length + repetition_levels_byte_length + 1 and compressed_page_size (included) is compressed with the compression_codec. If missing it is considered compressed

  • Statistics: Optional statistics for the data in this page *
var PageHeader_DataPageHeaderV2_DEFAULT *DataPageHeaderV2

func NewDataPageHeaderV2

func NewDataPageHeaderV2() *DataPageHeaderV2

func (*DataPageHeaderV2) Equals

func (p *DataPageHeaderV2) Equals(other *DataPageHeaderV2) bool

func (*DataPageHeaderV2) GetDefinitionLevelsByteLength

func (p *DataPageHeaderV2) GetDefinitionLevelsByteLength() int32

func (*DataPageHeaderV2) GetEncoding

func (p *DataPageHeaderV2) GetEncoding() Encoding

func (*DataPageHeaderV2) GetIsCompressed

func (p *DataPageHeaderV2) GetIsCompressed() bool

func (*DataPageHeaderV2) GetNumNulls

func (p *DataPageHeaderV2) GetNumNulls() int32

func (*DataPageHeaderV2) GetNumRows

func (p *DataPageHeaderV2) GetNumRows() int32

func (*DataPageHeaderV2) GetNumValues

func (p *DataPageHeaderV2) GetNumValues() int32

func (*DataPageHeaderV2) GetRepetitionLevelsByteLength

func (p *DataPageHeaderV2) GetRepetitionLevelsByteLength() int32

func (*DataPageHeaderV2) GetStatistics

func (p *DataPageHeaderV2) GetStatistics() *Statistics

func (*DataPageHeaderV2) IsSetIsCompressed

func (p *DataPageHeaderV2) IsSetIsCompressed() bool

func (*DataPageHeaderV2) IsSetStatistics

func (p *DataPageHeaderV2) IsSetStatistics() bool

func (*DataPageHeaderV2) LogValue

func (p *DataPageHeaderV2) LogValue() slog.Value

func (*DataPageHeaderV2) Read

func (p *DataPageHeaderV2) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*DataPageHeaderV2) ReadField1

func (p *DataPageHeaderV2) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*DataPageHeaderV2) ReadField2

func (p *DataPageHeaderV2) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*DataPageHeaderV2) ReadField3

func (p *DataPageHeaderV2) ReadField3(ctx context.Context, iprot thrift.TProtocol) error

func (*DataPageHeaderV2) ReadField4

func (p *DataPageHeaderV2) ReadField4(ctx context.Context, iprot thrift.TProtocol) error

func (*DataPageHeaderV2) ReadField5

func (p *DataPageHeaderV2) ReadField5(ctx context.Context, iprot thrift.TProtocol) error

func (*DataPageHeaderV2) ReadField6

func (p *DataPageHeaderV2) ReadField6(ctx context.Context, iprot thrift.TProtocol) error

func (*DataPageHeaderV2) ReadField7

func (p *DataPageHeaderV2) ReadField7(ctx context.Context, iprot thrift.TProtocol) error

func (*DataPageHeaderV2) ReadField8

func (p *DataPageHeaderV2) ReadField8(ctx context.Context, iprot thrift.TProtocol) error

func (*DataPageHeaderV2) String

func (p *DataPageHeaderV2) String() string

func (*DataPageHeaderV2) Validate

func (p *DataPageHeaderV2) Validate() error

func (*DataPageHeaderV2) Write

func (p *DataPageHeaderV2) Write(ctx context.Context, oprot thrift.TProtocol) error

type DateType

type DateType struct {
}
var LogicalType_DATE_DEFAULT *DateType

func NewDateType

func NewDateType() *DateType

func (*DateType) Equals

func (p *DateType) Equals(other *DateType) bool

func (*DateType) LogValue

func (p *DateType) LogValue() slog.Value

func (*DateType) Read

func (p *DateType) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*DateType) String

func (p *DateType) String() string

func (*DateType) Validate

func (p *DateType) Validate() error

func (*DateType) Write

func (p *DateType) Write(ctx context.Context, oprot thrift.TProtocol) error

type DecimalType

type DecimalType struct {
	Scale     int32 `thrift:"scale,1,required" db:"scale" json:"scale"`
	Precision int32 `thrift:"precision,2,required" db:"precision" json:"precision"`
}

Decimal logical type annotation

Scale must be zero or a positive integer less than or equal to the precision. Precision must be a non-zero positive integer.

To maintain forward-compatibility in v1, implementations using this logical type must also set scale and precision on the annotated SchemaElement.

Allowed for physical types: INT32, INT64, FIXED_LEN_BYTE_ARRAY, and BYTE_ARRAY.

Attributes:

  • Scale
  • Precision
var LogicalType_DECIMAL_DEFAULT *DecimalType

func NewDecimalType

func NewDecimalType() *DecimalType

func (*DecimalType) Equals

func (p *DecimalType) Equals(other *DecimalType) bool

func (*DecimalType) GetPrecision

func (p *DecimalType) GetPrecision() int32

func (*DecimalType) GetScale

func (p *DecimalType) GetScale() int32

func (*DecimalType) LogValue

func (p *DecimalType) LogValue() slog.Value

func (*DecimalType) Read

func (p *DecimalType) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*DecimalType) ReadField1

func (p *DecimalType) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*DecimalType) ReadField2

func (p *DecimalType) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*DecimalType) String

func (p *DecimalType) String() string

func (*DecimalType) Validate

func (p *DecimalType) Validate() error

func (*DecimalType) Write

func (p *DecimalType) Write(ctx context.Context, oprot thrift.TProtocol) error

type DictionaryPageHeader

type DictionaryPageHeader struct {
	NumValues int32    `thrift:"num_values,1,required" db:"num_values" json:"num_values"`
	Encoding  Encoding `thrift:"encoding,2,required" db:"encoding" json:"encoding"`
	IsSorted  *bool    `thrift:"is_sorted,3" db:"is_sorted" json:"is_sorted,omitempty"`
}

The dictionary page must be placed at the first position of the column chunk if it is partly or completely dictionary encoded. At most one dictionary page can be placed in a column chunk.

Attributes:

  • NumValues: Number of values in the dictionary *
  • Encoding: Encoding using this dictionary page *
  • IsSorted: If true, the entries in the dictionary are sorted in ascending order *
var PageHeader_DictionaryPageHeader_DEFAULT *DictionaryPageHeader

func NewDictionaryPageHeader

func NewDictionaryPageHeader() *DictionaryPageHeader

func (*DictionaryPageHeader) Equals

func (*DictionaryPageHeader) GetEncoding

func (p *DictionaryPageHeader) GetEncoding() Encoding

func (*DictionaryPageHeader) GetIsSorted

func (p *DictionaryPageHeader) GetIsSorted() bool

func (*DictionaryPageHeader) GetNumValues

func (p *DictionaryPageHeader) GetNumValues() int32

func (*DictionaryPageHeader) IsSetIsSorted

func (p *DictionaryPageHeader) IsSetIsSorted() bool

func (*DictionaryPageHeader) LogValue

func (p *DictionaryPageHeader) LogValue() slog.Value

func (*DictionaryPageHeader) Read

func (*DictionaryPageHeader) ReadField1

func (p *DictionaryPageHeader) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*DictionaryPageHeader) ReadField2

func (p *DictionaryPageHeader) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*DictionaryPageHeader) ReadField3

func (p *DictionaryPageHeader) ReadField3(ctx context.Context, iprot thrift.TProtocol) error

func (*DictionaryPageHeader) String

func (p *DictionaryPageHeader) String() string

func (*DictionaryPageHeader) Validate

func (p *DictionaryPageHeader) Validate() error

func (*DictionaryPageHeader) Write

type Encoding

type Encoding int64

Encodings supported by Parquet. Not all encodings are valid for all types. These enums are also used to specify the encoding of definition and repetition levels. See the accompanying doc for the details of the more complicated encodings.

const (
	Encoding_PLAIN                   Encoding = 0
	Encoding_PLAIN_DICTIONARY        Encoding = 2
	Encoding_RLE                     Encoding = 3
	Encoding_BIT_PACKED              Encoding = 4
	Encoding_DELTA_BINARY_PACKED     Encoding = 5
	Encoding_DELTA_LENGTH_BYTE_ARRAY Encoding = 6
	Encoding_DELTA_BYTE_ARRAY        Encoding = 7
	Encoding_RLE_DICTIONARY          Encoding = 8
	Encoding_BYTE_STREAM_SPLIT       Encoding = 9
)

func EncodingFromString

func EncodingFromString(s string) (Encoding, error)

func EncodingPtr

func EncodingPtr(v Encoding) *Encoding

func (Encoding) MarshalText

func (p Encoding) MarshalText() ([]byte, error)

func (*Encoding) Scan

func (p *Encoding) Scan(value interface{}) error

func (Encoding) String

func (p Encoding) String() string

func (*Encoding) UnmarshalText

func (p *Encoding) UnmarshalText(text []byte) error

func (*Encoding) Value

func (p *Encoding) Value() (driver.Value, error)

type EncryptionAlgorithm

type EncryptionAlgorithm struct {
	AES_GCM_V1     *AesGcmV1    `thrift:"AES_GCM_V1,1" db:"AES_GCM_V1" json:"AES_GCM_V1,omitempty"`
	AES_GCM_CTR_V1 *AesGcmCtrV1 `thrift:"AES_GCM_CTR_V1,2" db:"AES_GCM_CTR_V1" json:"AES_GCM_CTR_V1,omitempty"`
}

Attributes:

  • AES_GCM_V1
  • AES_GCM_CTR_V1
var FileCryptoMetaData_EncryptionAlgorithm_DEFAULT *EncryptionAlgorithm
var FileMetaData_EncryptionAlgorithm_DEFAULT *EncryptionAlgorithm

func NewEncryptionAlgorithm

func NewEncryptionAlgorithm() *EncryptionAlgorithm

func (*EncryptionAlgorithm) CountSetFieldsEncryptionAlgorithm

func (p *EncryptionAlgorithm) CountSetFieldsEncryptionAlgorithm() int

func (*EncryptionAlgorithm) Equals

func (p *EncryptionAlgorithm) Equals(other *EncryptionAlgorithm) bool

func (*EncryptionAlgorithm) GetAES_GCM_CTR_V1

func (p *EncryptionAlgorithm) GetAES_GCM_CTR_V1() *AesGcmCtrV1

func (*EncryptionAlgorithm) GetAES_GCM_V1

func (p *EncryptionAlgorithm) GetAES_GCM_V1() *AesGcmV1

func (*EncryptionAlgorithm) IsSetAES_GCM_CTR_V1

func (p *EncryptionAlgorithm) IsSetAES_GCM_CTR_V1() bool

func (*EncryptionAlgorithm) IsSetAES_GCM_V1

func (p *EncryptionAlgorithm) IsSetAES_GCM_V1() bool

func (*EncryptionAlgorithm) LogValue

func (p *EncryptionAlgorithm) LogValue() slog.Value

func (*EncryptionAlgorithm) Read

func (*EncryptionAlgorithm) ReadField1

func (p *EncryptionAlgorithm) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*EncryptionAlgorithm) ReadField2

func (p *EncryptionAlgorithm) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*EncryptionAlgorithm) String

func (p *EncryptionAlgorithm) String() string

func (*EncryptionAlgorithm) Validate

func (p *EncryptionAlgorithm) Validate() error

func (*EncryptionAlgorithm) Write

type EncryptionWithColumnKey

type EncryptionWithColumnKey struct {
	PathInSchema []string `thrift:"path_in_schema,1,required" db:"path_in_schema" json:"path_in_schema"`
	KeyMetadata  []byte   `thrift:"key_metadata,2" db:"key_metadata" json:"key_metadata,omitempty"`
}

Attributes:

  • PathInSchema: Column path in schema *
  • KeyMetadata: Retrieval metadata of column encryption key *
var ColumnCryptoMetaData_ENCRYPTION_WITH_COLUMN_KEY_DEFAULT *EncryptionWithColumnKey

func NewEncryptionWithColumnKey

func NewEncryptionWithColumnKey() *EncryptionWithColumnKey

func (*EncryptionWithColumnKey) Equals

func (*EncryptionWithColumnKey) GetKeyMetadata

func (p *EncryptionWithColumnKey) GetKeyMetadata() []byte

func (*EncryptionWithColumnKey) GetPathInSchema

func (p *EncryptionWithColumnKey) GetPathInSchema() []string

func (*EncryptionWithColumnKey) IsSetKeyMetadata

func (p *EncryptionWithColumnKey) IsSetKeyMetadata() bool

func (*EncryptionWithColumnKey) LogValue

func (p *EncryptionWithColumnKey) LogValue() slog.Value

func (*EncryptionWithColumnKey) Read

func (*EncryptionWithColumnKey) ReadField1

func (p *EncryptionWithColumnKey) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*EncryptionWithColumnKey) ReadField2

func (p *EncryptionWithColumnKey) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*EncryptionWithColumnKey) String

func (p *EncryptionWithColumnKey) String() string

func (*EncryptionWithColumnKey) Validate

func (p *EncryptionWithColumnKey) Validate() error

func (*EncryptionWithColumnKey) Write

type EncryptionWithFooterKey

type EncryptionWithFooterKey struct {
}
var ColumnCryptoMetaData_ENCRYPTION_WITH_FOOTER_KEY_DEFAULT *EncryptionWithFooterKey

func NewEncryptionWithFooterKey

func NewEncryptionWithFooterKey() *EncryptionWithFooterKey

func (*EncryptionWithFooterKey) Equals

func (*EncryptionWithFooterKey) LogValue

func (p *EncryptionWithFooterKey) LogValue() slog.Value

func (*EncryptionWithFooterKey) Read

func (*EncryptionWithFooterKey) String

func (p *EncryptionWithFooterKey) String() string

func (*EncryptionWithFooterKey) Validate

func (p *EncryptionWithFooterKey) Validate() error

func (*EncryptionWithFooterKey) Write

type EnumType

type EnumType struct {
}
var LogicalType_ENUM_DEFAULT *EnumType

func NewEnumType

func NewEnumType() *EnumType

func (*EnumType) Equals

func (p *EnumType) Equals(other *EnumType) bool

func (*EnumType) LogValue

func (p *EnumType) LogValue() slog.Value

func (*EnumType) Read

func (p *EnumType) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*EnumType) String

func (p *EnumType) String() string

func (*EnumType) Validate

func (p *EnumType) Validate() error

func (*EnumType) Write

func (p *EnumType) Write(ctx context.Context, oprot thrift.TProtocol) error

type FieldRepetitionType

type FieldRepetitionType int64

Representation of Schemas

const (
	FieldRepetitionType_REQUIRED FieldRepetitionType = 0
	FieldRepetitionType_OPTIONAL FieldRepetitionType = 1
	FieldRepetitionType_REPEATED FieldRepetitionType = 2
)
var SchemaElement_RepetitionType_DEFAULT FieldRepetitionType

func FieldRepetitionTypeFromString

func FieldRepetitionTypeFromString(s string) (FieldRepetitionType, error)

func FieldRepetitionTypePtr

func FieldRepetitionTypePtr(v FieldRepetitionType) *FieldRepetitionType

func (FieldRepetitionType) MarshalText

func (p FieldRepetitionType) MarshalText() ([]byte, error)

func (*FieldRepetitionType) Scan

func (p *FieldRepetitionType) Scan(value interface{}) error

func (FieldRepetitionType) String

func (p FieldRepetitionType) String() string

func (*FieldRepetitionType) UnmarshalText

func (p *FieldRepetitionType) UnmarshalText(text []byte) error

func (*FieldRepetitionType) Value

func (p *FieldRepetitionType) Value() (driver.Value, error)

type FileCryptoMetaData

type FileCryptoMetaData struct {
	EncryptionAlgorithm *EncryptionAlgorithm `thrift:"encryption_algorithm,1,required" db:"encryption_algorithm" json:"encryption_algorithm"`
	KeyMetadata         []byte               `thrift:"key_metadata,2" db:"key_metadata" json:"key_metadata,omitempty"`
}

Crypto metadata for files with encrypted footer *

Attributes:

  • EncryptionAlgorithm: Encryption algorithm. This field is only used for files

with encrypted footer. Files with plaintext footer store algorithm id inside footer (FileMetaData structure).

  • KeyMetadata: Retrieval metadata of key used for encryption of footer,

and (possibly) columns *

func NewFileCryptoMetaData

func NewFileCryptoMetaData() *FileCryptoMetaData

func (*FileCryptoMetaData) Equals

func (p *FileCryptoMetaData) Equals(other *FileCryptoMetaData) bool

func (*FileCryptoMetaData) GetEncryptionAlgorithm

func (p *FileCryptoMetaData) GetEncryptionAlgorithm() *EncryptionAlgorithm

func (*FileCryptoMetaData) GetKeyMetadata

func (p *FileCryptoMetaData) GetKeyMetadata() []byte

func (*FileCryptoMetaData) IsSetEncryptionAlgorithm

func (p *FileCryptoMetaData) IsSetEncryptionAlgorithm() bool

func (*FileCryptoMetaData) IsSetKeyMetadata

func (p *FileCryptoMetaData) IsSetKeyMetadata() bool

func (*FileCryptoMetaData) LogValue

func (p *FileCryptoMetaData) LogValue() slog.Value

func (*FileCryptoMetaData) Read

func (*FileCryptoMetaData) ReadField1

func (p *FileCryptoMetaData) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*FileCryptoMetaData) ReadField2

func (p *FileCryptoMetaData) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*FileCryptoMetaData) String

func (p *FileCryptoMetaData) String() string

func (*FileCryptoMetaData) Validate

func (p *FileCryptoMetaData) Validate() error

func (*FileCryptoMetaData) Write

func (p *FileCryptoMetaData) Write(ctx context.Context, oprot thrift.TProtocol) error

type FileMetaData

type FileMetaData struct {
	Version                  int32                `thrift:"version,1,required" db:"version" json:"version"`
	Schema                   []*SchemaElement     `thrift:"schema,2,required" db:"schema" json:"schema"`
	NumRows                  int64                `thrift:"num_rows,3,required" db:"num_rows" json:"num_rows"`
	RowGroups                []*RowGroup          `thrift:"row_groups,4,required" db:"row_groups" json:"row_groups"`
	KeyValueMetadata         []*KeyValue          `thrift:"key_value_metadata,5" db:"key_value_metadata" json:"key_value_metadata,omitempty"`
	CreatedBy                *string              `thrift:"created_by,6" db:"created_by" json:"created_by,omitempty"`
	ColumnOrders             []*ColumnOrder       `thrift:"column_orders,7" db:"column_orders" json:"column_orders,omitempty"`
	EncryptionAlgorithm      *EncryptionAlgorithm `thrift:"encryption_algorithm,8" db:"encryption_algorithm" json:"encryption_algorithm,omitempty"`
	FooterSigningKeyMetadata []byte               `thrift:"footer_signing_key_metadata,9" db:"footer_signing_key_metadata" json:"footer_signing_key_metadata,omitempty"`
}

Description for file metadata

Attributes:

  • Version: Version of this file *
  • Schema: Parquet schema for this file. This schema contains metadata for all the columns.

The schema is represented as a tree with a single root. The nodes of the tree are flattened to a list by doing a depth-first traversal. The column metadata contains the path in the schema for that column which can be used to map columns to nodes in the schema. The first element is the root *

  • NumRows: Number of rows in this file *
  • RowGroups: Row groups in this file *
  • KeyValueMetadata: Optional key/value metadata *
  • CreatedBy: String for application that wrote this file. This should be in the format

<Application> version <App Version> (build <App Build Hash>). e.g. impala version 1.0 (build 6cf94d29b2b7115df4de2c06e2ab4326d721eb55)

  • ColumnOrders: Sort order used for the min_value and max_value fields in the Statistics

objects and the min_values and max_values fields in the ColumnIndex objects of each column in this file. Sort orders are listed in the order matching the columns in the schema. The indexes are not necessary the same though, because only leaf nodes of the schema are represented in the list of sort orders.

Without column_orders, the meaning of the min_value and max_value fields in the Statistics object and the ColumnIndex object is undefined. To ensure well-defined behaviour, if these fields are written to a Parquet file, column_orders must be written as well.

The obsolete min and max fields in the Statistics object are always sorted by signed comparison regardless of column_orders.

  • EncryptionAlgorithm: Encryption algorithm. This field is set only in encrypted files

with plaintext footer. Files with encrypted footer store algorithm id in FileCryptoMetaData structure.

  • FooterSigningKeyMetadata: Retrieval metadata of key used for signing the footer.

Used only in encrypted files with plaintext footer.

func NewFileMetaData

func NewFileMetaData() *FileMetaData

func (*FileMetaData) Equals

func (p *FileMetaData) Equals(other *FileMetaData) bool

func (*FileMetaData) GetColumnOrders

func (p *FileMetaData) GetColumnOrders() []*ColumnOrder

func (*FileMetaData) GetCreatedBy

func (p *FileMetaData) GetCreatedBy() string

func (*FileMetaData) GetEncryptionAlgorithm

func (p *FileMetaData) GetEncryptionAlgorithm() *EncryptionAlgorithm

func (*FileMetaData) GetFooterSigningKeyMetadata

func (p *FileMetaData) GetFooterSigningKeyMetadata() []byte

func (*FileMetaData) GetKeyValueMetadata

func (p *FileMetaData) GetKeyValueMetadata() []*KeyValue

func (*FileMetaData) GetNumRows

func (p *FileMetaData) GetNumRows() int64

func (*FileMetaData) GetRowGroups

func (p *FileMetaData) GetRowGroups() []*RowGroup

func (*FileMetaData) GetSchema

func (p *FileMetaData) GetSchema() []*SchemaElement

func (*FileMetaData) GetVersion

func (p *FileMetaData) GetVersion() int32

func (*FileMetaData) IsSetColumnOrders

func (p *FileMetaData) IsSetColumnOrders() bool

func (*FileMetaData) IsSetCreatedBy

func (p *FileMetaData) IsSetCreatedBy() bool

func (*FileMetaData) IsSetEncryptionAlgorithm

func (p *FileMetaData) IsSetEncryptionAlgorithm() bool

func (*FileMetaData) IsSetFooterSigningKeyMetadata

func (p *FileMetaData) IsSetFooterSigningKeyMetadata() bool

func (*FileMetaData) IsSetKeyValueMetadata

func (p *FileMetaData) IsSetKeyValueMetadata() bool

func (*FileMetaData) LogValue

func (p *FileMetaData) LogValue() slog.Value

func (*FileMetaData) Read

func (p *FileMetaData) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*FileMetaData) ReadField1

func (p *FileMetaData) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*FileMetaData) ReadField2

func (p *FileMetaData) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*FileMetaData) ReadField3

func (p *FileMetaData) ReadField3(ctx context.Context, iprot thrift.TProtocol) error

func (*FileMetaData) ReadField4

func (p *FileMetaData) ReadField4(ctx context.Context, iprot thrift.TProtocol) error

func (*FileMetaData) ReadField5

func (p *FileMetaData) ReadField5(ctx context.Context, iprot thrift.TProtocol) error

func (*FileMetaData) ReadField6

func (p *FileMetaData) ReadField6(ctx context.Context, iprot thrift.TProtocol) error

func (*FileMetaData) ReadField7

func (p *FileMetaData) ReadField7(ctx context.Context, iprot thrift.TProtocol) error

func (*FileMetaData) ReadField8

func (p *FileMetaData) ReadField8(ctx context.Context, iprot thrift.TProtocol) error

func (*FileMetaData) ReadField9

func (p *FileMetaData) ReadField9(ctx context.Context, iprot thrift.TProtocol) error

func (*FileMetaData) String

func (p *FileMetaData) String() string

func (*FileMetaData) Validate

func (p *FileMetaData) Validate() error

func (*FileMetaData) Write

func (p *FileMetaData) Write(ctx context.Context, oprot thrift.TProtocol) error

type Float16Type

type Float16Type struct {
}
var LogicalType_FLOAT16_DEFAULT *Float16Type

func NewFloat16Type

func NewFloat16Type() *Float16Type

func (*Float16Type) Equals

func (p *Float16Type) Equals(other *Float16Type) bool

func (*Float16Type) LogValue

func (p *Float16Type) LogValue() slog.Value

func (*Float16Type) Read

func (p *Float16Type) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*Float16Type) String

func (p *Float16Type) String() string

func (*Float16Type) Validate

func (p *Float16Type) Validate() error

func (*Float16Type) Write

func (p *Float16Type) Write(ctx context.Context, oprot thrift.TProtocol) error

type IndexPageHeader

type IndexPageHeader struct {
}
var PageHeader_IndexPageHeader_DEFAULT *IndexPageHeader

func NewIndexPageHeader

func NewIndexPageHeader() *IndexPageHeader

func (*IndexPageHeader) Equals

func (p *IndexPageHeader) Equals(other *IndexPageHeader) bool

func (*IndexPageHeader) LogValue

func (p *IndexPageHeader) LogValue() slog.Value

func (*IndexPageHeader) Read

func (p *IndexPageHeader) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*IndexPageHeader) String

func (p *IndexPageHeader) String() string

func (*IndexPageHeader) Validate

func (p *IndexPageHeader) Validate() error

func (*IndexPageHeader) Write

func (p *IndexPageHeader) Write(ctx context.Context, oprot thrift.TProtocol) error

type IntType

type IntType struct {
	BitWidth int8 `thrift:"bitWidth,1,required" db:"bitWidth" json:"bitWidth"`
	IsSigned bool `thrift:"isSigned,2,required" db:"isSigned" json:"isSigned"`
}

Integer logical type annotation

bitWidth must be 8, 16, 32, or 64.

Allowed for physical types: INT32, INT64

Attributes:

  • BitWidth
  • IsSigned
var LogicalType_INTEGER_DEFAULT *IntType

func NewIntType

func NewIntType() *IntType

func (*IntType) Equals

func (p *IntType) Equals(other *IntType) bool

func (*IntType) GetBitWidth

func (p *IntType) GetBitWidth() int8

func (*IntType) GetIsSigned

func (p *IntType) GetIsSigned() bool

func (*IntType) LogValue

func (p *IntType) LogValue() slog.Value

func (*IntType) Read

func (p *IntType) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*IntType) ReadField1

func (p *IntType) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*IntType) ReadField2

func (p *IntType) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*IntType) String

func (p *IntType) String() string

func (*IntType) Validate

func (p *IntType) Validate() error

func (*IntType) Write

func (p *IntType) Write(ctx context.Context, oprot thrift.TProtocol) error

type JsonType

type JsonType struct {
}

Embedded JSON logical type annotation

Allowed for physical types: BYTE_ARRAY

var LogicalType_JSON_DEFAULT *JsonType

func NewJsonType

func NewJsonType() *JsonType

func (*JsonType) Equals

func (p *JsonType) Equals(other *JsonType) bool

func (*JsonType) LogValue

func (p *JsonType) LogValue() slog.Value

func (*JsonType) Read

func (p *JsonType) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*JsonType) String

func (p *JsonType) String() string

func (*JsonType) Validate

func (p *JsonType) Validate() error

func (*JsonType) Write

func (p *JsonType) Write(ctx context.Context, oprot thrift.TProtocol) error

type KeyValue

type KeyValue struct {
	Key   string  `thrift:"key,1,required" db:"key" json:"key"`
	Value *string `thrift:"value,2" db:"value" json:"value,omitempty"`
}

Wrapper struct to store key values

Attributes:

  • Key
  • Value

func NewKeyValue

func NewKeyValue() *KeyValue

func (*KeyValue) Equals

func (p *KeyValue) Equals(other *KeyValue) bool

func (*KeyValue) GetKey

func (p *KeyValue) GetKey() string

func (*KeyValue) GetValue

func (p *KeyValue) GetValue() string

func (*KeyValue) IsSetValue

func (p *KeyValue) IsSetValue() bool

func (*KeyValue) LogValue

func (p *KeyValue) LogValue() slog.Value

func (*KeyValue) Read

func (p *KeyValue) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*KeyValue) ReadField1

func (p *KeyValue) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*KeyValue) ReadField2

func (p *KeyValue) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*KeyValue) String

func (p *KeyValue) String() string

func (*KeyValue) Validate

func (p *KeyValue) Validate() error

func (*KeyValue) Write

func (p *KeyValue) Write(ctx context.Context, oprot thrift.TProtocol) error

type ListType

type ListType struct {
}
var LogicalType_LIST_DEFAULT *ListType

func NewListType

func NewListType() *ListType

func (*ListType) Equals

func (p *ListType) Equals(other *ListType) bool

func (*ListType) LogValue

func (p *ListType) LogValue() slog.Value

func (*ListType) Read

func (p *ListType) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*ListType) String

func (p *ListType) String() string

func (*ListType) Validate

func (p *ListType) Validate() error

func (*ListType) Write

func (p *ListType) Write(ctx context.Context, oprot thrift.TProtocol) error

type LogicalType

type LogicalType struct {
	STRING    *StringType    `thrift:"STRING,1" db:"STRING" json:"STRING,omitempty"`
	MAP       *MapType       `thrift:"MAP,2" db:"MAP" json:"MAP,omitempty"`
	LIST      *ListType      `thrift:"LIST,3" db:"LIST" json:"LIST,omitempty"`
	ENUM      *EnumType      `thrift:"ENUM,4" db:"ENUM" json:"ENUM,omitempty"`
	DECIMAL   *DecimalType   `thrift:"DECIMAL,5" db:"DECIMAL" json:"DECIMAL,omitempty"`
	DATE      *DateType      `thrift:"DATE,6" db:"DATE" json:"DATE,omitempty"`
	TIME      *TimeType      `thrift:"TIME,7" db:"TIME" json:"TIME,omitempty"`
	TIMESTAMP *TimestampType `thrift:"TIMESTAMP,8" db:"TIMESTAMP" json:"TIMESTAMP,omitempty"`
	// unused field # 9
	INTEGER *IntType     `thrift:"INTEGER,10" db:"INTEGER" json:"INTEGER,omitempty"`
	UNKNOWN *NullType    `thrift:"UNKNOWN,11" db:"UNKNOWN" json:"UNKNOWN,omitempty"`
	JSON    *JsonType    `thrift:"JSON,12" db:"JSON" json:"JSON,omitempty"`
	BSON    *BsonType    `thrift:"BSON,13" db:"BSON" json:"BSON,omitempty"`
	UUID    *UUIDType    `thrift:"UUID,14" db:"UUID" json:"UUID,omitempty"`
	FLOAT16 *Float16Type `thrift:"FLOAT16,15" db:"FLOAT16" json:"FLOAT16,omitempty"`
}

LogicalType annotations to replace ConvertedType.

To maintain compatibility, implementations using LogicalType for a SchemaElement must also set the corresponding ConvertedType (if any) from the following table.

Attributes:

  • STRING
  • MAP
  • LIST
  • ENUM
  • DECIMAL
  • DATE
  • TIME
  • TIMESTAMP
  • INTEGER
  • UNKNOWN
  • JSON
  • BSON
  • UUID
  • FLOAT16
var SchemaElement_LogicalType_DEFAULT *LogicalType

func NewLogicalType

func NewLogicalType() *LogicalType

func (*LogicalType) CountSetFieldsLogicalType

func (p *LogicalType) CountSetFieldsLogicalType() int

func (*LogicalType) Equals

func (p *LogicalType) Equals(other *LogicalType) bool

func (*LogicalType) GetBSON

func (p *LogicalType) GetBSON() *BsonType

func (*LogicalType) GetDATE

func (p *LogicalType) GetDATE() *DateType

func (*LogicalType) GetDECIMAL

func (p *LogicalType) GetDECIMAL() *DecimalType

func (*LogicalType) GetENUM

func (p *LogicalType) GetENUM() *EnumType

func (*LogicalType) GetFLOAT16

func (p *LogicalType) GetFLOAT16() *Float16Type

func (*LogicalType) GetINTEGER

func (p *LogicalType) GetINTEGER() *IntType

func (*LogicalType) GetJSON

func (p *LogicalType) GetJSON() *JsonType

func (*LogicalType) GetLIST

func (p *LogicalType) GetLIST() *ListType

func (*LogicalType) GetMAP

func (p *LogicalType) GetMAP() *MapType

func (*LogicalType) GetSTRING

func (p *LogicalType) GetSTRING() *StringType

func (*LogicalType) GetTIME

func (p *LogicalType) GetTIME() *TimeType

func (*LogicalType) GetTIMESTAMP

func (p *LogicalType) GetTIMESTAMP() *TimestampType

func (*LogicalType) GetUNKNOWN

func (p *LogicalType) GetUNKNOWN() *NullType

func (*LogicalType) GetUUID

func (p *LogicalType) GetUUID() *UUIDType

func (*LogicalType) IsSetBSON

func (p *LogicalType) IsSetBSON() bool

func (*LogicalType) IsSetDATE

func (p *LogicalType) IsSetDATE() bool

func (*LogicalType) IsSetDECIMAL

func (p *LogicalType) IsSetDECIMAL() bool

func (*LogicalType) IsSetENUM

func (p *LogicalType) IsSetENUM() bool

func (*LogicalType) IsSetFLOAT16

func (p *LogicalType) IsSetFLOAT16() bool

func (*LogicalType) IsSetINTEGER

func (p *LogicalType) IsSetINTEGER() bool

func (*LogicalType) IsSetJSON

func (p *LogicalType) IsSetJSON() bool

func (*LogicalType) IsSetLIST

func (p *LogicalType) IsSetLIST() bool

func (*LogicalType) IsSetMAP

func (p *LogicalType) IsSetMAP() bool

func (*LogicalType) IsSetSTRING

func (p *LogicalType) IsSetSTRING() bool

func (*LogicalType) IsSetTIME

func (p *LogicalType) IsSetTIME() bool

func (*LogicalType) IsSetTIMESTAMP

func (p *LogicalType) IsSetTIMESTAMP() bool

func (*LogicalType) IsSetUNKNOWN

func (p *LogicalType) IsSetUNKNOWN() bool

func (*LogicalType) IsSetUUID

func (p *LogicalType) IsSetUUID() bool

func (*LogicalType) LogValue

func (p *LogicalType) LogValue() slog.Value

func (*LogicalType) Read

func (p *LogicalType) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*LogicalType) ReadField1

func (p *LogicalType) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*LogicalType) ReadField10

func (p *LogicalType) ReadField10(ctx context.Context, iprot thrift.TProtocol) error

func (*LogicalType) ReadField11

func (p *LogicalType) ReadField11(ctx context.Context, iprot thrift.TProtocol) error

func (*LogicalType) ReadField12

func (p *LogicalType) ReadField12(ctx context.Context, iprot thrift.TProtocol) error

func (*LogicalType) ReadField13

func (p *LogicalType) ReadField13(ctx context.Context, iprot thrift.TProtocol) error

func (*LogicalType) ReadField14

func (p *LogicalType) ReadField14(ctx context.Context, iprot thrift.TProtocol) error

func (*LogicalType) ReadField15

func (p *LogicalType) ReadField15(ctx context.Context, iprot thrift.TProtocol) error

func (*LogicalType) ReadField2

func (p *LogicalType) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*LogicalType) ReadField3

func (p *LogicalType) ReadField3(ctx context.Context, iprot thrift.TProtocol) error

func (*LogicalType) ReadField4

func (p *LogicalType) ReadField4(ctx context.Context, iprot thrift.TProtocol) error

func (*LogicalType) ReadField5

func (p *LogicalType) ReadField5(ctx context.Context, iprot thrift.TProtocol) error

func (*LogicalType) ReadField6

func (p *LogicalType) ReadField6(ctx context.Context, iprot thrift.TProtocol) error

func (*LogicalType) ReadField7

func (p *LogicalType) ReadField7(ctx context.Context, iprot thrift.TProtocol) error

func (*LogicalType) ReadField8

func (p *LogicalType) ReadField8(ctx context.Context, iprot thrift.TProtocol) error

func (*LogicalType) String

func (p *LogicalType) String() string

func (*LogicalType) Validate

func (p *LogicalType) Validate() error

func (*LogicalType) Write

func (p *LogicalType) Write(ctx context.Context, oprot thrift.TProtocol) error

type MapType

type MapType struct {
}
var LogicalType_MAP_DEFAULT *MapType

func NewMapType

func NewMapType() *MapType

func (*MapType) Equals

func (p *MapType) Equals(other *MapType) bool

func (*MapType) LogValue

func (p *MapType) LogValue() slog.Value

func (*MapType) Read

func (p *MapType) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*MapType) String

func (p *MapType) String() string

func (*MapType) Validate

func (p *MapType) Validate() error

func (*MapType) Write

func (p *MapType) Write(ctx context.Context, oprot thrift.TProtocol) error

type MicroSeconds

type MicroSeconds struct {
}
var TimeUnit_MICROS_DEFAULT *MicroSeconds

func NewMicroSeconds

func NewMicroSeconds() *MicroSeconds

func (*MicroSeconds) Equals

func (p *MicroSeconds) Equals(other *MicroSeconds) bool

func (*MicroSeconds) LogValue

func (p *MicroSeconds) LogValue() slog.Value

func (*MicroSeconds) Read

func (p *MicroSeconds) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*MicroSeconds) String

func (p *MicroSeconds) String() string

func (*MicroSeconds) Validate

func (p *MicroSeconds) Validate() error

func (*MicroSeconds) Write

func (p *MicroSeconds) Write(ctx context.Context, oprot thrift.TProtocol) error

type MilliSeconds

type MilliSeconds struct {
}

Time units for logical types

var TimeUnit_MILLIS_DEFAULT *MilliSeconds

func NewMilliSeconds

func NewMilliSeconds() *MilliSeconds

func (*MilliSeconds) Equals

func (p *MilliSeconds) Equals(other *MilliSeconds) bool

func (*MilliSeconds) LogValue

func (p *MilliSeconds) LogValue() slog.Value

func (*MilliSeconds) Read

func (p *MilliSeconds) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*MilliSeconds) String

func (p *MilliSeconds) String() string

func (*MilliSeconds) Validate

func (p *MilliSeconds) Validate() error

func (*MilliSeconds) Write

func (p *MilliSeconds) Write(ctx context.Context, oprot thrift.TProtocol) error

type NanoSeconds

type NanoSeconds struct {
}
var TimeUnit_NANOS_DEFAULT *NanoSeconds

func NewNanoSeconds

func NewNanoSeconds() *NanoSeconds

func (*NanoSeconds) Equals

func (p *NanoSeconds) Equals(other *NanoSeconds) bool

func (*NanoSeconds) LogValue

func (p *NanoSeconds) LogValue() slog.Value

func (*NanoSeconds) Read

func (p *NanoSeconds) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*NanoSeconds) String

func (p *NanoSeconds) String() string

func (*NanoSeconds) Validate

func (p *NanoSeconds) Validate() error

func (*NanoSeconds) Write

func (p *NanoSeconds) Write(ctx context.Context, oprot thrift.TProtocol) error

type NullType

type NullType struct {
}

Logical type to annotate a column that is always null.

Sometimes when discovering the schema of existing data, values are always null and the physical type can't be determined. This annotation signals the case where the physical type was guessed from all null values.

var LogicalType_UNKNOWN_DEFAULT *NullType

func NewNullType

func NewNullType() *NullType

func (*NullType) Equals

func (p *NullType) Equals(other *NullType) bool

func (*NullType) LogValue

func (p *NullType) LogValue() slog.Value

func (*NullType) Read

func (p *NullType) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*NullType) String

func (p *NullType) String() string

func (*NullType) Validate

func (p *NullType) Validate() error

func (*NullType) Write

func (p *NullType) Write(ctx context.Context, oprot thrift.TProtocol) error

type OffsetIndex

type OffsetIndex struct {
	PageLocations               []*PageLocation `thrift:"page_locations,1,required" db:"page_locations" json:"page_locations"`
	UnencodedByteArrayDataBytes []int64         `` /* 128-byte string literal not displayed */
}

Optional offsets for each data page in a ColumnChunk.

Forms part of the page index, along with ColumnIndex.

OffsetIndex may be present even if ColumnIndex is not.

Attributes:

  • PageLocations: PageLocations, ordered by increasing PageLocation.offset. It is required

that page_locations[i].first_row_index < page_locations[i+1].first_row_index.

  • UnencodedByteArrayDataBytes: Unencoded/uncompressed size for BYTE_ARRAY types.

See documention for unencoded_byte_array_data_bytes in SizeStatistics for more details on this field.

func NewOffsetIndex

func NewOffsetIndex() *OffsetIndex

func (*OffsetIndex) Equals

func (p *OffsetIndex) Equals(other *OffsetIndex) bool

func (*OffsetIndex) GetPageLocations

func (p *OffsetIndex) GetPageLocations() []*PageLocation

func (*OffsetIndex) GetUnencodedByteArrayDataBytes

func (p *OffsetIndex) GetUnencodedByteArrayDataBytes() []int64

func (*OffsetIndex) IsSetUnencodedByteArrayDataBytes

func (p *OffsetIndex) IsSetUnencodedByteArrayDataBytes() bool

func (*OffsetIndex) LogValue

func (p *OffsetIndex) LogValue() slog.Value

func (*OffsetIndex) Read

func (p *OffsetIndex) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*OffsetIndex) ReadField1

func (p *OffsetIndex) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*OffsetIndex) ReadField2

func (p *OffsetIndex) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*OffsetIndex) String

func (p *OffsetIndex) String() string

func (*OffsetIndex) Validate

func (p *OffsetIndex) Validate() error

func (*OffsetIndex) Write

func (p *OffsetIndex) Write(ctx context.Context, oprot thrift.TProtocol) error

type PageEncodingStats

type PageEncodingStats struct {
	PageType PageType `thrift:"page_type,1,required" db:"page_type" json:"page_type"`
	Encoding Encoding `thrift:"encoding,2,required" db:"encoding" json:"encoding"`
	Count    int32    `thrift:"count,3,required" db:"count" json:"count"`
}

statistics of a given page type and encoding

Attributes:

  • PageType: the page type (data/dic/...) *
  • Encoding: encoding of the page *
  • Count: number of pages of this type with this encoding *

func NewPageEncodingStats

func NewPageEncodingStats() *PageEncodingStats

func (*PageEncodingStats) Equals

func (p *PageEncodingStats) Equals(other *PageEncodingStats) bool

func (*PageEncodingStats) GetCount

func (p *PageEncodingStats) GetCount() int32

func (*PageEncodingStats) GetEncoding

func (p *PageEncodingStats) GetEncoding() Encoding

func (*PageEncodingStats) GetPageType

func (p *PageEncodingStats) GetPageType() PageType

func (*PageEncodingStats) LogValue

func (p *PageEncodingStats) LogValue() slog.Value

func (*PageEncodingStats) Read

func (p *PageEncodingStats) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*PageEncodingStats) ReadField1

func (p *PageEncodingStats) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*PageEncodingStats) ReadField2

func (p *PageEncodingStats) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*PageEncodingStats) ReadField3

func (p *PageEncodingStats) ReadField3(ctx context.Context, iprot thrift.TProtocol) error

func (*PageEncodingStats) String

func (p *PageEncodingStats) String() string

func (*PageEncodingStats) Validate

func (p *PageEncodingStats) Validate() error

func (*PageEncodingStats) Write

func (p *PageEncodingStats) Write(ctx context.Context, oprot thrift.TProtocol) error
type PageHeader struct {
	Type                 PageType              `thrift:"type,1,required" db:"type" json:"type"`
	UncompressedPageSize int32                 `thrift:"uncompressed_page_size,2,required" db:"uncompressed_page_size" json:"uncompressed_page_size"`
	CompressedPageSize   int32                 `thrift:"compressed_page_size,3,required" db:"compressed_page_size" json:"compressed_page_size"`
	Crc                  *int32                `thrift:"crc,4" db:"crc" json:"crc,omitempty"`
	DataPageHeader       *DataPageHeader       `thrift:"data_page_header,5" db:"data_page_header" json:"data_page_header,omitempty"`
	IndexPageHeader      *IndexPageHeader      `thrift:"index_page_header,6" db:"index_page_header" json:"index_page_header,omitempty"`
	DictionaryPageHeader *DictionaryPageHeader `thrift:"dictionary_page_header,7" db:"dictionary_page_header" json:"dictionary_page_header,omitempty"`
	DataPageHeaderV2     *DataPageHeaderV2     `thrift:"data_page_header_v2,8" db:"data_page_header_v2" json:"data_page_header_v2,omitempty"`
}

Attributes:

  • Type: the type of the page: indicates which of the *_header fields is set *
  • UncompressedPageSize: Uncompressed page size in bytes (not including this header) *
  • CompressedPageSize: Compressed (and potentially encrypted) page size in bytes, not including this header *
  • Crc: The 32-bit CRC checksum for the page, to be be calculated as follows:
  • The standard CRC32 algorithm is used (with polynomial 0x04C11DB7, the same as in e.g. GZip).
  • All page types can have a CRC (v1 and v2 data pages, dictionary pages, etc.).
  • The CRC is computed on the serialization binary representation of the page (as written to disk), excluding the page header. For example, for v1 data pages, the CRC is computed on the concatenation of repetition levels, definition levels and column values (optionally compressed, optionally encrypted).
  • The CRC computation therefore takes place after any compression and encryption steps, if any.

If enabled, this allows for disabling checksumming in HDFS if only a few pages need to be read.

  • DataPageHeader
  • IndexPageHeader
  • DictionaryPageHeader
  • DataPageHeaderV2

func NewPageHeader

func NewPageHeader() *PageHeader

func (*PageHeader) Equals

func (p *PageHeader) Equals(other *PageHeader) bool

func (*PageHeader) GetCompressedPageSize

func (p *PageHeader) GetCompressedPageSize() int32

func (*PageHeader) GetCrc

func (p *PageHeader) GetCrc() int32

func (*PageHeader) GetDataPageHeader

func (p *PageHeader) GetDataPageHeader() *DataPageHeader

func (*PageHeader) GetDataPageHeaderV2

func (p *PageHeader) GetDataPageHeaderV2() *DataPageHeaderV2

func (*PageHeader) GetDictionaryPageHeader

func (p *PageHeader) GetDictionaryPageHeader() *DictionaryPageHeader

func (*PageHeader) GetIndexPageHeader

func (p *PageHeader) GetIndexPageHeader() *IndexPageHeader

func (*PageHeader) GetType

func (p *PageHeader) GetType() PageType

func (*PageHeader) GetUncompressedPageSize

func (p *PageHeader) GetUncompressedPageSize() int32

func (*PageHeader) IsSetCrc

func (p *PageHeader) IsSetCrc() bool

func (*PageHeader) IsSetDataPageHeader

func (p *PageHeader) IsSetDataPageHeader() bool

func (*PageHeader) IsSetDataPageHeaderV2

func (p *PageHeader) IsSetDataPageHeaderV2() bool

func (*PageHeader) IsSetDictionaryPageHeader

func (p *PageHeader) IsSetDictionaryPageHeader() bool

func (*PageHeader) IsSetIndexPageHeader

func (p *PageHeader) IsSetIndexPageHeader() bool

func (*PageHeader) LogValue

func (p *PageHeader) LogValue() slog.Value

func (*PageHeader) Read

func (p *PageHeader) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*PageHeader) ReadField1

func (p *PageHeader) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*PageHeader) ReadField2

func (p *PageHeader) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*PageHeader) ReadField3

func (p *PageHeader) ReadField3(ctx context.Context, iprot thrift.TProtocol) error

func (*PageHeader) ReadField4

func (p *PageHeader) ReadField4(ctx context.Context, iprot thrift.TProtocol) error

func (*PageHeader) ReadField5

func (p *PageHeader) ReadField5(ctx context.Context, iprot thrift.TProtocol) error

func (*PageHeader) ReadField6

func (p *PageHeader) ReadField6(ctx context.Context, iprot thrift.TProtocol) error

func (*PageHeader) ReadField7

func (p *PageHeader) ReadField7(ctx context.Context, iprot thrift.TProtocol) error

func (*PageHeader) ReadField8

func (p *PageHeader) ReadField8(ctx context.Context, iprot thrift.TProtocol) error

func (*PageHeader) String

func (p *PageHeader) String() string

func (*PageHeader) Validate

func (p *PageHeader) Validate() error

func (*PageHeader) Write

func (p *PageHeader) Write(ctx context.Context, oprot thrift.TProtocol) error

type PageLocation

type PageLocation struct {
	Offset             int64 `thrift:"offset,1,required" db:"offset" json:"offset"`
	CompressedPageSize int32 `thrift:"compressed_page_size,2,required" db:"compressed_page_size" json:"compressed_page_size"`
	FirstRowIndex      int64 `thrift:"first_row_index,3,required" db:"first_row_index" json:"first_row_index"`
}

Attributes:

  • Offset: Offset of the page in the file *
  • CompressedPageSize: Size of the page, including header. Sum of compressed_page_size and header

length

  • FirstRowIndex: Index within the RowGroup of the first row of the page. When an

OffsetIndex is present, pages must begin on row boundaries (repetition_level = 0).

func NewPageLocation

func NewPageLocation() *PageLocation

func (*PageLocation) Equals

func (p *PageLocation) Equals(other *PageLocation) bool

func (*PageLocation) GetCompressedPageSize

func (p *PageLocation) GetCompressedPageSize() int32

func (*PageLocation) GetFirstRowIndex

func (p *PageLocation) GetFirstRowIndex() int64

func (*PageLocation) GetOffset

func (p *PageLocation) GetOffset() int64

func (*PageLocation) LogValue

func (p *PageLocation) LogValue() slog.Value

func (*PageLocation) Read

func (p *PageLocation) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*PageLocation) ReadField1

func (p *PageLocation) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*PageLocation) ReadField2

func (p *PageLocation) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*PageLocation) ReadField3

func (p *PageLocation) ReadField3(ctx context.Context, iprot thrift.TProtocol) error

func (*PageLocation) String

func (p *PageLocation) String() string

func (*PageLocation) Validate

func (p *PageLocation) Validate() error

func (*PageLocation) Write

func (p *PageLocation) Write(ctx context.Context, oprot thrift.TProtocol) error

type PageType

type PageType int64
const (
	PageType_DATA_PAGE       PageType = 0
	PageType_INDEX_PAGE      PageType = 1
	PageType_DICTIONARY_PAGE PageType = 2
	PageType_DATA_PAGE_V2    PageType = 3
)

func PageTypeFromString

func PageTypeFromString(s string) (PageType, error)

func PageTypePtr

func PageTypePtr(v PageType) *PageType

func (PageType) MarshalText

func (p PageType) MarshalText() ([]byte, error)

func (*PageType) Scan

func (p *PageType) Scan(value interface{}) error

func (PageType) String

func (p PageType) String() string

func (*PageType) UnmarshalText

func (p *PageType) UnmarshalText(text []byte) error

func (*PageType) Value

func (p *PageType) Value() (driver.Value, error)

type RowGroup

type RowGroup struct {
	Columns             []*ColumnChunk   `thrift:"columns,1,required" db:"columns" json:"columns"`
	TotalByteSize       int64            `thrift:"total_byte_size,2,required" db:"total_byte_size" json:"total_byte_size"`
	NumRows             int64            `thrift:"num_rows,3,required" db:"num_rows" json:"num_rows"`
	SortingColumns      []*SortingColumn `thrift:"sorting_columns,4" db:"sorting_columns" json:"sorting_columns,omitempty"`
	FileOffset          *int64           `thrift:"file_offset,5" db:"file_offset" json:"file_offset,omitempty"`
	TotalCompressedSize *int64           `thrift:"total_compressed_size,6" db:"total_compressed_size" json:"total_compressed_size,omitempty"`
	Ordinal             *int16           `thrift:"ordinal,7" db:"ordinal" json:"ordinal,omitempty"`
}

Attributes:

  • Columns: Metadata for each column chunk in this row group.

This list must have the same order as the SchemaElement list in FileMetaData.

  • TotalByteSize: Total byte size of all the uncompressed column data in this row group *
  • NumRows: Number of rows in this row group *
  • SortingColumns: If set, specifies a sort ordering of the rows in this RowGroup.

The sorting columns can be a subset of all the columns.

  • FileOffset: Byte offset from beginning of file to first page (data or dictionary)

in this row group *

  • TotalCompressedSize: Total byte size of all compressed (and potentially encrypted) column data

in this row group *

  • Ordinal: Row group ordinal in the file *

func NewRowGroup

func NewRowGroup() *RowGroup

func (*RowGroup) Equals

func (p *RowGroup) Equals(other *RowGroup) bool

func (*RowGroup) GetColumns

func (p *RowGroup) GetColumns() []*ColumnChunk

func (*RowGroup) GetFileOffset

func (p *RowGroup) GetFileOffset() int64

func (*RowGroup) GetNumRows

func (p *RowGroup) GetNumRows() int64

func (*RowGroup) GetOrdinal

func (p *RowGroup) GetOrdinal() int16

func (*RowGroup) GetSortingColumns

func (p *RowGroup) GetSortingColumns() []*SortingColumn

func (*RowGroup) GetTotalByteSize

func (p *RowGroup) GetTotalByteSize() int64

func (*RowGroup) GetTotalCompressedSize

func (p *RowGroup) GetTotalCompressedSize() int64

func (*RowGroup) IsSetFileOffset

func (p *RowGroup) IsSetFileOffset() bool

func (*RowGroup) IsSetOrdinal

func (p *RowGroup) IsSetOrdinal() bool

func (*RowGroup) IsSetSortingColumns

func (p *RowGroup) IsSetSortingColumns() bool

func (*RowGroup) IsSetTotalCompressedSize

func (p *RowGroup) IsSetTotalCompressedSize() bool

func (*RowGroup) LogValue

func (p *RowGroup) LogValue() slog.Value

func (*RowGroup) Read

func (p *RowGroup) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*RowGroup) ReadField1

func (p *RowGroup) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*RowGroup) ReadField2

func (p *RowGroup) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*RowGroup) ReadField3

func (p *RowGroup) ReadField3(ctx context.Context, iprot thrift.TProtocol) error

func (*RowGroup) ReadField4

func (p *RowGroup) ReadField4(ctx context.Context, iprot thrift.TProtocol) error

func (*RowGroup) ReadField5

func (p *RowGroup) ReadField5(ctx context.Context, iprot thrift.TProtocol) error

func (*RowGroup) ReadField6

func (p *RowGroup) ReadField6(ctx context.Context, iprot thrift.TProtocol) error

func (*RowGroup) ReadField7

func (p *RowGroup) ReadField7(ctx context.Context, iprot thrift.TProtocol) error

func (*RowGroup) String

func (p *RowGroup) String() string

func (*RowGroup) Validate

func (p *RowGroup) Validate() error

func (*RowGroup) Write

func (p *RowGroup) Write(ctx context.Context, oprot thrift.TProtocol) error

type SchemaElement

type SchemaElement struct {
	Type           *Type                `thrift:"type,1" db:"type" json:"type,omitempty"`
	TypeLength     *int32               `thrift:"type_length,2" db:"type_length" json:"type_length,omitempty"`
	RepetitionType *FieldRepetitionType `thrift:"repetition_type,3" db:"repetition_type" json:"repetition_type,omitempty"`
	Name           string               `thrift:"name,4,required" db:"name" json:"name"`
	NumChildren    *int32               `thrift:"num_children,5" db:"num_children" json:"num_children,omitempty"`
	ConvertedType  *ConvertedType       `thrift:"converted_type,6" db:"converted_type" json:"converted_type,omitempty"`
	Scale          *int32               `thrift:"scale,7" db:"scale" json:"scale,omitempty"`
	Precision      *int32               `thrift:"precision,8" db:"precision" json:"precision,omitempty"`
	FieldID        *int32               `thrift:"field_id,9" db:"field_id" json:"field_id,omitempty"`
	LogicalType    *LogicalType         `thrift:"logicalType,10" db:"logicalType" json:"logicalType,omitempty"`
}

Represents a element inside a schema definition.

  • if it is a group (inner node) then type is undefined and num_children is defined
  • if it is a primitive type (leaf) then type is defined and num_children is undefined

the nodes are listed in depth first traversal order.

Attributes:

  • Type: Data type for this field. Not set if the current element is a non-leaf node
  • TypeLength: If type is FIXED_LEN_BYTE_ARRAY, this is the byte length of the values.

Otherwise, if specified, this is the maximum bit length to store any of the values. (e.g. a low cardinality INT col could have this set to 3). Note that this is in the schema, and therefore fixed for the entire file.

  • RepetitionType: repetition of the field. The root of the schema does not have a repetition_type.

All other nodes must have one

  • Name: Name of the field in the schema
  • NumChildren: Nested fields. Since thrift does not support nested fields,

the nesting is flattened to a single list by a depth-first traversal. The children count is used to construct the nested relationship. This field is not set when the element is a primitive type

  • ConvertedType: DEPRECATED: When the schema is the result of a conversion from another model.

Used to record the original type to help with cross conversion.

This is superseded by logicalType.

  • Scale: DEPRECATED: Used when this column contains decimal data.

See the DECIMAL converted type for more details.

This is superseded by using the DecimalType annotation in logicalType.

  • Precision
  • FieldID: When the original schema supports field ids, this will save the

original field id in the parquet schema

  • LogicalType: The logical type of this SchemaElement

LogicalType replaces ConvertedType, but ConvertedType is still required for some logical types to ensure forward-compatibility in format v1.

func NewSchemaElement

func NewSchemaElement() *SchemaElement

func (*SchemaElement) Equals

func (p *SchemaElement) Equals(other *SchemaElement) bool

func (*SchemaElement) GetConvertedType

func (p *SchemaElement) GetConvertedType() ConvertedType

func (*SchemaElement) GetFieldID

func (p *SchemaElement) GetFieldID() int32

func (*SchemaElement) GetLogicalType

func (p *SchemaElement) GetLogicalType() *LogicalType

func (*SchemaElement) GetName

func (p *SchemaElement) GetName() string

func (*SchemaElement) GetNumChildren

func (p *SchemaElement) GetNumChildren() int32

func (*SchemaElement) GetPrecision

func (p *SchemaElement) GetPrecision() int32

func (*SchemaElement) GetRepetitionType

func (p *SchemaElement) GetRepetitionType() FieldRepetitionType

func (*SchemaElement) GetScale

func (p *SchemaElement) GetScale() int32

func (*SchemaElement) GetType

func (p *SchemaElement) GetType() Type

func (*SchemaElement) GetTypeLength

func (p *SchemaElement) GetTypeLength() int32

func (*SchemaElement) IsSetConvertedType

func (p *SchemaElement) IsSetConvertedType() bool

func (*SchemaElement) IsSetFieldID

func (p *SchemaElement) IsSetFieldID() bool

func (*SchemaElement) IsSetLogicalType

func (p *SchemaElement) IsSetLogicalType() bool

func (*SchemaElement) IsSetNumChildren

func (p *SchemaElement) IsSetNumChildren() bool

func (*SchemaElement) IsSetPrecision

func (p *SchemaElement) IsSetPrecision() bool

func (*SchemaElement) IsSetRepetitionType

func (p *SchemaElement) IsSetRepetitionType() bool

func (*SchemaElement) IsSetScale

func (p *SchemaElement) IsSetScale() bool

func (*SchemaElement) IsSetType

func (p *SchemaElement) IsSetType() bool

func (*SchemaElement) IsSetTypeLength

func (p *SchemaElement) IsSetTypeLength() bool

func (*SchemaElement) LogValue

func (p *SchemaElement) LogValue() slog.Value

func (*SchemaElement) Read

func (p *SchemaElement) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*SchemaElement) ReadField1

func (p *SchemaElement) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*SchemaElement) ReadField10

func (p *SchemaElement) ReadField10(ctx context.Context, iprot thrift.TProtocol) error

func (*SchemaElement) ReadField2

func (p *SchemaElement) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*SchemaElement) ReadField3

func (p *SchemaElement) ReadField3(ctx context.Context, iprot thrift.TProtocol) error

func (*SchemaElement) ReadField4

func (p *SchemaElement) ReadField4(ctx context.Context, iprot thrift.TProtocol) error

func (*SchemaElement) ReadField5

func (p *SchemaElement) ReadField5(ctx context.Context, iprot thrift.TProtocol) error

func (*SchemaElement) ReadField6

func (p *SchemaElement) ReadField6(ctx context.Context, iprot thrift.TProtocol) error

func (*SchemaElement) ReadField7

func (p *SchemaElement) ReadField7(ctx context.Context, iprot thrift.TProtocol) error

func (*SchemaElement) ReadField8

func (p *SchemaElement) ReadField8(ctx context.Context, iprot thrift.TProtocol) error

func (*SchemaElement) ReadField9

func (p *SchemaElement) ReadField9(ctx context.Context, iprot thrift.TProtocol) error

func (*SchemaElement) String

func (p *SchemaElement) String() string

func (*SchemaElement) Validate

func (p *SchemaElement) Validate() error

func (*SchemaElement) Write

func (p *SchemaElement) Write(ctx context.Context, oprot thrift.TProtocol) error

type SizeStatistics

type SizeStatistics struct {
	UnencodedByteArrayDataBytes *int64  `` /* 128-byte string literal not displayed */
	RepetitionLevelHistogram    []int64 `thrift:"repetition_level_histogram,2" db:"repetition_level_histogram" json:"repetition_level_histogram,omitempty"`
	DefinitionLevelHistogram    []int64 `thrift:"definition_level_histogram,3" db:"definition_level_histogram" json:"definition_level_histogram,omitempty"`
}

A structure for capturing metadata for estimating the unencoded, uncompressed size of data written. This is useful for readers to estimate how much memory is needed to reconstruct data in their memory model and for fine grained filter pushdown on nested structures (the histograms contained in this structure can help determine the number of nulls at a particular nesting level and maximum length of lists).

Attributes:

  • UnencodedByteArrayDataBytes: The number of physical bytes stored for BYTE_ARRAY data values assuming

no encoding. This is exclusive of the bytes needed to store the length of each byte array. In other words, this field is equivalent to the `(size of PLAIN-ENCODING the byte array values) - (4 bytes * number of values written)`. To determine unencoded sizes of other types readers can use schema information multiplied by the number of non-null and null values. The number of null/non-null values can be inferred from the histograms below.

For example, if a column chunk is dictionary-encoded with dictionary ["a", "bc", "cde"], and a data page contains the indices [0, 0, 1, 2], then this value for that data page should be 7 (1 + 1 + 2 + 3).

This field should only be set for types that use BYTE_ARRAY as their physical type.

  • RepetitionLevelHistogram: When present, there is expected to be one element corresponding to each

repetition (i.e. size=max repetition_level+1) where each element represents the number of times the repetition level was observed in the data.

This field may be omitted if max_repetition_level is 0 without loss of information.

  • DefinitionLevelHistogram: Same as repetition_level_histogram except for definition levels.

This field may be omitted if max_definition_level is 0 or 1 without loss of information.

var ColumnMetaData_SizeStatistics_DEFAULT *SizeStatistics

func NewSizeStatistics

func NewSizeStatistics() *SizeStatistics

func (*SizeStatistics) Equals

func (p *SizeStatistics) Equals(other *SizeStatistics) bool

func (*SizeStatistics) GetDefinitionLevelHistogram

func (p *SizeStatistics) GetDefinitionLevelHistogram() []int64

func (*SizeStatistics) GetRepetitionLevelHistogram

func (p *SizeStatistics) GetRepetitionLevelHistogram() []int64

func (*SizeStatistics) GetUnencodedByteArrayDataBytes

func (p *SizeStatistics) GetUnencodedByteArrayDataBytes() int64

func (*SizeStatistics) IsSetDefinitionLevelHistogram

func (p *SizeStatistics) IsSetDefinitionLevelHistogram() bool

func (*SizeStatistics) IsSetRepetitionLevelHistogram

func (p *SizeStatistics) IsSetRepetitionLevelHistogram() bool

func (*SizeStatistics) IsSetUnencodedByteArrayDataBytes

func (p *SizeStatistics) IsSetUnencodedByteArrayDataBytes() bool

func (*SizeStatistics) LogValue

func (p *SizeStatistics) LogValue() slog.Value

func (*SizeStatistics) Read

func (p *SizeStatistics) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*SizeStatistics) ReadField1

func (p *SizeStatistics) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*SizeStatistics) ReadField2

func (p *SizeStatistics) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*SizeStatistics) ReadField3

func (p *SizeStatistics) ReadField3(ctx context.Context, iprot thrift.TProtocol) error

func (*SizeStatistics) String

func (p *SizeStatistics) String() string

func (*SizeStatistics) Validate

func (p *SizeStatistics) Validate() error

func (*SizeStatistics) Write

func (p *SizeStatistics) Write(ctx context.Context, oprot thrift.TProtocol) error

type SortingColumn

type SortingColumn struct {
	ColumnIdx  int32 `thrift:"column_idx,1,required" db:"column_idx" json:"column_idx"`
	Descending bool  `thrift:"descending,2,required" db:"descending" json:"descending"`
	NullsFirst bool  `thrift:"nulls_first,3,required" db:"nulls_first" json:"nulls_first"`
}

Sort order within a RowGroup of a leaf column

Attributes:

  • ColumnIdx: The ordinal position of the column (in this row group) *
  • Descending: If true, indicates this column is sorted in descending order. *
  • NullsFirst: If true, nulls will come before non-null values, otherwise,

nulls go at the end.

func NewSortingColumn

func NewSortingColumn() *SortingColumn

func (*SortingColumn) Equals

func (p *SortingColumn) Equals(other *SortingColumn) bool

func (*SortingColumn) GetColumnIdx

func (p *SortingColumn) GetColumnIdx() int32

func (*SortingColumn) GetDescending

func (p *SortingColumn) GetDescending() bool

func (*SortingColumn) GetNullsFirst

func (p *SortingColumn) GetNullsFirst() bool

func (*SortingColumn) LogValue

func (p *SortingColumn) LogValue() slog.Value

func (*SortingColumn) Read

func (p *SortingColumn) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*SortingColumn) ReadField1

func (p *SortingColumn) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*SortingColumn) ReadField2

func (p *SortingColumn) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*SortingColumn) ReadField3

func (p *SortingColumn) ReadField3(ctx context.Context, iprot thrift.TProtocol) error

func (*SortingColumn) String

func (p *SortingColumn) String() string

func (*SortingColumn) Validate

func (p *SortingColumn) Validate() error

func (*SortingColumn) Write

func (p *SortingColumn) Write(ctx context.Context, oprot thrift.TProtocol) error

type SplitBlockAlgorithm

type SplitBlockAlgorithm struct {
}

Block-based algorithm type annotation. *

var BloomFilterAlgorithm_BLOCK_DEFAULT *SplitBlockAlgorithm

func NewSplitBlockAlgorithm

func NewSplitBlockAlgorithm() *SplitBlockAlgorithm

func (*SplitBlockAlgorithm) Equals

func (p *SplitBlockAlgorithm) Equals(other *SplitBlockAlgorithm) bool

func (*SplitBlockAlgorithm) LogValue

func (p *SplitBlockAlgorithm) LogValue() slog.Value

func (*SplitBlockAlgorithm) Read

func (*SplitBlockAlgorithm) String

func (p *SplitBlockAlgorithm) String() string

func (*SplitBlockAlgorithm) Validate

func (p *SplitBlockAlgorithm) Validate() error

func (*SplitBlockAlgorithm) Write

type Statistics

type Statistics struct {
	Max             []byte `thrift:"max,1" db:"max" json:"max,omitempty"`
	Min             []byte `thrift:"min,2" db:"min" json:"min,omitempty"`
	NullCount       *int64 `thrift:"null_count,3" db:"null_count" json:"null_count,omitempty"`
	DistinctCount   *int64 `thrift:"distinct_count,4" db:"distinct_count" json:"distinct_count,omitempty"`
	MaxValue        []byte `thrift:"max_value,5" db:"max_value" json:"max_value,omitempty"`
	MinValue        []byte `thrift:"min_value,6" db:"min_value" json:"min_value,omitempty"`
	IsMaxValueExact *bool  `thrift:"is_max_value_exact,7" db:"is_max_value_exact" json:"is_max_value_exact,omitempty"`
	IsMinValueExact *bool  `thrift:"is_min_value_exact,8" db:"is_min_value_exact" json:"is_min_value_exact,omitempty"`
}

Statistics per row group and per page All fields are optional.

Attributes:

  • Max: DEPRECATED: min and max value of the column. Use min_value and max_value.

Values are encoded using PLAIN encoding, except that variable-length byte arrays do not include a length prefix.

These fields encode min and max values determined by signed comparison only. New files should use the correct order for a column's logical type and store the values in the min_value and max_value fields.

To support older readers, these may be set when the column order is signed.

  • Min
  • NullCount: Count of null values in the column.

Writers SHOULD always write this field even if it is zero (i.e. no null value) or the column is not nullable. Readers MUST distinguish between null_count not being present and null_count == 0. If null_count is not present, readers MUST NOT assume null_count == 0.

  • DistinctCount: count of distinct values occurring
  • MaxValue: Lower and upper bound values for the column, determined by its ColumnOrder.

These may be the actual minimum and maximum values found on a page or column chunk, but can also be (more compact) values that do not exist on a page or column chunk. For example, instead of storing "Blart Versenwald III", a writer may set min_value="B", max_value="C". Such more compact values must still be valid values within the column's logical type.

Values are encoded using PLAIN encoding, except that variable-length byte arrays do not include a length prefix.

  • MinValue
  • IsMaxValueExact: If true, max_value is the actual maximum value for a column
  • IsMinValueExact: If true, min_value is the actual minimum value for a column
var ColumnMetaData_Statistics_DEFAULT *Statistics
var DataPageHeaderV2_Statistics_DEFAULT *Statistics
var DataPageHeader_Statistics_DEFAULT *Statistics

func NewStatistics

func NewStatistics() *Statistics

func (*Statistics) Equals

func (p *Statistics) Equals(other *Statistics) bool

func (*Statistics) GetDistinctCount

func (p *Statistics) GetDistinctCount() int64

func (*Statistics) GetIsMaxValueExact

func (p *Statistics) GetIsMaxValueExact() bool

func (*Statistics) GetIsMinValueExact

func (p *Statistics) GetIsMinValueExact() bool

func (*Statistics) GetMax

func (p *Statistics) GetMax() []byte

func (*Statistics) GetMaxValue

func (p *Statistics) GetMaxValue() []byte

func (*Statistics) GetMin

func (p *Statistics) GetMin() []byte

func (*Statistics) GetMinValue

func (p *Statistics) GetMinValue() []byte

func (*Statistics) GetNullCount

func (p *Statistics) GetNullCount() int64

func (*Statistics) IsSetDistinctCount

func (p *Statistics) IsSetDistinctCount() bool

func (*Statistics) IsSetIsMaxValueExact

func (p *Statistics) IsSetIsMaxValueExact() bool

func (*Statistics) IsSetIsMinValueExact

func (p *Statistics) IsSetIsMinValueExact() bool

func (*Statistics) IsSetMax

func (p *Statistics) IsSetMax() bool

func (*Statistics) IsSetMaxValue

func (p *Statistics) IsSetMaxValue() bool

func (*Statistics) IsSetMin

func (p *Statistics) IsSetMin() bool

func (*Statistics) IsSetMinValue

func (p *Statistics) IsSetMinValue() bool

func (*Statistics) IsSetNullCount

func (p *Statistics) IsSetNullCount() bool

func (*Statistics) LogValue

func (p *Statistics) LogValue() slog.Value

func (*Statistics) Read

func (p *Statistics) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*Statistics) ReadField1

func (p *Statistics) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*Statistics) ReadField2

func (p *Statistics) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*Statistics) ReadField3

func (p *Statistics) ReadField3(ctx context.Context, iprot thrift.TProtocol) error

func (*Statistics) ReadField4

func (p *Statistics) ReadField4(ctx context.Context, iprot thrift.TProtocol) error

func (*Statistics) ReadField5

func (p *Statistics) ReadField5(ctx context.Context, iprot thrift.TProtocol) error

func (*Statistics) ReadField6

func (p *Statistics) ReadField6(ctx context.Context, iprot thrift.TProtocol) error

func (*Statistics) ReadField7

func (p *Statistics) ReadField7(ctx context.Context, iprot thrift.TProtocol) error

func (*Statistics) ReadField8

func (p *Statistics) ReadField8(ctx context.Context, iprot thrift.TProtocol) error

func (*Statistics) String

func (p *Statistics) String() string

func (*Statistics) Validate

func (p *Statistics) Validate() error

func (*Statistics) Write

func (p *Statistics) Write(ctx context.Context, oprot thrift.TProtocol) error

type StringType

type StringType struct {
}

Empty structs to use as logical type annotations

var LogicalType_STRING_DEFAULT *StringType

func NewStringType

func NewStringType() *StringType

func (*StringType) Equals

func (p *StringType) Equals(other *StringType) bool

func (*StringType) LogValue

func (p *StringType) LogValue() slog.Value

func (*StringType) Read

func (p *StringType) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*StringType) String

func (p *StringType) String() string

func (*StringType) Validate

func (p *StringType) Validate() error

func (*StringType) Write

func (p *StringType) Write(ctx context.Context, oprot thrift.TProtocol) error

type TimeType

type TimeType struct {
	IsAdjustedToUTC bool      `thrift:"isAdjustedToUTC,1,required" db:"isAdjustedToUTC" json:"isAdjustedToUTC"`
	Unit            *TimeUnit `thrift:"unit,2,required" db:"unit" json:"unit"`
}

Time logical type annotation

Allowed for physical types: INT32 (millis), INT64 (micros, nanos)

Attributes:

  • IsAdjustedToUTC
  • Unit
var LogicalType_TIME_DEFAULT *TimeType

func NewTimeType

func NewTimeType() *TimeType

func (*TimeType) Equals

func (p *TimeType) Equals(other *TimeType) bool

func (*TimeType) GetIsAdjustedToUTC

func (p *TimeType) GetIsAdjustedToUTC() bool

func (*TimeType) GetUnit

func (p *TimeType) GetUnit() *TimeUnit

func (*TimeType) IsSetUnit

func (p *TimeType) IsSetUnit() bool

func (*TimeType) LogValue

func (p *TimeType) LogValue() slog.Value

func (*TimeType) Read

func (p *TimeType) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*TimeType) ReadField1

func (p *TimeType) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*TimeType) ReadField2

func (p *TimeType) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*TimeType) String

func (p *TimeType) String() string

func (*TimeType) Validate

func (p *TimeType) Validate() error

func (*TimeType) Write

func (p *TimeType) Write(ctx context.Context, oprot thrift.TProtocol) error

type TimeUnit

type TimeUnit struct {
	MILLIS *MilliSeconds `thrift:"MILLIS,1" db:"MILLIS" json:"MILLIS,omitempty"`
	MICROS *MicroSeconds `thrift:"MICROS,2" db:"MICROS" json:"MICROS,omitempty"`
	NANOS  *NanoSeconds  `thrift:"NANOS,3" db:"NANOS" json:"NANOS,omitempty"`
}

Attributes:

  • MILLIS
  • MICROS
  • NANOS
var TimeType_Unit_DEFAULT *TimeUnit
var TimestampType_Unit_DEFAULT *TimeUnit

func NewTimeUnit

func NewTimeUnit() *TimeUnit

func (*TimeUnit) CountSetFieldsTimeUnit

func (p *TimeUnit) CountSetFieldsTimeUnit() int

func (*TimeUnit) Equals

func (p *TimeUnit) Equals(other *TimeUnit) bool

func (*TimeUnit) GetMICROS

func (p *TimeUnit) GetMICROS() *MicroSeconds

func (*TimeUnit) GetMILLIS

func (p *TimeUnit) GetMILLIS() *MilliSeconds

func (*TimeUnit) GetNANOS

func (p *TimeUnit) GetNANOS() *NanoSeconds

func (*TimeUnit) IsSetMICROS

func (p *TimeUnit) IsSetMICROS() bool

func (*TimeUnit) IsSetMILLIS

func (p *TimeUnit) IsSetMILLIS() bool

func (*TimeUnit) IsSetNANOS

func (p *TimeUnit) IsSetNANOS() bool

func (*TimeUnit) LogValue

func (p *TimeUnit) LogValue() slog.Value

func (*TimeUnit) Read

func (p *TimeUnit) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*TimeUnit) ReadField1

func (p *TimeUnit) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*TimeUnit) ReadField2

func (p *TimeUnit) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*TimeUnit) ReadField3

func (p *TimeUnit) ReadField3(ctx context.Context, iprot thrift.TProtocol) error

func (*TimeUnit) String

func (p *TimeUnit) String() string

func (*TimeUnit) Validate

func (p *TimeUnit) Validate() error

func (*TimeUnit) Write

func (p *TimeUnit) Write(ctx context.Context, oprot thrift.TProtocol) error

type TimestampType

type TimestampType struct {
	IsAdjustedToUTC bool      `thrift:"isAdjustedToUTC,1,required" db:"isAdjustedToUTC" json:"isAdjustedToUTC"`
	Unit            *TimeUnit `thrift:"unit,2,required" db:"unit" json:"unit"`
}

Timestamp logical type annotation

Allowed for physical types: INT64

Attributes:

  • IsAdjustedToUTC
  • Unit
var LogicalType_TIMESTAMP_DEFAULT *TimestampType

func NewTimestampType

func NewTimestampType() *TimestampType

func (*TimestampType) Equals

func (p *TimestampType) Equals(other *TimestampType) bool

func (*TimestampType) GetIsAdjustedToUTC

func (p *TimestampType) GetIsAdjustedToUTC() bool

func (*TimestampType) GetUnit

func (p *TimestampType) GetUnit() *TimeUnit

func (*TimestampType) IsSetUnit

func (p *TimestampType) IsSetUnit() bool

func (*TimestampType) LogValue

func (p *TimestampType) LogValue() slog.Value

func (*TimestampType) Read

func (p *TimestampType) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*TimestampType) ReadField1

func (p *TimestampType) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*TimestampType) ReadField2

func (p *TimestampType) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*TimestampType) String

func (p *TimestampType) String() string

func (*TimestampType) Validate

func (p *TimestampType) Validate() error

func (*TimestampType) Write

func (p *TimestampType) Write(ctx context.Context, oprot thrift.TProtocol) error

type Type

type Type int64

Types supported by Parquet. These types are intended to be used in combination with the encodings to control the on disk storage format. For example INT16 is not included as a type since a good encoding of INT32 would handle this.

const (
	Type_BOOLEAN              Type = 0
	Type_INT32                Type = 1
	Type_INT64                Type = 2
	Type_INT96                Type = 3
	Type_FLOAT                Type = 4
	Type_DOUBLE               Type = 5
	Type_BYTE_ARRAY           Type = 6
	Type_FIXED_LEN_BYTE_ARRAY Type = 7
)
var SchemaElement_Type_DEFAULT Type

func TypeFromString

func TypeFromString(s string) (Type, error)

func TypePtr

func TypePtr(v Type) *Type

func (Type) MarshalText

func (p Type) MarshalText() ([]byte, error)

func (*Type) Scan

func (p *Type) Scan(value interface{}) error

func (Type) String

func (p Type) String() string

func (*Type) UnmarshalText

func (p *Type) UnmarshalText(text []byte) error

func (*Type) Value

func (p *Type) Value() (driver.Value, error)

type TypeDefinedOrder

type TypeDefinedOrder struct {
}

Empty struct to signal the order defined by the physical or logical type

var ColumnOrder_TYPE_ORDER_DEFAULT *TypeDefinedOrder

func NewTypeDefinedOrder

func NewTypeDefinedOrder() *TypeDefinedOrder

func (*TypeDefinedOrder) Equals

func (p *TypeDefinedOrder) Equals(other *TypeDefinedOrder) bool

func (*TypeDefinedOrder) LogValue

func (p *TypeDefinedOrder) LogValue() slog.Value

func (*TypeDefinedOrder) Read

func (p *TypeDefinedOrder) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*TypeDefinedOrder) String

func (p *TypeDefinedOrder) String() string

func (*TypeDefinedOrder) Validate

func (p *TypeDefinedOrder) Validate() error

func (*TypeDefinedOrder) Write

func (p *TypeDefinedOrder) Write(ctx context.Context, oprot thrift.TProtocol) error

type UUIDType

type UUIDType struct {
}
var LogicalType_UUID_DEFAULT *UUIDType

func NewUUIDType

func NewUUIDType() *UUIDType

func (*UUIDType) Equals

func (p *UUIDType) Equals(other *UUIDType) bool

func (*UUIDType) LogValue

func (p *UUIDType) LogValue() slog.Value

func (*UUIDType) Read

func (p *UUIDType) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*UUIDType) String

func (p *UUIDType) String() string

func (*UUIDType) Validate

func (p *UUIDType) Validate() error

func (*UUIDType) Write

func (p *UUIDType) Write(ctx context.Context, oprot thrift.TProtocol) error

type Uncompressed

type Uncompressed struct {
}

The compression used in the Bloom filter.

var BloomFilterCompression_UNCOMPRESSED_DEFAULT *Uncompressed

func NewUncompressed

func NewUncompressed() *Uncompressed

func (*Uncompressed) Equals

func (p *Uncompressed) Equals(other *Uncompressed) bool

func (*Uncompressed) LogValue

func (p *Uncompressed) LogValue() slog.Value

func (*Uncompressed) Read

func (p *Uncompressed) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*Uncompressed) String

func (p *Uncompressed) String() string

func (*Uncompressed) Validate

func (p *Uncompressed) Validate() error

func (*Uncompressed) Write

func (p *Uncompressed) Write(ctx context.Context, oprot thrift.TProtocol) error

type XxHash

type XxHash struct {
}

Hash strategy type annotation. xxHash is an extremely fast non-cryptographic hash algorithm. It uses 64 bits version of xxHash.

var BloomFilterHash_XXHASH_DEFAULT *XxHash

func NewXxHash

func NewXxHash() *XxHash

func (*XxHash) Equals

func (p *XxHash) Equals(other *XxHash) bool

func (*XxHash) LogValue

func (p *XxHash) LogValue() slog.Value

func (*XxHash) Read

func (p *XxHash) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*XxHash) String

func (p *XxHash) String() string

func (*XxHash) Validate

func (p *XxHash) Validate() error

func (*XxHash) Write

func (p *XxHash) Write(ctx context.Context, oprot thrift.TProtocol) error

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL