parquet

package
v1.5.4 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 10, 2020 License: Apache-2.0 Imports: 7 Imported by: 352

Documentation

Index

Constants

This section is empty.

Variables

View Source
var ColumnChunk_ColumnIndexLength_DEFAULT int32
View Source
var ColumnChunk_ColumnIndexOffset_DEFAULT int64
View Source
var ColumnChunk_FilePath_DEFAULT string
View Source
var ColumnChunk_OffsetIndexLength_DEFAULT int32
View Source
var ColumnChunk_OffsetIndexOffset_DEFAULT int64
View Source
var ColumnIndex_NullCounts_DEFAULT []int64
View Source
var ColumnMetaData_DictionaryPageOffset_DEFAULT int64
View Source
var ColumnMetaData_EncodingStats_DEFAULT []*PageEncodingStats
View Source
var ColumnMetaData_IndexPageOffset_DEFAULT int64
View Source
var ColumnMetaData_KeyValueMetadata_DEFAULT []*KeyValue
View Source
var DataPageHeaderV2_IsCompressed_DEFAULT bool = true
View Source
var DictionaryPageHeader_IsSorted_DEFAULT bool
View Source
var FileMetaData_ColumnOrders_DEFAULT []*ColumnOrder
View Source
var FileMetaData_CreatedBy_DEFAULT string
View Source
var FileMetaData_KeyValueMetadata_DEFAULT []*KeyValue
View Source
var GoUnusedProtection__ int
View Source
var KeyValue_Value_DEFAULT string
View Source
var PageHeader_Crc_DEFAULT int32
View Source
var RowGroup_SortingColumns_DEFAULT []*SortingColumn
View Source
var SchemaElement_FieldID_DEFAULT int32
View Source
var SchemaElement_NumChildren_DEFAULT int32
View Source
var SchemaElement_Precision_DEFAULT int32
View Source
var SchemaElement_Scale_DEFAULT int32
View Source
var SchemaElement_TypeLength_DEFAULT int32
View Source
var Statistics_DistinctCount_DEFAULT int64
View Source
var Statistics_MaxValue_DEFAULT []byte
View Source
var Statistics_Max_DEFAULT []byte
View Source
var Statistics_MinValue_DEFAULT []byte
View Source
var Statistics_Min_DEFAULT []byte
View Source
var Statistics_NullCount_DEFAULT int64

Functions

This section is empty.

Types

type BoundaryOrder added in v1.4.0

type BoundaryOrder int64

Enum to annotate whether lists of min/max elements inside ColumnIndex are ordered and if so, in which direction.

const (
	BoundaryOrder_UNORDERED  BoundaryOrder = 0
	BoundaryOrder_ASCENDING  BoundaryOrder = 1
	BoundaryOrder_DESCENDING BoundaryOrder = 2
)

func BoundaryOrderFromString added in v1.4.0

func BoundaryOrderFromString(s string) (BoundaryOrder, error)

func BoundaryOrderPtr added in v1.4.0

func BoundaryOrderPtr(v BoundaryOrder) *BoundaryOrder

func (BoundaryOrder) MarshalText added in v1.4.0

func (p BoundaryOrder) MarshalText() ([]byte, error)

func (*BoundaryOrder) Scan added in v1.4.0

func (p *BoundaryOrder) Scan(value interface{}) error

func (BoundaryOrder) String added in v1.4.0

func (p BoundaryOrder) String() string

func (*BoundaryOrder) UnmarshalText added in v1.4.0

func (p *BoundaryOrder) UnmarshalText(text []byte) error

func (*BoundaryOrder) Value added in v1.4.0

func (p *BoundaryOrder) Value() (driver.Value, error)

type BsonType added in v1.4.0

type BsonType struct {
}

Embedded BSON logical type annotation

Allowed for physical types: BINARY

var LogicalType_BSON_DEFAULT *BsonType

func NewBsonType added in v1.4.0

func NewBsonType() *BsonType

func (*BsonType) Read added in v1.4.0

func (p *BsonType) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*BsonType) String added in v1.4.0

func (p *BsonType) String() string

func (*BsonType) Write added in v1.4.0

func (p *BsonType) Write(ctx context.Context, oprot thrift.TProtocol) error

type ColumnChunk

type ColumnChunk struct {
	FilePath          *string         `thrift:"file_path,1" db:"file_path" json:"file_path,omitempty"`
	FileOffset        int64           `thrift:"file_offset,2,required" db:"file_offset" json:"file_offset"`
	MetaData          *ColumnMetaData `thrift:"meta_data,3" db:"meta_data" json:"meta_data,omitempty"`
	OffsetIndexOffset *int64          `thrift:"offset_index_offset,4" db:"offset_index_offset" json:"offset_index_offset,omitempty"`
	OffsetIndexLength *int32          `thrift:"offset_index_length,5" db:"offset_index_length" json:"offset_index_length,omitempty"`
	ColumnIndexOffset *int64          `thrift:"column_index_offset,6" db:"column_index_offset" json:"column_index_offset,omitempty"`
	ColumnIndexLength *int32          `thrift:"column_index_length,7" db:"column_index_length" json:"column_index_length,omitempty"`
}

Attributes:

  • FilePath: File where column data is stored. If not set, assumed to be same file as

metadata. This path is relative to the current file.

  • FileOffset: Byte offset in file_path to the ColumnMetaData *
  • MetaData: Column metadata for this chunk. This is the same content as what is at

file_path/file_offset. Having it here has it replicated in the file metadata.

  • OffsetIndexOffset: File offset of ColumnChunk's OffsetIndex *
  • OffsetIndexLength: Size of ColumnChunk's OffsetIndex, in bytes *
  • ColumnIndexOffset: File offset of ColumnChunk's ColumnIndex *
  • ColumnIndexLength: Size of ColumnChunk's ColumnIndex, in bytes *

func NewColumnChunk

func NewColumnChunk() *ColumnChunk

func (*ColumnChunk) GetColumnIndexLength added in v1.4.0

func (p *ColumnChunk) GetColumnIndexLength() int32

func (*ColumnChunk) GetColumnIndexOffset added in v1.4.0

func (p *ColumnChunk) GetColumnIndexOffset() int64

func (*ColumnChunk) GetFileOffset

func (p *ColumnChunk) GetFileOffset() int64

func (*ColumnChunk) GetFilePath

func (p *ColumnChunk) GetFilePath() string

func (*ColumnChunk) GetMetaData

func (p *ColumnChunk) GetMetaData() *ColumnMetaData

func (*ColumnChunk) GetOffsetIndexLength added in v1.4.0

func (p *ColumnChunk) GetOffsetIndexLength() int32

func (*ColumnChunk) GetOffsetIndexOffset added in v1.4.0

func (p *ColumnChunk) GetOffsetIndexOffset() int64

func (*ColumnChunk) IsSetColumnIndexLength added in v1.4.0

func (p *ColumnChunk) IsSetColumnIndexLength() bool

func (*ColumnChunk) IsSetColumnIndexOffset added in v1.4.0

func (p *ColumnChunk) IsSetColumnIndexOffset() bool

func (*ColumnChunk) IsSetFilePath

func (p *ColumnChunk) IsSetFilePath() bool

func (*ColumnChunk) IsSetMetaData

func (p *ColumnChunk) IsSetMetaData() bool

func (*ColumnChunk) IsSetOffsetIndexLength added in v1.4.0

func (p *ColumnChunk) IsSetOffsetIndexLength() bool

func (*ColumnChunk) IsSetOffsetIndexOffset added in v1.4.0

func (p *ColumnChunk) IsSetOffsetIndexOffset() bool

func (*ColumnChunk) Read

func (p *ColumnChunk) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnChunk) ReadField1

func (p *ColumnChunk) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnChunk) ReadField2

func (p *ColumnChunk) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnChunk) ReadField3

func (p *ColumnChunk) ReadField3(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnChunk) ReadField4 added in v1.4.0

func (p *ColumnChunk) ReadField4(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnChunk) ReadField5 added in v1.4.0

func (p *ColumnChunk) ReadField5(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnChunk) ReadField6 added in v1.4.0

func (p *ColumnChunk) ReadField6(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnChunk) ReadField7 added in v1.4.0

func (p *ColumnChunk) ReadField7(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnChunk) String

func (p *ColumnChunk) String() string

func (*ColumnChunk) Write

func (p *ColumnChunk) Write(ctx context.Context, oprot thrift.TProtocol) error

type ColumnIndex added in v1.4.0

type ColumnIndex struct {
	NullPages     []bool        `thrift:"null_pages,1,required" db:"null_pages" json:"null_pages"`
	MinValues     [][]byte      `thrift:"min_values,2,required" db:"min_values" json:"min_values"`
	MaxValues     [][]byte      `thrift:"max_values,3,required" db:"max_values" json:"max_values"`
	BoundaryOrder BoundaryOrder `thrift:"boundary_order,4,required" db:"boundary_order" json:"boundary_order"`
	NullCounts    []int64       `thrift:"null_counts,5" db:"null_counts" json:"null_counts,omitempty"`
}

Description for ColumnIndex. Each <array-field>[i] refers to the page at OffsetIndex.page_locations[i]

Attributes:

  • NullPages: A list of Boolean values to determine the validity of the corresponding

min and max values. If true, a page contains only null values, and writers have to set the corresponding entries in min_values and max_values to byte[0], so that all lists have the same length. If false, the corresponding entries in min_values and max_values must be valid.

  • MinValues: Two lists containing lower and upper bounds for the values of each page.

These may be the actual minimum and maximum values found on a page, but can also be (more compact) values that do not exist on a page. For example, instead of storing ""Blart Versenwald III", a writer may set min_values[i]="B", max_values[i]="C". Such more compact values must still be valid values within the column's logical type. Readers must make sure that list entries are populated before using them by inspecting null_pages.

  • MaxValues
  • BoundaryOrder: Stores whether both min_values and max_values are orderd and if so, in

which direction. This allows readers to perform binary searches in both lists. Readers cannot assume that max_values[i] <= min_values[i+1], even if the lists are ordered.

  • NullCounts: A list containing the number of null values for each page *

func NewColumnIndex added in v1.4.0

func NewColumnIndex() *ColumnIndex

func (*ColumnIndex) GetBoundaryOrder added in v1.4.0

func (p *ColumnIndex) GetBoundaryOrder() BoundaryOrder

func (*ColumnIndex) GetMaxValues added in v1.4.0

func (p *ColumnIndex) GetMaxValues() [][]byte

func (*ColumnIndex) GetMinValues added in v1.4.0

func (p *ColumnIndex) GetMinValues() [][]byte

func (*ColumnIndex) GetNullCounts added in v1.4.0

func (p *ColumnIndex) GetNullCounts() []int64

func (*ColumnIndex) GetNullPages added in v1.4.0

func (p *ColumnIndex) GetNullPages() []bool

func (*ColumnIndex) IsSetNullCounts added in v1.4.0

func (p *ColumnIndex) IsSetNullCounts() bool

func (*ColumnIndex) Read added in v1.4.0

func (p *ColumnIndex) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnIndex) ReadField1 added in v1.4.0

func (p *ColumnIndex) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnIndex) ReadField2 added in v1.4.0

func (p *ColumnIndex) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnIndex) ReadField3 added in v1.4.0

func (p *ColumnIndex) ReadField3(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnIndex) ReadField4 added in v1.4.0

func (p *ColumnIndex) ReadField4(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnIndex) ReadField5 added in v1.4.0

func (p *ColumnIndex) ReadField5(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnIndex) String added in v1.4.0

func (p *ColumnIndex) String() string

func (*ColumnIndex) Write added in v1.4.0

func (p *ColumnIndex) Write(ctx context.Context, oprot thrift.TProtocol) error

type ColumnMetaData

type ColumnMetaData struct {
	Type                  Type                 `thrift:"type,1,required" db:"type" json:"type"`
	Encodings             []Encoding           `thrift:"encodings,2,required" db:"encodings" json:"encodings"`
	PathInSchema          []string             `thrift:"path_in_schema,3,required" db:"path_in_schema" json:"path_in_schema"`
	Codec                 CompressionCodec     `thrift:"codec,4,required" db:"codec" json:"codec"`
	NumValues             int64                `thrift:"num_values,5,required" db:"num_values" json:"num_values"`
	TotalUncompressedSize int64                `thrift:"total_uncompressed_size,6,required" db:"total_uncompressed_size" json:"total_uncompressed_size"`
	TotalCompressedSize   int64                `thrift:"total_compressed_size,7,required" db:"total_compressed_size" json:"total_compressed_size"`
	KeyValueMetadata      []*KeyValue          `thrift:"key_value_metadata,8" db:"key_value_metadata" json:"key_value_metadata,omitempty"`
	DataPageOffset        int64                `thrift:"data_page_offset,9,required" db:"data_page_offset" json:"data_page_offset"`
	IndexPageOffset       *int64               `thrift:"index_page_offset,10" db:"index_page_offset" json:"index_page_offset,omitempty"`
	DictionaryPageOffset  *int64               `thrift:"dictionary_page_offset,11" db:"dictionary_page_offset" json:"dictionary_page_offset,omitempty"`
	Statistics            *Statistics          `thrift:"statistics,12" db:"statistics" json:"statistics,omitempty"`
	EncodingStats         []*PageEncodingStats `thrift:"encoding_stats,13" db:"encoding_stats" json:"encoding_stats,omitempty"`
}

Description for column metadata

Attributes:

  • Type: Type of this column *
  • Encodings: Set of all encodings used for this column. The purpose is to validate

whether we can decode those pages. *

  • PathInSchema: Path in schema *
  • Codec: Compression codec *
  • NumValues: Number of values in this column *
  • TotalUncompressedSize: total byte size of all uncompressed pages in this column chunk (including the headers) *
  • TotalCompressedSize: total byte size of all compressed pages in this column chunk (including the headers) *
  • KeyValueMetadata: Optional key/value metadata *
  • DataPageOffset: Byte offset from beginning of file to first data page *
  • IndexPageOffset: Byte offset from beginning of file to root index page *
  • DictionaryPageOffset: Byte offset from the beginning of file to first (only) dictionary page *
  • Statistics: optional statistics for this column chunk
  • EncodingStats: Set of all encodings used for pages in this column chunk.

This information can be used to determine if all data pages are dictionary encoded for example *

var ColumnChunk_MetaData_DEFAULT *ColumnMetaData

func NewColumnMetaData

func NewColumnMetaData() *ColumnMetaData

func (*ColumnMetaData) GetCodec

func (p *ColumnMetaData) GetCodec() CompressionCodec

func (*ColumnMetaData) GetDataPageOffset

func (p *ColumnMetaData) GetDataPageOffset() int64

func (*ColumnMetaData) GetDictionaryPageOffset

func (p *ColumnMetaData) GetDictionaryPageOffset() int64

func (*ColumnMetaData) GetEncodingStats

func (p *ColumnMetaData) GetEncodingStats() []*PageEncodingStats

func (*ColumnMetaData) GetEncodings

func (p *ColumnMetaData) GetEncodings() []Encoding

func (*ColumnMetaData) GetIndexPageOffset

func (p *ColumnMetaData) GetIndexPageOffset() int64

func (*ColumnMetaData) GetKeyValueMetadata

func (p *ColumnMetaData) GetKeyValueMetadata() []*KeyValue

func (*ColumnMetaData) GetNumValues

func (p *ColumnMetaData) GetNumValues() int64

func (*ColumnMetaData) GetPathInSchema

func (p *ColumnMetaData) GetPathInSchema() []string

func (*ColumnMetaData) GetStatistics

func (p *ColumnMetaData) GetStatistics() *Statistics

func (*ColumnMetaData) GetTotalCompressedSize

func (p *ColumnMetaData) GetTotalCompressedSize() int64

func (*ColumnMetaData) GetTotalUncompressedSize

func (p *ColumnMetaData) GetTotalUncompressedSize() int64

func (*ColumnMetaData) GetType

func (p *ColumnMetaData) GetType() Type

func (*ColumnMetaData) IsSetDictionaryPageOffset

func (p *ColumnMetaData) IsSetDictionaryPageOffset() bool

func (*ColumnMetaData) IsSetEncodingStats

func (p *ColumnMetaData) IsSetEncodingStats() bool

func (*ColumnMetaData) IsSetIndexPageOffset

func (p *ColumnMetaData) IsSetIndexPageOffset() bool

func (*ColumnMetaData) IsSetKeyValueMetadata

func (p *ColumnMetaData) IsSetKeyValueMetadata() bool

func (*ColumnMetaData) IsSetStatistics

func (p *ColumnMetaData) IsSetStatistics() bool

func (*ColumnMetaData) Read

func (p *ColumnMetaData) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnMetaData) ReadField1

func (p *ColumnMetaData) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnMetaData) ReadField10

func (p *ColumnMetaData) ReadField10(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnMetaData) ReadField11

func (p *ColumnMetaData) ReadField11(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnMetaData) ReadField12

func (p *ColumnMetaData) ReadField12(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnMetaData) ReadField13

func (p *ColumnMetaData) ReadField13(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnMetaData) ReadField2

func (p *ColumnMetaData) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnMetaData) ReadField3

func (p *ColumnMetaData) ReadField3(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnMetaData) ReadField4

func (p *ColumnMetaData) ReadField4(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnMetaData) ReadField5

func (p *ColumnMetaData) ReadField5(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnMetaData) ReadField6

func (p *ColumnMetaData) ReadField6(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnMetaData) ReadField7

func (p *ColumnMetaData) ReadField7(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnMetaData) ReadField8

func (p *ColumnMetaData) ReadField8(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnMetaData) ReadField9

func (p *ColumnMetaData) ReadField9(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnMetaData) String

func (p *ColumnMetaData) String() string

func (*ColumnMetaData) Write

func (p *ColumnMetaData) Write(ctx context.Context, oprot thrift.TProtocol) error

type ColumnOrder added in v1.4.0

type ColumnOrder struct {
	TYPE_ORDER *TypeDefinedOrder `thrift:"TYPE_ORDER,1" db:"TYPE_ORDER" json:"TYPE_ORDER,omitempty"`
}

Union to specify the order used for the min_value and max_value fields for a column. This union takes the role of an enhanced enum that allows rich elements (which will be needed for a collation-based ordering in the future).

Possible values are:

  • TypeDefinedOrder - the column uses the order defined by its logical or physical type (if there is no logical type).

If the reader does not support the value of this union, min and max stats for this column should be ignored.

Attributes:

  • TYPE_ORDER: The sort orders for logical types are: UTF8 - unsigned byte-wise comparison INT8 - signed comparison INT16 - signed comparison INT32 - signed comparison INT64 - signed comparison UINT8 - unsigned comparison UINT16 - unsigned comparison UINT32 - unsigned comparison UINT64 - unsigned comparison DECIMAL - signed comparison of the represented value DATE - signed comparison TIME_MILLIS - signed comparison TIME_MICROS - signed comparison TIMESTAMP_MILLIS - signed comparison TIMESTAMP_MICROS - signed comparison INTERVAL - unsigned comparison JSON - unsigned byte-wise comparison BSON - unsigned byte-wise comparison ENUM - unsigned byte-wise comparison LIST - undefined MAP - undefined

In the absence of logical types, the sort order is determined by the physical type:

BOOLEAN - false, true
INT32 - signed comparison
INT64 - signed comparison
INT96 (only used for legacy timestamps) - undefined
FLOAT - signed comparison of the represented value (*)
DOUBLE - signed comparison of the represented value (*)
BYTE_ARRAY - unsigned byte-wise comparison
FIXED_LEN_BYTE_ARRAY - unsigned byte-wise comparison

(*) Because the sorting order is not specified properly for floating

point values (relations vs. total ordering) the following
compatibility rules should be applied when reading statistics:
- If the min is a NaN, it should be ignored.
- If the max is a NaN, it should be ignored.
- If the min is +0, the row group may contain -0 values as well.
- If the max is -0, the row group may contain +0 values as well.
- When looking for NaN values, min and max should be ignored.

func NewColumnOrder added in v1.4.0

func NewColumnOrder() *ColumnOrder

func (*ColumnOrder) CountSetFieldsColumnOrder added in v1.4.0

func (p *ColumnOrder) CountSetFieldsColumnOrder() int

func (*ColumnOrder) GetTYPE_ORDER added in v1.4.0

func (p *ColumnOrder) GetTYPE_ORDER() *TypeDefinedOrder

func (*ColumnOrder) IsSetTYPE_ORDER added in v1.4.0

func (p *ColumnOrder) IsSetTYPE_ORDER() bool

func (*ColumnOrder) Read added in v1.4.0

func (p *ColumnOrder) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnOrder) ReadField1 added in v1.4.0

func (p *ColumnOrder) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*ColumnOrder) String added in v1.4.0

func (p *ColumnOrder) String() string

func (*ColumnOrder) Write added in v1.4.0

func (p *ColumnOrder) Write(ctx context.Context, oprot thrift.TProtocol) error

type CompressionCodec

type CompressionCodec int64

Supported compression algorithms.

Codecs added in 2.4 can be read by readers based on 2.4 and later. Codec support may vary between readers based on the format version and libraries available at runtime. Gzip, Snappy, and LZ4 codecs are widely available, while Zstd and Brotli require additional libraries.

const (
	CompressionCodec_UNCOMPRESSED CompressionCodec = 0
	CompressionCodec_SNAPPY       CompressionCodec = 1
	CompressionCodec_GZIP         CompressionCodec = 2
	CompressionCodec_LZO          CompressionCodec = 3
	CompressionCodec_BROTLI       CompressionCodec = 4
	CompressionCodec_LZ4          CompressionCodec = 5
	CompressionCodec_ZSTD         CompressionCodec = 6
)

func CompressionCodecFromString

func CompressionCodecFromString(s string) (CompressionCodec, error)

func CompressionCodecPtr

func CompressionCodecPtr(v CompressionCodec) *CompressionCodec

func (CompressionCodec) MarshalText

func (p CompressionCodec) MarshalText() ([]byte, error)

func (*CompressionCodec) Scan

func (p *CompressionCodec) Scan(value interface{}) error

func (CompressionCodec) String

func (p CompressionCodec) String() string

func (*CompressionCodec) UnmarshalText

func (p *CompressionCodec) UnmarshalText(text []byte) error

func (*CompressionCodec) Value

func (p *CompressionCodec) Value() (driver.Value, error)

type ConvertedType

type ConvertedType int64

Common types used by frameworks(e.g. hive, pig) using parquet. This helps map between types in those frameworks to the base types in parquet. This is only metadata and not needed to read or write the data.

const (
	ConvertedType_UTF8             ConvertedType = 0
	ConvertedType_MAP              ConvertedType = 1
	ConvertedType_MAP_KEY_VALUE    ConvertedType = 2
	ConvertedType_LIST             ConvertedType = 3
	ConvertedType_ENUM             ConvertedType = 4
	ConvertedType_DECIMAL          ConvertedType = 5
	ConvertedType_DATE             ConvertedType = 6
	ConvertedType_TIME_MILLIS      ConvertedType = 7
	ConvertedType_TIME_MICROS      ConvertedType = 8
	ConvertedType_TIMESTAMP_MILLIS ConvertedType = 9
	ConvertedType_TIMESTAMP_MICROS ConvertedType = 10
	ConvertedType_UINT_8           ConvertedType = 11
	ConvertedType_UINT_16          ConvertedType = 12
	ConvertedType_UINT_32          ConvertedType = 13
	ConvertedType_UINT_64          ConvertedType = 14
	ConvertedType_INT_8            ConvertedType = 15
	ConvertedType_INT_16           ConvertedType = 16
	ConvertedType_INT_32           ConvertedType = 17
	ConvertedType_INT_64           ConvertedType = 18
	ConvertedType_JSON             ConvertedType = 19
	ConvertedType_BSON             ConvertedType = 20
	ConvertedType_INTERVAL         ConvertedType = 21
)
var SchemaElement_ConvertedType_DEFAULT ConvertedType

func ConvertedTypeFromString

func ConvertedTypeFromString(s string) (ConvertedType, error)

func ConvertedTypePtr

func ConvertedTypePtr(v ConvertedType) *ConvertedType

func (ConvertedType) MarshalText

func (p ConvertedType) MarshalText() ([]byte, error)

func (*ConvertedType) Scan

func (p *ConvertedType) Scan(value interface{}) error

func (ConvertedType) String

func (p ConvertedType) String() string

func (*ConvertedType) UnmarshalText

func (p *ConvertedType) UnmarshalText(text []byte) error

func (*ConvertedType) Value

func (p *ConvertedType) Value() (driver.Value, error)

type DataPageHeader

type DataPageHeader struct {
	NumValues               int32       `thrift:"num_values,1,required" db:"num_values" json:"num_values"`
	Encoding                Encoding    `thrift:"encoding,2,required" db:"encoding" json:"encoding"`
	DefinitionLevelEncoding Encoding    `thrift:"definition_level_encoding,3,required" db:"definition_level_encoding" json:"definition_level_encoding"`
	RepetitionLevelEncoding Encoding    `thrift:"repetition_level_encoding,4,required" db:"repetition_level_encoding" json:"repetition_level_encoding"`
	Statistics              *Statistics `thrift:"statistics,5" db:"statistics" json:"statistics,omitempty"`
}

Data page header

Attributes:

  • NumValues: Number of values, including NULLs, in this data page. *
  • Encoding: Encoding used for this data page *
  • DefinitionLevelEncoding: Encoding used for definition levels *
  • RepetitionLevelEncoding: Encoding used for repetition levels *
  • Statistics: Optional statistics for the data in this page*
var PageHeader_DataPageHeader_DEFAULT *DataPageHeader

func NewDataPageHeader

func NewDataPageHeader() *DataPageHeader

func (*DataPageHeader) GetDefinitionLevelEncoding

func (p *DataPageHeader) GetDefinitionLevelEncoding() Encoding

func (*DataPageHeader) GetEncoding

func (p *DataPageHeader) GetEncoding() Encoding

func (*DataPageHeader) GetNumValues

func (p *DataPageHeader) GetNumValues() int32

func (*DataPageHeader) GetRepetitionLevelEncoding

func (p *DataPageHeader) GetRepetitionLevelEncoding() Encoding

func (*DataPageHeader) GetStatistics

func (p *DataPageHeader) GetStatistics() *Statistics

func (*DataPageHeader) IsSetStatistics

func (p *DataPageHeader) IsSetStatistics() bool

func (*DataPageHeader) Read

func (p *DataPageHeader) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*DataPageHeader) ReadField1

func (p *DataPageHeader) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*DataPageHeader) ReadField2

func (p *DataPageHeader) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*DataPageHeader) ReadField3

func (p *DataPageHeader) ReadField3(ctx context.Context, iprot thrift.TProtocol) error

func (*DataPageHeader) ReadField4

func (p *DataPageHeader) ReadField4(ctx context.Context, iprot thrift.TProtocol) error

func (*DataPageHeader) ReadField5

func (p *DataPageHeader) ReadField5(ctx context.Context, iprot thrift.TProtocol) error

func (*DataPageHeader) String

func (p *DataPageHeader) String() string

func (*DataPageHeader) Write

func (p *DataPageHeader) Write(ctx context.Context, oprot thrift.TProtocol) error

type DataPageHeaderV2

type DataPageHeaderV2 struct {
	NumValues                  int32       `thrift:"num_values,1,required" db:"num_values" json:"num_values"`
	NumNulls                   int32       `thrift:"num_nulls,2,required" db:"num_nulls" json:"num_nulls"`
	NumRows                    int32       `thrift:"num_rows,3,required" db:"num_rows" json:"num_rows"`
	Encoding                   Encoding    `thrift:"encoding,4,required" db:"encoding" json:"encoding"`
	DefinitionLevelsByteLength int32       `thrift:"definition_levels_byte_length,5,required" db:"definition_levels_byte_length" json:"definition_levels_byte_length"`
	RepetitionLevelsByteLength int32       `thrift:"repetition_levels_byte_length,6,required" db:"repetition_levels_byte_length" json:"repetition_levels_byte_length"`
	IsCompressed               bool        `thrift:"is_compressed,7" db:"is_compressed" json:"is_compressed,omitempty"`
	Statistics                 *Statistics `thrift:"statistics,8" db:"statistics" json:"statistics,omitempty"`
}

New page format allowing reading levels without decompressing the data Repetition and definition levels are uncompressed The remaining section containing the data is compressed if is_compressed is true

Attributes:

  • NumValues: Number of values, including NULLs, in this data page. *
  • NumNulls: Number of NULL values, in this data page.

Number of non-null = num_values - num_nulls which is also the number of values in the data section *

  • NumRows: Number of rows in this data page. which means pages change on record boundaries (r = 0) *
  • Encoding: Encoding used for data in this page *
  • DefinitionLevelsByteLength: length of the definition levels
  • RepetitionLevelsByteLength: length of the repetition levels
  • IsCompressed: whether the values are compressed.

Which means the section of the page between definition_levels_byte_length + repetition_levels_byte_length + 1 and compressed_page_size (included) is compressed with the compression_codec. If missing it is considered compressed

  • Statistics: optional statistics for this column chunk
var PageHeader_DataPageHeaderV2_DEFAULT *DataPageHeaderV2

func NewDataPageHeaderV2

func NewDataPageHeaderV2() *DataPageHeaderV2

func (*DataPageHeaderV2) GetDefinitionLevelsByteLength

func (p *DataPageHeaderV2) GetDefinitionLevelsByteLength() int32

func (*DataPageHeaderV2) GetEncoding

func (p *DataPageHeaderV2) GetEncoding() Encoding

func (*DataPageHeaderV2) GetIsCompressed

func (p *DataPageHeaderV2) GetIsCompressed() bool

func (*DataPageHeaderV2) GetNumNulls

func (p *DataPageHeaderV2) GetNumNulls() int32

func (*DataPageHeaderV2) GetNumRows

func (p *DataPageHeaderV2) GetNumRows() int32

func (*DataPageHeaderV2) GetNumValues

func (p *DataPageHeaderV2) GetNumValues() int32

func (*DataPageHeaderV2) GetRepetitionLevelsByteLength

func (p *DataPageHeaderV2) GetRepetitionLevelsByteLength() int32

func (*DataPageHeaderV2) GetStatistics

func (p *DataPageHeaderV2) GetStatistics() *Statistics

func (*DataPageHeaderV2) IsSetIsCompressed

func (p *DataPageHeaderV2) IsSetIsCompressed() bool

func (*DataPageHeaderV2) IsSetStatistics

func (p *DataPageHeaderV2) IsSetStatistics() bool

func (*DataPageHeaderV2) Read

func (p *DataPageHeaderV2) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*DataPageHeaderV2) ReadField1

func (p *DataPageHeaderV2) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*DataPageHeaderV2) ReadField2

func (p *DataPageHeaderV2) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*DataPageHeaderV2) ReadField3

func (p *DataPageHeaderV2) ReadField3(ctx context.Context, iprot thrift.TProtocol) error

func (*DataPageHeaderV2) ReadField4

func (p *DataPageHeaderV2) ReadField4(ctx context.Context, iprot thrift.TProtocol) error

func (*DataPageHeaderV2) ReadField5

func (p *DataPageHeaderV2) ReadField5(ctx context.Context, iprot thrift.TProtocol) error

func (*DataPageHeaderV2) ReadField6

func (p *DataPageHeaderV2) ReadField6(ctx context.Context, iprot thrift.TProtocol) error

func (*DataPageHeaderV2) ReadField7

func (p *DataPageHeaderV2) ReadField7(ctx context.Context, iprot thrift.TProtocol) error

func (*DataPageHeaderV2) ReadField8

func (p *DataPageHeaderV2) ReadField8(ctx context.Context, iprot thrift.TProtocol) error

func (*DataPageHeaderV2) String

func (p *DataPageHeaderV2) String() string

func (*DataPageHeaderV2) Write

func (p *DataPageHeaderV2) Write(ctx context.Context, oprot thrift.TProtocol) error

type DateType added in v1.4.0

type DateType struct {
}
var LogicalType_DATE_DEFAULT *DateType

func NewDateType added in v1.4.0

func NewDateType() *DateType

func (*DateType) Read added in v1.4.0

func (p *DateType) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*DateType) String added in v1.4.0

func (p *DateType) String() string

func (*DateType) Write added in v1.4.0

func (p *DateType) Write(ctx context.Context, oprot thrift.TProtocol) error

type DecimalType added in v1.4.0

type DecimalType struct {
	Scale     int32 `thrift:"scale,1,required" db:"scale" json:"scale"`
	Precision int32 `thrift:"precision,2,required" db:"precision" json:"precision"`
}

Decimal logical type annotation

To maintain forward-compatibility in v1, implementations using this logical type must also set scale and precision on the annotated SchemaElement.

Allowed for physical types: INT32, INT64, FIXED, and BINARY

Attributes:

  • Scale
  • Precision
var LogicalType_DECIMAL_DEFAULT *DecimalType

func NewDecimalType added in v1.4.0

func NewDecimalType() *DecimalType

func (*DecimalType) GetPrecision added in v1.4.0

func (p *DecimalType) GetPrecision() int32

func (*DecimalType) GetScale added in v1.4.0

func (p *DecimalType) GetScale() int32

func (*DecimalType) Read added in v1.4.0

func (p *DecimalType) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*DecimalType) ReadField1 added in v1.4.0

func (p *DecimalType) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*DecimalType) ReadField2 added in v1.4.0

func (p *DecimalType) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*DecimalType) String added in v1.4.0

func (p *DecimalType) String() string

func (*DecimalType) Write added in v1.4.0

func (p *DecimalType) Write(ctx context.Context, oprot thrift.TProtocol) error

type DictionaryPageHeader

type DictionaryPageHeader struct {
	NumValues int32    `thrift:"num_values,1,required" db:"num_values" json:"num_values"`
	Encoding  Encoding `thrift:"encoding,2,required" db:"encoding" json:"encoding"`
	IsSorted  *bool    `thrift:"is_sorted,3" db:"is_sorted" json:"is_sorted,omitempty"`
}

TODO: *

Attributes:

  • NumValues: Number of values in the dictionary *
  • Encoding: Encoding using this dictionary page *
  • IsSorted: If true, the entries in the dictionary are sorted in ascending order *
var PageHeader_DictionaryPageHeader_DEFAULT *DictionaryPageHeader

func NewDictionaryPageHeader

func NewDictionaryPageHeader() *DictionaryPageHeader

func (*DictionaryPageHeader) GetEncoding

func (p *DictionaryPageHeader) GetEncoding() Encoding

func (*DictionaryPageHeader) GetIsSorted

func (p *DictionaryPageHeader) GetIsSorted() bool

func (*DictionaryPageHeader) GetNumValues

func (p *DictionaryPageHeader) GetNumValues() int32

func (*DictionaryPageHeader) IsSetIsSorted

func (p *DictionaryPageHeader) IsSetIsSorted() bool

func (*DictionaryPageHeader) Read

func (*DictionaryPageHeader) ReadField1

func (p *DictionaryPageHeader) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*DictionaryPageHeader) ReadField2

func (p *DictionaryPageHeader) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*DictionaryPageHeader) ReadField3

func (p *DictionaryPageHeader) ReadField3(ctx context.Context, iprot thrift.TProtocol) error

func (*DictionaryPageHeader) String

func (p *DictionaryPageHeader) String() string

func (*DictionaryPageHeader) Write

type Encoding

type Encoding int64

Encodings supported by Parquet. Not all encodings are valid for all types. These enums are also used to specify the encoding of definition and repetition levels. See the accompanying doc for the details of the more complicated encodings.

const (
	Encoding_PLAIN                   Encoding = 0
	Encoding_PLAIN_DICTIONARY        Encoding = 2
	Encoding_RLE                     Encoding = 3
	Encoding_BIT_PACKED              Encoding = 4
	Encoding_DELTA_BINARY_PACKED     Encoding = 5
	Encoding_DELTA_LENGTH_BYTE_ARRAY Encoding = 6
	Encoding_DELTA_BYTE_ARRAY        Encoding = 7
	Encoding_RLE_DICTIONARY          Encoding = 8
)

func EncodingFromString

func EncodingFromString(s string) (Encoding, error)

func EncodingPtr

func EncodingPtr(v Encoding) *Encoding

func (Encoding) MarshalText

func (p Encoding) MarshalText() ([]byte, error)

func (*Encoding) Scan

func (p *Encoding) Scan(value interface{}) error

func (Encoding) String

func (p Encoding) String() string

func (*Encoding) UnmarshalText

func (p *Encoding) UnmarshalText(text []byte) error

func (*Encoding) Value

func (p *Encoding) Value() (driver.Value, error)

type EnumType added in v1.4.0

type EnumType struct {
}
var LogicalType_ENUM_DEFAULT *EnumType

func NewEnumType added in v1.4.0

func NewEnumType() *EnumType

func (*EnumType) Read added in v1.4.0

func (p *EnumType) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*EnumType) String added in v1.4.0

func (p *EnumType) String() string

func (*EnumType) Write added in v1.4.0

func (p *EnumType) Write(ctx context.Context, oprot thrift.TProtocol) error

type FieldRepetitionType

type FieldRepetitionType int64

Representation of Schemas

const (
	FieldRepetitionType_REQUIRED FieldRepetitionType = 0
	FieldRepetitionType_OPTIONAL FieldRepetitionType = 1
	FieldRepetitionType_REPEATED FieldRepetitionType = 2
)
var SchemaElement_RepetitionType_DEFAULT FieldRepetitionType

func FieldRepetitionTypeFromString

func FieldRepetitionTypeFromString(s string) (FieldRepetitionType, error)

func FieldRepetitionTypePtr

func FieldRepetitionTypePtr(v FieldRepetitionType) *FieldRepetitionType

func (FieldRepetitionType) MarshalText

func (p FieldRepetitionType) MarshalText() ([]byte, error)

func (*FieldRepetitionType) Scan

func (p *FieldRepetitionType) Scan(value interface{}) error

func (FieldRepetitionType) String

func (p FieldRepetitionType) String() string

func (*FieldRepetitionType) UnmarshalText

func (p *FieldRepetitionType) UnmarshalText(text []byte) error

func (*FieldRepetitionType) Value

func (p *FieldRepetitionType) Value() (driver.Value, error)

type FileMetaData

type FileMetaData struct {
	Version          int32            `thrift:"version,1,required" db:"version" json:"version"`
	Schema           []*SchemaElement `thrift:"schema,2,required" db:"schema" json:"schema"`
	NumRows          int64            `thrift:"num_rows,3,required" db:"num_rows" json:"num_rows"`
	RowGroups        []*RowGroup      `thrift:"row_groups,4,required" db:"row_groups" json:"row_groups"`
	KeyValueMetadata []*KeyValue      `thrift:"key_value_metadata,5" db:"key_value_metadata" json:"key_value_metadata,omitempty"`
	CreatedBy        *string          `thrift:"created_by,6" db:"created_by" json:"created_by,omitempty"`
	ColumnOrders     []*ColumnOrder   `thrift:"column_orders,7" db:"column_orders" json:"column_orders,omitempty"`
}

Description for file metadata

Attributes:

  • Version: Version of this file *
  • Schema: Parquet schema for this file. This schema contains metadata for all the columns.

The schema is represented as a tree with a single root. The nodes of the tree are flattened to a list by doing a depth-first traversal. The column metadata contains the path in the schema for that column which can be used to map columns to nodes in the schema. The first element is the root *

  • NumRows: Number of rows in this file *
  • RowGroups: Row groups in this file *
  • KeyValueMetadata: Optional key/value metadata *
  • CreatedBy: String for application that wrote this file. This should be in the format

<Application> version <App Version> (build <App Build Hash>). e.g. impala version 1.0 (build 6cf94d29b2b7115df4de2c06e2ab4326d721eb55)

  • ColumnOrders: Sort order used for the min_value and max_value fields of each column in

this file. Each sort order corresponds to one column, determined by its position in the list, matching the position of the column in the schema.

Without column_orders, the meaning of the min_value and max_value fields is undefined. To ensure well-defined behaviour, if min_value and max_value are written to a Parquet file, column_orders must be written as well.

The obsolete min and max fields are always sorted by signed comparison regardless of column_orders.

func NewFileMetaData

func NewFileMetaData() *FileMetaData

func (*FileMetaData) GetColumnOrders added in v1.4.0

func (p *FileMetaData) GetColumnOrders() []*ColumnOrder

func (*FileMetaData) GetCreatedBy

func (p *FileMetaData) GetCreatedBy() string

func (*FileMetaData) GetKeyValueMetadata

func (p *FileMetaData) GetKeyValueMetadata() []*KeyValue

func (*FileMetaData) GetNumRows

func (p *FileMetaData) GetNumRows() int64

func (*FileMetaData) GetRowGroups

func (p *FileMetaData) GetRowGroups() []*RowGroup

func (*FileMetaData) GetSchema

func (p *FileMetaData) GetSchema() []*SchemaElement

func (*FileMetaData) GetVersion

func (p *FileMetaData) GetVersion() int32

func (*FileMetaData) IsSetColumnOrders added in v1.4.0

func (p *FileMetaData) IsSetColumnOrders() bool

func (*FileMetaData) IsSetCreatedBy

func (p *FileMetaData) IsSetCreatedBy() bool

func (*FileMetaData) IsSetKeyValueMetadata

func (p *FileMetaData) IsSetKeyValueMetadata() bool

func (*FileMetaData) Read

func (p *FileMetaData) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*FileMetaData) ReadField1

func (p *FileMetaData) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*FileMetaData) ReadField2

func (p *FileMetaData) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*FileMetaData) ReadField3

func (p *FileMetaData) ReadField3(ctx context.Context, iprot thrift.TProtocol) error

func (*FileMetaData) ReadField4

func (p *FileMetaData) ReadField4(ctx context.Context, iprot thrift.TProtocol) error

func (*FileMetaData) ReadField5

func (p *FileMetaData) ReadField5(ctx context.Context, iprot thrift.TProtocol) error

func (*FileMetaData) ReadField6

func (p *FileMetaData) ReadField6(ctx context.Context, iprot thrift.TProtocol) error

func (*FileMetaData) ReadField7 added in v1.4.0

func (p *FileMetaData) ReadField7(ctx context.Context, iprot thrift.TProtocol) error

func (*FileMetaData) String

func (p *FileMetaData) String() string

func (*FileMetaData) Write

func (p *FileMetaData) Write(ctx context.Context, oprot thrift.TProtocol) error

type IndexPageHeader

type IndexPageHeader struct {
}
var PageHeader_IndexPageHeader_DEFAULT *IndexPageHeader

func NewIndexPageHeader

func NewIndexPageHeader() *IndexPageHeader

func (*IndexPageHeader) Read

func (p *IndexPageHeader) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*IndexPageHeader) String

func (p *IndexPageHeader) String() string

func (*IndexPageHeader) Write

func (p *IndexPageHeader) Write(ctx context.Context, oprot thrift.TProtocol) error

type IntType added in v1.4.0

type IntType struct {
	BitWidth int8 `thrift:"bitWidth,1,required" db:"bitWidth" json:"bitWidth"`
	IsSigned bool `thrift:"isSigned,2,required" db:"isSigned" json:"isSigned"`
}

Integer logical type annotation

bitWidth must be 8, 16, 32, or 64.

Allowed for physical types: INT32, INT64

Attributes:

  • BitWidth
  • IsSigned
var LogicalType_INTEGER_DEFAULT *IntType

func NewIntType added in v1.4.0

func NewIntType() *IntType

func (*IntType) GetBitWidth added in v1.4.0

func (p *IntType) GetBitWidth() int8

func (*IntType) GetIsSigned added in v1.4.0

func (p *IntType) GetIsSigned() bool

func (*IntType) Read added in v1.4.0

func (p *IntType) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*IntType) ReadField1 added in v1.4.0

func (p *IntType) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*IntType) ReadField2 added in v1.4.0

func (p *IntType) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*IntType) String added in v1.4.0

func (p *IntType) String() string

func (*IntType) Write added in v1.4.0

func (p *IntType) Write(ctx context.Context, oprot thrift.TProtocol) error

type JsonType added in v1.4.0

type JsonType struct {
}

Embedded JSON logical type annotation

Allowed for physical types: BINARY

var LogicalType_JSON_DEFAULT *JsonType

func NewJsonType added in v1.4.0

func NewJsonType() *JsonType

func (*JsonType) Read added in v1.4.0

func (p *JsonType) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*JsonType) String added in v1.4.0

func (p *JsonType) String() string

func (*JsonType) Write added in v1.4.0

func (p *JsonType) Write(ctx context.Context, oprot thrift.TProtocol) error

type KeyValue

type KeyValue struct {
	Key   string  `thrift:"key,1,required" db:"key" json:"key"`
	Value *string `thrift:"value,2" db:"value" json:"value,omitempty"`
}

Wrapper struct to store key values

Attributes:

  • Key
  • Value

func NewKeyValue

func NewKeyValue() *KeyValue

func (*KeyValue) GetKey

func (p *KeyValue) GetKey() string

func (*KeyValue) GetValue

func (p *KeyValue) GetValue() string

func (*KeyValue) IsSetValue

func (p *KeyValue) IsSetValue() bool

func (*KeyValue) Read

func (p *KeyValue) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*KeyValue) ReadField1

func (p *KeyValue) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*KeyValue) ReadField2

func (p *KeyValue) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*KeyValue) String

func (p *KeyValue) String() string

func (*KeyValue) Write

func (p *KeyValue) Write(ctx context.Context, oprot thrift.TProtocol) error

type ListType added in v1.4.0

type ListType struct {
}
var LogicalType_LIST_DEFAULT *ListType

func NewListType added in v1.4.0

func NewListType() *ListType

func (*ListType) Read added in v1.4.0

func (p *ListType) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*ListType) String added in v1.4.0

func (p *ListType) String() string

func (*ListType) Write added in v1.4.0

func (p *ListType) Write(ctx context.Context, oprot thrift.TProtocol) error

type LogicalType added in v1.4.0

type LogicalType struct {
	STRING    *StringType    `thrift:"STRING,1" db:"STRING" json:"STRING,omitempty"`
	MAP       *MapType       `thrift:"MAP,2" db:"MAP" json:"MAP,omitempty"`
	LIST      *ListType      `thrift:"LIST,3" db:"LIST" json:"LIST,omitempty"`
	ENUM      *EnumType      `thrift:"ENUM,4" db:"ENUM" json:"ENUM,omitempty"`
	DECIMAL   *DecimalType   `thrift:"DECIMAL,5" db:"DECIMAL" json:"DECIMAL,omitempty"`
	DATE      *DateType      `thrift:"DATE,6" db:"DATE" json:"DATE,omitempty"`
	TIME      *TimeType      `thrift:"TIME,7" db:"TIME" json:"TIME,omitempty"`
	TIMESTAMP *TimestampType `thrift:"TIMESTAMP,8" db:"TIMESTAMP" json:"TIMESTAMP,omitempty"`
	// unused field # 9
	INTEGER *IntType  `thrift:"INTEGER,10" db:"INTEGER" json:"INTEGER,omitempty"`
	UNKNOWN *NullType `thrift:"UNKNOWN,11" db:"UNKNOWN" json:"UNKNOWN,omitempty"`
	JSON    *JsonType `thrift:"JSON,12" db:"JSON" json:"JSON,omitempty"`
	BSON    *BsonType `thrift:"BSON,13" db:"BSON" json:"BSON,omitempty"`
	UUID    *UUIDType `thrift:"UUID,14" db:"UUID" json:"UUID,omitempty"`
}

LogicalType annotations to replace ConvertedType.

To maintain compatibility, implementations using LogicalType for a SchemaElement must also set the corresponding ConvertedType from the following table.

Attributes:

  • STRING
  • MAP
  • LIST
  • ENUM
  • DECIMAL
  • DATE
  • TIME
  • TIMESTAMP
  • INTEGER
  • UNKNOWN
  • JSON
  • BSON
  • UUID
var SchemaElement_LogicalType_DEFAULT *LogicalType

func NewLogicalType added in v1.4.0

func NewLogicalType() *LogicalType

func (*LogicalType) CountSetFieldsLogicalType added in v1.4.0

func (p *LogicalType) CountSetFieldsLogicalType() int

func (*LogicalType) GetBSON added in v1.4.0

func (p *LogicalType) GetBSON() *BsonType

func (*LogicalType) GetDATE added in v1.4.0

func (p *LogicalType) GetDATE() *DateType

func (*LogicalType) GetDECIMAL added in v1.4.0

func (p *LogicalType) GetDECIMAL() *DecimalType

func (*LogicalType) GetENUM added in v1.4.0

func (p *LogicalType) GetENUM() *EnumType

func (*LogicalType) GetINTEGER added in v1.4.0

func (p *LogicalType) GetINTEGER() *IntType

func (*LogicalType) GetJSON added in v1.4.0

func (p *LogicalType) GetJSON() *JsonType

func (*LogicalType) GetLIST added in v1.4.0

func (p *LogicalType) GetLIST() *ListType

func (*LogicalType) GetMAP added in v1.4.0

func (p *LogicalType) GetMAP() *MapType

func (*LogicalType) GetSTRING added in v1.4.0

func (p *LogicalType) GetSTRING() *StringType

func (*LogicalType) GetTIME added in v1.4.0

func (p *LogicalType) GetTIME() *TimeType

func (*LogicalType) GetTIMESTAMP added in v1.4.0

func (p *LogicalType) GetTIMESTAMP() *TimestampType

func (*LogicalType) GetUNKNOWN added in v1.4.0

func (p *LogicalType) GetUNKNOWN() *NullType

func (*LogicalType) GetUUID added in v1.4.0

func (p *LogicalType) GetUUID() *UUIDType

func (*LogicalType) IsSetBSON added in v1.4.0

func (p *LogicalType) IsSetBSON() bool

func (*LogicalType) IsSetDATE added in v1.4.0

func (p *LogicalType) IsSetDATE() bool

func (*LogicalType) IsSetDECIMAL added in v1.4.0

func (p *LogicalType) IsSetDECIMAL() bool

func (*LogicalType) IsSetENUM added in v1.4.0

func (p *LogicalType) IsSetENUM() bool

func (*LogicalType) IsSetINTEGER added in v1.4.0

func (p *LogicalType) IsSetINTEGER() bool

func (*LogicalType) IsSetJSON added in v1.4.0

func (p *LogicalType) IsSetJSON() bool

func (*LogicalType) IsSetLIST added in v1.4.0

func (p *LogicalType) IsSetLIST() bool

func (*LogicalType) IsSetMAP added in v1.4.0

func (p *LogicalType) IsSetMAP() bool

func (*LogicalType) IsSetSTRING added in v1.4.0

func (p *LogicalType) IsSetSTRING() bool

func (*LogicalType) IsSetTIME added in v1.4.0

func (p *LogicalType) IsSetTIME() bool

func (*LogicalType) IsSetTIMESTAMP added in v1.4.0

func (p *LogicalType) IsSetTIMESTAMP() bool

func (*LogicalType) IsSetUNKNOWN added in v1.4.0

func (p *LogicalType) IsSetUNKNOWN() bool

func (*LogicalType) IsSetUUID added in v1.4.0

func (p *LogicalType) IsSetUUID() bool

func (*LogicalType) Read added in v1.4.0

func (p *LogicalType) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*LogicalType) ReadField1 added in v1.4.0

func (p *LogicalType) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*LogicalType) ReadField10 added in v1.4.0

func (p *LogicalType) ReadField10(ctx context.Context, iprot thrift.TProtocol) error

func (*LogicalType) ReadField11 added in v1.4.0

func (p *LogicalType) ReadField11(ctx context.Context, iprot thrift.TProtocol) error

func (*LogicalType) ReadField12 added in v1.4.0

func (p *LogicalType) ReadField12(ctx context.Context, iprot thrift.TProtocol) error

func (*LogicalType) ReadField13 added in v1.4.0

func (p *LogicalType) ReadField13(ctx context.Context, iprot thrift.TProtocol) error

func (*LogicalType) ReadField14 added in v1.4.0

func (p *LogicalType) ReadField14(ctx context.Context, iprot thrift.TProtocol) error

func (*LogicalType) ReadField2 added in v1.4.0

func (p *LogicalType) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*LogicalType) ReadField3 added in v1.4.0

func (p *LogicalType) ReadField3(ctx context.Context, iprot thrift.TProtocol) error

func (*LogicalType) ReadField4 added in v1.4.0

func (p *LogicalType) ReadField4(ctx context.Context, iprot thrift.TProtocol) error

func (*LogicalType) ReadField5 added in v1.4.0

func (p *LogicalType) ReadField5(ctx context.Context, iprot thrift.TProtocol) error

func (*LogicalType) ReadField6 added in v1.4.0

func (p *LogicalType) ReadField6(ctx context.Context, iprot thrift.TProtocol) error

func (*LogicalType) ReadField7 added in v1.4.0

func (p *LogicalType) ReadField7(ctx context.Context, iprot thrift.TProtocol) error

func (*LogicalType) ReadField8 added in v1.4.0

func (p *LogicalType) ReadField8(ctx context.Context, iprot thrift.TProtocol) error

func (*LogicalType) String added in v1.4.0

func (p *LogicalType) String() string

func (*LogicalType) Write added in v1.4.0

func (p *LogicalType) Write(ctx context.Context, oprot thrift.TProtocol) error

type MapType added in v1.4.0

type MapType struct {
}
var LogicalType_MAP_DEFAULT *MapType

func NewMapType added in v1.4.0

func NewMapType() *MapType

func (*MapType) Read added in v1.4.0

func (p *MapType) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*MapType) String added in v1.4.0

func (p *MapType) String() string

func (*MapType) Write added in v1.4.0

func (p *MapType) Write(ctx context.Context, oprot thrift.TProtocol) error

type MicroSeconds added in v1.4.0

type MicroSeconds struct {
}
var TimeUnit_MICROS_DEFAULT *MicroSeconds

func NewMicroSeconds added in v1.4.0

func NewMicroSeconds() *MicroSeconds

func (*MicroSeconds) Read added in v1.4.0

func (p *MicroSeconds) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*MicroSeconds) String added in v1.4.0

func (p *MicroSeconds) String() string

func (*MicroSeconds) Write added in v1.4.0

func (p *MicroSeconds) Write(ctx context.Context, oprot thrift.TProtocol) error

type MilliSeconds added in v1.4.0

type MilliSeconds struct {
}

Time units for logical types

var TimeUnit_MILLIS_DEFAULT *MilliSeconds

func NewMilliSeconds added in v1.4.0

func NewMilliSeconds() *MilliSeconds

func (*MilliSeconds) Read added in v1.4.0

func (p *MilliSeconds) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*MilliSeconds) String added in v1.4.0

func (p *MilliSeconds) String() string

func (*MilliSeconds) Write added in v1.4.0

func (p *MilliSeconds) Write(ctx context.Context, oprot thrift.TProtocol) error

type NanoSeconds added in v1.4.0

type NanoSeconds struct {
}
var TimeUnit_NANOS_DEFAULT *NanoSeconds

func NewNanoSeconds added in v1.4.0

func NewNanoSeconds() *NanoSeconds

func (*NanoSeconds) Read added in v1.4.0

func (p *NanoSeconds) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*NanoSeconds) String added in v1.4.0

func (p *NanoSeconds) String() string

func (*NanoSeconds) Write added in v1.4.0

func (p *NanoSeconds) Write(ctx context.Context, oprot thrift.TProtocol) error

type NullType added in v1.4.0

type NullType struct {
}

Logical type to annotate a column that is always null.

Sometimes when discovering the schema of existing data, values are always null and the physical type can't be determined. This annotation signals the case where the physical type was guessed from all null values.

var LogicalType_UNKNOWN_DEFAULT *NullType

func NewNullType added in v1.4.0

func NewNullType() *NullType

func (*NullType) Read added in v1.4.0

func (p *NullType) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*NullType) String added in v1.4.0

func (p *NullType) String() string

func (*NullType) Write added in v1.4.0

func (p *NullType) Write(ctx context.Context, oprot thrift.TProtocol) error

type OffsetIndex added in v1.4.0

type OffsetIndex struct {
	PageLocations []*PageLocation `thrift:"page_locations,1,required" db:"page_locations" json:"page_locations"`
}

Attributes:

  • PageLocations: PageLocations, ordered by increasing PageLocation.offset. It is required

that page_locations[i].first_row_index < page_locations[i+1].first_row_index.

func NewOffsetIndex added in v1.4.0

func NewOffsetIndex() *OffsetIndex

func (*OffsetIndex) GetPageLocations added in v1.4.0

func (p *OffsetIndex) GetPageLocations() []*PageLocation

func (*OffsetIndex) Read added in v1.4.0

func (p *OffsetIndex) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*OffsetIndex) ReadField1 added in v1.4.0

func (p *OffsetIndex) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*OffsetIndex) String added in v1.4.0

func (p *OffsetIndex) String() string

func (*OffsetIndex) Write added in v1.4.0

func (p *OffsetIndex) Write(ctx context.Context, oprot thrift.TProtocol) error

type PageEncodingStats

type PageEncodingStats struct {
	PageType PageType `thrift:"page_type,1,required" db:"page_type" json:"page_type"`
	Encoding Encoding `thrift:"encoding,2,required" db:"encoding" json:"encoding"`
	Count    int32    `thrift:"count,3,required" db:"count" json:"count"`
}

statistics of a given page type and encoding

Attributes:

  • PageType: the page type (data/dic/...) *
  • Encoding: encoding of the page *
  • Count: number of pages of this type with this encoding *

func NewPageEncodingStats

func NewPageEncodingStats() *PageEncodingStats

func (*PageEncodingStats) GetCount

func (p *PageEncodingStats) GetCount() int32

func (*PageEncodingStats) GetEncoding

func (p *PageEncodingStats) GetEncoding() Encoding

func (*PageEncodingStats) GetPageType

func (p *PageEncodingStats) GetPageType() PageType

func (*PageEncodingStats) Read

func (p *PageEncodingStats) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*PageEncodingStats) ReadField1

func (p *PageEncodingStats) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*PageEncodingStats) ReadField2

func (p *PageEncodingStats) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*PageEncodingStats) ReadField3

func (p *PageEncodingStats) ReadField3(ctx context.Context, iprot thrift.TProtocol) error

func (*PageEncodingStats) String

func (p *PageEncodingStats) String() string

func (*PageEncodingStats) Write

func (p *PageEncodingStats) Write(ctx context.Context, oprot thrift.TProtocol) error
type PageHeader struct {
	Type                 PageType              `thrift:"type,1,required" db:"type" json:"type"`
	UncompressedPageSize int32                 `thrift:"uncompressed_page_size,2,required" db:"uncompressed_page_size" json:"uncompressed_page_size"`
	CompressedPageSize   int32                 `thrift:"compressed_page_size,3,required" db:"compressed_page_size" json:"compressed_page_size"`
	Crc                  *int32                `thrift:"crc,4" db:"crc" json:"crc,omitempty"`
	DataPageHeader       *DataPageHeader       `thrift:"data_page_header,5" db:"data_page_header" json:"data_page_header,omitempty"`
	IndexPageHeader      *IndexPageHeader      `thrift:"index_page_header,6" db:"index_page_header" json:"index_page_header,omitempty"`
	DictionaryPageHeader *DictionaryPageHeader `thrift:"dictionary_page_header,7" db:"dictionary_page_header" json:"dictionary_page_header,omitempty"`
	DataPageHeaderV2     *DataPageHeaderV2     `thrift:"data_page_header_v2,8" db:"data_page_header_v2" json:"data_page_header_v2,omitempty"`
}

Attributes:

  • Type: the type of the page: indicates which of the *_header fields is set *
  • UncompressedPageSize: Uncompressed page size in bytes (not including this header) *
  • CompressedPageSize: Compressed page size in bytes (not including this header) *
  • Crc: 32bit crc for the data below. This allows for disabling checksumming in HDFS

if only a few pages needs to be read

  • DataPageHeader
  • IndexPageHeader
  • DictionaryPageHeader
  • DataPageHeaderV2

func NewPageHeader

func NewPageHeader() *PageHeader

func (*PageHeader) GetCompressedPageSize

func (p *PageHeader) GetCompressedPageSize() int32

func (*PageHeader) GetCrc

func (p *PageHeader) GetCrc() int32

func (*PageHeader) GetDataPageHeader

func (p *PageHeader) GetDataPageHeader() *DataPageHeader

func (*PageHeader) GetDataPageHeaderV2

func (p *PageHeader) GetDataPageHeaderV2() *DataPageHeaderV2

func (*PageHeader) GetDictionaryPageHeader

func (p *PageHeader) GetDictionaryPageHeader() *DictionaryPageHeader

func (*PageHeader) GetIndexPageHeader

func (p *PageHeader) GetIndexPageHeader() *IndexPageHeader

func (*PageHeader) GetType

func (p *PageHeader) GetType() PageType

func (*PageHeader) GetUncompressedPageSize

func (p *PageHeader) GetUncompressedPageSize() int32

func (*PageHeader) IsSetCrc

func (p *PageHeader) IsSetCrc() bool

func (*PageHeader) IsSetDataPageHeader

func (p *PageHeader) IsSetDataPageHeader() bool

func (*PageHeader) IsSetDataPageHeaderV2

func (p *PageHeader) IsSetDataPageHeaderV2() bool

func (*PageHeader) IsSetDictionaryPageHeader

func (p *PageHeader) IsSetDictionaryPageHeader() bool

func (*PageHeader) IsSetIndexPageHeader

func (p *PageHeader) IsSetIndexPageHeader() bool

func (*PageHeader) Read

func (p *PageHeader) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*PageHeader) ReadField1

func (p *PageHeader) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*PageHeader) ReadField2

func (p *PageHeader) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*PageHeader) ReadField3

func (p *PageHeader) ReadField3(ctx context.Context, iprot thrift.TProtocol) error

func (*PageHeader) ReadField4

func (p *PageHeader) ReadField4(ctx context.Context, iprot thrift.TProtocol) error

func (*PageHeader) ReadField5

func (p *PageHeader) ReadField5(ctx context.Context, iprot thrift.TProtocol) error

func (*PageHeader) ReadField6

func (p *PageHeader) ReadField6(ctx context.Context, iprot thrift.TProtocol) error

func (*PageHeader) ReadField7

func (p *PageHeader) ReadField7(ctx context.Context, iprot thrift.TProtocol) error

func (*PageHeader) ReadField8

func (p *PageHeader) ReadField8(ctx context.Context, iprot thrift.TProtocol) error

func (*PageHeader) String

func (p *PageHeader) String() string

func (*PageHeader) Write

func (p *PageHeader) Write(ctx context.Context, oprot thrift.TProtocol) error

type PageLocation added in v1.4.0

type PageLocation struct {
	Offset             int64 `thrift:"offset,1,required" db:"offset" json:"offset"`
	CompressedPageSize int32 `thrift:"compressed_page_size,2,required" db:"compressed_page_size" json:"compressed_page_size"`
	FirstRowIndex      int64 `thrift:"first_row_index,3,required" db:"first_row_index" json:"first_row_index"`
}

Attributes:

  • Offset: Offset of the page in the file *
  • CompressedPageSize: Size of the page, including header. Sum of compressed_page_size and header

length

  • FirstRowIndex: Index within the RowGroup of the first row of the page; this means pages

change on record boundaries (r = 0).

func NewPageLocation added in v1.4.0

func NewPageLocation() *PageLocation

func (*PageLocation) GetCompressedPageSize added in v1.4.0

func (p *PageLocation) GetCompressedPageSize() int32

func (*PageLocation) GetFirstRowIndex added in v1.4.0

func (p *PageLocation) GetFirstRowIndex() int64

func (*PageLocation) GetOffset added in v1.4.0

func (p *PageLocation) GetOffset() int64

func (*PageLocation) Read added in v1.4.0

func (p *PageLocation) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*PageLocation) ReadField1 added in v1.4.0

func (p *PageLocation) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*PageLocation) ReadField2 added in v1.4.0

func (p *PageLocation) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*PageLocation) ReadField3 added in v1.4.0

func (p *PageLocation) ReadField3(ctx context.Context, iprot thrift.TProtocol) error

func (*PageLocation) String added in v1.4.0

func (p *PageLocation) String() string

func (*PageLocation) Write added in v1.4.0

func (p *PageLocation) Write(ctx context.Context, oprot thrift.TProtocol) error

type PageType

type PageType int64
const (
	PageType_DATA_PAGE       PageType = 0
	PageType_INDEX_PAGE      PageType = 1
	PageType_DICTIONARY_PAGE PageType = 2
	PageType_DATA_PAGE_V2    PageType = 3
)

func PageTypeFromString

func PageTypeFromString(s string) (PageType, error)

func PageTypePtr

func PageTypePtr(v PageType) *PageType

func (PageType) MarshalText

func (p PageType) MarshalText() ([]byte, error)

func (*PageType) Scan

func (p *PageType) Scan(value interface{}) error

func (PageType) String

func (p PageType) String() string

func (*PageType) UnmarshalText

func (p *PageType) UnmarshalText(text []byte) error

func (*PageType) Value

func (p *PageType) Value() (driver.Value, error)

type RowGroup

type RowGroup struct {
	Columns        []*ColumnChunk   `thrift:"columns,1,required" db:"columns" json:"columns"`
	TotalByteSize  int64            `thrift:"total_byte_size,2,required" db:"total_byte_size" json:"total_byte_size"`
	NumRows        int64            `thrift:"num_rows,3,required" db:"num_rows" json:"num_rows"`
	SortingColumns []*SortingColumn `thrift:"sorting_columns,4" db:"sorting_columns" json:"sorting_columns,omitempty"`
}

Attributes:

  • Columns: Metadata for each column chunk in this row group.

This list must have the same order as the SchemaElement list in FileMetaData.

  • TotalByteSize: Total byte size of all the uncompressed column data in this row group *
  • NumRows: Number of rows in this row group *
  • SortingColumns: If set, specifies a sort ordering of the rows in this RowGroup.

The sorting columns can be a subset of all the columns.

func NewRowGroup

func NewRowGroup() *RowGroup

func (*RowGroup) GetColumns

func (p *RowGroup) GetColumns() []*ColumnChunk

func (*RowGroup) GetNumRows

func (p *RowGroup) GetNumRows() int64

func (*RowGroup) GetSortingColumns

func (p *RowGroup) GetSortingColumns() []*SortingColumn

func (*RowGroup) GetTotalByteSize

func (p *RowGroup) GetTotalByteSize() int64

func (*RowGroup) IsSetSortingColumns

func (p *RowGroup) IsSetSortingColumns() bool

func (*RowGroup) Read

func (p *RowGroup) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*RowGroup) ReadField1

func (p *RowGroup) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*RowGroup) ReadField2

func (p *RowGroup) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*RowGroup) ReadField3

func (p *RowGroup) ReadField3(ctx context.Context, iprot thrift.TProtocol) error

func (*RowGroup) ReadField4

func (p *RowGroup) ReadField4(ctx context.Context, iprot thrift.TProtocol) error

func (*RowGroup) String

func (p *RowGroup) String() string

func (*RowGroup) Write

func (p *RowGroup) Write(ctx context.Context, oprot thrift.TProtocol) error

type SchemaElement

type SchemaElement struct {
	Type           *Type                `thrift:"type,1" db:"type" json:"type,omitempty"`
	TypeLength     *int32               `thrift:"type_length,2" db:"type_length" json:"type_length,omitempty"`
	RepetitionType *FieldRepetitionType `thrift:"repetition_type,3" db:"repetition_type" json:"repetition_type,omitempty"`
	Name           string               `thrift:"name,4,required" db:"name" json:"name"`
	NumChildren    *int32               `thrift:"num_children,5" db:"num_children" json:"num_children,omitempty"`
	ConvertedType  *ConvertedType       `thrift:"converted_type,6" db:"converted_type" json:"converted_type,omitempty"`
	Scale          *int32               `thrift:"scale,7" db:"scale" json:"scale,omitempty"`
	Precision      *int32               `thrift:"precision,8" db:"precision" json:"precision,omitempty"`
	FieldID        *int32               `thrift:"field_id,9" db:"field_id" json:"field_id,omitempty"`
	LogicalType    *LogicalType         `thrift:"logicalType,10" db:"logicalType" json:"logicalType,omitempty"`
}

Represents a element inside a schema definition.

  • if it is a group (inner node) then type is undefined and num_children is defined
  • if it is a primitive type (leaf) then type is defined and num_children is undefined

the nodes are listed in depth first traversal order.

Attributes:

  • Type: Data type for this field. Not set if the current element is a non-leaf node
  • TypeLength: If type is FIXED_LEN_BYTE_ARRAY, this is the byte length of the vales.

Otherwise, if specified, this is the maximum bit length to store any of the values. (e.g. a low cardinality INT col could have this set to 3). Note that this is in the schema, and therefore fixed for the entire file.

  • RepetitionType: repetition of the field. The root of the schema does not have a repetition_type.

All other nodes must have one

  • Name: Name of the field in the schema
  • NumChildren: Nested fields. Since thrift does not support nested fields,

the nesting is flattened to a single list by a depth-first traversal. The children count is used to construct the nested relationship. This field is not set when the element is a primitive type

  • ConvertedType: When the schema is the result of a conversion from another model

Used to record the original type to help with cross conversion.

  • Scale: Used when this column contains decimal data.

See the DECIMAL converted type for more details.

  • Precision
  • FieldID: When the original schema supports field ids, this will save the

original field id in the parquet schema

  • LogicalType: The logical type of this SchemaElement

LogicalType replaces ConvertedType, but ConvertedType is still required for some logical types to ensure forward-compatibility in format v1.

func NewSchemaElement

func NewSchemaElement() *SchemaElement

func (*SchemaElement) GetConvertedType

func (p *SchemaElement) GetConvertedType() ConvertedType

func (*SchemaElement) GetFieldID

func (p *SchemaElement) GetFieldID() int32

func (*SchemaElement) GetLogicalType added in v1.4.0

func (p *SchemaElement) GetLogicalType() *LogicalType

func (*SchemaElement) GetName

func (p *SchemaElement) GetName() string

func (*SchemaElement) GetNumChildren

func (p *SchemaElement) GetNumChildren() int32

func (*SchemaElement) GetPrecision

func (p *SchemaElement) GetPrecision() int32

func (*SchemaElement) GetRepetitionType

func (p *SchemaElement) GetRepetitionType() FieldRepetitionType

func (*SchemaElement) GetScale

func (p *SchemaElement) GetScale() int32

func (*SchemaElement) GetType

func (p *SchemaElement) GetType() Type

func (*SchemaElement) GetTypeLength

func (p *SchemaElement) GetTypeLength() int32

func (*SchemaElement) IsSetConvertedType

func (p *SchemaElement) IsSetConvertedType() bool

func (*SchemaElement) IsSetFieldID

func (p *SchemaElement) IsSetFieldID() bool

func (*SchemaElement) IsSetLogicalType added in v1.4.0

func (p *SchemaElement) IsSetLogicalType() bool

func (*SchemaElement) IsSetNumChildren

func (p *SchemaElement) IsSetNumChildren() bool

func (*SchemaElement) IsSetPrecision

func (p *SchemaElement) IsSetPrecision() bool

func (*SchemaElement) IsSetRepetitionType

func (p *SchemaElement) IsSetRepetitionType() bool

func (*SchemaElement) IsSetScale

func (p *SchemaElement) IsSetScale() bool

func (*SchemaElement) IsSetType

func (p *SchemaElement) IsSetType() bool

func (*SchemaElement) IsSetTypeLength

func (p *SchemaElement) IsSetTypeLength() bool

func (*SchemaElement) Read

func (p *SchemaElement) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*SchemaElement) ReadField1

func (p *SchemaElement) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*SchemaElement) ReadField10 added in v1.4.0

func (p *SchemaElement) ReadField10(ctx context.Context, iprot thrift.TProtocol) error

func (*SchemaElement) ReadField2

func (p *SchemaElement) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*SchemaElement) ReadField3

func (p *SchemaElement) ReadField3(ctx context.Context, iprot thrift.TProtocol) error

func (*SchemaElement) ReadField4

func (p *SchemaElement) ReadField4(ctx context.Context, iprot thrift.TProtocol) error

func (*SchemaElement) ReadField5

func (p *SchemaElement) ReadField5(ctx context.Context, iprot thrift.TProtocol) error

func (*SchemaElement) ReadField6

func (p *SchemaElement) ReadField6(ctx context.Context, iprot thrift.TProtocol) error

func (*SchemaElement) ReadField7

func (p *SchemaElement) ReadField7(ctx context.Context, iprot thrift.TProtocol) error

func (*SchemaElement) ReadField8

func (p *SchemaElement) ReadField8(ctx context.Context, iprot thrift.TProtocol) error

func (*SchemaElement) ReadField9

func (p *SchemaElement) ReadField9(ctx context.Context, iprot thrift.TProtocol) error

func (*SchemaElement) String

func (p *SchemaElement) String() string

func (*SchemaElement) Write

func (p *SchemaElement) Write(ctx context.Context, oprot thrift.TProtocol) error

type SortingColumn

type SortingColumn struct {
	ColumnIdx  int32 `thrift:"column_idx,1,required" db:"column_idx" json:"column_idx"`
	Descending bool  `thrift:"descending,2,required" db:"descending" json:"descending"`
	NullsFirst bool  `thrift:"nulls_first,3,required" db:"nulls_first" json:"nulls_first"`
}

Wrapper struct to specify sort order

Attributes:

  • ColumnIdx: The column index (in this row group) *
  • Descending: If true, indicates this column is sorted in descending order. *
  • NullsFirst: If true, nulls will come before non-null values, otherwise,

nulls go at the end.

func NewSortingColumn

func NewSortingColumn() *SortingColumn

func (*SortingColumn) GetColumnIdx

func (p *SortingColumn) GetColumnIdx() int32

func (*SortingColumn) GetDescending

func (p *SortingColumn) GetDescending() bool

func (*SortingColumn) GetNullsFirst

func (p *SortingColumn) GetNullsFirst() bool

func (*SortingColumn) Read

func (p *SortingColumn) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*SortingColumn) ReadField1

func (p *SortingColumn) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*SortingColumn) ReadField2

func (p *SortingColumn) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*SortingColumn) ReadField3

func (p *SortingColumn) ReadField3(ctx context.Context, iprot thrift.TProtocol) error

func (*SortingColumn) String

func (p *SortingColumn) String() string

func (*SortingColumn) Write

func (p *SortingColumn) Write(ctx context.Context, oprot thrift.TProtocol) error

type Statistics

type Statistics struct {
	Max           []byte `thrift:"max,1" db:"max" json:"max,omitempty"`
	Min           []byte `thrift:"min,2" db:"min" json:"min,omitempty"`
	NullCount     *int64 `thrift:"null_count,3" db:"null_count" json:"null_count,omitempty"`
	DistinctCount *int64 `thrift:"distinct_count,4" db:"distinct_count" json:"distinct_count,omitempty"`
	MaxValue      []byte `thrift:"max_value,5" db:"max_value" json:"max_value,omitempty"`
	MinValue      []byte `thrift:"min_value,6" db:"min_value" json:"min_value,omitempty"`
}

Statistics per row group and per page All fields are optional.

Attributes:

  • Max: DEPRECATED: min and max value of the column. Use min_value and max_value.

Values are encoded using PLAIN encoding, except that variable-length byte arrays do not include a length prefix.

These fields encode min and max values determined by signed comparison only. New files should use the correct order for a column's logical type and store the values in the min_value and max_value fields.

To support older readers, these may be set when the column order is signed.

  • Min
  • NullCount: count of null value in the column
  • DistinctCount: count of distinct values occurring
  • MaxValue: Min and max values for the column, determined by its ColumnOrder.

Values are encoded using PLAIN encoding, except that variable-length byte arrays do not include a length prefix.

  • MinValue
var ColumnMetaData_Statistics_DEFAULT *Statistics
var DataPageHeaderV2_Statistics_DEFAULT *Statistics
var DataPageHeader_Statistics_DEFAULT *Statistics

func NewStatistics

func NewStatistics() *Statistics

func (*Statistics) GetDistinctCount

func (p *Statistics) GetDistinctCount() int64

func (*Statistics) GetMax

func (p *Statistics) GetMax() []byte

func (*Statistics) GetMaxValue added in v1.4.0

func (p *Statistics) GetMaxValue() []byte

func (*Statistics) GetMin

func (p *Statistics) GetMin() []byte

func (*Statistics) GetMinValue added in v1.4.0

func (p *Statistics) GetMinValue() []byte

func (*Statistics) GetNullCount

func (p *Statistics) GetNullCount() int64

func (*Statistics) IsSetDistinctCount

func (p *Statistics) IsSetDistinctCount() bool

func (*Statistics) IsSetMax

func (p *Statistics) IsSetMax() bool

func (*Statistics) IsSetMaxValue added in v1.4.0

func (p *Statistics) IsSetMaxValue() bool

func (*Statistics) IsSetMin

func (p *Statistics) IsSetMin() bool

func (*Statistics) IsSetMinValue added in v1.4.0

func (p *Statistics) IsSetMinValue() bool

func (*Statistics) IsSetNullCount

func (p *Statistics) IsSetNullCount() bool

func (*Statistics) Read

func (p *Statistics) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*Statistics) ReadField1

func (p *Statistics) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*Statistics) ReadField2

func (p *Statistics) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*Statistics) ReadField3

func (p *Statistics) ReadField3(ctx context.Context, iprot thrift.TProtocol) error

func (*Statistics) ReadField4

func (p *Statistics) ReadField4(ctx context.Context, iprot thrift.TProtocol) error

func (*Statistics) ReadField5 added in v1.4.0

func (p *Statistics) ReadField5(ctx context.Context, iprot thrift.TProtocol) error

func (*Statistics) ReadField6 added in v1.4.0

func (p *Statistics) ReadField6(ctx context.Context, iprot thrift.TProtocol) error

func (*Statistics) String

func (p *Statistics) String() string

func (*Statistics) Write

func (p *Statistics) Write(ctx context.Context, oprot thrift.TProtocol) error

type StringType added in v1.4.0

type StringType struct {
}

Empty structs to use as logical type annotations

var LogicalType_STRING_DEFAULT *StringType

func NewStringType added in v1.4.0

func NewStringType() *StringType

func (*StringType) Read added in v1.4.0

func (p *StringType) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*StringType) String added in v1.4.0

func (p *StringType) String() string

func (*StringType) Write added in v1.4.0

func (p *StringType) Write(ctx context.Context, oprot thrift.TProtocol) error

type TimeType added in v1.4.0

type TimeType struct {
	IsAdjustedToUTC bool      `thrift:"isAdjustedToUTC,1,required" db:"isAdjustedToUTC" json:"isAdjustedToUTC"`
	Unit            *TimeUnit `thrift:"unit,2,required" db:"unit" json:"unit"`
}

Time logical type annotation

Allowed for physical types: INT32 (millis), INT64 (micros, nanos)

Attributes:

  • IsAdjustedToUTC
  • Unit
var LogicalType_TIME_DEFAULT *TimeType

func NewTimeType added in v1.4.0

func NewTimeType() *TimeType

func (*TimeType) GetIsAdjustedToUTC added in v1.4.0

func (p *TimeType) GetIsAdjustedToUTC() bool

func (*TimeType) GetUnit added in v1.4.0

func (p *TimeType) GetUnit() *TimeUnit

func (*TimeType) IsSetUnit added in v1.4.0

func (p *TimeType) IsSetUnit() bool

func (*TimeType) Read added in v1.4.0

func (p *TimeType) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*TimeType) ReadField1 added in v1.4.0

func (p *TimeType) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*TimeType) ReadField2 added in v1.4.0

func (p *TimeType) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*TimeType) String added in v1.4.0

func (p *TimeType) String() string

func (*TimeType) Write added in v1.4.0

func (p *TimeType) Write(ctx context.Context, oprot thrift.TProtocol) error

type TimeUnit added in v1.4.0

type TimeUnit struct {
	MILLIS *MilliSeconds `thrift:"MILLIS,1" db:"MILLIS" json:"MILLIS,omitempty"`
	MICROS *MicroSeconds `thrift:"MICROS,2" db:"MICROS" json:"MICROS,omitempty"`
	NANOS  *NanoSeconds  `thrift:"NANOS,3" db:"NANOS" json:"NANOS,omitempty"`
}

Attributes:

  • MILLIS
  • MICROS
  • NANOS
var TimeType_Unit_DEFAULT *TimeUnit
var TimestampType_Unit_DEFAULT *TimeUnit

func NewTimeUnit added in v1.4.0

func NewTimeUnit() *TimeUnit

func (*TimeUnit) CountSetFieldsTimeUnit added in v1.4.0

func (p *TimeUnit) CountSetFieldsTimeUnit() int

func (*TimeUnit) GetMICROS added in v1.4.0

func (p *TimeUnit) GetMICROS() *MicroSeconds

func (*TimeUnit) GetMILLIS added in v1.4.0

func (p *TimeUnit) GetMILLIS() *MilliSeconds

func (*TimeUnit) GetNANOS added in v1.4.0

func (p *TimeUnit) GetNANOS() *NanoSeconds

func (*TimeUnit) IsSetMICROS added in v1.4.0

func (p *TimeUnit) IsSetMICROS() bool

func (*TimeUnit) IsSetMILLIS added in v1.4.0

func (p *TimeUnit) IsSetMILLIS() bool

func (*TimeUnit) IsSetNANOS added in v1.4.0

func (p *TimeUnit) IsSetNANOS() bool

func (*TimeUnit) Read added in v1.4.0

func (p *TimeUnit) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*TimeUnit) ReadField1 added in v1.4.0

func (p *TimeUnit) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*TimeUnit) ReadField2 added in v1.4.0

func (p *TimeUnit) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*TimeUnit) ReadField3 added in v1.4.0

func (p *TimeUnit) ReadField3(ctx context.Context, iprot thrift.TProtocol) error

func (*TimeUnit) String added in v1.4.0

func (p *TimeUnit) String() string

func (*TimeUnit) Write added in v1.4.0

func (p *TimeUnit) Write(ctx context.Context, oprot thrift.TProtocol) error

type TimestampType added in v1.4.0

type TimestampType struct {
	IsAdjustedToUTC bool      `thrift:"isAdjustedToUTC,1,required" db:"isAdjustedToUTC" json:"isAdjustedToUTC"`
	Unit            *TimeUnit `thrift:"unit,2,required" db:"unit" json:"unit"`
}

Timestamp logical type annotation

Allowed for physical types: INT64

Attributes:

  • IsAdjustedToUTC
  • Unit
var LogicalType_TIMESTAMP_DEFAULT *TimestampType

func NewTimestampType added in v1.4.0

func NewTimestampType() *TimestampType

func (*TimestampType) GetIsAdjustedToUTC added in v1.4.0

func (p *TimestampType) GetIsAdjustedToUTC() bool

func (*TimestampType) GetUnit added in v1.4.0

func (p *TimestampType) GetUnit() *TimeUnit

func (*TimestampType) IsSetUnit added in v1.4.0

func (p *TimestampType) IsSetUnit() bool

func (*TimestampType) Read added in v1.4.0

func (p *TimestampType) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*TimestampType) ReadField1 added in v1.4.0

func (p *TimestampType) ReadField1(ctx context.Context, iprot thrift.TProtocol) error

func (*TimestampType) ReadField2 added in v1.4.0

func (p *TimestampType) ReadField2(ctx context.Context, iprot thrift.TProtocol) error

func (*TimestampType) String added in v1.4.0

func (p *TimestampType) String() string

func (*TimestampType) Write added in v1.4.0

func (p *TimestampType) Write(ctx context.Context, oprot thrift.TProtocol) error

type Type

type Type int64

Types supported by Parquet. These types are intended to be used in combination with the encodings to control the on disk storage format. For example INT16 is not included as a type since a good encoding of INT32 would handle this.

const (
	Type_BOOLEAN              Type = 0
	Type_INT32                Type = 1
	Type_INT64                Type = 2
	Type_INT96                Type = 3
	Type_FLOAT                Type = 4
	Type_DOUBLE               Type = 5
	Type_BYTE_ARRAY           Type = 6
	Type_FIXED_LEN_BYTE_ARRAY Type = 7
)
var SchemaElement_Type_DEFAULT Type

func TypeFromString

func TypeFromString(s string) (Type, error)

func TypePtr

func TypePtr(v Type) *Type

func (Type) MarshalText

func (p Type) MarshalText() ([]byte, error)

func (*Type) Scan

func (p *Type) Scan(value interface{}) error

func (Type) String

func (p Type) String() string

func (*Type) UnmarshalText

func (p *Type) UnmarshalText(text []byte) error

func (*Type) Value

func (p *Type) Value() (driver.Value, error)

type TypeDefinedOrder added in v1.4.0

type TypeDefinedOrder struct {
}

Empty struct to signal the order defined by the physical or logical type

var ColumnOrder_TYPE_ORDER_DEFAULT *TypeDefinedOrder

func NewTypeDefinedOrder added in v1.4.0

func NewTypeDefinedOrder() *TypeDefinedOrder

func (*TypeDefinedOrder) Read added in v1.4.0

func (p *TypeDefinedOrder) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*TypeDefinedOrder) String added in v1.4.0

func (p *TypeDefinedOrder) String() string

func (*TypeDefinedOrder) Write added in v1.4.0

func (p *TypeDefinedOrder) Write(ctx context.Context, oprot thrift.TProtocol) error

type UUIDType added in v1.4.0

type UUIDType struct {
}
var LogicalType_UUID_DEFAULT *UUIDType

func NewUUIDType added in v1.4.0

func NewUUIDType() *UUIDType

func (*UUIDType) Read added in v1.4.0

func (p *UUIDType) Read(ctx context.Context, iprot thrift.TProtocol) error

func (*UUIDType) String added in v1.4.0

func (p *UUIDType) String() string

func (*UUIDType) Write added in v1.4.0

func (p *UUIDType) Write(ctx context.Context, oprot thrift.TProtocol) error

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL