parquetformat

package
v0.0.0-...-7924348 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Sep 4, 2020 License: MIT Imports: 4 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var ColumnChunk_ColumnIndexLength_DEFAULT int32
View Source
var ColumnChunk_ColumnIndexOffset_DEFAULT int64
View Source
var ColumnChunk_FilePath_DEFAULT string
View Source
var ColumnChunk_OffsetIndexLength_DEFAULT int32
View Source
var ColumnChunk_OffsetIndexOffset_DEFAULT int64
View Source
var ColumnIndex_NullCounts_DEFAULT []int64
View Source
var ColumnMetaData_DictionaryPageOffset_DEFAULT int64
View Source
var ColumnMetaData_EncodingStats_DEFAULT []*PageEncodingStats
View Source
var ColumnMetaData_IndexPageOffset_DEFAULT int64
View Source
var ColumnMetaData_KeyValueMetadata_DEFAULT []*KeyValue
View Source
var DataPageHeaderV2_IsCompressed_DEFAULT bool = true
View Source
var DictionaryPageHeader_IsSorted_DEFAULT bool
View Source
var FileMetaData_ColumnOrders_DEFAULT []*ColumnOrder
View Source
var FileMetaData_CreatedBy_DEFAULT string
View Source
var FileMetaData_KeyValueMetadata_DEFAULT []*KeyValue
View Source
var GoUnusedProtection__ int
View Source
var KeyValue_Value_DEFAULT string
View Source
var PageHeader_Crc_DEFAULT int32
View Source
var RowGroup_SortingColumns_DEFAULT []*SortingColumn
View Source
var SchemaElement_FieldID_DEFAULT int32
View Source
var SchemaElement_NumChildren_DEFAULT int32
View Source
var SchemaElement_Precision_DEFAULT int32
View Source
var SchemaElement_Scale_DEFAULT int32
View Source
var SchemaElement_TypeLength_DEFAULT int32
View Source
var Statistics_DistinctCount_DEFAULT int64
View Source
var Statistics_MaxValue_DEFAULT []byte
View Source
var Statistics_Max_DEFAULT []byte
View Source
var Statistics_MinValue_DEFAULT []byte
View Source
var Statistics_Min_DEFAULT []byte
View Source
var Statistics_NullCount_DEFAULT int64

Functions

This section is empty.

Types

type BoundaryOrder

type BoundaryOrder int64

Enum to annotate whether lists of min/max elements inside ColumnIndex are ordered and if so, in which direction.

const (
	BoundaryOrder_UNORDERED  BoundaryOrder = 0
	BoundaryOrder_ASCENDING  BoundaryOrder = 1
	BoundaryOrder_DESCENDING BoundaryOrder = 2
)

func BoundaryOrderFromString

func BoundaryOrderFromString(s string) (BoundaryOrder, error)

func BoundaryOrderPtr

func BoundaryOrderPtr(v BoundaryOrder) *BoundaryOrder

func (BoundaryOrder) MarshalText

func (p BoundaryOrder) MarshalText() ([]byte, error)

func (BoundaryOrder) String

func (p BoundaryOrder) String() string

func (*BoundaryOrder) UnmarshalText

func (p *BoundaryOrder) UnmarshalText(text []byte) error

type BsonType

type BsonType struct {
}

Embedded BSON logical type annotation

Allowed for physical types: BINARY

var LogicalType_BSON_DEFAULT *BsonType

func NewBsonType

func NewBsonType() *BsonType

func (*BsonType) String

func (p *BsonType) String() string

type ColumnChunk

type ColumnChunk struct {
	FilePath          *string         `thrift:"file_path,1" json:"file_path,omitempty"`
	FileOffset        int64           `thrift:"file_offset,2,required" json:"file_offset"`
	MetaData          *ColumnMetaData `thrift:"meta_data,3" json:"meta_data,omitempty"`
	OffsetIndexOffset *int64          `thrift:"offset_index_offset,4" json:"offset_index_offset,omitempty"`
	OffsetIndexLength *int32          `thrift:"offset_index_length,5" json:"offset_index_length,omitempty"`
	ColumnIndexOffset *int64          `thrift:"column_index_offset,6" json:"column_index_offset,omitempty"`
	ColumnIndexLength *int32          `thrift:"column_index_length,7" json:"column_index_length,omitempty"`
}

Attributes:

  • FilePath: File where column data is stored. If not set, assumed to be same file as

metadata. This path is relative to the current file.

  • FileOffset: Byte offset in file_path to the ColumnMetaData *
  • MetaData: Column metadata for this chunk. This is the same content as what is at

file_path/file_offset. Having it here has it replicated in the file metadata.

  • OffsetIndexOffset: File offset of ColumnChunk's OffsetIndex *
  • OffsetIndexLength: Size of ColumnChunk's OffsetIndex, in bytes *
  • ColumnIndexOffset: File offset of ColumnChunk's ColumnIndex *
  • ColumnIndexLength: Size of ColumnChunk's ColumnIndex, in bytes *

func NewColumnChunk

func NewColumnChunk() *ColumnChunk

func (*ColumnChunk) GetColumnIndexLength

func (p *ColumnChunk) GetColumnIndexLength() int32

func (*ColumnChunk) GetColumnIndexOffset

func (p *ColumnChunk) GetColumnIndexOffset() int64

func (*ColumnChunk) GetFileOffset

func (p *ColumnChunk) GetFileOffset() int64

func (*ColumnChunk) GetFilePath

func (p *ColumnChunk) GetFilePath() string

func (*ColumnChunk) GetMetaData

func (p *ColumnChunk) GetMetaData() *ColumnMetaData

func (*ColumnChunk) GetOffsetIndexLength

func (p *ColumnChunk) GetOffsetIndexLength() int32

func (*ColumnChunk) GetOffsetIndexOffset

func (p *ColumnChunk) GetOffsetIndexOffset() int64

func (*ColumnChunk) IsSetColumnIndexLength

func (p *ColumnChunk) IsSetColumnIndexLength() bool

func (*ColumnChunk) IsSetColumnIndexOffset

func (p *ColumnChunk) IsSetColumnIndexOffset() bool

func (*ColumnChunk) IsSetFilePath

func (p *ColumnChunk) IsSetFilePath() bool

func (*ColumnChunk) IsSetMetaData

func (p *ColumnChunk) IsSetMetaData() bool

func (*ColumnChunk) IsSetOffsetIndexLength

func (p *ColumnChunk) IsSetOffsetIndexLength() bool

func (*ColumnChunk) IsSetOffsetIndexOffset

func (p *ColumnChunk) IsSetOffsetIndexOffset() bool

func (*ColumnChunk) String

func (p *ColumnChunk) String() string

type ColumnIndex

type ColumnIndex struct {
	NullPages     []bool        `thrift:"null_pages,1,required" json:"null_pages"`
	MinValues     [][]byte      `thrift:"min_values,2,required" json:"min_values"`
	MaxValues     [][]byte      `thrift:"max_values,3,required" json:"max_values"`
	BoundaryOrder BoundaryOrder `thrift:"boundary_order,4,required" json:"boundary_order"`
	NullCounts    []int64       `thrift:"null_counts,5" json:"null_counts,omitempty"`
}

Description for ColumnIndex. Each <array-field>[i] refers to the page at OffsetIndex.page_locations[i]

Attributes:

  • NullPages: A list of Boolean values to determine the validity of the corresponding

min and max values. If true, a page contains only null values, and writers have to set the corresponding entries in min_values and max_values to byte[0], so that all lists have the same length. If false, the corresponding entries in min_values and max_values must be valid.

  • MinValues: Two lists containing lower and upper bounds for the values of each page.

These may be the actual minimum and maximum values found on a page, but can also be (more compact) values that do not exist on a page. For example, instead of storing ""Blart Versenwald III", a writer may set min_values[i]="B", max_values[i]="C". Such more compact values must still be valid values within the column's logical type. Readers must make sure that list entries are populated before using them by inspecting null_pages.

  • MaxValues
  • BoundaryOrder: Stores whether both min_values and max_values are orderd and if so, in

which direction. This allows readers to perform binary searches in both lists. Readers cannot assume that max_values[i] <= min_values[i+1], even if the lists are ordered.

  • NullCounts: A list containing the number of null values for each page *

func NewColumnIndex

func NewColumnIndex() *ColumnIndex

func (*ColumnIndex) GetBoundaryOrder

func (p *ColumnIndex) GetBoundaryOrder() BoundaryOrder

func (*ColumnIndex) GetMaxValues

func (p *ColumnIndex) GetMaxValues() [][]byte

func (*ColumnIndex) GetMinValues

func (p *ColumnIndex) GetMinValues() [][]byte

func (*ColumnIndex) GetNullCounts

func (p *ColumnIndex) GetNullCounts() []int64

func (*ColumnIndex) GetNullPages

func (p *ColumnIndex) GetNullPages() []bool

func (*ColumnIndex) IsSetNullCounts

func (p *ColumnIndex) IsSetNullCounts() bool

func (*ColumnIndex) String

func (p *ColumnIndex) String() string

type ColumnMetaData

type ColumnMetaData struct {
	Type                  Type                 `thrift:"type,1,required" json:"type"`
	Encodings             []Encoding           `thrift:"encodings,2,required" json:"encodings"`
	PathInSchema          []string             `thrift:"path_in_schema,3,required" json:"path_in_schema"`
	Codec                 CompressionCodec     `thrift:"codec,4,required" json:"codec"`
	NumValues             int64                `thrift:"num_values,5,required" json:"num_values"`
	TotalUncompressedSize int64                `thrift:"total_uncompressed_size,6,required" json:"total_uncompressed_size"`
	TotalCompressedSize   int64                `thrift:"total_compressed_size,7,required" json:"total_compressed_size"`
	KeyValueMetadata      []*KeyValue          `thrift:"key_value_metadata,8" json:"key_value_metadata,omitempty"`
	DataPageOffset        int64                `thrift:"data_page_offset,9,required" json:"data_page_offset"`
	IndexPageOffset       *int64               `thrift:"index_page_offset,10" json:"index_page_offset,omitempty"`
	DictionaryPageOffset  *int64               `thrift:"dictionary_page_offset,11" json:"dictionary_page_offset,omitempty"`
	Statistics            *Statistics          `thrift:"statistics,12" json:"statistics,omitempty"`
	EncodingStats         []*PageEncodingStats `thrift:"encoding_stats,13" json:"encoding_stats,omitempty"`
}

Description for column metadata

Attributes:

  • Type: Type of this column *
  • Encodings: Set of all encodings used for this column. The purpose is to validate

whether we can decode those pages. *

  • PathInSchema: Path in schema *
  • Codec: Compression codec *
  • NumValues: Number of values in this column *
  • TotalUncompressedSize: total byte size of all uncompressed pages in this column chunk (including the headers) *
  • TotalCompressedSize: total byte size of all compressed pages in this column chunk (including the headers) *
  • KeyValueMetadata: Optional key/value metadata *
  • DataPageOffset: Byte offset from beginning of file to first data page *
  • IndexPageOffset: Byte offset from beginning of file to root index page *
  • DictionaryPageOffset: Byte offset from the beginning of file to first (only) dictionary page *
  • Statistics: optional statistics for this column chunk
  • EncodingStats: Set of all encodings used for pages in this column chunk.

This information can be used to determine if all data pages are dictionary encoded for example *

var ColumnChunk_MetaData_DEFAULT *ColumnMetaData

func NewColumnMetaData

func NewColumnMetaData() *ColumnMetaData

func (*ColumnMetaData) GetCodec

func (p *ColumnMetaData) GetCodec() CompressionCodec

func (*ColumnMetaData) GetDataPageOffset

func (p *ColumnMetaData) GetDataPageOffset() int64

func (*ColumnMetaData) GetDictionaryPageOffset

func (p *ColumnMetaData) GetDictionaryPageOffset() int64

func (*ColumnMetaData) GetEncodingStats

func (p *ColumnMetaData) GetEncodingStats() []*PageEncodingStats

func (*ColumnMetaData) GetEncodings

func (p *ColumnMetaData) GetEncodings() []Encoding

func (*ColumnMetaData) GetIndexPageOffset

func (p *ColumnMetaData) GetIndexPageOffset() int64

func (*ColumnMetaData) GetKeyValueMetadata

func (p *ColumnMetaData) GetKeyValueMetadata() []*KeyValue

func (*ColumnMetaData) GetNumValues

func (p *ColumnMetaData) GetNumValues() int64

func (*ColumnMetaData) GetPathInSchema

func (p *ColumnMetaData) GetPathInSchema() []string

func (*ColumnMetaData) GetStatistics

func (p *ColumnMetaData) GetStatistics() *Statistics

func (*ColumnMetaData) GetTotalCompressedSize

func (p *ColumnMetaData) GetTotalCompressedSize() int64

func (*ColumnMetaData) GetTotalUncompressedSize

func (p *ColumnMetaData) GetTotalUncompressedSize() int64

func (*ColumnMetaData) GetType

func (p *ColumnMetaData) GetType() Type

func (*ColumnMetaData) IsSetDictionaryPageOffset

func (p *ColumnMetaData) IsSetDictionaryPageOffset() bool

func (*ColumnMetaData) IsSetEncodingStats

func (p *ColumnMetaData) IsSetEncodingStats() bool

func (*ColumnMetaData) IsSetIndexPageOffset

func (p *ColumnMetaData) IsSetIndexPageOffset() bool

func (*ColumnMetaData) IsSetKeyValueMetadata

func (p *ColumnMetaData) IsSetKeyValueMetadata() bool

func (*ColumnMetaData) IsSetStatistics

func (p *ColumnMetaData) IsSetStatistics() bool

func (*ColumnMetaData) String

func (p *ColumnMetaData) String() string

type ColumnOrder

type ColumnOrder struct {
	TYPE_ORDER *TypeDefinedOrder `thrift:"TYPE_ORDER,1" json:"TYPE_ORDER,omitempty"`
}

Union to specify the order used for the min_value and max_value fields for a column. This union takes the role of an enhanced enum that allows rich elements (which will be needed for a collation-based ordering in the future).

Possible values are:

  • TypeDefinedOrder - the column uses the order defined by its logical or physical type (if there is no logical type).

If the reader does not support the value of this union, min and max stats for this column should be ignored.

Attributes:

  • TYPE_ORDER: The sort orders for logical types are: UTF8 - unsigned byte-wise comparison INT8 - signed comparison INT16 - signed comparison INT32 - signed comparison INT64 - signed comparison UINT8 - unsigned comparison UINT16 - unsigned comparison UINT32 - unsigned comparison UINT64 - unsigned comparison DECIMAL - signed comparison of the represented value DATE - signed comparison TIME_MILLIS - signed comparison TIME_MICROS - signed comparison TIMESTAMP_MILLIS - signed comparison TIMESTAMP_MICROS - signed comparison INTERVAL - unsigned comparison JSON - unsigned byte-wise comparison BSON - unsigned byte-wise comparison ENUM - unsigned byte-wise comparison LIST - undefined MAP - undefined

In the absence of logical types, the sort order is determined by the physical type:

BOOLEAN - false, true
INT32 - signed comparison
INT64 - signed comparison
INT96 (only used for legacy timestamps) - undefined
FLOAT - signed comparison of the represented value (*)
DOUBLE - signed comparison of the represented value (*)
BYTE_ARRAY - unsigned byte-wise comparison
FIXED_LEN_BYTE_ARRAY - unsigned byte-wise comparison

(*) Because the sorting order is not specified properly for floating

point values (relations vs. total ordering) the following
compatibility rules should be applied when reading statistics:
- If the min is a NaN, it should be ignored.
- If the max is a NaN, it should be ignored.
- If the min is +0, the row group may contain -0 values as well.
- If the max is -0, the row group may contain +0 values as well.
- When looking for NaN values, min and max should be ignored.

func NewColumnOrder

func NewColumnOrder() *ColumnOrder

func (*ColumnOrder) CountSetFieldsColumnOrder

func (p *ColumnOrder) CountSetFieldsColumnOrder() int

func (*ColumnOrder) GetTYPE_ORDER

func (p *ColumnOrder) GetTYPE_ORDER() *TypeDefinedOrder

func (*ColumnOrder) IsSetTYPE_ORDER

func (p *ColumnOrder) IsSetTYPE_ORDER() bool

func (*ColumnOrder) String

func (p *ColumnOrder) String() string

type CompressionCodec

type CompressionCodec int64

Supported compression algorithms.

Codecs added in 2.4 can be read by readers based on 2.4 and later. Codec support may vary between readers based on the format version and libraries available at runtime. Gzip, Snappy, and LZ4 codecs are widely available, while Zstd and Brotli require additional libraries.

const (
	CompressionCodec_UNCOMPRESSED CompressionCodec = 0
	CompressionCodec_SNAPPY       CompressionCodec = 1
	CompressionCodec_GZIP         CompressionCodec = 2
	CompressionCodec_LZO          CompressionCodec = 3
	CompressionCodec_BROTLI       CompressionCodec = 4
	CompressionCodec_LZ4          CompressionCodec = 5
	CompressionCodec_ZSTD         CompressionCodec = 6
)

func CompressionCodecFromString

func CompressionCodecFromString(s string) (CompressionCodec, error)

func CompressionCodecPtr

func CompressionCodecPtr(v CompressionCodec) *CompressionCodec

func (CompressionCodec) MarshalText

func (p CompressionCodec) MarshalText() ([]byte, error)

func (CompressionCodec) String

func (p CompressionCodec) String() string

func (*CompressionCodec) UnmarshalText

func (p *CompressionCodec) UnmarshalText(text []byte) error

type ConvertedType

type ConvertedType int64

Common types used by frameworks(e.g. hive, pig) using parquet. This helps map between types in those frameworks to the base types in parquet. This is only metadata and not needed to read or write the data.

const (
	ConvertedType_UTF8             ConvertedType = 0
	ConvertedType_MAP              ConvertedType = 1
	ConvertedType_MAP_KEY_VALUE    ConvertedType = 2
	ConvertedType_LIST             ConvertedType = 3
	ConvertedType_ENUM             ConvertedType = 4
	ConvertedType_DECIMAL          ConvertedType = 5
	ConvertedType_DATE             ConvertedType = 6
	ConvertedType_TIME_MILLIS      ConvertedType = 7
	ConvertedType_TIME_MICROS      ConvertedType = 8
	ConvertedType_TIMESTAMP_MILLIS ConvertedType = 9
	ConvertedType_TIMESTAMP_MICROS ConvertedType = 10
	ConvertedType_UINT_8           ConvertedType = 11
	ConvertedType_UINT_16          ConvertedType = 12
	ConvertedType_UINT_32          ConvertedType = 13
	ConvertedType_UINT_64          ConvertedType = 14
	ConvertedType_INT_8            ConvertedType = 15
	ConvertedType_INT_16           ConvertedType = 16
	ConvertedType_INT_32           ConvertedType = 17
	ConvertedType_INT_64           ConvertedType = 18
	ConvertedType_JSON             ConvertedType = 19
	ConvertedType_BSON             ConvertedType = 20
	ConvertedType_INTERVAL         ConvertedType = 21
)
var SchemaElement_ConvertedType_DEFAULT ConvertedType

func ConvertedTypeFromString

func ConvertedTypeFromString(s string) (ConvertedType, error)

func ConvertedTypePtr

func ConvertedTypePtr(v ConvertedType) *ConvertedType

func (ConvertedType) MarshalText

func (p ConvertedType) MarshalText() ([]byte, error)

func (ConvertedType) String

func (p ConvertedType) String() string

func (*ConvertedType) UnmarshalText

func (p *ConvertedType) UnmarshalText(text []byte) error

type DataPageHeader

type DataPageHeader struct {
	NumValues               int32       `thrift:"num_values,1,required" json:"num_values"`
	Encoding                Encoding    `thrift:"encoding,2,required" json:"encoding"`
	DefinitionLevelEncoding Encoding    `thrift:"definition_level_encoding,3,required" json:"definition_level_encoding"`
	RepetitionLevelEncoding Encoding    `thrift:"repetition_level_encoding,4,required" json:"repetition_level_encoding"`
	Statistics              *Statistics `thrift:"statistics,5" json:"statistics,omitempty"`
}

Data page header

Attributes:

  • NumValues: Number of values, including NULLs, in this data page. *
  • Encoding: Encoding used for this data page *
  • DefinitionLevelEncoding: Encoding used for definition levels *
  • RepetitionLevelEncoding: Encoding used for repetition levels *
  • Statistics: Optional statistics for the data in this page*
var PageHeader_DataPageHeader_DEFAULT *DataPageHeader

func NewDataPageHeader

func NewDataPageHeader() *DataPageHeader

func (*DataPageHeader) GetDefinitionLevelEncoding

func (p *DataPageHeader) GetDefinitionLevelEncoding() Encoding

func (*DataPageHeader) GetEncoding

func (p *DataPageHeader) GetEncoding() Encoding

func (*DataPageHeader) GetNumValues

func (p *DataPageHeader) GetNumValues() int32

func (*DataPageHeader) GetRepetitionLevelEncoding

func (p *DataPageHeader) GetRepetitionLevelEncoding() Encoding

func (*DataPageHeader) GetStatistics

func (p *DataPageHeader) GetStatistics() *Statistics

func (*DataPageHeader) IsSetStatistics

func (p *DataPageHeader) IsSetStatistics() bool

func (*DataPageHeader) String

func (p *DataPageHeader) String() string

type DataPageHeaderV2

type DataPageHeaderV2 struct {
	NumValues                  int32       `thrift:"num_values,1,required" json:"num_values"`
	NumNulls                   int32       `thrift:"num_nulls,2,required" json:"num_nulls"`
	NumRows                    int32       `thrift:"num_rows,3,required" json:"num_rows"`
	Encoding                   Encoding    `thrift:"encoding,4,required" json:"encoding"`
	DefinitionLevelsByteLength int32       `thrift:"definition_levels_byte_length,5,required" json:"definition_levels_byte_length"`
	RepetitionLevelsByteLength int32       `thrift:"repetition_levels_byte_length,6,required" json:"repetition_levels_byte_length"`
	IsCompressed               bool        `thrift:"is_compressed,7" json:"is_compressed,omitempty"`
	Statistics                 *Statistics `thrift:"statistics,8" json:"statistics,omitempty"`
}

New page format allowing reading levels without decompressing the data Repetition and definition levels are uncompressed The remaining section containing the data is compressed if is_compressed is true

Attributes:

  • NumValues: Number of values, including NULLs, in this data page. *
  • NumNulls: Number of NULL values, in this data page.

Number of non-null = num_values - num_nulls which is also the number of values in the data section *

  • NumRows: Number of rows in this data page. which means pages change on record boundaries (r = 0) *
  • Encoding: Encoding used for data in this page *
  • DefinitionLevelsByteLength: length of the definition levels
  • RepetitionLevelsByteLength: length of the repetition levels
  • IsCompressed: whether the values are compressed.

Which means the section of the page between definition_levels_byte_length + repetition_levels_byte_length + 1 and compressed_page_size (included) is compressed with the compression_codec. If missing it is considered compressed

  • Statistics: optional statistics for this column chunk
var PageHeader_DataPageHeaderV2_DEFAULT *DataPageHeaderV2

func NewDataPageHeaderV2

func NewDataPageHeaderV2() *DataPageHeaderV2

func (*DataPageHeaderV2) GetDefinitionLevelsByteLength

func (p *DataPageHeaderV2) GetDefinitionLevelsByteLength() int32

func (*DataPageHeaderV2) GetEncoding

func (p *DataPageHeaderV2) GetEncoding() Encoding

func (*DataPageHeaderV2) GetIsCompressed

func (p *DataPageHeaderV2) GetIsCompressed() bool

func (*DataPageHeaderV2) GetNumNulls

func (p *DataPageHeaderV2) GetNumNulls() int32

func (*DataPageHeaderV2) GetNumRows

func (p *DataPageHeaderV2) GetNumRows() int32

func (*DataPageHeaderV2) GetNumValues

func (p *DataPageHeaderV2) GetNumValues() int32

func (*DataPageHeaderV2) GetRepetitionLevelsByteLength

func (p *DataPageHeaderV2) GetRepetitionLevelsByteLength() int32

func (*DataPageHeaderV2) GetStatistics

func (p *DataPageHeaderV2) GetStatistics() *Statistics

func (*DataPageHeaderV2) IsSetIsCompressed

func (p *DataPageHeaderV2) IsSetIsCompressed() bool

func (*DataPageHeaderV2) IsSetStatistics

func (p *DataPageHeaderV2) IsSetStatistics() bool

func (*DataPageHeaderV2) String

func (p *DataPageHeaderV2) String() string

type DateType

type DateType struct {
}
var LogicalType_DATE_DEFAULT *DateType

func NewDateType

func NewDateType() *DateType

func (*DateType) String

func (p *DateType) String() string

type DecimalType

type DecimalType struct {
	Scale     int32 `thrift:"scale,1,required" json:"scale"`
	Precision int32 `thrift:"precision,2,required" json:"precision"`
}

Decimal logical type annotation

To maintain forward-compatibility in v1, implementations using this logical type must also set scale and precision on the annotated SchemaElement.

Allowed for physical types: INT32, INT64, FIXED, and BINARY

Attributes:

  • Scale
  • Precision
var LogicalType_DECIMAL_DEFAULT *DecimalType

func NewDecimalType

func NewDecimalType() *DecimalType

func (*DecimalType) GetPrecision

func (p *DecimalType) GetPrecision() int32

func (*DecimalType) GetScale

func (p *DecimalType) GetScale() int32

func (*DecimalType) String

func (p *DecimalType) String() string

type DictionaryPageHeader

type DictionaryPageHeader struct {
	NumValues int32    `thrift:"num_values,1,required" json:"num_values"`
	Encoding  Encoding `thrift:"encoding,2,required" json:"encoding"`
	IsSorted  *bool    `thrift:"is_sorted,3" json:"is_sorted,omitempty"`
}

TODO: *

Attributes:

  • NumValues: Number of values in the dictionary *
  • Encoding: Encoding using this dictionary page *
  • IsSorted: If true, the entries in the dictionary are sorted in ascending order *
var PageHeader_DictionaryPageHeader_DEFAULT *DictionaryPageHeader

func NewDictionaryPageHeader

func NewDictionaryPageHeader() *DictionaryPageHeader

func (*DictionaryPageHeader) GetEncoding

func (p *DictionaryPageHeader) GetEncoding() Encoding

func (*DictionaryPageHeader) GetIsSorted

func (p *DictionaryPageHeader) GetIsSorted() bool

func (*DictionaryPageHeader) GetNumValues

func (p *DictionaryPageHeader) GetNumValues() int32

func (*DictionaryPageHeader) IsSetIsSorted

func (p *DictionaryPageHeader) IsSetIsSorted() bool

func (*DictionaryPageHeader) String

func (p *DictionaryPageHeader) String() string

type Encoding

type Encoding int64

Encodings supported by Parquet. Not all encodings are valid for all types. These enums are also used to specify the encoding of definition and repetition levels. See the accompanying doc for the details of the more complicated encodings.

const (
	Encoding_PLAIN                   Encoding = 0
	Encoding_PLAIN_DICTIONARY        Encoding = 2
	Encoding_RLE                     Encoding = 3
	Encoding_BIT_PACKED              Encoding = 4
	Encoding_DELTA_BINARY_PACKED     Encoding = 5
	Encoding_DELTA_LENGTH_BYTE_ARRAY Encoding = 6
	Encoding_DELTA_BYTE_ARRAY        Encoding = 7
	Encoding_RLE_DICTIONARY          Encoding = 8
)

func EncodingFromString

func EncodingFromString(s string) (Encoding, error)

func EncodingPtr

func EncodingPtr(v Encoding) *Encoding

func (Encoding) MarshalText

func (p Encoding) MarshalText() ([]byte, error)

func (Encoding) String

func (p Encoding) String() string

func (*Encoding) UnmarshalText

func (p *Encoding) UnmarshalText(text []byte) error

type EnumType

type EnumType struct {
}
var LogicalType_ENUM_DEFAULT *EnumType

func NewEnumType

func NewEnumType() *EnumType

func (*EnumType) String

func (p *EnumType) String() string

type FieldRepetitionType

type FieldRepetitionType int64

Representation of Schemas

const (
	FieldRepetitionType_REQUIRED FieldRepetitionType = 0
	FieldRepetitionType_OPTIONAL FieldRepetitionType = 1
	FieldRepetitionType_REPEATED FieldRepetitionType = 2
)
var SchemaElement_RepetitionType_DEFAULT FieldRepetitionType

func FieldRepetitionTypeFromString

func FieldRepetitionTypeFromString(s string) (FieldRepetitionType, error)

func FieldRepetitionTypePtr

func FieldRepetitionTypePtr(v FieldRepetitionType) *FieldRepetitionType

func (FieldRepetitionType) MarshalText

func (p FieldRepetitionType) MarshalText() ([]byte, error)

func (FieldRepetitionType) String

func (p FieldRepetitionType) String() string

func (*FieldRepetitionType) UnmarshalText

func (p *FieldRepetitionType) UnmarshalText(text []byte) error

type FileMetaData

type FileMetaData struct {
	Version          int32            `thrift:"version,1,required" json:"version"`
	Schema           []*SchemaElement `thrift:"schema,2,required" json:"schema"`
	NumRows          int64            `thrift:"num_rows,3,required" json:"num_rows"`
	RowGroups        []*RowGroup      `thrift:"row_groups,4,required" json:"row_groups"`
	KeyValueMetadata []*KeyValue      `thrift:"key_value_metadata,5" json:"key_value_metadata,omitempty"`
	CreatedBy        *string          `thrift:"created_by,6" json:"created_by,omitempty"`
	ColumnOrders     []*ColumnOrder   `thrift:"column_orders,7" json:"column_orders,omitempty"`
}

Description for file metadata

Attributes:

  • Version: Version of this file *
  • Schema: Parquet schema for this file. This schema contains metadata for all the columns.

The schema is represented as a tree with a single root. The nodes of the tree are flattened to a list by doing a depth-first traversal. The column metadata contains the path in the schema for that column which can be used to map columns to nodes in the schema. The first element is the root *

  • NumRows: Number of rows in this file *
  • RowGroups: Row groups in this file *
  • KeyValueMetadata: Optional key/value metadata *
  • CreatedBy: String for application that wrote this file. This should be in the format

<Application> version <App Version> (build <App Build Hash>). e.g. impala version 1.0 (build 6cf94d29b2b7115df4de2c06e2ab4326d721eb55)

  • ColumnOrders: Sort order used for the min_value and max_value fields of each column in

this file. Each sort order corresponds to one column, determined by its position in the list, matching the position of the column in the schema.

Without column_orders, the meaning of the min_value and max_value fields is undefined. To ensure well-defined behaviour, if min_value and max_value are written to a Parquet file, column_orders must be written as well.

The obsolete min and max fields are always sorted by signed comparison regardless of column_orders.

func NewFileMetaData

func NewFileMetaData() *FileMetaData

func (*FileMetaData) GetColumnOrders

func (p *FileMetaData) GetColumnOrders() []*ColumnOrder

func (*FileMetaData) GetCreatedBy

func (p *FileMetaData) GetCreatedBy() string

func (*FileMetaData) GetKeyValueMetadata

func (p *FileMetaData) GetKeyValueMetadata() []*KeyValue

func (*FileMetaData) GetNumRows

func (p *FileMetaData) GetNumRows() int64

func (*FileMetaData) GetRowGroups

func (p *FileMetaData) GetRowGroups() []*RowGroup

func (*FileMetaData) GetSchema

func (p *FileMetaData) GetSchema() []*SchemaElement

func (*FileMetaData) GetVersion

func (p *FileMetaData) GetVersion() int32

func (*FileMetaData) IsSetColumnOrders

func (p *FileMetaData) IsSetColumnOrders() bool

func (*FileMetaData) IsSetCreatedBy

func (p *FileMetaData) IsSetCreatedBy() bool

func (*FileMetaData) IsSetKeyValueMetadata

func (p *FileMetaData) IsSetKeyValueMetadata() bool

func (*FileMetaData) Read

func (meta *FileMetaData) Read(r io.Reader) error

FileMetaData.Read reads the object from a io.Reader

func (*FileMetaData) String

func (p *FileMetaData) String() string

type IndexPageHeader

type IndexPageHeader struct {
}
var PageHeader_IndexPageHeader_DEFAULT *IndexPageHeader

func NewIndexPageHeader

func NewIndexPageHeader() *IndexPageHeader

func (*IndexPageHeader) String

func (p *IndexPageHeader) String() string

type IntType

type IntType struct {
	BitWidth int8 `thrift:"bitWidth,1,required" json:"bitWidth"`
	IsSigned bool `thrift:"isSigned,2,required" json:"isSigned"`
}

Integer logical type annotation

bitWidth must be 8, 16, 32, or 64.

Allowed for physical types: INT32, INT64

Attributes:

  • BitWidth
  • IsSigned
var LogicalType_INTEGER_DEFAULT *IntType

func NewIntType

func NewIntType() *IntType

func (*IntType) GetBitWidth

func (p *IntType) GetBitWidth() int8

func (*IntType) GetIsSigned

func (p *IntType) GetIsSigned() bool

func (*IntType) String

func (p *IntType) String() string

type JsonType

type JsonType struct {
}

Embedded JSON logical type annotation

Allowed for physical types: BINARY

var LogicalType_JSON_DEFAULT *JsonType

func NewJsonType

func NewJsonType() *JsonType

func (*JsonType) String

func (p *JsonType) String() string

type KeyValue

type KeyValue struct {
	Key   string  `thrift:"key,1,required" json:"key"`
	Value *string `thrift:"value,2" json:"value,omitempty"`
}

Wrapper struct to store key values

Attributes:

  • Key
  • Value

func NewKeyValue

func NewKeyValue() *KeyValue

func (*KeyValue) GetKey

func (p *KeyValue) GetKey() string

func (*KeyValue) GetValue

func (p *KeyValue) GetValue() string

func (*KeyValue) IsSetValue

func (p *KeyValue) IsSetValue() bool

func (*KeyValue) String

func (p *KeyValue) String() string

type ListType

type ListType struct {
}
var LogicalType_LIST_DEFAULT *ListType

func NewListType

func NewListType() *ListType

func (*ListType) String

func (p *ListType) String() string

type LogicalType

type LogicalType struct {
	STRING    *StringType    `thrift:"STRING,1" json:"STRING,omitempty"`
	MAP       *MapType       `thrift:"MAP,2" json:"MAP,omitempty"`
	LIST      *ListType      `thrift:"LIST,3" json:"LIST,omitempty"`
	ENUM      *EnumType      `thrift:"ENUM,4" json:"ENUM,omitempty"`
	DECIMAL   *DecimalType   `thrift:"DECIMAL,5" json:"DECIMAL,omitempty"`
	DATE      *DateType      `thrift:"DATE,6" json:"DATE,omitempty"`
	TIME      *TimeType      `thrift:"TIME,7" json:"TIME,omitempty"`
	TIMESTAMP *TimestampType `thrift:"TIMESTAMP,8" json:"TIMESTAMP,omitempty"`
	// unused field # 9
	INTEGER *IntType  `thrift:"INTEGER,10" json:"INTEGER,omitempty"`
	UNKNOWN *NullType `thrift:"UNKNOWN,11" json:"UNKNOWN,omitempty"`
	JSON    *JsonType `thrift:"JSON,12" json:"JSON,omitempty"`
	BSON    *BsonType `thrift:"BSON,13" json:"BSON,omitempty"`
	UUID    *UUIDType `thrift:"UUID,14" json:"UUID,omitempty"`
}

LogicalType annotations to replace ConvertedType.

To maintain compatibility, implementations using LogicalType for a SchemaElement must also set the corresponding ConvertedType from the following table.

Attributes:

  • STRING
  • MAP
  • LIST
  • ENUM
  • DECIMAL
  • DATE
  • TIME
  • TIMESTAMP
  • INTEGER
  • UNKNOWN
  • JSON
  • BSON
  • UUID
var SchemaElement_LogicalType_DEFAULT *LogicalType

func NewLogicalType

func NewLogicalType() *LogicalType

func (*LogicalType) CountSetFieldsLogicalType

func (p *LogicalType) CountSetFieldsLogicalType() int

func (*LogicalType) GetBSON

func (p *LogicalType) GetBSON() *BsonType

func (*LogicalType) GetDATE

func (p *LogicalType) GetDATE() *DateType

func (*LogicalType) GetDECIMAL

func (p *LogicalType) GetDECIMAL() *DecimalType

func (*LogicalType) GetENUM

func (p *LogicalType) GetENUM() *EnumType

func (*LogicalType) GetINTEGER

func (p *LogicalType) GetINTEGER() *IntType

func (*LogicalType) GetJSON

func (p *LogicalType) GetJSON() *JsonType

func (*LogicalType) GetLIST

func (p *LogicalType) GetLIST() *ListType

func (*LogicalType) GetMAP

func (p *LogicalType) GetMAP() *MapType

func (*LogicalType) GetSTRING

func (p *LogicalType) GetSTRING() *StringType

func (*LogicalType) GetTIME

func (p *LogicalType) GetTIME() *TimeType

func (*LogicalType) GetTIMESTAMP

func (p *LogicalType) GetTIMESTAMP() *TimestampType

func (*LogicalType) GetUNKNOWN

func (p *LogicalType) GetUNKNOWN() *NullType

func (*LogicalType) GetUUID

func (p *LogicalType) GetUUID() *UUIDType

func (*LogicalType) IsSetBSON

func (p *LogicalType) IsSetBSON() bool

func (*LogicalType) IsSetDATE

func (p *LogicalType) IsSetDATE() bool

func (*LogicalType) IsSetDECIMAL

func (p *LogicalType) IsSetDECIMAL() bool

func (*LogicalType) IsSetENUM

func (p *LogicalType) IsSetENUM() bool

func (*LogicalType) IsSetINTEGER

func (p *LogicalType) IsSetINTEGER() bool

func (*LogicalType) IsSetJSON

func (p *LogicalType) IsSetJSON() bool

func (*LogicalType) IsSetLIST

func (p *LogicalType) IsSetLIST() bool

func (*LogicalType) IsSetMAP

func (p *LogicalType) IsSetMAP() bool

func (*LogicalType) IsSetSTRING

func (p *LogicalType) IsSetSTRING() bool

func (*LogicalType) IsSetTIME

func (p *LogicalType) IsSetTIME() bool

func (*LogicalType) IsSetTIMESTAMP

func (p *LogicalType) IsSetTIMESTAMP() bool

func (*LogicalType) IsSetUNKNOWN

func (p *LogicalType) IsSetUNKNOWN() bool

func (*LogicalType) IsSetUUID

func (p *LogicalType) IsSetUUID() bool

func (*LogicalType) String

func (p *LogicalType) String() string

type MapType

type MapType struct {
}
var LogicalType_MAP_DEFAULT *MapType

func NewMapType

func NewMapType() *MapType

func (*MapType) String

func (p *MapType) String() string

type MicroSeconds

type MicroSeconds struct {
}
var TimeUnit_MICROS_DEFAULT *MicroSeconds

func NewMicroSeconds

func NewMicroSeconds() *MicroSeconds

func (*MicroSeconds) String

func (p *MicroSeconds) String() string

type MilliSeconds

type MilliSeconds struct {
}

Time units for logical types

var TimeUnit_MILLIS_DEFAULT *MilliSeconds

func NewMilliSeconds

func NewMilliSeconds() *MilliSeconds

func (*MilliSeconds) String

func (p *MilliSeconds) String() string

type NullType

type NullType struct {
}

Logical type to annotate a column that is always null.

Sometimes when discovering the schema of existing data, values are always null and the physical type can't be determined. This annotation signals the case where the physical type was guessed from all null values.

var LogicalType_UNKNOWN_DEFAULT *NullType

func NewNullType

func NewNullType() *NullType

func (*NullType) String

func (p *NullType) String() string

type OffsetIndex

type OffsetIndex struct {
	PageLocations []*PageLocation `thrift:"page_locations,1,required" json:"page_locations"`
}

Attributes:

  • PageLocations: PageLocations, ordered by increasing PageLocation.offset. It is required

that page_locations[i].first_row_index < page_locations[i+1].first_row_index.

func NewOffsetIndex

func NewOffsetIndex() *OffsetIndex

func (*OffsetIndex) GetPageLocations

func (p *OffsetIndex) GetPageLocations() []*PageLocation

func (*OffsetIndex) String

func (p *OffsetIndex) String() string

type PageEncodingStats

type PageEncodingStats struct {
	PageType PageType `thrift:"page_type,1,required" json:"page_type"`
	Encoding Encoding `thrift:"encoding,2,required" json:"encoding"`
	Count    int32    `thrift:"count,3,required" json:"count"`
}

statistics of a given page type and encoding

Attributes:

  • PageType: the page type (data/dic/...) *
  • Encoding: encoding of the page *
  • Count: number of pages of this type with this encoding *

func NewPageEncodingStats

func NewPageEncodingStats() *PageEncodingStats

func (*PageEncodingStats) GetCount

func (p *PageEncodingStats) GetCount() int32

func (*PageEncodingStats) GetEncoding

func (p *PageEncodingStats) GetEncoding() Encoding

func (*PageEncodingStats) GetPageType

func (p *PageEncodingStats) GetPageType() PageType

func (*PageEncodingStats) String

func (p *PageEncodingStats) String() string
type PageHeader struct {
	Type                 PageType              `thrift:"type,1,required" json:"type"`
	UncompressedPageSize int32                 `thrift:"uncompressed_page_size,2,required" json:"uncompressed_page_size"`
	CompressedPageSize   int32                 `thrift:"compressed_page_size,3,required" json:"compressed_page_size"`
	Crc                  *int32                `thrift:"crc,4" json:"crc,omitempty"`
	DataPageHeader       *DataPageHeader       `thrift:"data_page_header,5" json:"data_page_header,omitempty"`
	IndexPageHeader      *IndexPageHeader      `thrift:"index_page_header,6" json:"index_page_header,omitempty"`
	DictionaryPageHeader *DictionaryPageHeader `thrift:"dictionary_page_header,7" json:"dictionary_page_header,omitempty"`
	DataPageHeaderV2     *DataPageHeaderV2     `thrift:"data_page_header_v2,8" json:"data_page_header_v2,omitempty"`
}

Attributes:

  • Type: the type of the page: indicates which of the *_header fields is set *
  • UncompressedPageSize: Uncompressed page size in bytes (not including this header) *
  • CompressedPageSize: Compressed page size in bytes (not including this header) *
  • Crc: 32bit crc for the data below. This allows for disabling checksumming in HDFS

if only a few pages needs to be read

  • DataPageHeader
  • IndexPageHeader
  • DictionaryPageHeader
  • DataPageHeaderV2

func NewPageHeader

func NewPageHeader() *PageHeader

func (*PageHeader) GetCompressedPageSize

func (p *PageHeader) GetCompressedPageSize() int32

func (*PageHeader) GetCrc

func (p *PageHeader) GetCrc() int32

func (*PageHeader) GetDataPageHeader

func (p *PageHeader) GetDataPageHeader() *DataPageHeader

func (*PageHeader) GetDataPageHeaderV2

func (p *PageHeader) GetDataPageHeaderV2() *DataPageHeaderV2

func (*PageHeader) GetDictionaryPageHeader

func (p *PageHeader) GetDictionaryPageHeader() *DictionaryPageHeader

func (*PageHeader) GetIndexPageHeader

func (p *PageHeader) GetIndexPageHeader() *IndexPageHeader

func (*PageHeader) GetType

func (p *PageHeader) GetType() PageType

func (*PageHeader) GetUncompressedPageSize

func (p *PageHeader) GetUncompressedPageSize() int32

func (*PageHeader) IsSetCrc

func (p *PageHeader) IsSetCrc() bool

func (*PageHeader) IsSetDataPageHeader

func (p *PageHeader) IsSetDataPageHeader() bool

func (*PageHeader) IsSetDataPageHeaderV2

func (p *PageHeader) IsSetDataPageHeaderV2() bool

func (*PageHeader) IsSetDictionaryPageHeader

func (p *PageHeader) IsSetDictionaryPageHeader() bool

func (*PageHeader) IsSetIndexPageHeader

func (p *PageHeader) IsSetIndexPageHeader() bool

func (*PageHeader) Read

func (ph *PageHeader) Read(r io.Reader) error

PageHeader.Read reads the object from a io.Reader

func (*PageHeader) String

func (p *PageHeader) String() string

type PageLocation

type PageLocation struct {
	Offset             int64 `thrift:"offset,1,required" json:"offset"`
	CompressedPageSize int32 `thrift:"compressed_page_size,2,required" json:"compressed_page_size"`
	FirstRowIndex      int64 `thrift:"first_row_index,3,required" json:"first_row_index"`
}

Attributes:

  • Offset: Offset of the page in the file *
  • CompressedPageSize: Size of the page, including header. Sum of compressed_page_size and header

length

  • FirstRowIndex: Index within the RowGroup of the first row of the page; this means pages

change on record boundaries (r = 0).

func NewPageLocation

func NewPageLocation() *PageLocation

func (*PageLocation) GetCompressedPageSize

func (p *PageLocation) GetCompressedPageSize() int32

func (*PageLocation) GetFirstRowIndex

func (p *PageLocation) GetFirstRowIndex() int64

func (*PageLocation) GetOffset

func (p *PageLocation) GetOffset() int64

func (*PageLocation) String

func (p *PageLocation) String() string

type PageType

type PageType int64
const (
	PageType_DATA_PAGE       PageType = 0
	PageType_INDEX_PAGE      PageType = 1
	PageType_DICTIONARY_PAGE PageType = 2
	PageType_DATA_PAGE_V2    PageType = 3
)

func PageTypeFromString

func PageTypeFromString(s string) (PageType, error)

func PageTypePtr

func PageTypePtr(v PageType) *PageType

func (PageType) MarshalText

func (p PageType) MarshalText() ([]byte, error)

func (PageType) String

func (p PageType) String() string

func (*PageType) UnmarshalText

func (p *PageType) UnmarshalText(text []byte) error

type RowGroup

type RowGroup struct {
	Columns        []*ColumnChunk   `thrift:"columns,1,required" json:"columns"`
	TotalByteSize  int64            `thrift:"total_byte_size,2,required" json:"total_byte_size"`
	NumRows        int64            `thrift:"num_rows,3,required" json:"num_rows"`
	SortingColumns []*SortingColumn `thrift:"sorting_columns,4" json:"sorting_columns,omitempty"`
}

Attributes:

  • Columns: Metadata for each column chunk in this row group.

This list must have the same order as the SchemaElement list in FileMetaData.

  • TotalByteSize: Total byte size of all the uncompressed column data in this row group *
  • NumRows: Number of rows in this row group *
  • SortingColumns: If set, specifies a sort ordering of the rows in this RowGroup.

The sorting columns can be a subset of all the columns.

func NewRowGroup

func NewRowGroup() *RowGroup

func (*RowGroup) GetColumns

func (p *RowGroup) GetColumns() []*ColumnChunk

func (*RowGroup) GetNumRows

func (p *RowGroup) GetNumRows() int64

func (*RowGroup) GetSortingColumns

func (p *RowGroup) GetSortingColumns() []*SortingColumn

func (*RowGroup) GetTotalByteSize

func (p *RowGroup) GetTotalByteSize() int64

func (*RowGroup) IsSetSortingColumns

func (p *RowGroup) IsSetSortingColumns() bool

func (*RowGroup) String

func (p *RowGroup) String() string

type SchemaElement

type SchemaElement struct {
	Type           *Type                `thrift:"type,1" json:"type,omitempty"`
	TypeLength     *int32               `thrift:"type_length,2" json:"type_length,omitempty"`
	RepetitionType *FieldRepetitionType `thrift:"repetition_type,3" json:"repetition_type,omitempty"`
	Name           string               `thrift:"name,4,required" json:"name"`
	NumChildren    *int32               `thrift:"num_children,5" json:"num_children,omitempty"`
	ConvertedType  *ConvertedType       `thrift:"converted_type,6" json:"converted_type,omitempty"`
	Scale          *int32               `thrift:"scale,7" json:"scale,omitempty"`
	Precision      *int32               `thrift:"precision,8" json:"precision,omitempty"`
	FieldID        *int32               `thrift:"field_id,9" json:"field_id,omitempty"`
	LogicalType    *LogicalType         `thrift:"logicalType,10" json:"logicalType,omitempty"`
}

Represents a element inside a schema definition.

  • if it is a group (inner node) then type is undefined and num_children is defined
  • if it is a primitive type (leaf) then type is defined and num_children is undefined

the nodes are listed in depth first traversal order.

Attributes:

  • Type: Data type for this field. Not set if the current element is a non-leaf node
  • TypeLength: If type is FIXED_LEN_BYTE_ARRAY, this is the byte length of the vales.

Otherwise, if specified, this is the maximum bit length to store any of the values. (e.g. a low cardinality INT col could have this set to 3). Note that this is in the schema, and therefore fixed for the entire file.

  • RepetitionType: repetition of the field. The root of the schema does not have a repetition_type.

All other nodes must have one

  • Name: Name of the field in the schema
  • NumChildren: Nested fields. Since thrift does not support nested fields,

the nesting is flattened to a single list by a depth-first traversal. The children count is used to construct the nested relationship. This field is not set when the element is a primitive type

  • ConvertedType: When the schema is the result of a conversion from another model

Used to record the original type to help with cross conversion.

  • Scale: Used when this column contains decimal data.

See the DECIMAL converted type for more details.

  • Precision
  • FieldID: When the original schema supports field ids, this will save the

original field id in the parquet schema

  • LogicalType: The logical type of this SchemaElement

LogicalType replaces ConvertedType, but ConvertedType is still required for some logical types to ensure forward-compatibility in format v1.

func NewSchemaElement

func NewSchemaElement() *SchemaElement

func (*SchemaElement) GetConvertedType

func (p *SchemaElement) GetConvertedType() ConvertedType

func (*SchemaElement) GetFieldID

func (p *SchemaElement) GetFieldID() int32

func (*SchemaElement) GetLogicalType

func (p *SchemaElement) GetLogicalType() *LogicalType

func (*SchemaElement) GetName

func (p *SchemaElement) GetName() string

func (*SchemaElement) GetNumChildren

func (p *SchemaElement) GetNumChildren() int32

func (*SchemaElement) GetPrecision

func (p *SchemaElement) GetPrecision() int32

func (*SchemaElement) GetRepetitionType

func (p *SchemaElement) GetRepetitionType() FieldRepetitionType

func (*SchemaElement) GetScale

func (p *SchemaElement) GetScale() int32

func (*SchemaElement) GetType

func (p *SchemaElement) GetType() Type

func (*SchemaElement) GetTypeLength

func (p *SchemaElement) GetTypeLength() int32

func (*SchemaElement) IsSetConvertedType

func (p *SchemaElement) IsSetConvertedType() bool

func (*SchemaElement) IsSetFieldID

func (p *SchemaElement) IsSetFieldID() bool

func (*SchemaElement) IsSetLogicalType

func (p *SchemaElement) IsSetLogicalType() bool

func (*SchemaElement) IsSetNumChildren

func (p *SchemaElement) IsSetNumChildren() bool

func (*SchemaElement) IsSetPrecision

func (p *SchemaElement) IsSetPrecision() bool

func (*SchemaElement) IsSetRepetitionType

func (p *SchemaElement) IsSetRepetitionType() bool

func (*SchemaElement) IsSetScale

func (p *SchemaElement) IsSetScale() bool

func (*SchemaElement) IsSetType

func (p *SchemaElement) IsSetType() bool

func (*SchemaElement) IsSetTypeLength

func (p *SchemaElement) IsSetTypeLength() bool

func (*SchemaElement) String

func (p *SchemaElement) String() string

type SortingColumn

type SortingColumn struct {
	ColumnIdx  int32 `thrift:"column_idx,1,required" json:"column_idx"`
	Descending bool  `thrift:"descending,2,required" json:"descending"`
	NullsFirst bool  `thrift:"nulls_first,3,required" json:"nulls_first"`
}

Wrapper struct to specify sort order

Attributes:

  • ColumnIdx: The column index (in this row group) *
  • Descending: If true, indicates this column is sorted in descending order. *
  • NullsFirst: If true, nulls will come before non-null values, otherwise,

nulls go at the end.

func NewSortingColumn

func NewSortingColumn() *SortingColumn

func (*SortingColumn) GetColumnIdx

func (p *SortingColumn) GetColumnIdx() int32

func (*SortingColumn) GetDescending

func (p *SortingColumn) GetDescending() bool

func (*SortingColumn) GetNullsFirst

func (p *SortingColumn) GetNullsFirst() bool

func (*SortingColumn) String

func (p *SortingColumn) String() string

type Statistics

type Statistics struct {
	Max           []byte `thrift:"max,1" json:"max,omitempty"`
	Min           []byte `thrift:"min,2" json:"min,omitempty"`
	NullCount     *int64 `thrift:"null_count,3" json:"null_count,omitempty"`
	DistinctCount *int64 `thrift:"distinct_count,4" json:"distinct_count,omitempty"`
	MaxValue      []byte `thrift:"max_value,5" json:"max_value,omitempty"`
	MinValue      []byte `thrift:"min_value,6" json:"min_value,omitempty"`
}

Statistics per row group and per page All fields are optional.

Attributes:

  • Max: DEPRECATED: min and max value of the column. Use min_value and max_value.

Values are encoded using PLAIN encoding, except that variable-length byte arrays do not include a length prefix.

These fields encode min and max values determined by signed comparison only. New files should use the correct order for a column's logical type and store the values in the min_value and max_value fields.

To support older readers, these may be set when the column order is signed.

  • Min
  • NullCount: count of null value in the column
  • DistinctCount: count of distinct values occurring
  • MaxValue: Min and max values for the column, determined by its ColumnOrder.

Values are encoded using PLAIN encoding, except that variable-length byte arrays do not include a length prefix.

  • MinValue
var ColumnMetaData_Statistics_DEFAULT *Statistics
var DataPageHeaderV2_Statistics_DEFAULT *Statistics
var DataPageHeader_Statistics_DEFAULT *Statistics

func NewStatistics

func NewStatistics() *Statistics

func (*Statistics) GetDistinctCount

func (p *Statistics) GetDistinctCount() int64

func (*Statistics) GetMax

func (p *Statistics) GetMax() []byte

func (*Statistics) GetMaxValue

func (p *Statistics) GetMaxValue() []byte

func (*Statistics) GetMin

func (p *Statistics) GetMin() []byte

func (*Statistics) GetMinValue

func (p *Statistics) GetMinValue() []byte

func (*Statistics) GetNullCount

func (p *Statistics) GetNullCount() int64

func (*Statistics) IsSetDistinctCount

func (p *Statistics) IsSetDistinctCount() bool

func (*Statistics) IsSetMax

func (p *Statistics) IsSetMax() bool

func (*Statistics) IsSetMaxValue

func (p *Statistics) IsSetMaxValue() bool

func (*Statistics) IsSetMin

func (p *Statistics) IsSetMin() bool

func (*Statistics) IsSetMinValue

func (p *Statistics) IsSetMinValue() bool

func (*Statistics) IsSetNullCount

func (p *Statistics) IsSetNullCount() bool

func (*Statistics) String

func (p *Statistics) String() string

type StringType

type StringType struct {
}

Empty structs to use as logical type annotations

var LogicalType_STRING_DEFAULT *StringType

func NewStringType

func NewStringType() *StringType

func (*StringType) String

func (p *StringType) String() string

type TimeType

type TimeType struct {
	IsAdjustedToUTC bool      `thrift:"isAdjustedToUTC,1,required" json:"isAdjustedToUTC"`
	Unit            *TimeUnit `thrift:"unit,2,required" json:"unit"`
}

Time logical type annotation

Allowed for physical types: INT32 (millis), INT64 (micros)

Attributes:

  • IsAdjustedToUTC
  • Unit
var LogicalType_TIME_DEFAULT *TimeType

func NewTimeType

func NewTimeType() *TimeType

func (*TimeType) GetIsAdjustedToUTC

func (p *TimeType) GetIsAdjustedToUTC() bool

func (*TimeType) GetUnit

func (p *TimeType) GetUnit() *TimeUnit

func (*TimeType) IsSetUnit

func (p *TimeType) IsSetUnit() bool

func (*TimeType) String

func (p *TimeType) String() string

type TimeUnit

type TimeUnit struct {
	MILLIS *MilliSeconds `thrift:"MILLIS,1" json:"MILLIS,omitempty"`
	MICROS *MicroSeconds `thrift:"MICROS,2" json:"MICROS,omitempty"`
}

Attributes:

  • MILLIS
  • MICROS
var TimeType_Unit_DEFAULT *TimeUnit
var TimestampType_Unit_DEFAULT *TimeUnit

func NewTimeUnit

func NewTimeUnit() *TimeUnit

func (*TimeUnit) CountSetFieldsTimeUnit

func (p *TimeUnit) CountSetFieldsTimeUnit() int

func (*TimeUnit) GetMICROS

func (p *TimeUnit) GetMICROS() *MicroSeconds

func (*TimeUnit) GetMILLIS

func (p *TimeUnit) GetMILLIS() *MilliSeconds

func (*TimeUnit) IsSetMICROS

func (p *TimeUnit) IsSetMICROS() bool

func (*TimeUnit) IsSetMILLIS

func (p *TimeUnit) IsSetMILLIS() bool

func (*TimeUnit) String

func (p *TimeUnit) String() string

type TimestampType

type TimestampType struct {
	IsAdjustedToUTC bool      `thrift:"isAdjustedToUTC,1,required" json:"isAdjustedToUTC"`
	Unit            *TimeUnit `thrift:"unit,2,required" json:"unit"`
}

Timestamp logical type annotation

Allowed for physical types: INT64

Attributes:

  • IsAdjustedToUTC
  • Unit
var LogicalType_TIMESTAMP_DEFAULT *TimestampType

func NewTimestampType

func NewTimestampType() *TimestampType

func (*TimestampType) GetIsAdjustedToUTC

func (p *TimestampType) GetIsAdjustedToUTC() bool

func (*TimestampType) GetUnit

func (p *TimestampType) GetUnit() *TimeUnit

func (*TimestampType) IsSetUnit

func (p *TimestampType) IsSetUnit() bool

func (*TimestampType) String

func (p *TimestampType) String() string

type Type

type Type int64

Types supported by Parquet. These types are intended to be used in combination with the encodings to control the on disk storage format. For example INT16 is not included as a type since a good encoding of INT32 would handle this.

const (
	Type_BOOLEAN              Type = 0
	Type_INT32                Type = 1
	Type_INT64                Type = 2
	Type_INT96                Type = 3
	Type_FLOAT                Type = 4
	Type_DOUBLE               Type = 5
	Type_BYTE_ARRAY           Type = 6
	Type_FIXED_LEN_BYTE_ARRAY Type = 7
)
var SchemaElement_Type_DEFAULT Type

func TypeFromString

func TypeFromString(s string) (Type, error)

func TypePtr

func TypePtr(v Type) *Type

func (Type) MarshalText

func (p Type) MarshalText() ([]byte, error)

func (Type) String

func (p Type) String() string

func (*Type) UnmarshalText

func (p *Type) UnmarshalText(text []byte) error

type TypeDefinedOrder

type TypeDefinedOrder struct {
}

Empty struct to signal the order defined by the physical or logical type

var ColumnOrder_TYPE_ORDER_DEFAULT *TypeDefinedOrder

func NewTypeDefinedOrder

func NewTypeDefinedOrder() *TypeDefinedOrder

func (*TypeDefinedOrder) String

func (p *TypeDefinedOrder) String() string

type UUIDType

type UUIDType struct {
}
var LogicalType_UUID_DEFAULT *UUIDType

func NewUUIDType

func NewUUIDType() *UUIDType

func (*UUIDType) String

func (p *UUIDType) String() string

Directories

Path Synopsis
internal

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL