orc

package module

v0.0.0-...-06dddf1 Latest Latest Go to latest Published: May 13, 2021 License: MIT Imports: 22 Imported by: 21

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/scritchley/orc

Links

Open Source Insights

README ¶

orc

Project Status

This project is still a work in progress.

Current Support

Column Encoding	Read	Go Type
SmallInt, Int, BigInt	✓	int64
Float, Double	✓	float32, float64
String, Char, and VarChar	✓	string
Boolean	✓	bool
TinyInt	✓	byte
Binary	✓	[]byte
Decimal	✓	orc.Decimal
Date	✓	orc.Date (time.Time)
Timestamp	✓	time.Time
Struct	✓	orc.Struct (map[string]interface{})
List	✓	[]interface{}
Map	✓	[]orc.MapEntry
Union	✓	interface{}

The writer support is in its late stages, however, I do not recommend using it yet.

Example

r, err := Open("./examples/demo-12-zlib.orc")
if err != nil {
    log.Fatal(err)
}
defer r.Close()

// Create a new Cursor reading the provided columns.
c := r.Select("_col0", "_col1", "_col2")

// Iterate over each stripe in the file.
for c.Stripes() {
    
    // Iterate over each row in the stripe.
    for c.Next() {
          
        // Retrieve a slice of interface values for the current row.
        log.Println(c.Row())
        
    }
   
}

if err := c.Err(); err != nil {
    log.Fatal(err)
}

Documentation ¶

Index ¶

Constants
Variables
func NewStringPosition(value string) *stringPosition
type BaseStatistics
- func NewBaseStatistics() BaseStatistics
- func (b BaseStatistics) Add(value interface{})
- func (b BaseStatistics) Merge(other ColumnStatistics)
- func (b BaseStatistics) Statistics() *proto.ColumnStatistics
type BaseTreeReader
- func NewBaseTreeReader(r io.Reader) BaseTreeReader
- func (b BaseTreeReader) Err() error
- func (b BaseTreeReader) IsPresent() bool
- func (b BaseTreeReader) Next() bool
type BaseTreeWriter
- func NewBaseTreeWriter(category Category, codec CompressionCodec) BaseTreeWriter
- func (b *BaseTreeWriter) AddPositionRecorder(recorder PositionRecorder)
- func (b *BaseTreeWriter) AddStream(kind *proto.Stream_Kind) Stream
- func (b *BaseTreeWriter) Close() error
- func (b *BaseTreeWriter) Flush() error
- func (b *BaseTreeWriter) RecordPositions()
- func (b *BaseTreeWriter) RowIndex() *proto.RowIndex
- func (b *BaseTreeWriter) Statistics() ColumnStatistics
- func (b *BaseTreeWriter) Streams() []Stream
- func (b *BaseTreeWriter) Write(i interface{}) error
type BinaryTreeReader
- func NewBinaryTreeReader(present, data, length io.Reader, encoding *proto.ColumnEncoding) (*BinaryTreeReader, error)
- func (r *BinaryTreeReader) Binary() []byte
- func (r *BinaryTreeReader) Err() error
- func (r *BinaryTreeReader) Next() bool
- func (r *BinaryTreeReader) Value() interface{}
type BooleanReader
- func NewBooleanReader(r io.ByteReader) *BooleanReader
- func (b *BooleanReader) Bool() bool
- func (b *BooleanReader) Err() error
- func (b *BooleanReader) Next() bool
- func (b *BooleanReader) Value() interface{}
type BooleanTreeReader
- func NewBooleanTreeReader(present, data io.Reader, encoding *proto.ColumnEncoding) (*BooleanTreeReader, error)
- func (b *BooleanTreeReader) Err() error
- func (b *BooleanTreeReader) Next() bool
- func (b *BooleanTreeReader) Value() interface{}
type BooleanTreeWriter
- func NewBooleanTreeWriter(category Category, codec CompressionCodec) (*BooleanTreeWriter, error)
- func (b *BooleanTreeWriter) Close() error
- func (b *BooleanTreeWriter) Encoding() *proto.ColumnEncoding
- func (b *BooleanTreeWriter) Flush() error
- func (b *BooleanTreeWriter) Write(value interface{}) error
type BooleanWriter
- func NewBooleanWriter(w io.ByteWriter) *BooleanWriter
- func (b *BooleanWriter) Close() error
- func (b *BooleanWriter) Flush() error
- func (b *BooleanWriter) WriteBool(t bool) error
type BucketStatistics
- func NewBucketStatistics() *BucketStatistics
type BufferedWriter
- func NewBufferedWriter(codec CompressionCodec) *BufferedWriter
- func (b *BufferedWriter) Close() error
- func (b *BufferedWriter) Flush() error
- func (b *BufferedWriter) Len() int
- func (b *BufferedWriter) Positions() []uint64
- func (b *BufferedWriter) Read(p []byte) (int, error)
- func (b *BufferedWriter) Reset()
- func (b *BufferedWriter) Write(p []byte) (int, error)
- func (b *BufferedWriter) WriteByte(c byte) error
type ByteTreeReader
- func NewByteTreeReader(present, data io.Reader, encoding *proto.ColumnEncoding) (*ByteTreeReader, error)
- func (b *ByteTreeReader) Err() error
- func (b *ByteTreeReader) Next() bool
- func (b *ByteTreeReader) Value() interface{}
type Category
- func (c Category) String() string
type ColumnStatistics
- func NewColumnStatistics(category Category) ColumnStatistics
type CompressionCodec
type CompressionNone
- func (c CompressionNone) Decoder(r io.Reader) io.Reader
- func (c CompressionNone) Encoder(w io.Writer) io.WriteCloser
type CompressionNoneEncoder
- func (c CompressionNoneEncoder) Close() error
- func (c CompressionNoneEncoder) Flush() error
- func (c CompressionNoneEncoder) Write(p []byte) (int, error)
type CompressionSnappy
- func (c CompressionSnappy) Decoder(r io.Reader) io.Reader
- func (c CompressionSnappy) Encoder(w io.Writer) io.WriteCloser
type CompressionSnappyDecoder
- func (c *CompressionSnappyDecoder) Read(p []byte) (int, error)
type CompressionSnappyEncoder
- func (c *CompressionSnappyEncoder) Close() error
- func (c *CompressionSnappyEncoder) Flush() error
- func (c *CompressionSnappyEncoder) Write(p []byte) (int, error)
type CompressionZlib
- func (c CompressionZlib) Decoder(r io.Reader) io.Reader
- func (c CompressionZlib) Encoder(w io.Writer) io.WriteCloser
type CompressionZlibDecoder
- func (c *CompressionZlibDecoder) Read(p []byte) (int, error)
type CompressionZlibEncoder
- func (c *CompressionZlibEncoder) Close() error
- func (c *CompressionZlibEncoder) Write(p []byte) (int, error)
type Cursor
- func (c *Cursor) Err() error
- func (c *Cursor) Next() bool
- func (c *Cursor) Row() []interface{}
- func (c *Cursor) RowIndex(column string) (*proto.RowIndex, error)
- func (c *Cursor) Scan(dest ...interface{}) error
- func (c *Cursor) Select(fields ...string) *Cursor
- func (c *Cursor) SelectStripe(n int) error
- func (c *Cursor) Stripes() bool
type Date
type DateTreeReader
- func NewDateTreeReader(present, data io.Reader, encoding *proto.ColumnEncoding) (*DateTreeReader, error)
- func (d *DateTreeReader) Date() Date
- func (d *DateTreeReader) Value() interface{}
type DateTreeWriter
- func NewDateTreeWriter(category Category, codec CompressionCodec) (*DateTreeWriter, error)
- func (w *DateTreeWriter) Close() error
- func (w *DateTreeWriter) Encoding() *proto.ColumnEncoding
- func (w *DateTreeWriter) Flush() error
- func (w *DateTreeWriter) Write(value interface{}) error
- func (w *DateTreeWriter) WriteDate(date time.Time) error
type Decimal
- func NewDecimal(mant *big.Int, scale int64) Decimal
- func (d Decimal) Float32() float32
- func (d Decimal) Float64() float64
- func (d Decimal) MarshalJSON() ([]byte, error)
- func (d Decimal) String() string
type DecimalTreeReader
- func NewDecimalTreeReader(present, data, secondary io.Reader, encoding *proto.ColumnEncoding, ...) (*DecimalTreeReader, error)
- func (d *DecimalTreeReader) Decimal() Decimal
- func (d *DecimalTreeReader) Err() error
- func (d *DecimalTreeReader) Next() bool
- func (d *DecimalTreeReader) Value() interface{}
type Dictionary
- func NewDictionary(initialCapacity int) *Dictionary
- func (d *Dictionary) Size() int
type DictionaryV2
- func NewDictionaryV2() *DictionaryV2
type Double
type Float
- func (f Float) MarshalJSON() ([]byte, error)
type FloatTreeReader
- func NewFloatTreeReader(bytesPerValue int, present, data io.Reader, encoding *proto.ColumnEncoding) (*FloatTreeReader, error)
- func (r *FloatTreeReader) Double() Double
- func (r *FloatTreeReader) Err() error
- func (r *FloatTreeReader) Float() Float
- func (r *FloatTreeReader) Next() bool
- func (r *FloatTreeReader) Value() interface{}
type FloatTreeWriter
- func NewFloatTreeWriter(category Category, codec CompressionCodec, bytesPerValue int) (*FloatTreeWriter, error)
- func (f *FloatTreeWriter) Close() error
- func (f *FloatTreeWriter) Encoding() *proto.ColumnEncoding
- func (f *FloatTreeWriter) Flush() error
- func (f *FloatTreeWriter) Write(value interface{}) error
- func (f *FloatTreeWriter) WriteDouble(value interface{}) error
- func (f *FloatTreeWriter) WriteFloat(value interface{}) error
type IntegerReader
type IntegerStatistics
- func NewIntegerStatistics() *IntegerStatistics
- func (i *IntegerStatistics) Add(value interface{})
- func (i *IntegerStatistics) Merge(other ColumnStatistics)
- func (i *IntegerStatistics) Reset()
- func (i *IntegerStatistics) Statistics() *proto.ColumnStatistics
type IntegerTreeReader
- func NewIntegerTreeReader(present, data io.Reader, encoding *proto.ColumnEncoding) (*IntegerTreeReader, error)
- func (i *IntegerTreeReader) Err() error
- func (i *IntegerTreeReader) Next() bool
- func (i *IntegerTreeReader) Value() interface{}
type IntegerTreeWriter
- func NewIntegerTreeWriter(category Category, codec CompressionCodec) (*IntegerTreeWriter, error)
- func (w *IntegerTreeWriter) Close() error
- func (w *IntegerTreeWriter) Encoding() *proto.ColumnEncoding
- func (w *IntegerTreeWriter) Flush() error
- func (w *IntegerTreeWriter) Write(value interface{}) error
- func (w *IntegerTreeWriter) WriteInt(value int64) error
type IntegerWriter
type ListTreeReader
- func NewListTreeReader(present, length io.Reader, value TreeReader, encoding *proto.ColumnEncoding) (*ListTreeReader, error)
- func (r *ListTreeReader) Err() error
- func (r *ListTreeReader) List() []interface{}
- func (r *ListTreeReader) Next() bool
- func (r *ListTreeReader) Value() interface{}
type ListTreeWriter
- func NewListTreeWriter(category Category, codec CompressionCodec, child TreeWriter) (*ListTreeWriter, error)
- func (l *ListTreeWriter) Close() error
- func (l *ListTreeWriter) Encoding() *proto.ColumnEncoding
- func (l *ListTreeWriter) Flush() error
- func (l *ListTreeWriter) Write(value interface{}) error
type MapEntry
type MapTreeReader
- func NewMapTreeReader(present, length io.Reader, key, value TreeReader, ...) (*MapTreeReader, error)
- func (m *MapTreeReader) Map() []MapEntry
- func (m *MapTreeReader) Next() bool
- func (m *MapTreeReader) Value() interface{}
type MapTreeWriter
- func NewMapTreeWriter(category Category, codec CompressionCodec, keyWriter, valueWriter TreeWriter) (*MapTreeWriter, error)
- func (m *MapTreeWriter) Close() error
- func (m *MapTreeWriter) Encoding() *proto.ColumnEncoding
- func (m *MapTreeWriter) Flush() error
- func (m *MapTreeWriter) Write(value interface{}) error
type PositionRecorder
type PositionRecorders
- func NewPositionRecorders(recorders ...PositionRecorder) PositionRecorders
type RLEEncodingType
- func (i RLEEncodingType) String() string
type Reader
- func NewReader(r SizedReaderAt) (*Reader, error)
- func Open(filepath string) (*Reader, error)
- func (r *Reader) Close() error
- func (r *Reader) Metadata() *proto.Metadata
- func (r *Reader) NumRows() int
- func (r *Reader) NumStripes() (int, error)
- func (r *Reader) Schema() *TypeDescription
- func (r *Reader) Select(fields ...string) *Cursor
type RunLengthByteReader
- func NewRunLengthByteReader(r io.ByteReader) *RunLengthByteReader
- func (b *RunLengthByteReader) Byte() byte
- func (b *RunLengthByteReader) Err() error
- func (b *RunLengthByteReader) Next() bool
- func (b *RunLengthByteReader) ReadByte() (byte, error)
- func (b *RunLengthByteReader) Value() interface{}
type RunLengthByteWriter
- func NewRunLengthByteWriter(w io.ByteWriter) *RunLengthByteWriter
- func (b *RunLengthByteWriter) Close() error
- func (b *RunLengthByteWriter) Flush() error
- func (b *RunLengthByteWriter) WriteByte(value byte) error
type RunLengthIntegerReader
- func NewRunLengthIntegerReader(r io.ByteReader, signed bool) *RunLengthIntegerReader
- func (r *RunLengthIntegerReader) Err() error
- func (r *RunLengthIntegerReader) Int() int64
- func (r *RunLengthIntegerReader) Next() bool
- func (r *RunLengthIntegerReader) ReadByte() (byte, error)
- func (r *RunLengthIntegerReader) Value() interface{}
type RunLengthIntegerReaderV2
- func NewRunLengthIntegerReaderV2(r io.ByteReader, signed bool, skipCorrupt bool) *RunLengthIntegerReaderV2
- func (r *RunLengthIntegerReaderV2) Err() error
- func (r *RunLengthIntegerReaderV2) Int() int64
- func (r *RunLengthIntegerReaderV2) Next() bool
- func (r *RunLengthIntegerReaderV2) ReadByte() (byte, error)
- func (r *RunLengthIntegerReaderV2) Value() interface{}
type RunLengthIntegerWriter
- func NewRunLengthIntegerWriter(w io.ByteWriter, signed bool) *RunLengthIntegerWriter
- func (w *RunLengthIntegerWriter) Close() error
- func (w *RunLengthIntegerWriter) Flush() error
- func (w *RunLengthIntegerWriter) WriteInt(value int64) error
type RunLengthIntegerWriterV2
- func NewRunLengthIntegerWriterV2(w io.ByteWriter, signed bool) *RunLengthIntegerWriterV2
- func (i *RunLengthIntegerWriterV2) Close() error
- func (i *RunLengthIntegerWriterV2) Flush() error
- func (i *RunLengthIntegerWriterV2) WriteInt(val int64) error
type SizedReaderAt
type Stream
- func (s Stream) Positions() []uint64
type StringDictionaryTreeReader
- func NewStringDictionaryTreeReader(present, data, length, dictionary io.Reader, encoding *proto.ColumnEncoding) (*StringDictionaryTreeReader, error)
- func (s *StringDictionaryTreeReader) Err() error
- func (s *StringDictionaryTreeReader) Next() bool
- func (s *StringDictionaryTreeReader) String() string
- func (s *StringDictionaryTreeReader) Value() interface{}
type StringDirectTreeReader
- func NewStringDirectTreeReader(present, data, length io.Reader, kind proto.ColumnEncoding_Kind) (*StringDirectTreeReader, error)
- func (s *StringDirectTreeReader) Err() error
- func (s *StringDirectTreeReader) Next() bool
- func (s *StringDirectTreeReader) String() string
- func (s *StringDirectTreeReader) Value() interface{}
type StringStatistics
- func NewStringStatistics() *StringStatistics
- func (s *StringStatistics) Add(value interface{})
- func (s *StringStatistics) Merge(other ColumnStatistics)
- func (s *StringStatistics) Reset()
- func (s *StringStatistics) Statistics() *proto.ColumnStatistics
type StringTreeReader
- func NewStringTreeReader(present, data, length, dictionary io.Reader, encoding *proto.ColumnEncoding) (StringTreeReader, error)
type StringTreeWriter
- func NewStringTreeWriter(category Category, codec CompressionCodec) (*StringTreeWriter, error)
- func (s *StringTreeWriter) Close() error
- func (s *StringTreeWriter) Encoding() *proto.ColumnEncoding
- func (s *StringTreeWriter) Flush() error
- func (s *StringTreeWriter) Write(value interface{}) error
- func (s *StringTreeWriter) WriteString(value string) error
type Stripe
- func NewStripe(info *proto.StripeInformation, included ...int) *Stripe
- func (s *Stripe) FromReader(r *Reader) error
type Struct
type StructTreeReader
- func NewStructTreeReader(present io.Reader, children map[string]TreeReader) (*StructTreeReader, error)
- func (s *StructTreeReader) Err() error
- func (s *StructTreeReader) Next() bool
- func (s *StructTreeReader) Struct() Struct
- func (s *StructTreeReader) Value() interface{}
type StructTreeWriter
- func NewStructTreeWriter(category Category, codec CompressionCodec, children []TreeWriter) (*StructTreeWriter, error)
- func (s *StructTreeWriter) Close() error
- func (s *StructTreeWriter) Encoding() *proto.ColumnEncoding
- func (s *StructTreeWriter) Flush() error
- func (s *StructTreeWriter) RecordPositions()
- func (s *StructTreeWriter) Write(value interface{}) error
type TimestampStatistics
- func NewTimestampStatistics() *TimestampStatistics
- func (i *TimestampStatistics) Add(value interface{})
- func (i *TimestampStatistics) Merge(other ColumnStatistics)
- func (i *TimestampStatistics) Reset()
- func (i *TimestampStatistics) Statistics() *proto.ColumnStatistics
type TimestampTreeReader
- func NewTimestampTreeReader(present, data, secondary io.Reader, encoding *proto.ColumnEncoding) (*TimestampTreeReader, error)
- func (t *TimestampTreeReader) Err() error
- func (t *TimestampTreeReader) Next() bool
- func (t *TimestampTreeReader) Timestamp() time.Time
- func (t *TimestampTreeReader) Value() interface{}
type TimestampTreeWriter
- func NewTimestampTreeWriter(category Category, codec CompressionCodec) (*TimestampTreeWriter, error)
- func (w *TimestampTreeWriter) Close() error
- func (w *TimestampTreeWriter) Encoding() *proto.ColumnEncoding
- func (w *TimestampTreeWriter) Flush() error
- func (w *TimestampTreeWriter) Write(value interface{}) error
- func (w *TimestampTreeWriter) WriteTimestamp(value time.Time) error
type TimestampWriter
type TreeReader
type TreeWriter
type TypeDescription
- func NewTypeDescription(fns ...TypeDescriptionTransformFunc) (*TypeDescription, error)
- func ParseSchema(schema string) (*TypeDescription, error)
- func (t *TypeDescription) Columns() []string
- func (t *TypeDescription) GetField(fieldName string) (*TypeDescription, error)
- func (t *TypeDescription) MarshalJSON() ([]byte, error)
- func (t *TypeDescription) String() string
- func (t *TypeDescription) ToJSON() string
- func (t *TypeDescription) Type() *proto.Type
- func (t *TypeDescription) Types() []*proto.Type
type TypeDescriptionTransformFunc
- func AddChild(fns ...TypeDescriptionTransformFunc) TypeDescriptionTransformFunc
- func AddField(field string, fns ...TypeDescriptionTransformFunc) TypeDescriptionTransformFunc
- func AddUnionChild(fns ...TypeDescriptionTransformFunc) TypeDescriptionTransformFunc
- func SetCategory(category Category) TypeDescriptionTransformFunc
type UnionTreeReader
- func NewUnionTreeReader(present, data io.Reader, children []TreeReader) (*UnionTreeReader, error)
- func (u *UnionTreeReader) Err() error
- func (u *UnionTreeReader) Next() bool
- func (u *UnionTreeReader) Value() interface{}
type UnionTreeWriter
- func NewUnionTreeWriter(category Category, codec CompressionCodec, children []TreeWriter) (*UnionTreeWriter, error)
- func (s *UnionTreeWriter) Close() error
- func (s *UnionTreeWriter) Encoding() *proto.ColumnEncoding
- func (s *UnionTreeWriter) Flush() error
- func (s *UnionTreeWriter) RecordPositions()
- func (s *UnionTreeWriter) Write(value interface{}) error
- func (s *UnionTreeWriter) WriteUnion(value UnionValue) error
type UnionValue
type Version
type Writer
- func NewWriter(w io.Writer, fns ...WriterConfigFunc) (*Writer, error)
- func (w *Writer) Close() error
- func (w *Writer) Flush() error
- func (w *Writer) Schema() *TypeDescription
- func (w *Writer) Write(values ...interface{}) error
type WriterConfigFunc
- func AddUserMetadata(name string, value []byte) WriterConfigFunc
- func SetCompression(codec CompressionCodec) WriterConfigFunc
- func SetSchema(schema *TypeDescription) WriterConfigFunc
- func SetStripeTargetSize(stripeTargetSize int64) WriterConfigFunc

Constants ¶

View Source

const (
	// MinRepeatSize is the minimum number of repeated values required to use run length encoding.
	MinRepeatSize = 3
	// MaxShortRepeatLength is the maximum run length used for RLEV2IntShortRepeat sequences.
	MaxShortRepeatLength = 10
	// MaxScope is the maximum number of values that can be buffered before being flushed.
	MaxScope = 512
)

View Source

const (
	MaxRepeatSize = 127 + MinRepeatSize
	MinDelta      = -128
	MaxDelta      = 127
)

View Source

const (
	// InitialDictionarySize is the initial size used when creating the dictionary.
	InitialDictionarySize = 4096
	// DictionaryEncodingThreshold is the threshold ratio of unique items to the total count of items.
	DictionaryEncodingThreshold = 0.49
)

View Source

const (
	BufferSize = 64
)

View Source

const (
	MaxLiteralSize = 128
)

View Source

const (
	// TimestampBaseSeconds is 1 January 2015, the base value for all timestamp values.
	TimestampBaseSeconds int64 = 1420070400
)

Variables ¶

View Source

var (
	// Version0_11 is an ORC file version compatible with Hive 0.11.
	Version0_11 = Version{"0.11", 0, 11}
	// Version0_12 is an ORC file version compatible with Hive 0.12.
	Version0_12 = Version{"0.12", 0, 12}
)

View Source

var (
	ErrEOFUnsignedVInt = errors.New("EOF while reading unsigned vint")
	ErrCorrupt         = errors.New("ORC file is corrupt")
)

View Source

var (
	CategoryBoolean   = Category{"boolean", true, proto.Type_BOOLEAN.Enum()}
	CategoryByte      = Category{"tinyint", true, proto.Type_BYTE.Enum()}
	CategoryShort     = Category{"smallint", true, proto.Type_SHORT.Enum()}
	CategoryInt       = Category{"int", true, proto.Type_INT.Enum()}
	CategoryLong      = Category{"bigint", true, proto.Type_LONG.Enum()}
	CategoryFloat     = Category{"float", true, proto.Type_FLOAT.Enum()}
	CategoryDouble    = Category{"double", true, proto.Type_DOUBLE.Enum()}
	CategoryString    = Category{"string", true, proto.Type_STRING.Enum()}
	CategoryDate      = Category{"date", true, proto.Type_DATE.Enum()}
	CategoryTimestamp = Category{"timestamp", true, proto.Type_TIMESTAMP.Enum()}
	CategoryBinary    = Category{"binary", true, proto.Type_BINARY.Enum()}
	CategoryDecimal   = Category{"decimal", true, proto.Type_DECIMAL.Enum()}
	CategoryVarchar   = Category{"varchar", true, proto.Type_VARCHAR.Enum()}
	CategoryChar      = Category{"char", true, proto.Type_CHAR.Enum()}
	CategoryList      = Category{"array", false, proto.Type_LIST.Enum()}
	CategoryMap       = Category{"map", false, proto.Type_MAP.Enum()}
	CategoryStruct    = Category{"struct", false, proto.Type_STRUCT.Enum()}
	CategoryUnion     = Category{"uniontype", false, proto.Type_UNION.Enum()}
	Categories        = []Category{
		CategoryBoolean,
		CategoryByte,
		CategoryShort,
		CategoryInt,
		CategoryLong,
		CategoryFloat,
		CategoryDouble,
		CategoryString,
		CategoryDate,
		CategoryTimestamp,
		CategoryBinary,
		CategoryDecimal,
		CategoryVarchar,
		CategoryChar,
		CategoryList,
		CategoryMap,
		CategoryStruct,
		CategoryUnion,
	}
)

View Source

var (

	// WriterImplementation identifies the writer implementation
	WriterImplementation = uint32(3)
	// WriterVersion identifies the writer version being used.
	WriterVersion = uint32(6)
	// DefaultStripeTargetSize is the size in bytes over which a stripe should be written to the underlying file.
	DefaultStripeTargetSize int64 = 200 * 1024 * 1024
	// DefaultStripeTargetRowCount is the number of rows over which a stripe should be written to the underlying file.
	DefaultStripeTargetRowCount int64 = 1024 * 1024
	// DefaultStripeWriterTimezone is the timezone that writer adds into the stripe footer.
	DefaultStripeWriterTimezone string = "GMT"
	// DefaultCompressionChunkSize is the default size of compression chunks within each stream.
	DefaultCompressionChunkSize uint64 = 256 * 1024
	// DefaultRowIndexStride is the default number of rows between indexes
	DefaultRowIndexStride uint32 = 10000
)

Functions ¶

func NewStringPosition ¶

func NewStringPosition(value string) *stringPosition

Types ¶

type BaseStatistics ¶

type BaseStatistics struct {
	*proto.ColumnStatistics
}

func NewBaseStatistics ¶

func NewBaseStatistics() BaseStatistics

func (BaseStatistics) Add ¶

func (b BaseStatistics) Add(value interface{})

func (BaseStatistics) Merge ¶

func (b BaseStatistics) Merge(other ColumnStatistics)

func (BaseStatistics) Statistics ¶

func (b BaseStatistics) Statistics() *proto.ColumnStatistics

type BaseTreeReader ¶

type BaseTreeReader struct {
	*BooleanReader
}

BaseTreeReader wraps a *BooleanReader and is used for reading the Present stream in all TreeReader implementations.

func NewBaseTreeReader ¶

func NewBaseTreeReader(r io.Reader) BaseTreeReader

NewBaseTreeReader return a new BaseTreeReader from the provided io.Reader.

func (BaseTreeReader) Err ¶

func (b BaseTreeReader) Err() error

Err returns the last error to occur.

func (BaseTreeReader) IsPresent ¶

func (b BaseTreeReader) IsPresent() bool

IsPresent returns true if a value is available and is present in the stream.

func (BaseTreeReader) Next ¶

func (b BaseTreeReader) Next() bool

Next returns the next available value.

type BaseTreeWriter ¶

type BaseTreeWriter struct {
	// contains filtered or unexported fields
}

BaseTreeWriter is a TreeWriter implementation that writes to the present stream. It is the basis for all other TreeWriter implementations.

func NewBaseTreeWriter ¶

func NewBaseTreeWriter(category Category, codec CompressionCodec) BaseTreeWriter

NewBaseTreeWriter is a TreeWriter that is embedded in all other TreeWriter implementations.

func (*BaseTreeWriter) AddPositionRecorder ¶

func (b *BaseTreeWriter) AddPositionRecorder(recorder PositionRecorder)

func (*BaseTreeWriter) AddStream ¶

func (b *BaseTreeWriter) AddStream(kind *proto.Stream_Kind) Stream

func (*BaseTreeWriter) Close ¶

func (b *BaseTreeWriter) Close() error

Close flushes the underlying BufferedWriter returning an error if one occurs.

func (*BaseTreeWriter) Flush ¶

func (b *BaseTreeWriter) Flush() error

Flush flushes the underlying BufferedWriter returning an error if one occurs.

func (*BaseTreeWriter) RecordPositions ¶

func (b *BaseTreeWriter) RecordPositions()

func (*BaseTreeWriter) RowIndex ¶

func (b *BaseTreeWriter) RowIndex() *proto.RowIndex

func (*BaseTreeWriter) Statistics ¶

func (b *BaseTreeWriter) Statistics() ColumnStatistics

func (*BaseTreeWriter) Streams ¶

func (b *BaseTreeWriter) Streams() []Stream

func (*BaseTreeWriter) Write ¶

func (b *BaseTreeWriter) Write(i interface{}) error

Write checks whether i is nil and writes an appropriate true or false value to the underlying isPresent stream.

type BinaryTreeReader ¶

type BinaryTreeReader struct {
	BaseTreeReader
	// contains filtered or unexported fields
}

BinaryTreeReader is a TreeReader that reads a Binary type column.

func NewBinaryTreeReader ¶

func NewBinaryTreeReader(present, data, length io.Reader, encoding *proto.ColumnEncoding) (*BinaryTreeReader, error)

func (*BinaryTreeReader) Binary ¶

func (r *BinaryTreeReader) Binary() []byte

func (*BinaryTreeReader) Err ¶

func (r *BinaryTreeReader) Err() error

func (*BinaryTreeReader) Next ¶

func (r *BinaryTreeReader) Next() bool

func (*BinaryTreeReader) Value ¶

func (r *BinaryTreeReader) Value() interface{}

type BooleanReader ¶

type BooleanReader struct {
	*RunLengthByteReader
	// contains filtered or unexported fields
}

func NewBooleanReader ¶

func NewBooleanReader(r io.ByteReader) *BooleanReader

func (*BooleanReader) Bool ¶

func (b *BooleanReader) Bool() bool

func (*BooleanReader) Err ¶

func (b *BooleanReader) Err() error

func (*BooleanReader) Next ¶

func (b *BooleanReader) Next() bool

func (*BooleanReader) Value ¶

func (b *BooleanReader) Value() interface{}

type BooleanTreeReader ¶

type BooleanTreeReader struct {
	BaseTreeReader
	*BooleanReader
}

func NewBooleanTreeReader ¶

func NewBooleanTreeReader(present, data io.Reader, encoding *proto.ColumnEncoding) (*BooleanTreeReader, error)

func (*BooleanTreeReader) Err ¶

func (b *BooleanTreeReader) Err() error

func (*BooleanTreeReader) Next ¶

func (b *BooleanTreeReader) Next() bool

func (*BooleanTreeReader) Value ¶

func (b *BooleanTreeReader) Value() interface{}

type BooleanTreeWriter ¶

type BooleanTreeWriter struct {
	BaseTreeWriter
	*BooleanWriter
	*BufferedWriter
}

func NewBooleanTreeWriter ¶

func NewBooleanTreeWriter(category Category, codec CompressionCodec) (*BooleanTreeWriter, error)

func (*BooleanTreeWriter) Close ¶

func (b *BooleanTreeWriter) Close() error

func (*BooleanTreeWriter) Encoding ¶

func (b *BooleanTreeWriter) Encoding() *proto.ColumnEncoding

func (*BooleanTreeWriter) Flush ¶

func (b *BooleanTreeWriter) Flush() error

func (*BooleanTreeWriter) Write ¶

func (b *BooleanTreeWriter) Write(value interface{}) error

type BooleanWriter ¶

type BooleanWriter struct {
	*RunLengthByteWriter
	// contains filtered or unexported fields
}

func NewBooleanWriter ¶

func NewBooleanWriter(w io.ByteWriter) *BooleanWriter

func (*BooleanWriter) Close ¶

func (b *BooleanWriter) Close() error

func (*BooleanWriter) Flush ¶

func (b *BooleanWriter) Flush() error

func (*BooleanWriter) WriteBool ¶

func (b *BooleanWriter) WriteBool(t bool) error

type BucketStatistics ¶

type BucketStatistics struct {
	BaseStatistics
}

func NewBucketStatistics ¶

func NewBucketStatistics() *BucketStatistics

type BufferedWriter ¶

type BufferedWriter struct {
	sync.Mutex
	// contains filtered or unexported fields
}

func NewBufferedWriter ¶

func NewBufferedWriter(codec CompressionCodec) *BufferedWriter

NewBufferedWriter returns a new BufferedWriter using the provided CompressionCodec.

func (*BufferedWriter) Close ¶

func (b *BufferedWriter) Close() error

Close flushes any buffered bytes to the underlying writer.

func (*BufferedWriter) Flush ¶

func (b *BufferedWriter) Flush() error

func (*BufferedWriter) Len ¶

func (b *BufferedWriter) Len() int

func (*BufferedWriter) Positions ¶

func (b *BufferedWriter) Positions() []uint64

func (*BufferedWriter) Read ¶

func (b *BufferedWriter) Read(p []byte) (int, error)

func (*BufferedWriter) Reset ¶

func (b *BufferedWriter) Reset()

Reset resets the underlying encoded buffer

func (*BufferedWriter) Write ¶

func (b *BufferedWriter) Write(p []byte) (int, error)

Write writes the provided byte slice to the underlying buffer. If the desired chunk size is reached, the buffer is compressed

func (*BufferedWriter) WriteByte ¶

func (b *BufferedWriter) WriteByte(c byte) error

WriteByte writes a byte to the underlying buffer. If the desired chunk size is reached, the buffer is compressed

type ByteTreeReader ¶

type ByteTreeReader struct {
	BaseTreeReader
	*RunLengthByteReader
}

func NewByteTreeReader ¶

func NewByteTreeReader(present, data io.Reader, encoding *proto.ColumnEncoding) (*ByteTreeReader, error)

func (*ByteTreeReader) Err ¶

func (b *ByteTreeReader) Err() error

func (*ByteTreeReader) Next ¶

func (b *ByteTreeReader) Next() bool

func (*ByteTreeReader) Value ¶

func (b *ByteTreeReader) Value() interface{}

type Category ¶

type Category struct {
	// contains filtered or unexported fields
}

func (Category) String ¶

func (c Category) String() string

type ColumnStatistics ¶

type ColumnStatistics interface {
	Statistics() *proto.ColumnStatistics
	Add(value interface{})
	Merge(other ColumnStatistics)
	Reset()
}

func NewColumnStatistics ¶

func NewColumnStatistics(category Category) ColumnStatistics

type CompressionCodec ¶

type CompressionCodec interface {
	Encoder(w io.Writer) io.WriteCloser
	Decoder(r io.Reader) io.Reader
}

CompressionCodec is an interface that provides methods for creating an Encoder or Decoder of the CompressionCodec implementation.

type CompressionNone ¶

type CompressionNone struct{}

CompressionNone is a CompressionCodec that implements no compression.

func (CompressionNone) Decoder ¶

func (c CompressionNone) Decoder(r io.Reader) io.Reader

Decoder implements the CompressionCodec interface.

func (CompressionNone) Encoder ¶

func (c CompressionNone) Encoder(w io.Writer) io.WriteCloser

Encoder implements the CompressionCodec interface.

type CompressionNoneEncoder ¶

type CompressionNoneEncoder struct {
	// contains filtered or unexported fields
}

func (CompressionNoneEncoder) Close ¶

func (c CompressionNoneEncoder) Close() error

func (CompressionNoneEncoder) Flush ¶

func (c CompressionNoneEncoder) Flush() error

func (CompressionNoneEncoder) Write ¶

func (c CompressionNoneEncoder) Write(p []byte) (int, error)

type CompressionSnappy ¶

type CompressionSnappy struct{}

CompressionSnappy implements the CompressionCodec for Snappy compression.

func (CompressionSnappy) Decoder ¶

func (c CompressionSnappy) Decoder(r io.Reader) io.Reader

Decoder implements the CompressionCodec interface.

func (CompressionSnappy) Encoder ¶

func (c CompressionSnappy) Encoder(w io.Writer) io.WriteCloser

Encoder implements the CompressionCodec interface. This is currently not implemented.

type CompressionSnappyDecoder ¶

type CompressionSnappyDecoder struct {
	// contains filtered or unexported fields
}

CompressionSnappyDecoder implements the decoder for CompressionSnappy.

func (*CompressionSnappyDecoder) Read ¶

func (c *CompressionSnappyDecoder) Read(p []byte) (int, error)

type CompressionSnappyEncoder ¶

type CompressionSnappyEncoder struct {
	// contains filtered or unexported fields
}

func (*CompressionSnappyEncoder) Close ¶

func (c *CompressionSnappyEncoder) Close() error

func (*CompressionSnappyEncoder) Flush ¶

func (c *CompressionSnappyEncoder) Flush() error

func (*CompressionSnappyEncoder) Write ¶

func (c *CompressionSnappyEncoder) Write(p []byte) (int, error)

type CompressionZlib ¶

type CompressionZlib struct {
	Level    int
	Strategy int
}

func (CompressionZlib) Decoder ¶

func (c CompressionZlib) Decoder(r io.Reader) io.Reader

Decoder implements the CompressionCodec interface.

func (CompressionZlib) Encoder ¶

func (c CompressionZlib) Encoder(w io.Writer) io.WriteCloser

Encoder implements the CompressionCodec interface. This is currently not implemented.

type CompressionZlibDecoder ¶

type CompressionZlibDecoder struct {
	// contains filtered or unexported fields
}

CompressionZlibDecoder implements the CompressionCodec for Zlib compression.

func (*CompressionZlibDecoder) Read ¶

func (c *CompressionZlibDecoder) Read(p []byte) (int, error)

type CompressionZlibEncoder ¶

type CompressionZlibEncoder struct {
	Level int
	// contains filtered or unexported fields
}

CompressionZlibEncoder implements the CompressionCodec for Zlib compression.

func (*CompressionZlibEncoder) Close ¶

func (c *CompressionZlibEncoder) Close() error

func (*CompressionZlibEncoder) Write ¶

func (c *CompressionZlibEncoder) Write(p []byte) (int, error)

type Cursor ¶

type Cursor struct {
	*Reader
	*Stripe
	// contains filtered or unexported fields
}

Cursor is used for iterating through the stripes and rows within the ORC file.

func (*Cursor) Err ¶

func (c *Cursor) Err() error

Err returns the last error to have occurred.

func (*Cursor) Next ¶

func (c *Cursor) Next() bool

Next returns true if another set of records are available.

func (*Cursor) Row ¶

func (c *Cursor) Row() []interface{}

Row returns the next row of values.

func (*Cursor) RowIndex ¶

func (c *Cursor) RowIndex(column string) (*proto.RowIndex, error)

RowIndex returns the row index for the provided column from the current strip

func (*Cursor) Scan ¶

func (c *Cursor) Scan(dest ...interface{}) error

Scan assigns the values returned by the readers to the destination slice.

func (*Cursor) Select ¶

func (c *Cursor) Select(fields ...string) *Cursor

Select determines the columns that will be read from the ORC file. Only streams for the selected columns will be loaded into memory.

func (*Cursor) SelectStripe ¶

func (c *Cursor) SelectStripe(n int) error

SelectStripe retrieves the stream information for the specified stripe.

func (*Cursor) Stripes ¶

func (c *Cursor) Stripes() bool

Stripes prepares the next stripe for reading, returning true once its ready. It returns false if an error occurs whilst preparing the stripe.

type Date ¶

type Date struct {
	time.Time
}

Date is a date value represented by an underlying time.Time.

type DateTreeReader ¶

type DateTreeReader struct {
	*IntegerTreeReader
}

DateTreeReader is a TreeReader implementation that can read date column types.

func NewDateTreeReader ¶

func NewDateTreeReader(present, data io.Reader, encoding *proto.ColumnEncoding) (*DateTreeReader, error)

NewDateTreeReader returns a new DateTreeReader along with any error that occurs.

func (*DateTreeReader) Date ¶

func (d *DateTreeReader) Date() Date

Date returns the next date value as a time.Time.

func (*DateTreeReader) Value ¶

func (d *DateTreeReader) Value() interface{}

Value implements the TreeReader interface.

type DateTreeWriter ¶

type DateTreeWriter struct {
	BaseTreeWriter
	// contains filtered or unexported fields
}

DateTreeWriter is a TreeWriter implementation that writes an Date type column.

func NewDateTreeWriter ¶

func NewDateTreeWriter(category Category, codec CompressionCodec) (*DateTreeWriter, error)

NewDateTreeWriter returns a new DateTreeWriter.

func (*DateTreeWriter) Close ¶

func (w *DateTreeWriter) Close() error

Close closes the underlying writers returning an error if one occurs.

func (*DateTreeWriter) Encoding ¶

func (w *DateTreeWriter) Encoding() *proto.ColumnEncoding

Encoding returns the column encoding used for the DateTreeWriter.

func (*DateTreeWriter) Flush ¶

func (w *DateTreeWriter) Flush() error

Flush flushes the underlying writers returning an error if one occurs.

func (*DateTreeWriter) Write ¶

func (w *DateTreeWriter) Write(value interface{}) error

Write writes a value returning an error if one occurs. It accepts a time.Time or a nil value for writing nulls to the stream. Any other types will return an error.

func (*DateTreeWriter) WriteDate ¶

func (w *DateTreeWriter) WriteDate(date time.Time) error

WriteDate writes an Date value returning an error if one occurs.

func NewDecimal ¶

func NewDecimal(mant *big.Int, scale int64) Decimal

func (Decimal) Float32 ¶

func (d Decimal) Float32() float32

func (Decimal) Float64 ¶

func (d Decimal) Float64() float64

func (Decimal) MarshalJSON ¶

func (d Decimal) MarshalJSON() ([]byte, error)

MarshalJSON implements the json.Marshaller interface.

func (Decimal) String ¶

func (d Decimal) String() string

type DecimalTreeReader ¶

type DecimalTreeReader struct {
	BaseTreeReader
	// contains filtered or unexported fields
}

DecimalTreeReader is a TreeReader that reads a Decimal type column.

func NewDecimalTreeReader ¶

func NewDecimalTreeReader(present, data, secondary io.Reader, encoding *proto.ColumnEncoding, precision, scale int) (*DecimalTreeReader, error)

NewDecimalTreeReader returns a new instances of a DecimalTreeReader or an error if one occurs.

func (*DecimalTreeReader) Decimal ¶

func (d *DecimalTreeReader) Decimal() Decimal

Decimal returns the next decimal value as a float64

func (*DecimalTreeReader) Err ¶

func (d *DecimalTreeReader) Err() error

Err returns the last error to have occurred.

func (*DecimalTreeReader) Next ¶

func (d *DecimalTreeReader) Next() bool

Next returns true if a value is available.

func (*DecimalTreeReader) Value ¶

func (d *DecimalTreeReader) Value() interface{}

Value returns the next decimal value as an interface{}

type Dictionary ¶

type Dictionary struct {
	// contains filtered or unexported fields
}

Dictionary is a data structure that holds a distinct set of string values.

func NewDictionary ¶

func NewDictionary(initialCapacity int) *Dictionary

NewDictionary returns a new Dictionary intialised with the provided initialCapacity.

func (*Dictionary) Size ¶

func (d *Dictionary) Size() int

Size returns the number of values stored in the dictionary.

type DictionaryV2 ¶

type DictionaryV2 struct {
	// contains filtered or unexported fields
}

Dictionary is a data structure that holds a distinct set of string values.

func NewDictionaryV2 ¶

func NewDictionaryV2() *DictionaryV2

NewDictionaryV2 returns a new DictionaryV2 intialised with the provided initialCapacity.

type Double ¶

type Double float64

Double is ORC double type i.e. a float64.

type Float ¶

type Float float32

func (Float) MarshalJSON ¶

func (f Float) MarshalJSON() ([]byte, error)

type FloatTreeReader ¶

type FloatTreeReader struct {
	BaseTreeReader
	io.Reader
	// contains filtered or unexported fields
}

func NewFloatTreeReader ¶

func NewFloatTreeReader(bytesPerValue int, present, data io.Reader, encoding *proto.ColumnEncoding) (*FloatTreeReader, error)

func (*FloatTreeReader) Double ¶

func (r *FloatTreeReader) Double() Double

Double returns the next Double value.

func (*FloatTreeReader) Err ¶

func (r *FloatTreeReader) Err() error

func (*FloatTreeReader) Float ¶

func (r *FloatTreeReader) Float() Float

func (*FloatTreeReader) Next ¶

func (r *FloatTreeReader) Next() bool

func (*FloatTreeReader) Value ¶

func (r *FloatTreeReader) Value() interface{}

type FloatTreeWriter ¶

type FloatTreeWriter struct {
	BaseTreeWriter
	*BufferedWriter
	// contains filtered or unexported fields
}

FloatTreeWriter is a TreeWriter that writes to a Float or Double column type.

func NewFloatTreeWriter ¶

func NewFloatTreeWriter(category Category, codec CompressionCodec, bytesPerValue int) (*FloatTreeWriter, error)

NewFloatTreeWriter returns a new FloatTreeWriter or an error if one occurs.

func (*FloatTreeWriter) Close ¶

func (f *FloatTreeWriter) Close() error

func (*FloatTreeWriter) Encoding ¶

func (f *FloatTreeWriter) Encoding() *proto.ColumnEncoding

func (*FloatTreeWriter) Flush ¶

func (f *FloatTreeWriter) Flush() error

func (*FloatTreeWriter) Write ¶

func (f *FloatTreeWriter) Write(value interface{}) error

Write writes a float or double value returning an error if one occurs.

func (*FloatTreeWriter) WriteDouble ¶

func (f *FloatTreeWriter) WriteDouble(value interface{}) error

func (*FloatTreeWriter) WriteFloat ¶

func (f *FloatTreeWriter) WriteFloat(value interface{}) error

type IntegerReader ¶

type IntegerReader interface {
	TreeReader
	Int() int64
}

IntegerReader is an interface that provides methods for reading an integer stream that uses V1 or V2 encoding methods.

type IntegerStatistics ¶

type IntegerStatistics struct {
	BaseStatistics
	// contains filtered or unexported fields
}

func NewIntegerStatistics ¶

func NewIntegerStatistics() *IntegerStatistics

func (*IntegerStatistics) Add ¶

func (i *IntegerStatistics) Add(value interface{})

func (*IntegerStatistics) Merge ¶

func (i *IntegerStatistics) Merge(other ColumnStatistics)

func (*IntegerStatistics) Reset ¶

func (i *IntegerStatistics) Reset()

func (*IntegerStatistics) Statistics ¶

func (i *IntegerStatistics) Statistics() *proto.ColumnStatistics

type IntegerTreeReader ¶

type IntegerTreeReader struct {
	BaseTreeReader
	IntegerReader
}

IntegerTreeReader is a TreeReader that can read Integer type streams.

func NewIntegerTreeReader ¶

func NewIntegerTreeReader(present, data io.Reader, encoding *proto.ColumnEncoding) (*IntegerTreeReader, error)

NewIntegerTreeReader returns a new IntegerReader or an error if one occurs.

func (*IntegerTreeReader) Err ¶

func (i *IntegerTreeReader) Err() error

Err implements the TreeReader interface.

func (*IntegerTreeReader) Next ¶

func (i *IntegerTreeReader) Next() bool

Next implements the TreeReader interface.

func (*IntegerTreeReader) Value ¶

func (i *IntegerTreeReader) Value() interface{}

Value implements the TreeReader interface.

type IntegerTreeWriter ¶

type IntegerTreeWriter struct {
	BaseTreeWriter
	IntegerWriter
	*BufferedWriter
	// contains filtered or unexported fields
}

IntegerTreeWriter is a TreeWriter implementation that writes an integer type column.

func NewIntegerTreeWriter ¶

func NewIntegerTreeWriter(category Category, codec CompressionCodec) (*IntegerTreeWriter, error)

NewIntegerTreeWriter returns a new IntegerTreeWriter.

func (*IntegerTreeWriter) Close ¶

func (w *IntegerTreeWriter) Close() error

Close closes the underlying writers returning an error if one occurs.

func (*IntegerTreeWriter) Encoding ¶

func (w *IntegerTreeWriter) Encoding() *proto.ColumnEncoding

Encoding returns the column encoding used for the IntegerTreeWriter.

func (*IntegerTreeWriter) Flush ¶

func (w *IntegerTreeWriter) Flush() error

Flush flushes the underlying writers returning an error if one occurs.

func (*IntegerTreeWriter) Write ¶

func (w *IntegerTreeWriter) Write(value interface{}) error

Write writes a value returning an error if one occurs. It accepts any form of integer or a nil value for writing nulls to the stream. Any other types will return an error.

func (*IntegerTreeWriter) WriteInt ¶

func (w *IntegerTreeWriter) WriteInt(value int64) error

WriteInt writes an integer value returning an error if one occurs.

type IntegerWriter ¶

type IntegerWriter interface {
	WriteInt(value int64) error
	Close() error
	Flush() error
}

IntegerWriter is an interface implemented by all integer type writers.

type ListTreeReader ¶

type ListTreeReader struct {
	BaseTreeReader
	// contains filtered or unexported fields
}

func NewListTreeReader ¶

func NewListTreeReader(present, length io.Reader, value TreeReader, encoding *proto.ColumnEncoding) (*ListTreeReader, error)

func (*ListTreeReader) Err ¶

func (r *ListTreeReader) Err() error

func (*ListTreeReader) List ¶

func (r *ListTreeReader) List() []interface{}

func (*ListTreeReader) Next ¶

func (r *ListTreeReader) Next() bool

func (*ListTreeReader) Value ¶

func (r *ListTreeReader) Value() interface{}

type ListTreeWriter ¶

type ListTreeWriter struct {
	BaseTreeWriter
	// contains filtered or unexported fields
}

func NewListTreeWriter ¶

func NewListTreeWriter(category Category, codec CompressionCodec, child TreeWriter) (*ListTreeWriter, error)

func (*ListTreeWriter) Close ¶

func (l *ListTreeWriter) Close() error

func (*ListTreeWriter) Encoding ¶

func (l *ListTreeWriter) Encoding() *proto.ColumnEncoding

func (*ListTreeWriter) Flush ¶

func (l *ListTreeWriter) Flush() error

func (*ListTreeWriter) Write ¶

func (l *ListTreeWriter) Write(value interface{}) error

type MapEntry ¶

type MapEntry struct {
	Key   interface{} `json:"key"`
	Value interface{} `json:"value"`
}

MapEntry is an individual entry in a Map.

type MapTreeReader ¶

type MapTreeReader struct {
	BaseTreeReader
	// contains filtered or unexported fields
}

MapTreeReader is a TreeReader that reads from map encoded columns.

func NewMapTreeReader ¶

func NewMapTreeReader(present, length io.Reader, key, value TreeReader, encoding *proto.ColumnEncoding) (*MapTreeReader, error)

NewMapTreeReader returns a new instance of a MapTreeReader.

func (*MapTreeReader) Map ¶

func (m *MapTreeReader) Map() []MapEntry

Map returns the next available row of MapEntries.

func (*MapTreeReader) Next ¶

func (m *MapTreeReader) Next() bool

Next returns true if another row is available.

func (*MapTreeReader) Value ¶

func (m *MapTreeReader) Value() interface{}

Value implements the TreeReader interface, returning the next available row.

type MapTreeWriter ¶

type MapTreeWriter struct {
	BaseTreeWriter
	// contains filtered or unexported fields
}

func NewMapTreeWriter ¶

func NewMapTreeWriter(category Category, codec CompressionCodec, keyWriter, valueWriter TreeWriter) (*MapTreeWriter, error)

func (*MapTreeWriter) Close ¶

func (m *MapTreeWriter) Close() error

func (*MapTreeWriter) Encoding ¶

func (m *MapTreeWriter) Encoding() *proto.ColumnEncoding

func (*MapTreeWriter) Flush ¶

func (m *MapTreeWriter) Flush() error

func (*MapTreeWriter) Write ¶

func (m *MapTreeWriter) Write(value interface{}) error

type PositionRecorder ¶

type PositionRecorder interface {
	Positions() []uint64
}

type PositionRecorders ¶

type PositionRecorders []PositionRecorder

func NewPositionRecorders ¶

func NewPositionRecorders(recorders ...PositionRecorder) PositionRecorders

type RLEEncodingType ¶

type RLEEncodingType int

RLEEncodingType is a run length encoding type specified within the Apache ORC file documentation: https://orc.apache.org/docs/run-length.html

const (
	RLEV2IntShortRepeat RLEEncodingType = 0
	RLEV2IntDirect      RLEEncodingType = 1
	RLEV2IntPatchedBase RLEEncodingType = 2
	RLEV2IntDelta       RLEEncodingType = 3
)

func (RLEEncodingType) String ¶

func (i RLEEncodingType) String() string

type Reader ¶

type Reader struct {
	// contains filtered or unexported fields
}

func NewReader ¶

func NewReader(r SizedReaderAt) (*Reader, error)

func Open ¶

func Open(filepath string) (*Reader, error)

Open opens the file at the provided filepath.

func (*Reader) Close ¶

func (r *Reader) Close() error

func (*Reader) Metadata ¶

func (r *Reader) Metadata() *proto.Metadata

func (*Reader) NumRows ¶

func (r *Reader) NumRows() int

func (*Reader) NumStripes ¶

func (r *Reader) NumStripes() (int, error)

func (*Reader) Schema ¶

func (r *Reader) Schema() *TypeDescription

func (*Reader) Select ¶

func (r *Reader) Select(fields ...string) *Cursor

type RunLengthByteReader ¶

type RunLengthByteReader struct {
	// contains filtered or unexported fields
}

RunLengthByteReader reads a byte run length encoded stream from ByteReader r.

func NewRunLengthByteReader ¶

func NewRunLengthByteReader(r io.ByteReader) *RunLengthByteReader

func (*RunLengthByteReader) Byte ¶

func (b *RunLengthByteReader) Byte() byte

func (*RunLengthByteReader) Err ¶

func (b *RunLengthByteReader) Err() error

func (*RunLengthByteReader) Next ¶

func (b *RunLengthByteReader) Next() bool

func (*RunLengthByteReader) ReadByte ¶

func (b *RunLengthByteReader) ReadByte() (byte, error)

func (*RunLengthByteReader) Value ¶

func (b *RunLengthByteReader) Value() interface{}

type RunLengthByteWriter ¶

type RunLengthByteWriter struct {
	io.ByteWriter
	// contains filtered or unexported fields
}

func NewRunLengthByteWriter ¶

func NewRunLengthByteWriter(w io.ByteWriter) *RunLengthByteWriter

func (*RunLengthByteWriter) Close ¶

func (b *RunLengthByteWriter) Close() error

func (*RunLengthByteWriter) Flush ¶

func (b *RunLengthByteWriter) Flush() error

func (*RunLengthByteWriter) WriteByte ¶

func (b *RunLengthByteWriter) WriteByte(value byte) error

type RunLengthIntegerReader ¶

type RunLengthIntegerReader struct {
	// contains filtered or unexported fields
}

func NewRunLengthIntegerReader ¶

func NewRunLengthIntegerReader(r io.ByteReader, signed bool) *RunLengthIntegerReader

func (*RunLengthIntegerReader) Err ¶

func (r *RunLengthIntegerReader) Err() error

func (*RunLengthIntegerReader) Int ¶

func (r *RunLengthIntegerReader) Int() int64

func (*RunLengthIntegerReader) Next ¶

func (r *RunLengthIntegerReader) Next() bool

func (*RunLengthIntegerReader) ReadByte ¶

func (r *RunLengthIntegerReader) ReadByte() (byte, error)

func (*RunLengthIntegerReader) Value ¶

func (r *RunLengthIntegerReader) Value() interface{}

type RunLengthIntegerReaderV2 ¶

type RunLengthIntegerReaderV2 struct {
	// contains filtered or unexported fields
}

func NewRunLengthIntegerReaderV2 ¶

func NewRunLengthIntegerReaderV2(r io.ByteReader, signed bool, skipCorrupt bool) *RunLengthIntegerReaderV2

func (*RunLengthIntegerReaderV2) Err ¶

func (r *RunLengthIntegerReaderV2) Err() error

func (*RunLengthIntegerReaderV2) Int ¶

func (r *RunLengthIntegerReaderV2) Int() int64

func (*RunLengthIntegerReaderV2) Next ¶

func (r *RunLengthIntegerReaderV2) Next() bool

func (*RunLengthIntegerReaderV2) ReadByte ¶

func (r *RunLengthIntegerReaderV2) ReadByte() (byte, error)

func (*RunLengthIntegerReaderV2) Value ¶

func (r *RunLengthIntegerReaderV2) Value() interface{}

type RunLengthIntegerWriter ¶

type RunLengthIntegerWriter struct {
	// contains filtered or unexported fields
}

func NewRunLengthIntegerWriter ¶

func NewRunLengthIntegerWriter(w io.ByteWriter, signed bool) *RunLengthIntegerWriter

func (*RunLengthIntegerWriter) Close ¶

func (w *RunLengthIntegerWriter) Close() error

func (*RunLengthIntegerWriter) Flush ¶

func (w *RunLengthIntegerWriter) Flush() error

func (*RunLengthIntegerWriter) WriteInt ¶

func (w *RunLengthIntegerWriter) WriteInt(value int64) error

type RunLengthIntegerWriterV2 ¶

type RunLengthIntegerWriterV2 struct {
	// contains filtered or unexported fields
}

func NewRunLengthIntegerWriterV2 ¶

func NewRunLengthIntegerWriterV2(w io.ByteWriter, signed bool) *RunLengthIntegerWriterV2

func (*RunLengthIntegerWriterV2) Close ¶

func (i *RunLengthIntegerWriterV2) Close() error

func (*RunLengthIntegerWriterV2) Flush ¶

func (i *RunLengthIntegerWriterV2) Flush() error

func (*RunLengthIntegerWriterV2) WriteInt ¶

func (i *RunLengthIntegerWriterV2) WriteInt(val int64) error

type SizedReaderAt ¶

type SizedReaderAt interface {
	io.ReaderAt
	Size() int64
}

type Stream ¶

type Stream struct {
	// contains filtered or unexported fields
}

Stream is an individual stream for the TreeWriter.

func (Stream) Positions ¶

func (s Stream) Positions() []uint64

type StringDictionaryTreeReader ¶

type StringDictionaryTreeReader struct {
	BaseTreeReader
	// contains filtered or unexported fields
}

func NewStringDictionaryTreeReader ¶

func NewStringDictionaryTreeReader(present, data, length, dictionary io.Reader, encoding *proto.ColumnEncoding) (*StringDictionaryTreeReader, error)

func (*StringDictionaryTreeReader) Err ¶

func (s *StringDictionaryTreeReader) Err() error

func (*StringDictionaryTreeReader) Next ¶

func (s *StringDictionaryTreeReader) Next() bool

func (*StringDictionaryTreeReader) String ¶

func (s *StringDictionaryTreeReader) String() string

func (*StringDictionaryTreeReader) Value ¶

func (s *StringDictionaryTreeReader) Value() interface{}

type StringDirectTreeReader ¶

type StringDirectTreeReader struct {
	BaseTreeReader
	// contains filtered or unexported fields
}

StringDirectTreeReader is a StringTreeReader implementation that can read direct encoded string type columns.

func NewStringDirectTreeReader ¶

func NewStringDirectTreeReader(present, data, length io.Reader, kind proto.ColumnEncoding_Kind) (*StringDirectTreeReader, error)

func (*StringDirectTreeReader) Err ¶

func (s *StringDirectTreeReader) Err() error

func (*StringDirectTreeReader) Next ¶

func (s *StringDirectTreeReader) Next() bool

func (*StringDirectTreeReader) String ¶

func (s *StringDirectTreeReader) String() string

func (*StringDirectTreeReader) Value ¶

func (s *StringDirectTreeReader) Value() interface{}

type StringStatistics ¶

type StringStatistics struct {
	BaseStatistics
	// contains filtered or unexported fields
}

func NewStringStatistics ¶

func NewStringStatistics() *StringStatistics

func (*StringStatistics) Add ¶

func (s *StringStatistics) Add(value interface{})

func (*StringStatistics) Merge ¶

func (s *StringStatistics) Merge(other ColumnStatistics)

func (*StringStatistics) Reset ¶

func (s *StringStatistics) Reset()

func (*StringStatistics) Statistics ¶

func (s *StringStatistics) Statistics() *proto.ColumnStatistics

type StringTreeReader ¶

type StringTreeReader interface {
	TreeReader
	String() string
}

IntegerReader is an interface that provides methods for reading a string stream.

func NewStringTreeReader ¶

func NewStringTreeReader(present, data, length, dictionary io.Reader, encoding *proto.ColumnEncoding) (StringTreeReader, error)

NewStringTreeReader returns a StringTreeReader implementation along with any error that occurs.s

type StringTreeWriter ¶

type StringTreeWriter struct {
	BaseTreeWriter
	// contains filtered or unexported fields
}

StringTreeWriter is a TreeWriter implementation that writes to a string type column. It dynamically selects the most appropriate encoding format between direct and dictionary encoding based on the cardinality of the values up to the first call to Flush.

func NewStringTreeWriter ¶

func NewStringTreeWriter(category Category, codec CompressionCodec) (*StringTreeWriter, error)

NewStringTreeWriter returns a new StringTreeWriter or an error if one occurs.

func (*StringTreeWriter) Close ¶

func (s *StringTreeWriter) Close() error

Close closes the underlying writes returning an error if one occurs.

func (*StringTreeWriter) Encoding ¶

func (s *StringTreeWriter) Encoding() *proto.ColumnEncoding

Encoding returns the column encoding for the writer, either DICTIONARY_V2 or DIRECT_V2.

func (*StringTreeWriter) Flush ¶

func (s *StringTreeWriter) Flush() error

func (*StringTreeWriter) Write ¶

func (s *StringTreeWriter) Write(value interface{}) error

Write writes the provided value to the underlying writers. It returns an error if the value is not a string type or if an error occurs during writing.

func (*StringTreeWriter) WriteString ¶

func (s *StringTreeWriter) WriteString(value string) error

WriteString writes a string value to the StringTreeWriter returning an error if one occurs.

type Stripe ¶

type Stripe struct {
	*proto.StripeInformation
	// contains filtered or unexported fields
}

func NewStripe ¶

func NewStripe(info *proto.StripeInformation, included ...int) *Stripe

func (*Stripe) FromReader ¶

func (s *Stripe) FromReader(r *Reader) error

type Struct ¶

type Struct map[string]interface{}

type StructTreeReader ¶

type StructTreeReader struct {
	BaseTreeReader
	// contains filtered or unexported fields
}

func NewStructTreeReader ¶

func NewStructTreeReader(present io.Reader, children map[string]TreeReader) (*StructTreeReader, error)

func (*StructTreeReader) Err ¶

func (s *StructTreeReader) Err() error

func (*StructTreeReader) Next ¶

func (s *StructTreeReader) Next() bool

func (*StructTreeReader) Struct ¶

func (s *StructTreeReader) Struct() Struct

func (*StructTreeReader) Value ¶

func (s *StructTreeReader) Value() interface{}

type StructTreeWriter ¶

type StructTreeWriter struct {
	BaseTreeWriter
	// contains filtered or unexported fields
}

StructTreeWriter is a TreeWriter implementation that can write a struct column type.

func NewStructTreeWriter ¶

func NewStructTreeWriter(category Category, codec CompressionCodec, children []TreeWriter) (*StructTreeWriter, error)

NewStructTreeWriter returns a StructTreeWriter using the provided io.Writer and children TreeWriters. It additionally returns an error if one occurs.

func (*StructTreeWriter) Close ¶

func (s *StructTreeWriter) Close() error

Close closes the StructTreeWriter and its child TreeWriters returning an error if one occurs.

func (*StructTreeWriter) Encoding ¶

func (s *StructTreeWriter) Encoding() *proto.ColumnEncoding

Encoding returns the column encoding for the StructTreeWriter.

func (*StructTreeWriter) Flush ¶

func (s *StructTreeWriter) Flush() error

Flush flushes the StructTreeWriter and its child TreeWriters returning an error if one occurs.

func (*StructTreeWriter) RecordPositions ¶

func (s *StructTreeWriter) RecordPositions()

func (*StructTreeWriter) Write ¶

func (s *StructTreeWriter) Write(value interface{}) error

Write writes a value to the underlying child TreeWriters. It returns an error if one occurs.

type TimestampStatistics ¶

type TimestampStatistics struct {
	BaseStatistics
	// contains filtered or unexported fields
}

func NewTimestampStatistics ¶

func NewTimestampStatistics() *TimestampStatistics

func (*TimestampStatistics) Add ¶

func (i *TimestampStatistics) Add(value interface{})

func (*TimestampStatistics) Merge ¶

func (i *TimestampStatistics) Merge(other ColumnStatistics)

func (*TimestampStatistics) Reset ¶

func (i *TimestampStatistics) Reset()

func (*TimestampStatistics) Statistics ¶

func (i *TimestampStatistics) Statistics() *proto.ColumnStatistics

type TimestampTreeReader ¶

type TimestampTreeReader struct {
	BaseTreeReader
	// contains filtered or unexported fields
}

TimestampTreeReader is a TreeReader implementation that reads timestamp type columns.

func NewTimestampTreeReader ¶

func NewTimestampTreeReader(present, data, secondary io.Reader, encoding *proto.ColumnEncoding) (*TimestampTreeReader, error)

NewTimestampTreeReader returns a new TimestampTreeReader along with any error that occurs.

func (*TimestampTreeReader) Err ¶

func (t *TimestampTreeReader) Err() error

Err implements the TreeReader interface.

func (*TimestampTreeReader) Next ¶

func (t *TimestampTreeReader) Next() bool

Next implements the TreeReader interface.

func (*TimestampTreeReader) Timestamp ¶

func (t *TimestampTreeReader) Timestamp() time.Time

ValueTimestamp returns the next timestamp value.

func (*TimestampTreeReader) Value ¶

func (t *TimestampTreeReader) Value() interface{}

Value implements the TreeReader interface.

type TimestampTreeWriter ¶

type TimestampTreeWriter struct {
	BaseTreeWriter
	// contains filtered or unexported fields
}

TimestampTreeWriter is a TreeWriter implementation that writes an Timestamp type column.

func NewTimestampTreeWriter ¶

func NewTimestampTreeWriter(category Category, codec CompressionCodec) (*TimestampTreeWriter, error)

NewTimestampTreeWriter returns a new TimestampTreeWriter.

func (*TimestampTreeWriter) Close ¶

func (w *TimestampTreeWriter) Close() error

Close closes the underlying writers returning an error if one occurs.

func (*TimestampTreeWriter) Encoding ¶

func (w *TimestampTreeWriter) Encoding() *proto.ColumnEncoding

Encoding returns the column encoding used for the TimestampTreeWriter.

func (*TimestampTreeWriter) Flush ¶

func (w *TimestampTreeWriter) Flush() error

Flush flushes the underlying writers returning an error if one occurs.

func (*TimestampTreeWriter) Write ¶

func (w *TimestampTreeWriter) Write(value interface{}) error

Write writes a value returning an error if one occurs. It accepts any form of Timestamp or a nil value for writing nulls to the stream. Any other types will return an error.

func (*TimestampTreeWriter) WriteTimestamp ¶

func (w *TimestampTreeWriter) WriteTimestamp(value time.Time) error

WriteTimestamp writes an Timestamp value returning an error if one occurs.

type TimestampWriter ¶

type TimestampWriter interface {
	WriteTimestamp(value time.Time) error
	Close() error
	Flush() error
}

TimestampWriter is an interface implemented by all Timestamp type writers.

type TreeReader ¶

type TreeReader interface {
	Next() bool
	Value() interface{}
	Err() error
}

TreeReader is an interface that provides methods for reading an individual stream.

type TreeWriter ¶

type TreeWriter interface {
	// Encoding returns the column encoding used for the TreeWriter.
	Encoding() *proto.ColumnEncoding
	// Write writes the interface value i to the TreeWriter, it returns an error
	// if i is of an unexpected type or if an error occurs whilst writing to
	// the underlying stream.
	Write(i interface{}) error
	// Close flushes the remaining data and closes the writer.
	Close() error
	// Flush flushes any outstanding data to the underlying writer.
	Flush() error
	// Streams returns a slice of streams for the TreeWriter.
	Streams() []Stream
	// RowIndex returns the RowIndex for the writer.
	RowIndex() *proto.RowIndex
	// RecordPositions
	RecordPositions()
	// Statistics
	Statistics() ColumnStatistics
}

TreeWriter is an interface for writing to a stream.

type TypeDescription ¶

type TypeDescription struct {
	// contains filtered or unexported fields
}

func NewTypeDescription ¶

func NewTypeDescription(fns ...TypeDescriptionTransformFunc) (*TypeDescription, error)

func ParseSchema ¶

func ParseSchema(schema string) (*TypeDescription, error)

func (*TypeDescription) Columns ¶

func (t *TypeDescription) Columns() []string

func (*TypeDescription) GetField ¶

func (t *TypeDescription) GetField(fieldName string) (*TypeDescription, error)

func (*TypeDescription) MarshalJSON ¶

func (t *TypeDescription) MarshalJSON() ([]byte, error)

MarshalJSON returns a json encoded byte slice of t.

func (*TypeDescription) String ¶

func (t *TypeDescription) String() string

func (*TypeDescription) ToJSON ¶

func (t *TypeDescription) ToJSON() string

ToJSON returns a json encoded string of t.

func (*TypeDescription) Type ¶

func (t *TypeDescription) Type() *proto.Type

func (*TypeDescription) Types ¶

func (t *TypeDescription) Types() []*proto.Type

type TypeDescriptionTransformFunc ¶

type TypeDescriptionTransformFunc func(t *TypeDescription) error

func AddChild ¶

func AddChild(fns ...TypeDescriptionTransformFunc) TypeDescriptionTransformFunc

func AddField ¶

func AddField(field string, fns ...TypeDescriptionTransformFunc) TypeDescriptionTransformFunc

func AddUnionChild ¶

func AddUnionChild(fns ...TypeDescriptionTransformFunc) TypeDescriptionTransformFunc

func SetCategory ¶

func SetCategory(category Category) TypeDescriptionTransformFunc

type UnionTreeReader ¶

type UnionTreeReader struct {
	BaseTreeReader
	// contains filtered or unexported fields
}

UnionTreeReader is a TreeReader that reads a Union type column.

func NewUnionTreeReader ¶

func NewUnionTreeReader(present, data io.Reader, children []TreeReader) (*UnionTreeReader, error)

NewUnionTreeReader returns a new instance of a UnionTreeReader or an error if one occurs.

func (*UnionTreeReader) Err ¶

func (u *UnionTreeReader) Err() error

Err returns the last error to have occurred.

func (*UnionTreeReader) Next ¶

func (u *UnionTreeReader) Next() bool

Next returns true if another value is available.

func (*UnionTreeReader) Value ¶

func (u *UnionTreeReader) Value() interface{}

Value returns the next value as an interface{}.

type UnionTreeWriter ¶

type UnionTreeWriter struct {
	BaseTreeWriter
	// contains filtered or unexported fields
}

UnionTreeWriter is a TreeWriter implementation that can write a unionvalue column type.

func NewUnionTreeWriter ¶

func NewUnionTreeWriter(category Category, codec CompressionCodec, children []TreeWriter) (*UnionTreeWriter, error)

NewUnionTreeWriter returns a UnionTreeWriter using the provided io.Writer and children TreeWriters. It additionally returns an error if one occurs.

func (*UnionTreeWriter) Close ¶

func (s *UnionTreeWriter) Close() error

Close closes the UnionTreeWriter and its child TreeWriters returning an error if one occurs.

func (*UnionTreeWriter) Encoding ¶

func (s *UnionTreeWriter) Encoding() *proto.ColumnEncoding

Encoding returns the column encoding for the UnionTreeWriter.

func (*UnionTreeWriter) Flush ¶

func (s *UnionTreeWriter) Flush() error

Flush flushes the UnionTreeWriter and its child TreeWriters returning an error if one occurs.

func (*UnionTreeWriter) RecordPositions ¶

func (s *UnionTreeWriter) RecordPositions()

func (*UnionTreeWriter) Write ¶

func (s *UnionTreeWriter) Write(value interface{}) error

func (*UnionTreeWriter) WriteUnion ¶

func (s *UnionTreeWriter) WriteUnion(value UnionValue) error

Write writes a value to the underlying child TreeWriters. It returns an error if one occurs.

type UnionValue ¶

type UnionValue struct {
	Tag   int         `json:"tag"`
	Value interface{} `json:"value"`
}

type Version ¶

type Version struct {
	// contains filtered or unexported fields
}

Version is the version of the ORC file.

type Writer ¶

type Writer struct {
	// contains filtered or unexported fields
}

func NewWriter ¶

func NewWriter(w io.Writer, fns ...WriterConfigFunc) (*Writer, error)

NewWriter returns a new ORC file writer that writes to the provided io.Writer.

func (*Writer) Close ¶

func (w *Writer) Close() error

func (*Writer) Flush ¶

func (w *Writer) Flush() error

Flush the current stripe to the underlying Writer

func (*Writer) Schema ¶

func (w *Writer) Schema() *TypeDescription

func (*Writer) Write ¶

func (w *Writer) Write(values ...interface{}) error

type WriterConfigFunc ¶

type WriterConfigFunc func(w *Writer) error

func AddUserMetadata ¶

func AddUserMetadata(name string, value []byte) WriterConfigFunc

func SetCompression ¶

func SetCompression(codec CompressionCodec) WriterConfigFunc

func SetSchema ¶

func SetSchema(schema *TypeDescription) WriterConfigFunc

func SetStripeTargetSize ¶

func SetStripeTargetSize(stripeTargetSize int64) WriterConfigFunc

Source Files ¶

View all Source files

Directories ¶

Path	Synopsis
proto Package proto is a generated protocol buffer package.	Package proto is a generated protocol buffer package.
tools
orc2csv

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL