parquet

package module
v0.0.12 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 15, 2019 License: MIT Imports: 10 Imported by: 0

README

Parquet

Parquet generates a parquet reader and writer for a single struct.

Installation

go get -u github.com/parsyl/parquet/...

Usage

First define a struct for the data to be written to parquet:

type Person struct {
  	ID  int32  `parquet:"id"`
	Age *int32 `parquet:"age"`
}

Next, add a go:generate comment somewhere (in this example all code lives in main.go):

//go:generate parquetgen -input main.go -type Person -package main

Generate the code for the reader and writer:

$ go generate

A new file (parquet.go) has now been written that defines ParquetWriter and ParquetReader. Next, make use of the writer and reader:

package main

import (
    "bytes"
    "encoding/json"
)

func main() {
    var buf bytes.Buffer
    // MaxPageSize optionally defines the number of rows in each column chunk (default is 1000)
    w, err := NewParquetWriter(&buf, MaxPageSize(10000))
    if err != nil {
        log.Fatal(err)
    }

    w.Add(Person{ID: 1, Age: getAge(30)})
    w.Add(Person{ID: 2})

    // Each call to write creates a new parquet row group.
    if err := w.Write(); err != nil {
        log.Fatal(err)
    }

    // Close must be called when you are done.  It writes
    // the parquet metadata at the end of the file.
    if err := w.Close(); err != nil {
        log.Fatal(err)
    }

    r, err := NewParquetReader(bytes.NewReader(buf.Bytes()))
    if err != nil {
        log.Fatal(err)
    }

    enc := json.NewEncoder(os.Stdout)
    for r.Next() {
        var p Person
        r.Scan(&p)
        enc.Encode(p)
    }

    if err := r.Error(); err != nil {
        log.Fatal(err)
    }
}

func getAge(a int32) *int32 { return &a }

See this for a complete example of how to use it.

Supported Types

The struct used to define the parquet data can have the following types:

int32
uint32
int64
uint64
float32
float64
string
bool

Each of these types may be a pointer to indicate that the data is optional. The struct can also embed another struct:

type Being struct {
	ID  int32  `parquet:"id"`
	Age *int32 `parquet:"age"`
}

type Person struct {
	Being
	Username string `parquet:"username"`
}

Nested structs, however, are not supported at this time. If you want a field to be excluded from parquet you can tag it with a dash or make it private like so:

type Being struct {
  	ID  int32  `parquet:"id"`
	Password string`parquet:"-"` //will not be written to parquet
	age int32                    //will not be written to parquet
}

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func BoolType

func BoolType(se *sch.SchemaElement)

func Float32Type

func Float32Type(se *sch.SchemaElement)

func Float64Type

func Float64Type(se *sch.SchemaElement)

func GetBools

func GetBools(r io.Reader, n int, pageSizes []int) ([]bool, error)

func Int32Type

func Int32Type(se *sch.SchemaElement)

func Int64Type

func Int64Type(se *sch.SchemaElement)

func ReadLevels

func ReadLevels(r io.Reader) ([]int64, int, error)

ReadLevels reads the RLE encoded definition levels

func RepetitionOptional

func RepetitionOptional(se *sch.SchemaElement)

func RepetitionRequired

func RepetitionRequired(se *sch.SchemaElement)

func StringType

func StringType(se *sch.SchemaElement)

func Uint32Type

func Uint32Type(se *sch.SchemaElement)

func Uint64Type

func Uint64Type(se *sch.SchemaElement)

func WriteLevels

func WriteLevels(w io.Writer, levels []int64) error

WriteLevels writes vals to w as RLE encoded data

Types

type Field

type Field struct {
	Name           string
	Type           FieldFunc
	RepetitionType FieldFunc
}

type FieldFunc

type FieldFunc func(*sch.SchemaElement)

type Metadata

type Metadata struct {
	// contains filtered or unexported fields
}

func New

func New(fields ...Field) *Metadata

func (*Metadata) Footer

func (m *Metadata) Footer(w io.Writer) error

func (*Metadata) Offsets

func (m *Metadata) Offsets() (map[string][]Position, error)

func (*Metadata) PageHeader

func (m *Metadata) PageHeader(r io.ReadSeeker) (*sch.PageHeader, error)

func (*Metadata) ReadFooter

func (m *Metadata) ReadFooter(r io.ReadSeeker) error

func (*Metadata) RowGroups added in v0.0.3

func (m *Metadata) RowGroups() []RowGroup

func (*Metadata) Rows

func (m *Metadata) Rows() int64

func (*Metadata) StartRowGroup

func (m *Metadata) StartRowGroup(fields ...Field)

func (*Metadata) WritePageHeader

func (m *Metadata) WritePageHeader(w io.Writer, col string, dataLen, compressedLen, count int) error

WritePageHeader indicates you are done writing this columns's chunk

type OptionalField added in v0.0.8

type OptionalField struct {
	Defs []int64
	// contains filtered or unexported fields
}

func NewOptionalField added in v0.0.8

func NewOptionalField(col string) OptionalField

func (*OptionalField) DoRead added in v0.0.8

func (f *OptionalField) DoRead(r io.ReadSeeker, meta *Metadata, pos Position) (io.Reader, []int, error)

func (*OptionalField) DoWrite added in v0.0.8

func (f *OptionalField) DoWrite(w io.Writer, meta *Metadata, vals []byte, count int) error

func (*OptionalField) Name added in v0.0.8

func (f *OptionalField) Name() string

func (*OptionalField) Values added in v0.0.8

func (f *OptionalField) Values() int

type Position

type Position struct {
	N      int
	Size   int
	Offset int64
}

type ReadCounter added in v0.0.8

type ReadCounter struct {
	// contains filtered or unexported fields
}

func (*ReadCounter) Read added in v0.0.8

func (r *ReadCounter) Read(p []byte) (int, error)

func (*ReadCounter) Seek added in v0.0.8

func (r *ReadCounter) Seek(o int64, w int) (int64, error)

type RequiredField added in v0.0.8

type RequiredField struct {
	// contains filtered or unexported fields
}

func NewRequiredField added in v0.0.8

func NewRequiredField(col string) RequiredField

func (*RequiredField) DoRead added in v0.0.8

func (f *RequiredField) DoRead(r io.ReadSeeker, meta *Metadata, pos Position) (io.Reader, []int, error)

func (*RequiredField) DoWrite added in v0.0.8

func (f *RequiredField) DoWrite(w io.Writer, meta *Metadata, vals []byte, count int) error

func (*RequiredField) Name added in v0.0.8

func (f *RequiredField) Name() string

type RowGroup added in v0.0.8

type RowGroup struct {
	Rows int64
	// contains filtered or unexported fields
}

func (*RowGroup) Columns added in v0.0.8

func (r *RowGroup) Columns() []*sch.ColumnChunk

type WriteCounter added in v0.0.8

type WriteCounter struct {
	// contains filtered or unexported fields
}

func NewWriteCounter added in v0.0.8

func NewWriteCounter(w io.Writer) *WriteCounter

func (*WriteCounter) Write added in v0.0.8

func (w *WriteCounter) Write(p []byte) (int, error)

Directories

Path Synopsis
cmd
examples
internal

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL