writer

package
v1.6.2 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 5, 2021 License: Apache-2.0 Imports: 18 Imported by: 186

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type ArrowWriter added in v1.6.2

type ArrowWriter struct {
	ParquetWriter
}

ArrowWriter extending the base ParqueWriter

func NewArrowWriter added in v1.6.2

func NewArrowWriter(arrowSchema *arrow.Schema, pfile source.ParquetFile,
	np int64) (*ArrowWriter, error)

NewArrowWriter creates arrow schema parquet writer given the native arrow schema, parquet file writer which contains the parquet file in which we will write the record along with the number of parallel threads which will write in the file.

func (*ArrowWriter) WriteArrow added in v1.6.2

func (w *ArrowWriter) WriteArrow(record array.Record) error

WriteArrow wraps the base Write function provided by writer.ParquetWriter. The function transforms the data from the record, which the go arrow library gives as array of columns, to array of rows which the parquet-go library can understand as it does not accepts data by columns, but rather by rows.

type CSVWriter

type CSVWriter struct {
	ParquetWriter
}

func NewCSVWriter

func NewCSVWriter(md []string, pfile source.ParquetFile, np int64) (*CSVWriter, error)

Create CSV writer

func NewCSVWriterFromWriter added in v1.5.4

func NewCSVWriterFromWriter(md []string, w io.Writer, np int64) (*CSVWriter, error)

func (*CSVWriter) WriteString

func (w *CSVWriter) WriteString(recsi interface{}) error

Write string values to parquet file

type JSONWriter

type JSONWriter struct {
	ParquetWriter
}

func NewJSONWriter

func NewJSONWriter(jsonSchema string, pfile source.ParquetFile, np int64) (*JSONWriter, error)

Create JSON writer

func NewJSONWriterFromWriter added in v1.5.4

func NewJSONWriterFromWriter(jsonSchema string, w io.Writer, np int64) (*JSONWriter, error)

type ParquetWriter

type ParquetWriter struct {
	SchemaHandler *schema.SchemaHandler
	NP            int64 //parallel number
	Footer        *parquet.FileMetaData
	PFile         source.ParquetFile

	PageSize        int64
	RowGroupSize    int64
	CompressionType parquet.CompressionCodec
	Offset          int64

	Objs              []interface{}
	ObjsSize          int64
	ObjSize           int64
	CheckSizeCritical int64

	PagesMapBuf map[string][]*layout.Page
	Size        int64
	NumRows     int64

	DictRecs map[string]*layout.DictRecType

	ColumnIndexes []*parquet.ColumnIndex
	OffsetIndexes []*parquet.OffsetIndex

	MarshalFunc func(src []interface{}, sh *schema.SchemaHandler) (*map[string]*layout.Table, error)
}

ParquetWriter is a writer parquet file

func NewParquetWriter

func NewParquetWriter(pFile source.ParquetFile, obj interface{}, np int64) (*ParquetWriter, error)

Create a parquet handler. Obj is a object with tags or JSON schema string.

func NewParquetWriterFromWriter added in v1.5.4

func NewParquetWriterFromWriter(w io.Writer, obj interface{}, np int64) (*ParquetWriter, error)

func (*ParquetWriter) Flush

func (pw *ParquetWriter) Flush(flag bool) error

Flush the write buffer to parquet file

func (*ParquetWriter) RenameSchema

func (pw *ParquetWriter) RenameSchema()

Rename schema name to exname in tags

func (*ParquetWriter) SetSchemaHandlerFromJSON

func (pw *ParquetWriter) SetSchemaHandlerFromJSON(jsonSchema string) error

func (*ParquetWriter) Write

func (pw *ParquetWriter) Write(src interface{}) error

Write one object to parquet file

func (*ParquetWriter) WriteStop

func (pw *ParquetWriter) WriteStop() error

Write the footer and stop writing

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL