iop

package

v0.3.144 Latest Latest Go to latest Published: Dec 2, 2022 License: GPL-3.0 Imports: 40 Imported by: 1

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/flarco/dbio

Links

Open Source Insights

README ¶

Input-Process-Output (ipo)

Documentation ¶

Index ¶

Variables
func AutoDecompress(reader io.Reader) (gReader io.Reader, err error)
func CleanHeaderRow(header []string) []string
func CompareColumns(columns1 Columns, columns2 Columns) (reshape bool, err error)
func CreateDummyFields(numCols int) (cols []string)
func GetISO8601DateMap(t time.Time) map[string]interface{}
func IsDummy(columns []Column) bool
func Iso8601ToGoLayout(dateFormat string) (goDateFormat string)
func MakeRowsChan() chan []interface{}
func NewJSONStream(ds *Datastream, decoder decoderLike, flatten bool) *jsonStream
func Row(vals ...interface{}) []interface{}
func ScanCarrRet(data []byte, atEOF bool) (advance int, token []byte, err error)
func Unzip(src string, dest string) ([]string, error)
type CSV
- func (c *CSV) InferSchema() error
- func (c *CSV) NewReader() (*io.PipeReader, error)
- func (c *CSV) Read() (data Dataset, err error)
- func (c *CSV) ReadStream() (ds *Datastream, err error)
- func (c *CSV) Sample(n int) (Dataset, error)
- func (c *CSV) SetFields(fields []string)
- func (c *CSV) WriteStream(ds *Datastream) (cnt uint64, err error)
type Column
- func InferFromStats(columns []Column, safe bool, noTrace bool) []Column
- func SyncColumns(columns1 []Column, columns2 []Column) (columns []Column, err error)
- func (col *Column) IsBool() bool
- func (col *Column) IsDatetime() bool
- func (col *Column) IsDecimal() bool
- func (col *Column) IsInteger() bool
- func (col *Column) IsNumber() bool
- func (col *Column) IsString() bool
- func (col *Column) IsUnique() bool
- func (col *Column) Key() string
type ColumnStats
- func (cs *ColumnStats) DistinctPercent() float64
- func (cs *ColumnStats) DuplicateCount() int64
- func (cs *ColumnStats) DuplicatePercent() float64
type ColumnType
- func (ct ColumnType) IsBool() bool
- func (ct ColumnType) IsDatetime() bool
- func (ct ColumnType) IsDecimal() bool
- func (ct ColumnType) IsInteger() bool
- func (ct ColumnType) IsJSON() bool
- func (ct ColumnType) IsNumber() bool
- func (ct ColumnType) IsString() bool
type Columns
- func NewColumnsFromFields(fields ...string) (cols Columns)
- func (cols Columns) Clone() (newCols Columns)
- func (cols Columns) Dataset() Dataset
- func (cols Columns) FieldMap(toLower bool) map[string]int
- func (cols Columns) GetColumn(name string) Column
- func (cols Columns) IsDummy() bool
- func (cols Columns) Names() []string
- func (cols Columns) Normalize(name string) string
type Compressor
- func NewCompressor(cpType CompressorType) Compressor
type CompressorType
- func CompressorTypePtr(v CompressorType) *CompressorType
type Dataflow
- func MakeDataFlow(dss ...*Datastream) (df *Dataflow, err error)
- func NewDataflow(limit ...int) (df *Dataflow)
- func (df *Dataflow) AddInBytes(bytes uint64)
- func (df *Dataflow) AddOutBytes(bytes uint64)
- func (df *Dataflow) Bytes() (inBytes, outBytes uint64)
- func (df *Dataflow) CleanUp()
- func (df *Dataflow) Close()
- func (df *Dataflow) Collect() (data Dataset, err error)
- func (df *Dataflow) Count() (cnt uint64)
- func (df *Dataflow) Defer(f func())
- func (df *Dataflow) DsTotalBytes() (bytes uint64)
- func (df *Dataflow) Err() (err error)
- func (df *Dataflow) GetFinal(dsID string) (ds *Datastream)
- func (df *Dataflow) IsClosed() bool
- func (df *Dataflow) IsEmpty() bool
- func (df *Dataflow) MakeStreamCh() (streamCh chan *Datastream)
- func (df *Dataflow) Pause()
- func (df *Dataflow) PushStreamChan(dsCh chan *Datastream)
- func (df *Dataflow) ReplaceStream(old, new *Datastream)
- func (df *Dataflow) ResetStats()
- func (df *Dataflow) SetColumns(columns []Column)
- func (df *Dataflow) SetEmpty()
- func (df *Dataflow) SetReady()
- func (df *Dataflow) Size() int
- func (df *Dataflow) SyncStats()
- func (df *Dataflow) Unpause()
- func (df *Dataflow) WaitReady() error
type Dataset
- func NewDataset(columns Columns) (data Dataset)
- func NewDatasetFromMap(m map[string]interface{}) (data Dataset)
- func ReadCsv(path string) (Dataset, error)
- func (data *Dataset) Append(row []interface{})
- func (data *Dataset) ColValues(col int) []interface{}
- func (data *Dataset) ColValuesStr(col int) []string
- func (data *Dataset) FirstRow() []interface{}
- func (data *Dataset) FirstVal() interface{}
- func (data *Dataset) GetFields(lower ...bool) []string
- func (data *Dataset) InferColumnTypes()
- func (data *Dataset) Records() []map[string]interface{}
- func (data *Dataset) SetFields(fields []string)
- func (data *Dataset) Stream() *Datastream
- func (data *Dataset) ToJSONMap() map[string]interface{}
- func (data *Dataset) WriteCsv(path string) error
type Datastream
- func MergeDataflow(df *Dataflow) (ds *Datastream)
- func NewDatastream(columns Columns) (ds *Datastream)
- func NewDatastreamContext(ctx context.Context, columns Columns) (ds *Datastream)
- func NewDatastreamIt(ctx context.Context, columns Columns, nextFunc func(it *Iterator) bool) (ds *Datastream)
- func ReadCsvStream(path string) (ds *Datastream, err error)
- func (ds *Datastream) AddBytes(b int64)
- func (ds *Datastream) Chunk(limit uint64) (chDs chan *Datastream)
- func (ds *Datastream) Clone() *Datastream
- func (ds *Datastream) Close()
- func (ds *Datastream) Collect(limit int) (Dataset, error)
- func (ds *Datastream) ConsumeCsvReader(reader io.Reader) (err error)
- func (ds *Datastream) ConsumeJsonReader(reader io.Reader) (err error)
- func (ds *Datastream) ConsumeXmlReader(reader io.Reader) (err error)
- func (ds *Datastream) Defer(f func())
- func (ds *Datastream) Df() *Dataflow
- func (ds *Datastream) Err() (err error)
- func (ds *Datastream) GetFields(args ...bool) []string
- func (ds *Datastream) IsClosed() bool
- func (ds *Datastream) IsEmpty() bool
- func (ds *Datastream) Map(newColumns Columns, transf func([]interface{}) []interface{}) (nDs *Datastream)
- func (ds *Datastream) MapParallel(transf func([]interface{}) []interface{}, numWorkers int) (nDs *Datastream)
- func (ds *Datastream) NewCsvBufferReader(limit int, bytesLimit int64) *bytes.Reader
- func (ds *Datastream) NewCsvBufferReaderChnl(rowLimit int, bytesLimit int64) (readerChn chan *bytes.Reader)
- func (ds *Datastream) NewCsvBytesChnl(chunkRowSize int) (dataChn chan *[]byte)
- func (ds *Datastream) NewCsvReader(rowLimit int, bytesLimit int64) *io.PipeReader
- func (ds *Datastream) NewCsvReaderChnl(rowLimit int, bytesLimit int64) (readerChn chan *io.PipeReader)
- func (ds *Datastream) NewJsonLinesReaderChnl(rowLimit int, bytesLimit int64) (readerChn chan *io.PipeReader)
- func (ds *Datastream) NewJsonReaderChnl(rowLimit int, bytesLimit int64) (readerChn chan *io.PipeReader)
- func (ds *Datastream) Push(row []interface{})
- func (ds *Datastream) Records() <-chan map[string]interface{}
- func (ds *Datastream) ResetStats()
- func (ds *Datastream) SetConfig(configMap map[string]string)
- func (ds *Datastream) SetEmpty()
- func (ds *Datastream) SetFields(fields []string)
- func (ds *Datastream) SetMetadata(jsonStr string)
- func (ds *Datastream) SetReady()
- func (ds *Datastream) Shape(columns Columns) (nDs *Datastream, err error)
- func (ds *Datastream) Split(numStreams ...int) (dss []*Datastream)
- func (ds *Datastream) Start() (err error)
- func (ds *Datastream) WaitReady() error
type GzipCompressor
- func (cp *GzipCompressor) Compress(reader io.Reader) io.Reader
- func (cp *GzipCompressor) Decompress(reader io.Reader) (gReader io.Reader, err error)
- func (cp *GzipCompressor) Suffix() string
type Iterator
- func (it *Iterator) Ds() *Datastream
type KeyValue
type Metadata
- func (m *Metadata) AsMap() map[string]interface{}
type NoneCompressor
- func (cp *NoneCompressor) Compress(reader io.Reader) io.Reader
- func (cp *NoneCompressor) Decompress(reader io.Reader) (gReader io.Reader, err error)
- func (cp *NoneCompressor) Suffix() string
type Parquet
- func (p *Parquet) ReadStream() (*Datastream, error)
- func (p *Parquet) WriteStream(ds *Datastream) error
type SSHClient
- func (s *SSHClient) Close()
- func (s *SSHClient) Connect() (err error)
- func (s *SSHClient) GetOutput() (stdout string, stderr string)
- func (s *SSHClient) OpenPortForward() (localPort int, err error)
- func (s *SSHClient) RunAsProcess() (localPort int, err error)
- func (s *SSHClient) SftpClient() (sftpClient *sftp.Client, err error)
type SnappyCompressor
- func (cp *SnappyCompressor) Compress(reader io.Reader) io.Reader
- func (cp *SnappyCompressor) Decompress(reader io.Reader) (sReader io.Reader, err error)
- func (cp *SnappyCompressor) Suffix() string
type StreamProcessor
- func NewStreamProcessor() *StreamProcessor
- func (sp *StreamProcessor) CastRow(row []interface{}, columns []Column) []interface{}
- func (sp *StreamProcessor) CastToString(i int, val interface{}, valType ...ColumnType) string
- func (sp *StreamProcessor) CastToTime(i interface{}) (t time.Time, err error)
- func (sp *StreamProcessor) CastType(val interface{}, typ ColumnType) interface{}
- func (sp *StreamProcessor) CastVal(i int, val interface{}, col *Column) interface{}
- func (sp *StreamProcessor) CastValWithoutStats(i int, val interface{}, typ ColumnType) interface{}
- func (sp *StreamProcessor) GetType(val interface{}) (typ ColumnType)
- func (sp *StreamProcessor) ParseString(s string, jj ...int) interface{}
- func (sp *StreamProcessor) ParseTime(i interface{}) (t time.Time, err error)
- func (sp *StreamProcessor) ParseVal(val interface{}) interface{}
- func (sp *StreamProcessor) ProcessRow(row []interface{}) []interface{}
- func (sp *StreamProcessor) ProcessVal(val interface{}) interface{}
- func (sp *StreamProcessor) SetConfig(configMap map[string]string)
type ZStandardCompressor
- func (cp *ZStandardCompressor) Compress(reader io.Reader) io.Reader
- func (cp *ZStandardCompressor) Decompress(reader io.Reader) (sReader io.Reader, err error)
- func (cp *ZStandardCompressor) Suffix() string

Constants ¶

This section is empty.

Variables ¶

View Source

var (
	// RemoveTrailingDecZeros removes the trailing zeros in CastToString
	RemoveTrailingDecZeros = false
	SampleSize             = 900
)

Functions ¶

func AutoDecompress ¶ added in v0.2.3

func AutoDecompress(reader io.Reader) (gReader io.Reader, err error)

AutoDecompress auto detexts compression to decompress. Otherwise return same reader

func CleanHeaderRow ¶

func CleanHeaderRow(header []string) []string

CleanHeaderRow cleans the header row from incompatible characters

func CompareColumns ¶

func CompareColumns(columns1 Columns, columns2 Columns) (reshape bool, err error)

CompareColumns compared two columns to see if there are similar

func CreateDummyFields ¶

func CreateDummyFields(numCols int) (cols []string)

CreateDummyFields creates dummy columns for csvs with no header row

func GetISO8601DateMap ¶

func GetISO8601DateMap(t time.Time) map[string]interface{}

GetISO8601DateMap return a map of date parts for string formatting

func IsDummy ¶

func IsDummy(columns []Column) bool

IsDummy returns true if the columns are injected by CreateDummyFields

func Iso8601ToGoLayout ¶ added in v0.3.49

func Iso8601ToGoLayout(dateFormat string) (goDateFormat string)

https://www.w3.org/QA/Tips/iso-date https://www.w3.org/TR/NOTE-datetime https://www.iso.org/iso-8601-date-and-time-format.html

func MakeRowsChan ¶

func MakeRowsChan() chan []interface{}

MakeRowsChan returns a buffered channel with default size

func NewJSONStream ¶ added in v0.3.1

func NewJSONStream(ds *Datastream, decoder decoderLike, flatten bool) *jsonStream

func Row ¶

func Row(vals ...interface{}) []interface{}

Row is a row

func ScanCarrRet ¶

func ScanCarrRet(data []byte, atEOF bool) (advance int, token []byte, err error)

ScanCarrRet removes the \r runes that are without \n rightafter

func Unzip ¶

func Unzip(src string, dest string) ([]string, error)

Unzip will decompress a zip archive, moving all files and folders within the zip file (parameter 1) to an output directory (parameter 2).

Types ¶

type CSV ¶

type CSV struct {
	Path      string
	NoHeader  bool
	Delimiter rune
	Columns   []Column
	File      *os.File
	Data      Dataset
	Reader    io.Reader
	NoTrace   bool
	// contains filtered or unexported fields
}

CSV is a csv object

func (*CSV) NewReader ¶

func (c *CSV) NewReader() (*io.PipeReader, error)

NewReader creates a Reader

func (*CSV) Read ¶

func (c *CSV) Read() (data Dataset, err error)

ReadStream returns the read CSV stream with Line 1 as header

func (*CSV) ReadStream ¶

func (c *CSV) ReadStream() (ds *Datastream, err error)

ReadStream returns the read CSV stream with Line 1 as header

func (*CSV) Sample ¶

func (c *CSV) Sample(n int) (Dataset, error)

Sample returns a sample of n rows

func (*CSV) SetFields ¶

func (c *CSV) SetFields(fields []string)

SetFields sets the fields

func InferFromStats ¶

func InferFromStats(columns []Column, safe bool, noTrace bool) []Column

InferFromStats using the stats to infer data types

func (*Column) IsBool ¶

func (col *Column) IsBool() bool

IsBool returns whether the column is a boolean

func (*Column) IsDatetime ¶

func (col *Column) IsDatetime() bool

IsDatetime returns whether the column is a datetime object

func (*Column) IsDecimal ¶

func (col *Column) IsDecimal() bool

IsDecimal returns whether the column is a decimal

func (*Column) IsInteger ¶

func (col *Column) IsInteger() bool

IsInteger returns whether the column is an integer

func (*Column) IsNumber ¶

func (col *Column) IsNumber() bool

IsNumber returns whether the column is a decimal or an integer

func (*Column) IsString ¶

func (col *Column) IsString() bool

IsString returns whether the column is a string

func (*Column) IsUnique ¶ added in v0.3.66

func (col *Column) IsUnique() bool

func (*Column) Key ¶ added in v0.3.66

func (col *Column) Key() string

type ColumnStats ¶

type ColumnStats struct {
	MinLen    int    `json:"min_len,omitempty"`
	MaxLen    int    `json:"max_len,omitempty"`
	MaxDecLen int    `json:"max_dec_len,omitempty"`
	Min       int64  `json:"min"`
	Max       int64  `json:"max"`
	NullCnt   int64  `json:"null_cnt"`
	IntCnt    int64  `json:"int_cnt,omitempty"`
	DecCnt    int64  `json:"dec_cnt,omitempty"`
	BoolCnt   int64  `json:"bool_cnt,omitempty"`
	JsonCnt   int64  `json:"json_cnt,omitempty"`
	StringCnt int64  `json:"string_cnt,omitempty"`
	DateCnt   int64  `json:"date_cnt,omitempty"`
	TotalCnt  int64  `json:"total_cnt"`
	UniqCnt   int64  `json:"uniq_cnt"`
	Checksum  uint64 `json:"checksum"`
}

ColumnStats holds statistics for a column

func (*ColumnStats) DistinctPercent ¶ added in v0.3.66

func (cs *ColumnStats) DistinctPercent() float64

func (*ColumnStats) DuplicateCount ¶ added in v0.3.66

func (cs *ColumnStats) DuplicateCount() int64

func (*ColumnStats) DuplicatePercent ¶ added in v0.3.66

func (cs *ColumnStats) DuplicatePercent() float64

type ColumnType ¶ added in v0.3.66

type ColumnType string

const (
	BigIntType     ColumnType = "bigint"
	BinaryType     ColumnType = "binary"
	BoolType       ColumnType = "bool"
	DateType       ColumnType = "date"
	DatetimeType   ColumnType = "datetime"
	DecimalType    ColumnType = "decimal"
	IntegerType    ColumnType = "integer"
	JsonType       ColumnType = "json"
	SmallIntType   ColumnType = "smallint"
	StringType     ColumnType = "string"
	TextType       ColumnType = "text"
	TimestampType  ColumnType = "timestamp"
	TimestampzType ColumnType = "timestampz"
	FloatType      ColumnType = "float"
	TimeType       ColumnType = "time"
	TimezType      ColumnType = "timez"
)

func (ColumnType) IsBool ¶ added in v0.3.66

func (ct ColumnType) IsBool() bool

IsBool returns whether the column is a boolean

func (ColumnType) IsDatetime ¶ added in v0.3.66

func (ct ColumnType) IsDatetime() bool

IsDatetime returns whether the column is a datetime object

func (ColumnType) IsDecimal ¶ added in v0.3.66

func (ct ColumnType) IsDecimal() bool

IsDecimal returns whether the column is a decimal

func (ColumnType) IsInteger ¶ added in v0.3.66

func (ct ColumnType) IsInteger() bool

IsInteger returns whether the column is an integer

func (ColumnType) IsJSON ¶ added in v0.3.130

func (ct ColumnType) IsJSON() bool

IsJSON returns whether the column is a json

func (ColumnType) IsNumber ¶ added in v0.3.66

func (ct ColumnType) IsNumber() bool

IsNumber returns whether the column is a decimal or an integer

func (ColumnType) IsString ¶ added in v0.3.66

func (ct ColumnType) IsString() bool

IsString returns whether the column is a string

func NewColumnsFromFields ¶

func NewColumnsFromFields(fields ...string) (cols Columns)

NewColumnsFromFields creates Columns from fields

func (Columns) Clone ¶ added in v0.3.67

func (cols Columns) Clone() (newCols Columns)

Names return the column names

func (Columns) Dataset ¶ added in v0.0.5

func (cols Columns) Dataset() Dataset

Dataset return an empty inferred dataset

func (Columns) FieldMap ¶ added in v0.0.5

func (cols Columns) FieldMap(toLower bool) map[string]int

FieldMap return the fields map of indexes when `toLower` is true, field keys are lower cased

func (Columns) GetColumn ¶

func (cols Columns) GetColumn(name string) Column

GetColumn returns the matched Col

func (Columns) IsDummy ¶

func (cols Columns) IsDummy() bool

IsDummy returns true if the columns are injected by CreateDummyFields

func (Columns) Names ¶ added in v0.0.5

func (cols Columns) Names() []string

Names return the column names

func (Columns) Normalize ¶

func (cols Columns) Normalize(name string) string

Normalize returns the normalized field name

type Compressor ¶

type Compressor interface {
	Self() Compressor
	Compress(io.Reader) io.Reader
	Decompress(io.Reader) (io.Reader, error)
	Suffix() string
}

Compressor implements differnt kind of compression

func NewCompressor ¶ added in v0.2.3

func NewCompressor(cpType CompressorType) Compressor

type CompressorType ¶

type CompressorType string

CompressorType is an int type for enum for the Compressor Type

const (
	// AutoCompressorType is for auto compression
	AutoCompressorType CompressorType = "AUTO"
	// NoneCompressorType is for no compression
	NoneCompressorType CompressorType = "NONE"
	// ZipCompressorType is for Zip compression
	ZipCompressorType CompressorType = "ZIP"
	// GzipCompressorType is for Gzip compression
	GzipCompressorType CompressorType = "GZIP"
	// SnappyCompressorType is for Snappy compression
	SnappyCompressorType CompressorType = "SNAPPY"
	// ZStandardCompressorType is for ZStandard
	ZStandardCompressorType CompressorType = "ZSTD"
)

func CompressorTypePtr ¶ added in v0.3.56

func CompressorTypePtr(v CompressorType) *CompressorType

CompressorTypePtr returns a pointer to the CompressorType value passed in.

type Dataflow ¶

type Dataflow struct {
	Columns  Columns
	Buffer   [][]interface{}
	StreamCh chan *Datastream
	Streams  []*Datastream
	Context  g.Context
	Limit    uint64
	InBytes  uint64
	OutBytes uint64

	Ready          bool
	Inferred       bool
	FsURL          string
	OnSchemaChange func(int, ColumnType) error

	StreamMap map[string]*Datastream
	// contains filtered or unexported fields
}

Dataflow is a collection of concurrent Datastreams

func MakeDataFlow ¶

func MakeDataFlow(dss ...*Datastream) (df *Dataflow, err error)

MakeDataFlow create a dataflow from datastreams

func (*Dataflow) AddInBytes ¶ added in v0.3.0

func (df *Dataflow) AddInBytes(bytes uint64)

AddInBytes add ingress bytes

func (*Dataflow) AddOutBytes ¶ added in v0.3.0

func (df *Dataflow) AddOutBytes(bytes uint64)

AddOutBytes add egress bytes

func (*Dataflow) Bytes ¶

func (df *Dataflow) Bytes() (inBytes, outBytes uint64)

func (*Dataflow) CleanUp ¶ added in v0.3.0

func (df *Dataflow) CleanUp()

CleanUp refers the defer functions

func (*Dataflow) Close ¶

func (df *Dataflow) Close()

Close closes the df

func (*Dataflow) Collect ¶ added in v0.3.104

func (df *Dataflow) Collect() (data Dataset, err error)

Collect reads from one or more streams and return a dataset

func (*Dataflow) Count ¶

func (df *Dataflow) Count() (cnt uint64)

Count returns the aggregate count

func (*Dataflow) Defer ¶

func (df *Dataflow) Defer(f func())

Defer runs a given function as close of Dataflow

func (*Dataflow) DsTotalBytes ¶ added in v0.3.0

func (df *Dataflow) DsTotalBytes() (bytes uint64)

func (*Dataflow) Err ¶

func (df *Dataflow) Err() (err error)

Err return the error if any

func (*Dataflow) GetFinal ¶ added in v0.3.67

func (df *Dataflow) GetFinal(dsID string) (ds *Datastream)

GetFinal returns the final downstream ds (mapped or shaped)

func (*Dataflow) IsClosed ¶

func (df *Dataflow) IsClosed() bool

IsClosed is true is ds is closed

func (*Dataflow) IsEmpty ¶

func (df *Dataflow) IsEmpty() bool

IsEmpty returns true is ds.Rows of all channels as empty

func (*Dataflow) MakeStreamCh ¶ added in v0.3.136

func (df *Dataflow) MakeStreamCh() (streamCh chan *Datastream)

MakeStreamCh determines whether to merge all the streams into one or keep them separate. If data is small per stream, it's best to merge For example, Bigquery has limits on number of operations can be called within a time limit

func (*Dataflow) Pause ¶ added in v0.3.111

func (df *Dataflow) Pause()

Pause pauses all streams

func (*Dataflow) PushStreamChan ¶ added in v0.3.102

func (df *Dataflow) PushStreamChan(dsCh chan *Datastream)

func (*Dataflow) ReplaceStream ¶ added in v0.3.67

func (df *Dataflow) ReplaceStream(old, new *Datastream)

ReplaceStream adds to stream map for downstream ds (mapped or shaped)

func (*Dataflow) ResetStats ¶

func (df *Dataflow) ResetStats()

ResetStats resets the stats

func (*Dataflow) SetColumns ¶

func (df *Dataflow) SetColumns(columns []Column)

SetColumns sets the columns

func (*Dataflow) SetEmpty ¶

func (df *Dataflow) SetEmpty()

SetEmpty sets all underlying datastreams empty

func (*Dataflow) SetReady ¶ added in v0.3.102

func (df *Dataflow) SetReady()

SetReady sets the df.ready

func (*Dataflow) Size ¶

func (df *Dataflow) Size() int

Size is the number of streams

func (*Dataflow) SyncStats ¶

func (df *Dataflow) SyncStats()

SyncStats sync stream processor stats aggregated to the df.Columns

func (*Dataflow) Unpause ¶ added in v0.3.111

func (df *Dataflow) Unpause()

Unpause unpauses all streams

func (*Dataflow) WaitReady ¶

func (df *Dataflow) WaitReady() error

WaitReady waits until dataflow is ready

type Dataset ¶

type Dataset struct {
	Result        *sqlx.Rows
	Columns       Columns
	Rows          [][]interface{}
	SQL           string
	Duration      float64
	Sp            *StreamProcessor
	Inferred      bool
	SafeInference bool
	NoTrace       bool
}

Dataset is a query returned dataset

func NewDataset ¶

func NewDataset(columns Columns) (data Dataset)

NewDataset return a new dataset

func NewDatasetFromMap ¶

func NewDatasetFromMap(m map[string]interface{}) (data Dataset)

NewDatasetFromMap return a new dataset

func ReadCsv ¶

func ReadCsv(path string) (Dataset, error)

ReadCsv reads CSV and returns dataset

func (*Dataset) Append ¶

func (data *Dataset) Append(row []interface{})

Append appends a new row

func (*Dataset) ColValues ¶

func (data *Dataset) ColValues(col int) []interface{}

ColValues returns the values of a one column as array

func (*Dataset) ColValuesStr ¶

func (data *Dataset) ColValuesStr(col int) []string

ColValuesStr returns the values of a one column as array or string

func (*Dataset) FirstRow ¶

func (data *Dataset) FirstRow() []interface{}

FirstRow returns the first row

func (*Dataset) FirstVal ¶

func (data *Dataset) FirstVal() interface{}

FirstVal returns the first value from the first row

func (*Dataset) GetFields ¶

func (data *Dataset) GetFields(lower ...bool) []string

GetFields return the fields of the Data

func (*Dataset) InferColumnTypes ¶

func (data *Dataset) InferColumnTypes()

InferColumnTypes determines the columns types

func (*Dataset) Records ¶

func (data *Dataset) Records() []map[string]interface{}

Records return rows of maps

func (*Dataset) SetFields ¶

func (data *Dataset) SetFields(fields []string)

SetFields sets the fields/columns of the Datastream

func (*Dataset) Stream ¶

func (data *Dataset) Stream() *Datastream

Stream returns a datastream of the dataset

func (*Dataset) ToJSONMap ¶

func (data *Dataset) ToJSONMap() map[string]interface{}

ToJSONMap converst to a JSON object

func (*Dataset) WriteCsv ¶

func (data *Dataset) WriteCsv(path string) error

WriteCsv writes to a csv file

type Datastream ¶

type Datastream struct {
	Columns       Columns
	Rows          chan []interface{}
	Buffer        [][]interface{}
	Count         uint64
	Context       g.Context
	Ready         bool
	Bytes         uint64
	Sp            *StreamProcessor
	SafeInference bool
	NoTrace       bool
	Inferred      bool

	Metadata Metadata // map of column name to metadata type
	// contains filtered or unexported fields
}

Datastream is a stream of rows

func MergeDataflow ¶

func MergeDataflow(df *Dataflow) (ds *Datastream)

MergeDataflow merges the dataflow streams into one

func NewDatastream ¶

func NewDatastream(columns Columns) (ds *Datastream)

NewDatastream return a new datastream

func NewDatastreamContext ¶

func NewDatastreamContext(ctx context.Context, columns Columns) (ds *Datastream)

NewDatastreamContext return a new datastream

func NewDatastreamIt ¶

func NewDatastreamIt(ctx context.Context, columns Columns, nextFunc func(it *Iterator) bool) (ds *Datastream)

NewDatastreamIt with it

func ReadCsvStream ¶

func ReadCsvStream(path string) (ds *Datastream, err error)

ReadCsvStream reads CSV and returns datasream

func (*Datastream) AddBytes ¶

func (ds *Datastream) AddBytes(b int64)

AddBytes add bytes as processed

func (*Datastream) Chunk ¶

func (ds *Datastream) Chunk(limit uint64) (chDs chan *Datastream)

Chunk splits the datastream into chunk datastreams (in sequence)

func (*Datastream) Clone ¶ added in v0.0.5

func (ds *Datastream) Clone() *Datastream

Clone returns a new datastream of the same source

func (*Datastream) Close ¶

func (ds *Datastream) Close()

Close closes the datastream

func (*Datastream) Collect ¶

func (ds *Datastream) Collect(limit int) (Dataset, error)

Collect reads a stream and return a dataset limit of 0 is unlimited

func (*Datastream) ConsumeCsvReader ¶ added in v0.1.0

func (ds *Datastream) ConsumeCsvReader(reader io.Reader) (err error)

ConsumeCsvReader uses the provided reader to stream rows

func (*Datastream) ConsumeJsonReader ¶ added in v0.1.0

func (ds *Datastream) ConsumeJsonReader(reader io.Reader) (err error)

ConsumeJsonReader uses the provided reader to stream JSON This will put each JSON rec as one string value so payload can be processed downstream

func (*Datastream) ConsumeXmlReader ¶ added in v0.3.1

func (ds *Datastream) ConsumeXmlReader(reader io.Reader) (err error)

ConsumeXmlReader uses the provided reader to stream XML This will put each XML rec as one string value so payload can be processed downstream

func (*Datastream) Defer ¶

func (ds *Datastream) Defer(f func())

Defer runs a given function as close of Datastream

func (*Datastream) Df ¶ added in v0.3.111

func (ds *Datastream) Df() *Dataflow

func (*Datastream) Err ¶

func (ds *Datastream) Err() (err error)

Err return the error if any

func (*Datastream) GetFields ¶

func (ds *Datastream) GetFields(args ...bool) []string

GetFields return the fields of the Data

func (*Datastream) IsClosed ¶

func (ds *Datastream) IsClosed() bool

IsClosed is true is ds is closed

func (*Datastream) IsEmpty ¶

func (ds *Datastream) IsEmpty() bool

IsEmpty returns true is ds.Rows channel as empty

func (*Datastream) Map ¶

func (ds *Datastream) Map(newColumns Columns, transf func([]interface{}) []interface{}) (nDs *Datastream)

Map applies the provided function to every row and returns the result

func (*Datastream) MapParallel ¶

func (ds *Datastream) MapParallel(transf func([]interface{}) []interface{}, numWorkers int) (nDs *Datastream)

MapParallel applies the provided function to every row in parallel and returns the result. Order is not maintained.

func (*Datastream) NewCsvBufferReader ¶

func (ds *Datastream) NewCsvBufferReader(limit int, bytesLimit int64) *bytes.Reader

NewCsvBufferReader creates a Reader with limit. If limit == 0, then read all rows.

func (*Datastream) NewCsvBufferReaderChnl ¶

func (ds *Datastream) NewCsvBufferReaderChnl(rowLimit int, bytesLimit int64) (readerChn chan *bytes.Reader)

NewCsvBufferReaderChnl provides a channel of readers as the limit is reached data is read in memory, whereas NewCsvReaderChnl does not hold in memory

func (*Datastream) NewCsvBytesChnl ¶

func (ds *Datastream) NewCsvBytesChnl(chunkRowSize int) (dataChn chan *[]byte)

NewCsvBytesChnl returns a channel yield chunk of bytes of csv

func (*Datastream) NewCsvReader ¶

func (ds *Datastream) NewCsvReader(rowLimit int, bytesLimit int64) *io.PipeReader

NewCsvReader creates a Reader with limit. If limit == 0, then read all rows.

func (*Datastream) NewCsvReaderChnl ¶

func (ds *Datastream) NewCsvReaderChnl(rowLimit int, bytesLimit int64) (readerChn chan *io.PipeReader)

NewCsvReaderChnl provides a channel of readers as the limit is reached each channel flows as fast as the consumer consumes

func (*Datastream) NewJsonLinesReaderChnl ¶ added in v0.3.16

func (ds *Datastream) NewJsonLinesReaderChnl(rowLimit int, bytesLimit int64) (readerChn chan *io.PipeReader)

NewJsonLinesReaderChnl provides a channel of readers as the limit is reached each channel flows as fast as the consumer consumes

func (*Datastream) NewJsonReaderChnl ¶ added in v0.3.16

func (ds *Datastream) NewJsonReaderChnl(rowLimit int, bytesLimit int64) (readerChn chan *io.PipeReader)

NewJsonReaderChnl provides a channel of readers as the limit is reached each channel flows as fast as the consumer consumes

func (*Datastream) Push ¶

func (ds *Datastream) Push(row []interface{})

Push return the fields of the Data

func (*Datastream) Records ¶

func (ds *Datastream) Records() <-chan map[string]interface{}

Records return rows of maps

func (*Datastream) ResetStats ¶

func (ds *Datastream) ResetStats()

ResetStats resets the stats

func (*Datastream) SetConfig ¶

func (ds *Datastream) SetConfig(configMap map[string]string)

SetConfig sets the ds.config values

func (*Datastream) SetEmpty ¶

func (ds *Datastream) SetEmpty()

SetEmpty sets the ds.Rows channel as empty

func (*Datastream) SetFields ¶

func (ds *Datastream) SetFields(fields []string)

SetFields sets the fields/columns of the Datastream

func (*Datastream) SetMetadata ¶ added in v0.3.109

func (ds *Datastream) SetMetadata(jsonStr string)

func (*Datastream) SetReady ¶ added in v0.3.102

func (ds *Datastream) SetReady()

SetReady sets the ds.ready

func (*Datastream) Shape ¶

func (ds *Datastream) Shape(columns Columns) (nDs *Datastream, err error)

Shape changes the column types as needed, to the provided columns var It will cast the already wrongly casted rows, and not recast the correctly casted rows

func (*Datastream) Split ¶ added in v0.3.0

func (ds *Datastream) Split(numStreams ...int) (dss []*Datastream)

Split splits the datastream into parallel datastreams

func (*Datastream) Start ¶

func (ds *Datastream) Start() (err error)

Start generates the stream Should cycle the Iter Func until done

func (*Datastream) WaitReady ¶

func (ds *Datastream) WaitReady() error

WaitReady waits until datastream is ready

type GzipCompressor ¶ added in v0.2.3

type GzipCompressor struct {
	Compressor
	// contains filtered or unexported fields
}

func (*GzipCompressor) Compress ¶ added in v0.2.3

func (cp *GzipCompressor) Compress(reader io.Reader) io.Reader

Compress uses gzip to compress

func (*GzipCompressor) Decompress ¶ added in v0.2.3

func (cp *GzipCompressor) Decompress(reader io.Reader) (gReader io.Reader, err error)

Decompress uses gzip to decompress if it is gzip. Otherwise return same reader

func (*GzipCompressor) Suffix ¶ added in v0.2.3

func (cp *GzipCompressor) Suffix() string

type Iterator ¶

type Iterator struct {
	Row     []interface{}
	Counter uint64
	Context g.Context
	Closed  bool
	// contains filtered or unexported fields
}

Iterator is the row provider for a datastream

func (*Iterator) Ds ¶ added in v0.3.126

func (it *Iterator) Ds() *Datastream

type KeyValue ¶ added in v0.3.109

type KeyValue struct {
	Key   string      `json:"key"`
	Value interface{} `json:"value"`
}

type Metadata ¶ added in v0.3.109

type Metadata struct {
	StreamURL KeyValue `json:"stream_url"`
	LoadedAt  KeyValue `json:"loaded_at"`
}

func (*Metadata) AsMap ¶ added in v0.3.109

func (m *Metadata) AsMap() map[string]interface{}

AsMap return as map

type NoneCompressor ¶ added in v0.2.3

type NoneCompressor struct {
	Compressor
	// contains filtered or unexported fields
}

func (*NoneCompressor) Compress ¶ added in v0.2.3

func (cp *NoneCompressor) Compress(reader io.Reader) io.Reader

func (*NoneCompressor) Decompress ¶ added in v0.2.3

func (cp *NoneCompressor) Decompress(reader io.Reader) (gReader io.Reader, err error)

func (*NoneCompressor) Suffix ¶ added in v0.2.3

func (cp *NoneCompressor) Suffix() string

type Parquet ¶

type Parquet struct {
	Path    string
	Columns []Column
	File    *os.File
	PFile   source.ParquetFile
	Data    *Dataset
}

Parquet is a parquet object

func (*Parquet) ReadStream ¶

func (p *Parquet) ReadStream() (*Datastream, error)

ReadStream returns the read Parquet stream into a Datastream https://github.com/xitongsys/parquet-go/blob/master/example/read_partial.go

func (*Parquet) WriteStream ¶

func (p *Parquet) WriteStream(ds *Datastream) error

WriteStream to Parquet file from datastream

type SSHClient ¶

type SSHClient struct {
	Host       string
	Port       int
	User       string
	Password   string
	TgtHost    string
	TgtPort    int
	PrivateKey string
	Err        error
	// contains filtered or unexported fields
}

SSHClient is a client to connect to a ssh server with the main goal of forwarding ports

func (*SSHClient) Close ¶

func (s *SSHClient) Close()

Close stops the client connection

func (*SSHClient) Connect ¶

func (s *SSHClient) Connect() (err error)

Connect connects to the server

func (*SSHClient) GetOutput ¶

func (s *SSHClient) GetOutput() (stdout string, stderr string)

GetOutput return stdout & stderr outputs

func (*SSHClient) OpenPortForward ¶

func (s *SSHClient) OpenPortForward() (localPort int, err error)

OpenPortForward forwards the port as specified

func (*SSHClient) RunAsProcess ¶

func (s *SSHClient) RunAsProcess() (localPort int, err error)

RunAsProcess uses a separate process enables to use public key auth https://git-scm.com/book/pt-pt/v2/Git-no-Servidor-Generating-Your-SSH-Public-Key

func (*SSHClient) SftpClient ¶

func (s *SSHClient) SftpClient() (sftpClient *sftp.Client, err error)

SftpClient returns an SftpClient

type SnappyCompressor ¶ added in v0.2.3

type SnappyCompressor struct {
	Compressor
	// contains filtered or unexported fields
}

func (*SnappyCompressor) Compress ¶ added in v0.2.3

func (cp *SnappyCompressor) Compress(reader io.Reader) io.Reader

Compress uses gzip to compress

func (*SnappyCompressor) Decompress ¶ added in v0.2.3

func (cp *SnappyCompressor) Decompress(reader io.Reader) (sReader io.Reader, err error)

Decompress uses gzip to decompress if it is gzip. Otherwise return same reader

func (*SnappyCompressor) Suffix ¶ added in v0.2.3

func (cp *SnappyCompressor) Suffix() string

type StreamProcessor ¶

type StreamProcessor struct {
	N uint64
	// contains filtered or unexported fields
}

StreamProcessor processes rows and values

func NewStreamProcessor ¶

func NewStreamProcessor() *StreamProcessor

NewStreamProcessor returns a new StreamProcessor

func (*StreamProcessor) CastRow ¶

func (sp *StreamProcessor) CastRow(row []interface{}, columns []Column) []interface{}

CastRow casts each value of a row slows down processing about 40%?

func (*StreamProcessor) CastToString ¶

func (sp *StreamProcessor) CastToString(i int, val interface{}, valType ...ColumnType) string

CastToString to string. used for csv writing slows processing down 5% with upstream CastRow or 35% without upstream CastRow

func (*StreamProcessor) CastToTime ¶

func (sp *StreamProcessor) CastToTime(i interface{}) (t time.Time, err error)

CastToTime converts interface to time

func (*StreamProcessor) CastType ¶

func (sp *StreamProcessor) CastType(val interface{}, typ ColumnType) interface{}

CastType casts the type of an interface CastType is used to cast the interface place holders?

func (*StreamProcessor) CastVal ¶

func (sp *StreamProcessor) CastVal(i int, val interface{}, col *Column) interface{}

CastVal casts values with stats collection which degrades performance by ~10% go test -benchmem -run='^$ github.com/flarco/dbio/iop' -bench '^BenchmarkProcessVal'

func (*StreamProcessor) CastValWithoutStats ¶

func (sp *StreamProcessor) CastValWithoutStats(i int, val interface{}, typ ColumnType) interface{}

CastValWithoutStats casts the value without counting stats

func (*StreamProcessor) GetType ¶

func (sp *StreamProcessor) GetType(val interface{}) (typ ColumnType)

GetType returns the type of an interface

func (*StreamProcessor) ParseString ¶

func (sp *StreamProcessor) ParseString(s string, jj ...int) interface{}

ParseString return an interface string: "varchar" integer: "integer" decimal: "decimal" date: "date" datetime: "timestamp" timestamp: "timestamp" text: "text"

func (*StreamProcessor) ParseTime ¶

func (sp *StreamProcessor) ParseTime(i interface{}) (t time.Time, err error)

ParseTime parses a date string and returns time.Time

func (*StreamProcessor) ParseVal ¶

func (sp *StreamProcessor) ParseVal(val interface{}) interface{}

ParseVal parses the value into its appropriate type

func (*StreamProcessor) ProcessRow ¶

func (sp *StreamProcessor) ProcessRow(row []interface{}) []interface{}

ProcessRow processes a row

func (*StreamProcessor) ProcessVal ¶

func (sp *StreamProcessor) ProcessVal(val interface{}) interface{}

ProcessVal processes a value

func (*StreamProcessor) SetConfig ¶

func (sp *StreamProcessor) SetConfig(configMap map[string]string)

SetConfig sets the data.Sp.config values

type ZStandardCompressor ¶ added in v0.2.3

type ZStandardCompressor struct {
	Compressor
	// contains filtered or unexported fields
}

func (*ZStandardCompressor) Compress ¶ added in v0.2.3

func (cp *ZStandardCompressor) Compress(reader io.Reader) io.Reader

Compress uses gzip to compress

func (*ZStandardCompressor) Decompress ¶ added in v0.2.3

func (cp *ZStandardCompressor) Decompress(reader io.Reader) (sReader io.Reader, err error)

Decompress uses gzip to decompress if it is gzip. Otherwise return same reader

func (*ZStandardCompressor) Suffix ¶ added in v0.2.3

func (cp *ZStandardCompressor) Suffix() string

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL

README ¶

Input-Process-Output (ipo)

Documentation ¶

Index ¶

Constants ¶

Variables ¶

Functions ¶

func AutoDecompress ¶ added in v0.2.3

func CleanHeaderRow ¶

func CompareColumns ¶

func CreateDummyFields ¶

func GetISO8601DateMap ¶

func IsDummy ¶

func Iso8601ToGoLayout ¶ added in v0.3.49

func MakeRowsChan ¶

func NewJSONStream ¶ added in v0.3.1

func Row ¶

func ScanCarrRet ¶

func Unzip ¶

Types ¶

type CSV ¶

func (*CSV) InferSchema ¶

func (*CSV) NewReader ¶

func (*CSV) Read ¶

func (*CSV) ReadStream ¶

func (*CSV) Sample ¶

func (*CSV) SetFields ¶

func (*CSV) WriteStream ¶

type Column ¶

func InferFromStats ¶

func SyncColumns ¶

func (*Column) IsBool ¶

func (*Column) IsDatetime ¶

func (*Column) IsDecimal ¶

func (*Column) IsInteger ¶

func (*Column) IsNumber ¶

func (*Column) IsString ¶

func (*Column) IsUnique ¶ added in v0.3.66

func (*Column) Key ¶ added in v0.3.66

type ColumnStats ¶

func (*ColumnStats) DistinctPercent ¶ added in v0.3.66

func (*ColumnStats) DuplicateCount ¶ added in v0.3.66

func (*ColumnStats) DuplicatePercent ¶ added in v0.3.66

type ColumnType ¶ added in v0.3.66

func (ColumnType) IsBool ¶ added in v0.3.66

func (ColumnType) IsDatetime ¶ added in v0.3.66

func (ColumnType) IsDecimal ¶ added in v0.3.66

func (ColumnType) IsInteger ¶ added in v0.3.66

func (ColumnType) IsJSON ¶ added in v0.3.130

func (ColumnType) IsNumber ¶ added in v0.3.66

func (ColumnType) IsString ¶ added in v0.3.66

type Columns ¶

func NewColumnsFromFields ¶

func (Columns) Clone ¶ added in v0.3.67

func (Columns) Dataset ¶ added in v0.0.5

func (Columns) FieldMap ¶ added in v0.0.5

func (Columns) GetColumn ¶

func (Columns) IsDummy ¶

func (Columns) Names ¶ added in v0.0.5

func (Columns) Normalize ¶

type Compressor ¶

func NewCompressor ¶ added in v0.2.3

type CompressorType ¶

func CompressorTypePtr ¶ added in v0.3.56

type Dataflow ¶

func MakeDataFlow ¶

func NewDataflow ¶

func (*Dataflow) AddInBytes ¶ added in v0.3.0

func (*Dataflow) AddOutBytes ¶ added in v0.3.0

func (*Dataflow) Bytes ¶

func (*Dataflow) CleanUp ¶ added in v0.3.0

func (*Dataflow) Close ¶

func (*Dataflow) Collect ¶ added in v0.3.104

func (*Dataflow) Count ¶

func (*Dataflow) Defer ¶

func (*Dataflow) DsTotalBytes ¶ added in v0.3.0

func (*Dataflow) Err ¶

func (*Dataflow) GetFinal ¶ added in v0.3.67

func (*Dataflow) IsClosed ¶

func (*Dataflow) IsEmpty ¶