Documentation ¶
Index ¶
- type FileReader
- type FileShardInfo
- type FileSource
- func Csv(fileOrPattern string, partitionCount int) *FileSource
- func Orc(fileOrPattern string, partitionCount int) *FileSource
- func Parquet(fileOrPattern string, partitionCount int) *FileSource
- func Tsv(fileOrPattern string, partitionCount int) *FileSource
- func Txt(fileOrPattern string, partitionCount int) *FileSource
- func Zip(fileOrPattern string, partitionCount int) *FileSource
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type FileReader ¶
type FileShardInfo ¶
type FileShardInfo struct { Config map[string]string FileName string FileType string HasHeader bool Fields []string }
func (*FileShardInfo) NewReader ¶
func (ds *FileShardInfo) NewReader(vf filesystem.VirtualFile) (FileReader, error)
func (*FileShardInfo) ReadSplit ¶
func (ds *FileShardInfo) ReadSplit() error
type FileSource ¶
type FileSource struct { Path string HasHeader bool PartitionCount int FileType string Fields []string // contains filtered or unexported fields }
func Csv ¶
func Csv(fileOrPattern string, partitionCount int) *FileSource
func Orc ¶
func Orc(fileOrPattern string, partitionCount int) *FileSource
func Parquet ¶
func Parquet(fileOrPattern string, partitionCount int) *FileSource
func Tsv ¶
func Tsv(fileOrPattern string, partitionCount int) *FileSource
func Txt ¶
func Txt(fileOrPattern string, partitionCount int) *FileSource
func Zip ¶
func Zip(fileOrPattern string, partitionCount int) *FileSource
func (*FileSource) Generate ¶
func (s *FileSource) Generate(f *flow.Flow) *flow.Dataset
Generate generates data shard info, partitions them via round robin, and reads each shard on each executor
func (*FileSource) Select ¶
func (q *FileSource) Select(fields ...string) *FileSource
TODO adjust FileSource api to denote which data source can support columnar reads Select selects fields that can be pushed down to data sources supporting columnar reads
func (*FileSource) SetHasHeader ¶
func (q *FileSource) SetHasHeader(hasHeader bool) *FileSource
SetHasHeader sets whether the data contains header
Click to show internal directories.
Click to hide internal directories.