filesys

package
v0.4.72 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 12, 2024 License: GPL-3.0 Imports: 42 Imported by: 2

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func Delete added in v0.3.86

func Delete(fs FileSysClient, path string) (err error)

Delete deletes the provided path with some safeguards so to not accidentally delete some root path

func GetDataflow

func GetDataflow(fs FileSysClient, paths []string, cfg FileStreamConfig) (df *iop.Dataflow, err error)

GetDataflow returns a dataflow from specified paths in specified FileSysClient

func MakeDatastream

func MakeDatastream(reader io.Reader, cfg map[string]string) (ds *iop.Datastream, err error)

MakeDatastream create a datastream from a reader

func MergeReaders added in v0.3.202

func MergeReaders(fs FileSysClient, fileType FileType, paths ...string) (ds *iop.Datastream, err error)

func ParseURL

func ParseURL(urlStr string) (host string, path string, err error)

ParseURL parses a URL

func ProcessStreamViaTempFile added in v0.3.202

func ProcessStreamViaTempFile(ds *iop.Datastream) (nDs *iop.Datastream, err error)

func TestFsPermissions

func TestFsPermissions(fs FileSysClient, pathURL string) (err error)

TestFsPermissions tests read/write permisions

func Write

func Write(reader io.Reader, writer io.Writer) (bw int64, err error)

Write writer to a writer from a reader

func WriteDatastream

func WriteDatastream(writer io.Writer, ds *iop.Datastream) (bw int64, err error)

WriteDatastream writes a datasream to a writer or use fs.Write(path, ds.NewCsvReader(0))

Types

type AzureFileSysClient

type AzureFileSysClient struct {
	BaseFileSysClient
	// contains filtered or unexported fields
}

AzureFileSysClient is a file system client to write file to Microsoft's Azure file sys.

func (*AzureFileSysClient) Buckets added in v0.3.12

func (fs *AzureFileSysClient) Buckets() (paths []string, err error)

Buckets returns the containers found in the project

func (*AzureFileSysClient) Connect

func (fs *AzureFileSysClient) Connect() (err error)

Connect initiates the fs client connection

func (*AzureFileSysClient) GetReader

func (fs *AzureFileSysClient) GetReader(urlStr string) (reader io.Reader, err error)

GetReader returns an Azure FS reader

func (*AzureFileSysClient) Init

func (fs *AzureFileSysClient) Init(ctx context.Context) (err error)

Init initializes the fs client

func (*AzureFileSysClient) List

func (fs *AzureFileSysClient) List(url string) (paths []string, err error)

List list objects in path

func (*AzureFileSysClient) ListRecursive

func (fs *AzureFileSysClient) ListRecursive(url string) (paths []string, err error)

ListRecursive list objects in path

func (*AzureFileSysClient) Write

func (fs *AzureFileSysClient) Write(urlStr string, reader io.Reader) (bw int64, err error)

type BaseFileSysClient

type BaseFileSysClient struct {
	FileSysClient
	// contains filtered or unexported fields
}

BaseFileSysClient is the base file system type.

func (*BaseFileSysClient) Buckets added in v0.3.12

func (fs *BaseFileSysClient) Buckets() (paths []string, err error)

Buckets returns the buckets found in the account

func (*BaseFileSysClient) Client

func (fs *BaseFileSysClient) Client() *BaseFileSysClient

Client provides a pointer to itself

func (*BaseFileSysClient) Context

func (fs *BaseFileSysClient) Context() (context *g.Context)

Context provides a pointer to context

func (*BaseFileSysClient) FsType

func (fs *BaseFileSysClient) FsType() dbio.Type

FsType return the type of the client

func (*BaseFileSysClient) GetDatastream

func (fs *BaseFileSysClient) GetDatastream(urlStr string) (ds *iop.Datastream, err error)

GetDatastream return a datastream for the given path

func (*BaseFileSysClient) GetProp

func (fs *BaseFileSysClient) GetProp(key string, keys ...string) string

GetProp returns the value of a property

func (*BaseFileSysClient) GetReaders

func (fs *BaseFileSysClient) GetReaders(paths ...string) (readers []io.Reader, err error)

GetReaders returns one or more readers from specified paths in specified FileSysClient

func (*BaseFileSysClient) GetRefTs added in v0.3.110

func (fs *BaseFileSysClient) GetRefTs() time.Time

func (*BaseFileSysClient) Props added in v0.3.101

func (fs *BaseFileSysClient) Props() map[string]string

Props returns a copy of the properties map

func (*BaseFileSysClient) ReadDataflow

func (fs *BaseFileSysClient) ReadDataflow(url string, cfg ...FileStreamConfig) (df *iop.Dataflow, err error)

ReadDataflow read

func (*BaseFileSysClient) Self

func (fs *BaseFileSysClient) Self() FileSysClient

Instance returns the respective connection Instance This is useful to refer back to a subclass method from the superclass level. (Aka overloading)

func (*BaseFileSysClient) SetProp

func (fs *BaseFileSysClient) SetProp(key string, val string)

SetProp sets the value of a property

func (*BaseFileSysClient) WriteDataflow

func (fs *BaseFileSysClient) WriteDataflow(df *iop.Dataflow, url string) (bw int64, err error)

WriteDataflow writes a dataflow to a file sys.

func (*BaseFileSysClient) WriteDataflowReady

func (fs *BaseFileSysClient) WriteDataflowReady(df *iop.Dataflow, url string, fileReadyChn chan FileReady) (bw int64, err error)

WriteDataflowReady writes to a file sys and notifies the fileReady chan.

type Excel

type Excel struct {
	File   *excelize.File
	Sheets []string
	Path   string
	// contains filtered or unexported fields
}

Excel represent an Excel object pointing to its file

func NewExcel

func NewExcel() (xls *Excel)

NewExcel creates a new excel file

func NewExcelFromFile

func NewExcelFromFile(path string) (xls *Excel, err error)

NewExcelFromFile return a new Excel instance from a local file

func NewExcelFromReader

func NewExcelFromReader(reader io.Reader) (xls *Excel, err error)

NewExcelFromReader return a new Excel instance from a reader

func (*Excel) GetDataset

func (xls *Excel) GetDataset(sheet string) (data iop.Dataset)

GetDataset returns a dataset of the provided sheet

func (*Excel) GetDatasetFromRange

func (xls *Excel) GetDatasetFromRange(sheet, cellRange string) (data iop.Dataset, err error)

GetDatasetFromRange returns a dataset of the provided sheet / range cellRange example: `$AH$13:$AI$20` or `AH13:AI20` or `A:E`

func (*Excel) RefreshSheets

func (xls *Excel) RefreshSheets() (err error)

RefreshSheets refresh sheet index data

func (*Excel) WriteSheet

func (xls *Excel) WriteSheet(shtName string, ds *iop.Datastream, mode string) (err error)

WriteSheet write a datastream into a sheet mode can be: `new`, `append` or `overwrite`. Default is `new`

func (*Excel) WriteToFile

func (xls *Excel) WriteToFile(path string) (err error)

WriteToFile write to a file

func (*Excel) WriteToWriter

func (xls *Excel) WriteToWriter(w io.Writer) (err error)

WriteToWriter write to a provided writer

type FileReady added in v0.3.188

type FileReady struct {
	Columns iop.Columns
	URI     string
	BytesW  int64
	BatchID string
}

type FileStreamConfig added in v0.3.49

type FileStreamConfig struct {
	Limit   int
	Columns []string
}

type FileSysClient

type FileSysClient interface {
	Self() FileSysClient
	Init(ctx context.Context) (err error)
	Client() *BaseFileSysClient
	Context() (context *g.Context)
	FsType() dbio.Type
	GetReader(path string) (reader io.Reader, err error)
	GetReaders(paths ...string) (readers []io.Reader, err error)
	GetDatastream(path string) (ds *iop.Datastream, err error)
	GetWriter(path string) (writer io.Writer, err error)
	Buckets() (paths []string, err error)
	List(path string) (paths []string, err error)
	ListRecursive(path string) (paths []string, err error)
	Write(path string, reader io.Reader) (bw int64, err error)

	ReadDataflow(url string, cfg ...FileStreamConfig) (df *iop.Dataflow, err error)
	WriteDataflow(df *iop.Dataflow, url string) (bw int64, err error)
	WriteDataflowReady(df *iop.Dataflow, url string, fileReadyChn chan FileReady) (bw int64, err error)
	GetProp(key string, keys ...string) (val string)
	SetProp(key string, val string)
	MkdirAll(path string) (err error)
	// contains filtered or unexported methods
}

FileSysClient is a client to a file systems such as local, s3, hdfs, azure storage, google cloud storage

func NewFileSysClient

func NewFileSysClient(fst dbio.Type, props ...string) (fsClient FileSysClient, err error)

NewFileSysClient create a file system client such as local, s3, azure storage, google cloud storage props are provided as `"Prop1=Value1", "Prop2=Value2", ...`

func NewFileSysClientContext

func NewFileSysClientContext(ctx context.Context, fst dbio.Type, props ...string) (fsClient FileSysClient, err error)

NewFileSysClientContext create a file system client with context such as local, s3, azure storage, google cloud storage props are provided as `"Prop1=Value1", "Prop2=Value2", ...`

func NewFileSysClientFromURL

func NewFileSysClientFromURL(url string, props ...string) (fsClient FileSysClient, err error)

NewFileSysClientFromURL returns the proper fs client for the given path props are provided as `"Prop1=Value1", "Prop2=Value2", ...`

func NewFileSysClientFromURLContext

func NewFileSysClientFromURLContext(ctx context.Context, url string, props ...string) (fsClient FileSysClient, err error)

NewFileSysClientFromURLContext returns the proper fs client for the given path with context props are provided as `"Prop1=Value1", "Prop2=Value2", ...`

type FileType added in v0.3.169

type FileType string
const FileTypeAvro FileType = "avro"
const FileTypeCsv FileType = "csv"
const FileTypeJson FileType = "json"
const FileTypeJsonLines FileType = "jsonlines"
const FileTypeNone FileType = ""
const FileTypeParquet FileType = "parquet"
const FileTypeSAS FileType = "sas7bdat"
const FileTypeXml FileType = "xml"

func InferFileFormat added in v0.4.23

func InferFileFormat(path string) FileType

func PeekFileType added in v0.3.169

func PeekFileType(reader io.Reader) (ft FileType, reader2 io.Reader, err error)

PeekFileType peeks into the file to try determine the file type CSV is the default

func (FileType) Ext added in v0.3.216

func (ft FileType) Ext() string

func (FileType) IsJson added in v0.3.216

func (ft FileType) IsJson() bool

type GoogleFileSysClient

type GoogleFileSysClient struct {
	BaseFileSysClient
	// contains filtered or unexported fields
}

GoogleFileSysClient is a file system client to write file to Amazon's S3 file sys.

func (*GoogleFileSysClient) Buckets added in v0.3.12

func (fs *GoogleFileSysClient) Buckets() (paths []string, err error)

Buckets returns the buckets found in the project

func (*GoogleFileSysClient) Connect

func (fs *GoogleFileSysClient) Connect() (err error)

Connect initiates the Google Cloud Storage client

func (*GoogleFileSysClient) GetReader

func (fs *GoogleFileSysClient) GetReader(path string) (reader io.Reader, err error)

GetReader returns the reader for the given path

func (*GoogleFileSysClient) Init

func (fs *GoogleFileSysClient) Init(ctx context.Context) (err error)

Init initializes the fs client

func (*GoogleFileSysClient) List

func (fs *GoogleFileSysClient) List(path string) (paths []string, err error)

List returns the list of objects

func (*GoogleFileSysClient) ListRecursive

func (fs *GoogleFileSysClient) ListRecursive(path string) (paths []string, err error)

ListRecursive returns the list of objects recursively

func (*GoogleFileSysClient) Write

func (fs *GoogleFileSysClient) Write(path string, reader io.Reader) (bw int64, err error)

type GoogleSheet

type GoogleSheet struct {
	Sheets        []string
	SpreadsheetID string
	// contains filtered or unexported fields
}

GoogleSheet represent a Google Sheet object

func NewGoogleSheet

func NewGoogleSheet(props ...string) (ggs *GoogleSheet, err error)

NewGoogleSheet is a blank spreadsheet title is the new spreadsheet title

func NewGoogleSheetFromURL

func NewGoogleSheetFromURL(urlStr string, props ...string) (ggs *GoogleSheet, err error)

NewGoogleSheetFromURL return a new GoogleSheet instance from a provided url

func (*GoogleSheet) GetDataset

func (ggs *GoogleSheet) GetDataset(shtName string) (data iop.Dataset, err error)

GetDataset returns a dataset of the sheet

func (*GoogleSheet) GetDatasetFromRange

func (ggs *GoogleSheet) GetDatasetFromRange(shtName, cellRange string) (data iop.Dataset, err error)

GetDatasetFromRange returns a dataset from the specified range

func (*GoogleSheet) RefreshSheets

func (ggs *GoogleSheet) RefreshSheets() (err error)

RefreshSheets refreshes sheets data

func (*GoogleSheet) URL

func (ggs *GoogleSheet) URL() string

func (*GoogleSheet) WriteSheet

func (ggs *GoogleSheet) WriteSheet(shtName string, ds *iop.Datastream, mode string) (err error)

WriteSheet write a datastream into a sheet mode can be: `new`, `append` or `overwrite`. Default is `new`

type HTTPFileSysClient

type HTTPFileSysClient struct {
	BaseFileSysClient
	// contains filtered or unexported fields
}

HTTPFileSysClient is for HTTP files

func (*HTTPFileSysClient) Connect

func (fs *HTTPFileSysClient) Connect() (err error)

Connect initiates the http client

func (*HTTPFileSysClient) GetReader

func (fs *HTTPFileSysClient) GetReader(url string) (reader io.Reader, err error)

GetReader gets a reader for an HTTP resource (download)

func (*HTTPFileSysClient) Init

func (fs *HTTPFileSysClient) Init(ctx context.Context) (err error)

Init initializes the fs client

func (*HTTPFileSysClient) List

func (fs *HTTPFileSysClient) List(url string) (paths []string, err error)

List lists all urls on the page

func (*HTTPFileSysClient) ListRecursive

func (fs *HTTPFileSysClient) ListRecursive(url string) (paths []string, err error)

ListRecursive lists all urls on the page

func (*HTTPFileSysClient) Write

func (fs *HTTPFileSysClient) Write(urlStr string, reader io.Reader) (bw int64, err error)

Write uploads an HTTP file

type LocalFileSysClient

type LocalFileSysClient struct {
	BaseFileSysClient
	// contains filtered or unexported fields
}

LocalFileSysClient is a file system client to write file to local file sys.

func (*LocalFileSysClient) GetDatastream

func (fs *LocalFileSysClient) GetDatastream(path string) (ds *iop.Datastream, err error)

GetDatastream return a datastream for the given path

func (*LocalFileSysClient) GetReader

func (fs *LocalFileSysClient) GetReader(path string) (reader io.Reader, err error)

GetReader return a reader for the given path

func (*LocalFileSysClient) GetWriter

func (fs *LocalFileSysClient) GetWriter(path string) (writer io.Writer, err error)

GetWriter creates the file if non-existent and return a writer

func (*LocalFileSysClient) Init

func (fs *LocalFileSysClient) Init(ctx context.Context) (err error)

Init initializes the fs client

func (*LocalFileSysClient) List

func (fs *LocalFileSysClient) List(path string) (paths []string, err error)

List lists the file in given directory path

func (*LocalFileSysClient) ListRecursive

func (fs *LocalFileSysClient) ListRecursive(path string) (paths []string, err error)

ListRecursive lists the file in given directory path recursively

func (*LocalFileSysClient) MkdirAll

func (fs *LocalFileSysClient) MkdirAll(path string) (err error)

MkdirAll creates child directories

func (*LocalFileSysClient) Write

func (fs *LocalFileSysClient) Write(filePath string, reader io.Reader) (bw int64, err error)

Write creates the file if non-existent and writes from the reader

type PathNode

type PathNode struct {
	Name         string    `json:"name"`
	IsDir        bool      `json:"is_dir"`
	Size         int64     `json:"size,omitempty"`
	LastModified time.Time `json:"last_modified,omitempty"`
	Children     PathNodes `json:"children,omitempty"`
}

PathNode represents a file node

type PathNodes

type PathNodes []PathNode

PathNodes represent file nodes

func (*PathNodes) Add

func (pn *PathNodes) Add(p PathNode)

Add adds a new node to list

func (PathNodes) List

func (pn PathNodes) List() (paths []string)

List give a list of recursive paths

type S3FileSysClient

type S3FileSysClient struct {
	BaseFileSysClient

	RegionMap map[string]string
	// contains filtered or unexported fields
}

S3FileSysClient is a file system client to write file to Amazon's S3 file sys.

func (*S3FileSysClient) Buckets added in v0.3.12

func (fs *S3FileSysClient) Buckets() (paths []string, err error)

Buckets returns the buckets found in the account

func (*S3FileSysClient) Connect

func (fs *S3FileSysClient) Connect() (err error)

Connect initiates the Google Cloud Storage client

func (*S3FileSysClient) GenerateS3PreSignedURL added in v0.3.159

func (fs *S3FileSysClient) GenerateS3PreSignedURL(s3URL string, dur time.Duration) (httpURL string, err error)

func (*S3FileSysClient) GetReader

func (fs *S3FileSysClient) GetReader(path string) (reader io.Reader, err error)

GetReader return a reader for the given path path should specify the full path with scheme: `s3://my_bucket/key/to/file.txt` or `s3://my_bucket/key/to/directory`

func (*S3FileSysClient) GetWriter

func (fs *S3FileSysClient) GetWriter(path string) (writer io.Writer, err error)

GetWriter creates the file if non-existent and return a writer path should specify the full path with scheme: `s3://my_bucket/key/to/file.txt`

func (*S3FileSysClient) Init

func (fs *S3FileSysClient) Init(ctx context.Context) (err error)

Init initializes the fs client

func (*S3FileSysClient) List

func (fs *S3FileSysClient) List(path string) (paths []string, err error)

List lists the file in given directory path path should specify the full path with scheme: `s3://my_bucket/key/to/directory`

func (*S3FileSysClient) ListRecursive

func (fs *S3FileSysClient) ListRecursive(path string) (paths []string, err error)

ListRecursive lists the file in given directory path recusively path should specify the full path with scheme: `s3://my_bucket/key/to/directory`

func (*S3FileSysClient) Write

func (fs *S3FileSysClient) Write(path string, reader io.Reader) (bw int64, err error)

type SftpFileSysClient

type SftpFileSysClient struct {
	BaseFileSysClient
	// contains filtered or unexported fields
}

SftpFileSysClient is for SFTP / SSH file ops

func (*SftpFileSysClient) Connect

func (fs *SftpFileSysClient) Connect() (err error)

Connect initiates the Google Cloud Storage client

func (*SftpFileSysClient) GetReader

func (fs *SftpFileSysClient) GetReader(urlStr string) (reader io.Reader, err error)

GetReader return a reader for the given path

func (*SftpFileSysClient) GetWriter

func (fs *SftpFileSysClient) GetWriter(urlStr string) (writer io.Writer, err error)

GetWriter creates the file if non-existent and return a writer

func (*SftpFileSysClient) Init

func (fs *SftpFileSysClient) Init(ctx context.Context) (err error)

Init initializes the fs client

func (*SftpFileSysClient) List

func (fs *SftpFileSysClient) List(url string) (paths []string, err error)

List list objects in path

func (*SftpFileSysClient) ListRecursive

func (fs *SftpFileSysClient) ListRecursive(url string) (paths []string, err error)

ListRecursive list objects in path recursively

func (*SftpFileSysClient) MkdirAll

func (fs *SftpFileSysClient) MkdirAll(path string) (err error)

MkdirAll creates child directories

func (*SftpFileSysClient) Write

func (fs *SftpFileSysClient) Write(urlStr string, reader io.Reader) (bw int64, err error)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL