Documentation ¶
Overview ¶
Package file implements Input/Output for text files. Text files can
- Be delimited or fixed-width
- Have header rows or not
Ctrl-R values in the files are ignored.
Index ¶
- func Rdrs(rdr0 *Reader, nRdrs int) (r []chutils.Input, err error)
- func Wrtrs(tmpDir string, nWrtr int, con *chutils.Connect, separator, eol, quote rune, ...) (wrtrs []chutils.Output, err error)
- type Reader
- func (rdr *Reader) Close() error
- func (rdr *Reader) CountLines() (numLines int, err error)
- func (rdr *Reader) EOL() rune
- func (rdr *Reader) Init(key string, engine chutils.EngineType) error
- func (rdr *Reader) Name() string
- func (rdr *Reader) Read(nTarget int, validate bool) (data []chutils.Row, valid []chutils.Valid, err error)
- func (rdr *Reader) Reset() error
- func (rdr *Reader) Seek(lineNo int) error
- func (rdr *Reader) Separator() rune
- func (rdr *Reader) SetTableSpec(ts *chutils.TableDef)
- func (rdr *Reader) TableSpec() *chutils.TableDef
- type Writer
Examples ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
Types ¶
type Reader ¶
type Reader struct { Skip int // Skip is the # of rows to skip in the file RowsRead int // RowsRead is current count of rows read from the file (includes header) MaxRead int // MaxRead is the maximum number of rows to read Width int // Width is the line width for flat files Quote rune // Quote is the optional quote around strings that contain the Separator // contains filtered or unexported fields }
Reader implements chutils.Input interface.
func NewReader ¶
func NewReader(filename string, separator, eol, quote rune, width int, skip int, maxRead int, rws io.ReadSeekCloser, bufSize int) *Reader
NewReader initializes an instance of Reader
func (*Reader) CountLines ¶
CountLines returns the number of rows in the source data. This does not include any header rows.
func (*Reader) Init ¶
func (rdr *Reader) Init(key string, engine chutils.EngineType) error
Init initialize FieldDefs slice Reader.TableSpec() from header row of input. It does not set any of the field types. if key is empty, it defaults to the first field.
func (*Reader) Read ¶
func (rdr *Reader) Read(nTarget int, validate bool) (data []chutils.Row, valid []chutils.Valid, err error)
Read reads nTarget rows. If nTarget == 0, the entire file is read.
If validation == true:
- The data is validated according to the rules in rdr.TableSpec.
- The results are returned as the slice valid.
- data is returned with the fields appropriately typed.
If validation == false:
- The data is not validated.
- The return slice valid is nil
- The fields are returned as strings.
err returns io.EOF at end of file
Example (CSV) ¶
Loading a CSV, cleaning values and loading into ClickHouse using package file reader and writer
/* If you haven't created the table first, you'll get this error simply importing the file via clickhouse-client Code: 60. DB::Exception: Received from 127.0.0.1:9000. DB::Exception: Table testing.values doesn't exist. (UNKNOWN_TABLE) Once the table exists, the clickhouse-client approach produces this error: Row 3: Column 0, name: id, type: String, parsed text: "1B23" Column 1, name: zip, type: FixedString(5), parsed text: "77810" Column 2, name: value, type: Float64, parsed text: "NA"ERROR Code: 27. DB::Exception: Cannot parse NaN. (CANNOT_PARSE_INPUT_ASSERTION_FAILED) (version 22.4.5.9 (official build)) /home/test/data/zip_data.csv: id,zip,value 1A34,90210,20.8 1X88,43210,19.2 1B23,77810,NA 1r99,94043,100.4 1x09,hello,9.9 */ const inFile = "/home/will/tmp/zip_data.csv" // source data const table = "testing.values" // ClickHouse destination table tmpFile := os.TempDir() + "/tmp.csv" // temp file to write data to for import var con *chutils.Connect con, err := chutils.NewConnect("127.0.0.1", "tester", "testGoNow", clickhouse.Settings{}) if err != nil { panic(err) } defer func() { _ = con.Close() }() f, err := os.Open(inFile) if err != nil { panic(err) } rdr := NewReader(inFile, ',', '\n', '"', 0, 1, 0, f, 50000) defer func() { _ = rdr.Close() }() if e := rdr.Init("id", chutils.MergeTree); e != nil { panic(err) } if e := rdr.TableSpec().Impute(rdr, 0, .95); e != nil { panic(e) } // Check the internal consistency of TableSpec if e := rdr.TableSpec().Check(); e != nil { panic(e) } // Specify zip as FixedString(5) with a missing value of 00000 _, fd, err := rdr.TableSpec().Get("zip") if err != nil { panic(err) } // zip will impute to int if we don't make this change fd.ChSpec.Base = chutils.ChFixedString fd.ChSpec.Length = 5 fd.Missing = "00000" legal := []string{"90210", "43210", "77810", "94043"} fd.Legal.Levels = legal // Specify value as having a range of [0,30] with a missing value of -1.0 _, fd, err = rdr.TableSpec().Get("value") if err != nil { panic(err) } fd.Legal.HighLimit = 30.0 fd.Legal.LowLimit = 0.0 fd.Missing = -1.0 rdr.TableSpec().Engine = chutils.MergeTree rdr.TableSpec().Key = "id" if e := rdr.TableSpec().Create(con, table); e != nil { panic(e) } fx, err := os.Create(tmpFile) if err != nil { panic(err) } defer func() { _ = fx.Close() }() defer func() { _ = os.Remove(tmpFile) }() wrtr := NewWriter(fx, tmpFile, con, '|', '\n', 0, table) if e := chutils.Export(rdr, wrtr, 0, false); e != nil { panic(e) } qry := fmt.Sprintf("SELECT * FROM %s", table) res, err := con.Query(qry) if err != nil { panic(err) } defer func() { _ = res.Close() }() for res.Next() { var ( id string zip string value float64 ) if res.Scan(&id, &zip, &value) != nil { panic(err) } fmt.Println(id, zip, value) }
Output: 1A34 90210 20.8 1B23 77810 -1 1X88 43210 19.2 1r99 94043 -1 1x09 00000 9.9
func (*Reader) SetTableSpec ¶
SetTableSpec sets Reader.tablespec. Needed if tablespec is not created by Reader.TableSpec().Impute().
type Writer ¶
type Writer struct { io.WriteCloser Table string // Table is the ClickHouse table to Insert to // contains filtered or unexported fields }
Writer implements chutils.Output. Writer will accept any type that satisfies WriterCloser. Typically, this would be a file.
func NewWriter ¶
func NewWriter(f io.WriteCloser, name string, con *chutils.Connect, separator, eol, quote rune, table string) *Writer
NewWriter creates a new Writer instance
func (*Writer) Insert ¶
Insert inserts the file Writer.Name into ClickHouse table Writer.Table via the clickhouse-client program.