Documentation ¶
Index ¶
- Constants
- Variables
- func ConfigureDB(ctx context.Context, db *sql.DB, o options.Options)
- func MaxBatchRows(drvr SQLDriver, numCols int) int
- func NewRecordFromScanRow(meta record.Meta, row []any, skip []int) (rec record.Record, skipped []int)
- func OpeningPing(ctx context.Context, src *source.Source, db *sql.DB) error
- func PrepareInsertStmt(ctx context.Context, drvr SQLDriver, db sqlz.Preparer, destTbl string, ...) (stmt *sql.Stmt, err error)
- type BatchInsert
- type Database
- type DatabaseOpener
- type Databases
- func (d *Databases) Close() error
- func (d *Databases) Open(ctx context.Context, src *source.Source) (Database, error)
- func (d *Databases) OpenJoin(ctx context.Context, src1, src2 *source.Source, srcN ...*source.Source) (Database, error)
- func (d *Databases) OpenScratch(ctx context.Context, name string) (Database, error)
- type Driver
- type InsertMungeFunc
- type JoinDatabaseOpener
- type Metadata
- type NewRecordFunc
- type Provider
- type Registry
- type SQLDriver
- type ScratchDatabaseOpener
- type ScratchSrcFunc
- type StmtExecFunc
- type StmtExecer
Constants ¶
const Comma = ", "
Comma is the comma string to use in SQL queries.
Variables ¶
var ( // OptConnMaxOpen controls sql.DB.SetMaxOpenConn. OptConnMaxOpen = options.NewInt( "conn.max-open", "", 0, 0, "Max open connections to DB", `Maximum number of open connections to the database. A value of zero indicates no limit.`, "source", "sql", ) // OptConnMaxIdle controls sql.DB.SetMaxIdleConns. OptConnMaxIdle = options.NewInt( "conn.max-idle", "", 0, 2, "Max connections in idle connection pool", `Set the maximum number of connections in the idle connection pool. If conn.max-open is greater than 0 but less than the new conn.max-idle, then the new conn.max-idle will be reduced to match the conn.max-open limit. If n <= 0, no idle connections are retained.`, "source", ) // OptConnMaxIdleTime controls sql.DB.SetConnMaxIdleTime. OptConnMaxIdleTime = options.NewDuration( "conn.max-idle-time", "", 0, time.Second*2, "Max connection idle time", `Sets the maximum amount of time a connection may be idle. Expired connections may be closed lazily before reuse. If n <= 0, connections are not closed due to a connection's idle time.`, "source", ) // OptConnMaxLifetime controls sql.DB.SetConnMaxLifetime. OptConnMaxLifetime = options.NewDuration( "conn.max-lifetime", "", 0, time.Minute*10, "Max connection lifetime", `Set the maximum amount of time a connection may be reused. Expired connections may be closed lazily before reuse. If n <= 0, connections are not closed due to a connection's age.`, "source", ) // OptConnOpenTimeout controls connection open timeout. OptConnOpenTimeout = options.NewDuration( "conn.open-timeout", "", 0, time.Second, "Connection open timeout", "Max time to wait before a connection open timeout occurs.", "source", ) // OptMaxRetryInterval is the maximum interval to wait // between retries. OptMaxRetryInterval = options.NewDuration( "retry.max-interval", "", 0, time.Second*3, "Max interval between retries", `The maximum interval to wait between retries. If an operation is retryable (for example, if the DB has too many clients), repeated retry operations back off, typically using a Fibonacci backoff.`, "source", ) // OptTuningErrgroupLimit controls the maximum number of goroutines that can be spawned // by an errgroup. OptTuningErrgroupLimit = options.NewInt( "tuning.errgroup-limit", "", 0, 16, "Max goroutines in any one errgroup", `Controls the maximum number of goroutines that can be spawned by an errgroup. Note that this is the limit for any one errgroup, but not a ceiling on the total number of goroutines spawned, as some errgroups may themselves start an errgroup. This knob is primarily for internal use. Ultimately it should go away in favor of dynamic errgroup limit setting based on availability of additional DB conns, etc.`, "tuning", ) // OptTuningRecChanSize is the size of the buffer chan for record // insertion/writing. OptTuningRecChanSize = options.NewInt( "tuning.record-buffer", "", 0, 1024, "Size of record buffer", `Controls the size of the buffer channel for record insertion/writing.`, "tuning", ) )
Functions ¶
func ConfigureDB ¶ added in v0.34.0
ConfigureDB configures DB using o. It is no-op if o is nil.
func MaxBatchRows ¶
MaxBatchRows returns the maximum number of rows allowed for a batch insert for drvr. Note that the returned value may differ for each database driver.
func NewRecordFromScanRow ¶
func NewRecordFromScanRow(meta record.Meta, row []any, skip []int) (rec record.Record, skipped []int)
NewRecordFromScanRow iterates over the elements of the row slice from rows.Scan, and returns a new (record) slice, replacing any wrapper types such as sql.NullString with the unboxed value, and other similar sanitization. For example, it will make a copy of any sql.RawBytes. The row slice can be reused by rows.Scan after this function returns.
Any row elements specified in skip will not be processed; the value will be copied directly from row[i] into rec[i]. If any element of row otherwise cannot be processed, its value is copied directly into rec, and its index is returned in skipped. The caller must take appropriate action to deal with all elements of rec listed in skipped.
REVISIT: Do we need the skip mechanism at all?
func OpeningPing ¶ added in v0.36.0
OpeningPing is a standardized mechanism to ping db using driver.OptConnOpenTimeout. This should be invoked by each SQL driver impl in its Open method. If the ping fails, db is closed.
func PrepareInsertStmt ¶
func PrepareInsertStmt(ctx context.Context, drvr SQLDriver, db sqlz.Preparer, destTbl string, destCols []string, numRows int, ) (stmt *sql.Stmt, err error)
PrepareInsertStmt prepares an insert statement using driver-specific syntax from drvr. numRows specifies how many rows of values are inserted by each execution of the insert statement (1 row being the prototypical usage).
Types ¶
type BatchInsert ¶
type BatchInsert struct { // RecordCh is the channel that the caller sends records on. The // caller must close RecordCh when done. RecordCh chan<- []any // ErrCh returns any errors that occur during insert. ErrCh is // closed by BatchInsert when processing is complete. ErrCh <-chan error // contains filtered or unexported fields }
BatchInsert encapsulates inserting records to a db. The caller sends (munged) records on recCh; the record values should be munged via the Munge method prior to sending. Records are written to db in batches of batchSize as passed to NewBatchInsert (the final batch may be less than batchSize). The caller must close recCh to indicate that all records have been sent, or cancel the ctx passed to NewBatchInsert to stop the insertion goroutine. Any error is returned on errCh. Processing is complete when errCh is closed: the caller must select on errCh.
func NewBatchInsert ¶
func NewBatchInsert(ctx context.Context, drvr SQLDriver, db sqlz.DB, destTbl string, destColNames []string, batchSize int, ) (*BatchInsert, error)
NewBatchInsert returns a new BatchInsert instance. The internal goroutine is started.
Note that the db arg must guarantee a single connection: that is, it must be a sql.Conn or sql.Tx.
func (BatchInsert) Munge ¶
func (bi BatchInsert) Munge(rec []any) error
Munge should be invoked on every record before sending on RecordCh.
func (*BatchInsert) Written ¶
func (bi *BatchInsert) Written() int64
Written returns the number of records inserted (at the time of invocation). For the final value, Written should be invoked after ErrCh is closed.
type Database ¶
type Database interface { // DB returns the sql.DB object for this Database. DB() *sql.DB // SQLDriver returns the underlying database driver. The type of the SQLDriver // may be different from the driver type reported by the Source. SQLDriver() SQLDriver // Source returns the data source for which this connection was opened. Source() *source.Source // SourceMetadata returns metadata about the data source. // // TODO: SourceMetadata doesn't really belong on driver.Database. It // should be moved to driver.Driver. SourceMetadata(ctx context.Context) (*source.Metadata, error) // TableMetadata returns metadata for the specified table in the data source. // // TODO: TableMetadata doesn't really belong on driver.Database. It // should be moved to driver.Driver. TableMetadata(ctx context.Context, tblName string) (*source.TableMetadata, error) // Close is invoked to close and release any underlying resources. Close() error }
Database models a database handle. It is conceptually equivalent to stdlib sql.DB, and in fact encapsulates a sql.DB instance. The realized sql.DB instance can be accessed via the DB method.
type DatabaseOpener ¶
type DatabaseOpener interface { // Open returns a Database instance for src. This operation can // take a long time if opening the DB requires an import of data. // For example, with file-based sources such as CSV, invoking Open // will ultimately read and import all CSV rows from the file. // Thus, set a timeout on ctx as appropriate for the source. Open(ctx context.Context, src *source.Source) (Database, error) }
DatabaseOpener opens a Database.
type Databases ¶
type Databases struct {
// contains filtered or unexported fields
}
Databases provides a mechanism for getting Database instances. Note that at this time instances returned by Open are cached and then closed by Close. This may be a bad approach.
func NewDatabases ¶
func NewDatabases(log *slog.Logger, drvrs Provider, scratchSrcFn ScratchSrcFunc) *Databases
NewDatabases returns a Databases instances.
func (*Databases) Open ¶
Open returns an opened Database for src. The returned Database may be cached and returned on future invocations for the same handle. Thus, the caller should typically not close the Database: it will be closed via d.Close.
NOTE: This entire logic re caching/not-closing is a bit sketchy, and needs to be revisited.
Open implements DatabaseOpener.
func (*Databases) OpenJoin ¶
func (d *Databases) OpenJoin(ctx context.Context, src1, src2 *source.Source, srcN ...*source.Source) (Database, error)
OpenJoin opens an appropriate database for use as a work DB for joining across sources.
Note: There is much work to be done on this method. At this time, only two sources are supported. Ultimately OpenJoin should be able to inspect the join srcs and use heuristics to determine the best location for the join to occur (to minimize copying of data for the join etc.). Currently the implementation simply delegates to OpenScratch.
OpenJoin implements JoinDatabaseOpener.
func (*Databases) OpenScratch ¶
OpenScratch returns a scratch database instance. It is not necessary for the caller to close the returned Database as its Close method will be invoked by d.Close.
OpenScratch implements ScratchDatabaseOpener.
type Driver ¶
type Driver interface { DatabaseOpener // DriverMetadata returns driver metadata. DriverMetadata() Metadata // ValidateSource verifies that the source is valid for this driver. It // may transform the source into a canonical form, which is returned in // the "src" return value (the original source is not changed). An error // is returned if the source is invalid. ValidateSource(source *source.Source) (src *source.Source, err error) // Ping verifies that the source is reachable, or returns an error if not. // The exact behavior of Ping() is driver-dependent. Ping(ctx context.Context, src *source.Source) error // Truncate truncates tbl in src. If arg reset is true, the // identity counter for tbl should be reset, if supported // by the driver. Some DB impls may reset the identity // counter regardless of the val of reset. Truncate(ctx context.Context, src *source.Source, tbl string, reset bool) (affected int64, err error) }
Driver is the core interface that must be implemented for each type of data source.
type InsertMungeFunc ¶
InsertMungeFunc is invoked on vals before insertion (or update, despite the name). Note that InsertMungeFunc operates on the vals slice, while NewRecordFunc returns a new slice.
func DefaultInsertMungeFunc ¶
func DefaultInsertMungeFunc(destTbl string, destMeta record.Meta) InsertMungeFunc
DefaultInsertMungeFunc returns an InsertMungeFunc that checks the values of rec against destMeta and performs necessary munging. For example, if any element is a ptr to an empty string and the dest type is not of kind Text, the empty string was probably intended to mean nil. This happens when the original source doesn't handle nil, e.g. with CSV, where nil is effectively represented by "".
The returned InsertMungeFunc accounts for common cases, but it's possible that certain databases will require a custom InsertMungeFunc.
type JoinDatabaseOpener ¶
type JoinDatabaseOpener interface { // OpenJoin opens an appropriate Database for use as // a work DB for joining across sources. OpenJoin(ctx context.Context, src1, src2 *source.Source, srcN ...*source.Source) (Database, error) }
JoinDatabaseOpener can open a join database.
type Metadata ¶
type Metadata struct { // Type is the driver type, e.g. "mysql" or "csv", etc. Type source.DriverType `json:"type"` // Description is typically the long name of the driver, e.g. // "MySQL" or "Microsoft Excel XLSX". Description string `json:"description"` // Doc is optional documentation, typically a URL. Doc string `json:"doc,omitempty"` // UserDefined is true if this driver is the product of a // user driver definition, and false if built-in. UserDefined bool `json:"user_defined"` // IsSQL is true if this driver is a SQL driver. IsSQL bool `json:"is_sql"` // Monotable is true if this is a non-SQL document type that // effectively has a single table, such as CSV. Monotable bool `json:"monotable"` }
Metadata holds driver metadata.
type NewRecordFunc ¶
NewRecordFunc is invoked on a query result row (scanRow) to normalize and standardize the data, returning a new record. The provided scanRow arg is available for reuse after this func returns.
Ultimately rec should only contain:
nil, *int64, *bool, *float64, *string, *[]byte, *time.Time
Thus a func instance might unbox sql.NullString et al, or deal with any driver specific quirks.
type Provider ¶
type Provider interface { // DriverFor returns a driver instance for the given type. DriverFor(typ source.DriverType) (Driver, error) }
Provider is a factory that returns Driver instances.
type Registry ¶
type Registry struct {
// contains filtered or unexported fields
}
Registry provides access to driver implementations.
func NewRegistry ¶
NewRegistry returns a new Registry instance that provides access to driver implementations. Note that Registry implements Provider.
func (*Registry) AddProvider ¶
func (r *Registry) AddProvider(typ source.DriverType, p Provider)
AddProvider registers the provider for the specified driver type. This method has no effect if there's already a provider for typ.
func (*Registry) DriverFor ¶
func (r *Registry) DriverFor(typ source.DriverType) (Driver, error)
DriverFor implements Provider.
func (*Registry) DriversMetadata ¶
DriversMetadata returns metadata for each registered driver type.
func (*Registry) ProviderFor ¶
func (r *Registry) ProviderFor(typ source.DriverType) Provider
ProviderFor returns the provider for typ, or nil if no registered provider.
type SQLDriver ¶
type SQLDriver interface { Driver // Dialect returns the SQL dialect. Dialect() dialect.Dialect // ErrWrapFunc returns a func that wraps the driver's errors. ErrWrapFunc() func(error) error // Renderer returns the SQL renderer for this driver. Renderer() *render.Renderer // CurrentSchema returns the current schema name. CurrentSchema(ctx context.Context, db sqlz.DB) (string, error) // TableColumnTypes returns the column type info from // the SQL driver. If len(colNames) is 0, info is returned // for all columns in the table. TableColumnTypes(ctx context.Context, db sqlz.DB, tblName string, colNames []string) ([]*sql.ColumnType, error) // RecordMeta returns the result metadata (the metadata for // each col) from colTypes. RecordMeta is preferred over // sql.Rows.ColumnTypes because of the inconsistent behavior // of various SQL driver implementations wrt reporting // "nullable" information and other quirks. The returned // metadata may differ from the original metadata returned // by rows.ColumnTypes. // // The caller should typically should invoke rows.Next before // this method is invoked, as some implementations do not return // complete column type info until after the first call to rows.Next. // // RecordMeta also returns a NewRecordFunc which can be // applied to the scan row from sql.Rows. RecordMeta(colTypes []*sql.ColumnType) (record.Meta, NewRecordFunc, error) // PrepareInsertStmt prepares a statement for inserting // values to destColNames in destTbl. numRows specifies // how many rows of values are inserted by each execution of // the insert statement (1 row being the prototypical usage). // It is the caller's responsibility to close the execer. // // Note that db must guarantee a single connection: that is, db // must be a sql.Conn or sql.Tx. PrepareInsertStmt(ctx context.Context, db sqlz.DB, destTbl string, destColNames []string, numRows int) (*StmtExecer, error) // PrepareUpdateStmt prepares a statement for updating destColNames in // destTbl, using the supplied where clause (which may be empty). // The where arg should use question mark "?" as the placeholder: it will // be translated to the appropriate driver-specific placeholder. For example, // the where arg could be: // // "actor_id = ? AND first_name = ?". // // Use the returned StmtExecer per its documentation. It is the caller's // responsibility to close the execer. // // Note that db must guarantee a single connection: that is, db // must be a sql.Conn or sql.Tx. PrepareUpdateStmt(ctx context.Context, db sqlz.DB, destTbl string, destColNames []string, where string) (*StmtExecer, error) // CreateTable creates the table defined by tblDef. Some implementations // may not honor every field of tblDef, e.g. an impl might not // build the foreign key constraints. At a minimum the implementation // must honor the table name and column names and kinds from tblDef. CreateTable(ctx context.Context, db sqlz.DB, tblDef *sqlmodel.TableDef) error // TableExists returns true if there's an existing table tbl in db. TableExists(ctx context.Context, db sqlz.DB, tbl string) (bool, error) // CopyTable copies fromTable into a new table toTable. // If copyData is true, fromTable's data is also copied. // Constraints (keys, defaults etc.) may not be copied. The // number of copied rows is returned in copied. CopyTable(ctx context.Context, db sqlz.DB, fromTable, toTable string, copyData bool) (copied int64, err error) // DropTable drops tbl from db. If ifExists is true, an "IF EXISTS" // or equivalent clause is added, if supported. DropTable(ctx context.Context, db sqlz.DB, tbl string, ifExists bool) error // AlterTableRename renames a table. AlterTableRename(ctx context.Context, db sqlz.DB, tbl, newName string) error // AlterTableAddColumn adds column col to tbl. The column is appended // to the list of columns (that is, the column position cannot be // specified). AlterTableAddColumn(ctx context.Context, db sqlz.DB, tbl, col string, knd kind.Kind) error // AlterTableRenameColumn renames a column. AlterTableRenameColumn(ctx context.Context, db sqlz.DB, tbl, col, newName string) error // DBProperties returns a map of key-value database properties. The value // is often a scalar such as an int, string, or bool, but can be a nested // map or array. DBProperties(ctx context.Context, db sqlz.DB) (map[string]any, error) }
SQLDriver is implemented by Driver instances for SQL databases.
type ScratchDatabaseOpener ¶
type ScratchDatabaseOpener interface { // OpenScratch returns a database for scratch use. OpenScratch(ctx context.Context, name string) (Database, error) }
ScratchDatabaseOpener opens a scratch database. A scratch database is typically a short-lived database used as a target for loading non-SQL data (such as CSV).
type ScratchSrcFunc ¶
type ScratchSrcFunc func(ctx context.Context, name string) (src *source.Source, cleanFn func() error, err error)
ScratchSrcFunc is a function that returns a scratch source. The caller is responsible for invoking cleanFn.
type StmtExecFunc ¶
StmtExecFunc is provided by driver implementations to wrap execution of a prepared statement. Typically the func will perform some driver-specific action, such as managing retryable errors.
type StmtExecer ¶
type StmtExecer struct {
// contains filtered or unexported fields
}
StmtExecer encapsulates the elements required to execute a SQL statement. Typically the statement is an INSERT. The Munge method should be applied to each row of values prior to invoking Exec. The caller is responsible for invoking Close.
func NewStmtExecer ¶
func NewStmtExecer(stmt *sql.Stmt, mungeFn InsertMungeFunc, execFn StmtExecFunc, destMeta record.Meta, ) *StmtExecer
NewStmtExecer returns a new StmtExecer instance. The caller is responsible for invoking Close on the returned StmtExecer.
func (*StmtExecer) DestMeta ¶
func (x *StmtExecer) DestMeta() record.Meta
DestMeta returns the record.Meta for the destination table columns.
func (*StmtExecer) Exec ¶
Exec executes the statement. The caller should invoke Munge on each row of values prior to passing those values to Exec.
func (*StmtExecer) Munge ¶
func (x *StmtExecer) Munge(rec []any) error
Munge should be applied to each row of values prior to inserting invoking Exec.