Documentation ¶
Overview ¶
Package internal implements database-agnostic functionality for Spanner migration tool.
Index ¶
- Constants
- func AssertSpSchema(conv *Conv, t *testing.T, ...)
- func AssertSrcSchema(t *testing.T, conv *Conv, expectedSchema, actualSchema map[string]schema.Table)
- func AssertTableIssues(conv *Conv, t *testing.T, tableId string, ...)
- func ComputeUsedNames(conv *Conv) map[string]bool
- func Contains[T comparable](slice []T, target T) bool
- func FixName(name string) (string, bool)
- func GenerateColumnId() string
- func GenerateForeignkeyId() string
- func GenerateId(idPrefix string) string
- func GenerateIndexesId() string
- func GenerateRuleId() string
- func GenerateSequenceId() string
- func GenerateTableId() string
- func GetColIdFromSpName(colDefs map[string]ddl.ColumnDef, colName string) (string, error)
- func GetColIdFromSrcName(srcColDef map[string]schema.Column, columnName string) (string, error)
- func GetSpannerCol(conv *Conv, tableId, colId string, spColDef map[string]ddl.ColumnDef) (string, error)
- func GetSpannerCols(conv *Conv, tableId string, srcCols []string) ([]string, error)
- func GetSpannerTable(conv *Conv, tableId string) (string, error)
- func GetSrcColNameIdMap(srcs schema.Table) map[string]string
- func GetSrcFkFromId(fks []schema.ForeignKey, fkId string) (schema.ForeignKey, error)
- func GetSrcIndexFromId(indexes []schema.Index, indexId string) (schema.Index, error)
- func GetSrcTableByName(srcSchema map[string]schema.Table, name string) (*schema.Table, bool)
- func GetTableIdFromSpName(spSchema ddl.Schema, tableName string) (string, error)
- func GetTableIdFromSrcName(srcSchema map[string]schema.Table, tableName string) (string, error)
- func ResolveForeignKeyIds(schema map[string]schema.Table)
- func ResolveRefs(conv *Conv)
- func ToSpannerForeignKey(conv *Conv, srcFkName string) string
- func ToSpannerIndexName(conv *Conv, srcIndexName string) string
- func ToSpannerOnDelete(conv *Conv, srcTableId string, srcDeleteRule string) string
- func ToSpannerOnUpdate(conv *Conv, srcTableId string, srcUpdateRule string) string
- func Verbose() bool
- func VerboseInit(b bool)
- func VerbosePrintf(format string, a ...interface{})
- func VerbosePrintln(a ...interface{})
- type AdditionalDataAttributes
- type AdditionalSchemaAttributes
- type Audit
- type Conv
- func (conv *Conv) AddPrimaryKeys()
- func (conv *Conv) AddShardIdColumn()
- func (conv *Conv) BadRows() int64
- func (conv *Conv) CollectBadRow(srcTable string, srcCols, vals []string)
- func (conv *Conv) DataMode() bool
- func (conv *Conv) DataStatement(stmtType string)
- func (conv *Conv) ErrorInStatement(stmtType string)
- func (conv *Conv) ResetStats()
- func (conv *Conv) Rows() int64
- func (conv *Conv) SampleBadRows(n int) []string
- func (conv *Conv) SchemaMode() bool
- func (conv *Conv) SchemaStatement(stmtType string)
- func (conv *Conv) SetDataMode()
- func (conv *Conv) SetDataSink(ds func(table string, cols []string, values []interface{}))
- func (conv *Conv) SetLocation(loc *time.Location)
- func (conv *Conv) SetSchemaMode()
- func (conv *Conv) SkipStatement(stmtType string)
- func (conv *Conv) StatementErrors() int64
- func (conv *Conv) Statements() int64
- func (conv *Conv) StatsAddBadRow(srcTable string, b bool)
- func (conv *Conv) StatsAddRow(srcTable string, b bool)
- func (conv *Conv) Unexpected(u string)
- func (conv *Conv) Unexpecteds() int64
- func (conv *Conv) WriteRow(srcTable, spTable string, spCols []string, spVals []interface{})
- type Counter
- type DataflowOutput
- type DataflowResources
- type DatastreamResources
- type ExpressionDetail
- type ExpressionVerificationOutput
- type FkeyAndIdxs
- type GcsResources
- type MonitoringResources
- type NameAndCols
- type Progress
- type ProgressStatus
- type PubsubCfg
- type PubsubResources
- type Reader
- type ReferenceElement
- type Rule
- type SchemaDetails
- type SchemaIssue
- type ShardResources
- type SyntheticPKey
- type TableDetails
- type TableIssues
- type Tables
- type VerifyExpressionsInput
- type VerifyExpressionsOutput
Constants ¶
const ( ShardIdColumn = "migration_shard_id" SyntheticPrimaryKey = "synth_id" )
Variables ¶
This section is empty.
Functions ¶
func AssertSpSchema ¶
func AssertSrcSchema ¶
func AssertTableIssues ¶
func ComputeUsedNames ¶
func Contains ¶
func Contains[T comparable](slice []T, target T) bool
Checks if target is present in slice
func FixName ¶
FixName maps a table_name, column_name or index_name into something spanner will accept. table_name, column_name or index_name must all adhere to the following regexp:
{a-z|A-Z}[{a-z|A-Z|0-9|_}+]
If the first character of the name is not allowed, we replace it by "A". We replace all other problem characters by "_". Returns a Spanner-acceptable name, and whether we had to change the name.
func GenerateColumnId ¶
func GenerateColumnId() string
func GenerateForeignkeyId ¶
func GenerateForeignkeyId() string
func GenerateId ¶
func GenerateIndexesId ¶
func GenerateIndexesId() string
func GenerateRuleId ¶
func GenerateRuleId() string
func GenerateSequenceId ¶
func GenerateSequenceId() string
func GenerateTableId ¶
func GenerateTableId() string
func GetColIdFromSpName ¶
func GetColIdFromSrcName ¶
func GetSpannerCol ¶
func GetSpannerCol(conv *Conv, tableId, colId string, spColDef map[string]ddl.ColumnDef) (string, error)
GetSpannerCol maps a source DB table/column into a legal Spanner column name. If mustExist is true, we return error if the column is new. Note that source DB column names can be essentially any string, but Spanner column names must use a limited character set. This means that getSpannerCol may have to change a name to make it legal, we must ensure that: a) the new col name is legal b) the new col name doesn't clash with other col names in the same table c) we consistently return the same name for the same col.
func GetSpannerCols ¶
GetSpannerCols maps a slice of source columns into their corresponding Spanner columns using GetSpannerCol.
func GetSpannerTable ¶
GetSpannerTable maps a source DB table name into a legal Spanner table name. Note that source DB column names can be essentially any string, but Spanner column names must use a limited character set. This means that getSpannerTable may have to change a name to make it legal, we must ensure that: a) the new table name is legal b) the new table name doesn't clash with other Spanner table names c) we consistently return the same name for this table.
conv.UsedNames tracks Spanner names that have been used for table names, foreign key constraints and indexes. We use this to ensure we generate unique names when we map from source dbs to Spanner since Spanner requires all these names to be distinct and should not differ only in case.
func GetSrcFkFromId ¶
func GetSrcFkFromId(fks []schema.ForeignKey, fkId string) (schema.ForeignKey, error)
func GetSrcIndexFromId ¶
func GetSrcTableByName ¶
func GetTableIdFromSpName ¶
func GetTableIdFromSrcName ¶
func ResolveForeignKeyIds ¶
func ResolveRefs ¶
func ResolveRefs(conv *Conv)
ResolveRefs resolves all table and column references in foreign key constraints in the Spanner Schema. Note: Spanner requires that DDL references match the case of the referenced object, but this is not so for many source databases.
TODO: Expand ResolveRefs to primary keys and indexes.
func ToSpannerForeignKey ¶
ToSpannerForeignKey maps source foreign key name to legal Spanner foreign key name. If the srcKeyName is empty string we can just return empty string without error. If the srcKeyName is not empty we need to make sure of the following things: a) the new foreign key name is legal b) the new foreign key name doesn't clash with other Spanner
foreign key names
Note that foreign key constraint names in Spanner have to be globally unique (across the database). But in some source databases, such as PostgreSQL, they only have to be unique for a table. Hence we must map each source constraint name to a unique spanner constraint name.
func ToSpannerIndexName ¶
ToSpannerIndexName maps source index name to legal Spanner index name. We need to make sure of the following things: a) the new index name is legal b) the new index name doesn't clash with other Spanner
index names
Note that index key constraint names in Spanner have to be globally unique (across the database). But in some source databases, such as MySQL, they only have to be unique for a table. Hence we must map each source constraint name to a unique spanner constraint name.
func ToSpannerOnDelete ¶
ToSpannerOnDelete maps the source ON DELETE action to the corresponding Spanner compatible action. The following mapping is followed: a) CASCADE/NO ACTION -> mapped to the same as source action b) all others -> NO ACTION (default)
For all source actions converted to a different action, an issue is appended to it's TableLevelIssues to generate a warning message for the user
Since only MySQL and PostgreSQL have this functionality as of yet, for other sources OnDelete fields are kept empty i.e. "" is mapped to "" and an issue is appended to generate warning message (prints only once for each table with FK Actions)
func ToSpannerOnUpdate ¶
ToSpannerOnUpdate maps the source ON UPDATE action to the corresponding Spanner compatible action. The following mapping is followed: all actions -> NO ACTION (default) (Spanner only supports ON UPDATE NO ACTION)
For all source actions converted to a different action, an issue is appended to it's TableLevelIssues to generate a warning message for the user
Since only MySQL and PostgreSQL have this functionality as of yet, for other sources OnUpdate fields are kept empty i.e. "" is mapped to "" and an issue is appended to generate warning message (prints only once for each table with FK Actions)
func VerboseInit ¶
func VerboseInit(b bool)
VerboseInit determines whether verbose mode is enabled. Generally there should be one call to VerboseInit at startup.
func VerbosePrintf ¶
func VerbosePrintf(format string, a ...interface{})
VerbosePrintf prints to stdout if verbose is enabled.
func VerbosePrintln ¶
func VerbosePrintln(a ...interface{})
VerbosePrintln prints to stdout if verbose is enabled.
Types ¶
type AdditionalDataAttributes ¶
type AdditionalDataAttributes struct {
ShardId string
}
type AdditionalSchemaAttributes ¶
type AdditionalSchemaAttributes struct {
IsSharded bool
}
type Audit ¶
type Audit struct { SchemaConversionDuration time.Duration `json:"-"` // Duration of schema conversion. DataConversionDuration time.Duration `json:"-"` // Duration of data conversion. MigrationRequestId string `json:"-"` // Unique request id generated per migration MigrationType *migration.MigrationData_MigrationType `json:"-"` // Type of migration: Schema migration, data migration or schema and data migration DryRun bool `json:"-"` // Flag to identify if the migration is a dry run. StreamingStats streamingStats `json:"-"` // Stores information related to streaming migration process. Progress Progress `json:"-"` // Stores information related to progress of the migration progress SkipMetricsPopulation bool `json:"-"` // Flag to identify if outgoing metrics metadata needs to skipped }
Stores the audit information of conversion. Elements that do not affect the migration functionality but are relevant for the migration metadata.
type Conv ¶
type Conv struct { SpSchema ddl.Schema // Maps Spanner table name to Spanner schema. SyntheticPKeys map[string]SyntheticPKey // Maps Spanner table name to synthetic primary key (if needed). SrcSchema map[string]schema.Table // Maps source-DB table name to schema information. SchemaIssues map[string]TableIssues // Maps source-DB table/col to list of schema conversion issues. ToSpanner map[string]NameAndCols `json:"-"` // Maps from source-DB table name to Spanner name and column mapping. ToSource map[string]NameAndCols `json:"-"` // Maps from Spanner table name to source-DB table name and column mapping. UsedNames map[string]bool `json:"-"` // Map storing the names that are already assigned to tables, indices or foreign key contraints. DataFlush func() `json:"-"` // Data flush is used to flush out remaining writes and wait for them to complete. Location *time.Location // Timezone (for timestamp conversion). Stats stats `json:"-"` TimezoneOffset string // Timezone offset for timestamp conversion. SpDialect string // The dialect of the spanner database to which Spanner migration tool is writing. UniquePKey map[string][]string // Maps Spanner table name to unique column name being used as primary key (if needed). Audit Audit `json:"-"` // Stores the audit information for the database conversion Rules []Rule // Stores applied rules during schema conversion IsSharded bool // Flag denoting if the migration is sharded or not ConvLock sync.RWMutex `json:"-"` // ConvLock prevents concurrent map read/write operations. This lock will be used in all the APIs that either read or write elements to the conv object. SpRegion string // Leader Region for Spanner Instance ResourceValidation bool // Flag denoting if validation for resources to generated is complete UI bool // Flag if UI interface was used for migration. ToDo: Remove flag after resource generation is introduced to UI SpSequences map[string]ddl.Sequence // Maps Spanner Sequences to Sequence Schema SrcSequences map[string]ddl.Sequence // Maps source-DB Sequences to Sequence schema information // contains filtered or unexported fields }
Conv contains all schema and data conversion state.
func (*Conv) AddPrimaryKeys ¶
func (conv *Conv) AddPrimaryKeys()
AddPrimaryKeys analyzes all tables in conv.schema and adds synthetic primary keys for any tables that don't have primary key.
func (*Conv) AddShardIdColumn ¶
func (conv *Conv) AddShardIdColumn()
func (*Conv) BadRows ¶
BadRows returns the total count of bad rows encountered during data conversion.
func (*Conv) CollectBadRow ¶
CollectBadRow updates the list of bad rows, while respecting the byte limit for bad rows.
func (*Conv) DataStatement ¶
DataStatement increments the data statement stats for 'stmtType'.
func (*Conv) ErrorInStatement ¶
ErrorInStatement increments the error statement stats for 'stmtType'.
func (*Conv) ResetStats ¶
func (conv *Conv) ResetStats()
func (*Conv) SampleBadRows ¶
SampleBadRows returns a string-formatted list of rows that generated errors. Returns at most n rows.
func (*Conv) SchemaMode ¶
SchemaMode returns true if conv is configured to schemaOnly.
func (*Conv) SchemaStatement ¶
SchemaStatement increments the schema statement stats for 'stmtType'.
func (*Conv) SetDataMode ¶
func (conv *Conv) SetDataMode()
SetDataMode configures conv to convert data and write it to Spanner. In this mode, we also do a complete re-processing of all statements for stats purposes (its hard to keep track of which stats are collected in each phase, so we simply reset and recollect), but we don't modify the schema.
func (*Conv) SetDataSink ¶
SetDataSink configures conv to use the specified data sink.
func (*Conv) SetLocation ¶
SetLocation configures the timezone for data conversion.
func (*Conv) SetSchemaMode ¶
func (conv *Conv) SetSchemaMode()
SetSchemaMode configures conv to process schema-related statements and build the Spanner schema. In schema mode we also process just enough of other statements to get an accurate count of the number of data rows (used for tracking progress when writing data to Spanner).
func (*Conv) SkipStatement ¶
SkipStatement increments the skip statement stats for 'stmtType'.
func (*Conv) StatementErrors ¶
StatementErrors returns the number of statement errors encountered.
func (*Conv) Statements ¶
Statements returns the total number of statements processed.
func (*Conv) StatsAddBadRow ¶
StatsAddBadRow increments the bad-row stats for 'srcTable' if b is true. See StatsAddRow comments for context.
func (*Conv) StatsAddRow ¶
StatsAddRow increments the count of rows for 'srcTable' if b is true. The boolean arg 'b' is used to avoid double counting of stats. Specifically, some code paths that report row stats run in both schema-mode and data-mode e.g. statement.go. To avoid double counting, we explicitly choose a mode-for-stats-collection for each place where row stats are collected. When specifying this mode take care to ensure that the code actually runs in the mode you specify, otherwise stats will be dropped.
func (*Conv) Unexpected ¶
Unexpected records stats about corner-cases and conditions that were not expected. Note that the counts maybe not be completely reliable due to potential double-counting because we process dump data twice.
func (*Conv) Unexpecteds ¶
Unexpecteds returns the total number of distinct unexpected conditions encountered during processing.
type Counter ¶
type Counter struct { ObjectId string // contains filtered or unexported fields }
var Cntr Counter
func (*Counter) GenerateIdSuffix ¶
Thread safe Counter to generate ids in session file.
type DataflowOutput ¶
type DataflowResources ¶
type DataflowResources struct { JobId string `json:"JobId"` GcloudCmd string `json:"GcloudCmd"` Region string `json:"Region"` }
Stores information related to generated Dataflow Resources.
type DatastreamResources ¶
type DatastreamResources struct { DatastreamName string `json:"DatastreamName"` Region string `json:"Region"` }
Stores information related to generated Datastream Resources.
type ExpressionDetail ¶
type ExpressionVerificationOutput ¶
type ExpressionVerificationOutput struct { ExpressionDetail ExpressionDetail Result bool Err error }
type FkeyAndIdxs ¶
FkeyAndIdxs contains the name of a table, its foreign keys and indexes Used to map between source DB and spanner table name, foreign key name and index names.
type GcsResources ¶
type GcsResources struct {
BucketName string `json:"BucketName"`
}
type MonitoringResources ¶
type MonitoringResources struct {
DashboardName string `json:"DashboardName"`
}
Stores information related to Monitoring resources
type NameAndCols ¶
NameAndCols contains the name of a table and its columns. Used to map between source DB and Spanner table and column names.
type Progress ¶
type Progress struct { ProgressStatus // contains filtered or unexported fields }
Progress provides console progress functionality. i.e. it reports what percentage of a task is complete to the console, overwriting previous progress percentage with new progress.
func NewProgress ¶
func NewProgress(total int64, message string, verbose, fractional bool, progressStatus int) *Progress
NewProgress creates and returns a Progress instance.
func (*Progress) Done ¶
func (p *Progress) Done()
Done signals completion, and will report 100% if it hasn't already been reported.
func (*Progress) MaybeReport ¶
MaybeReport updates the state of p with the new 'progress' measure. If this update changes pct (integer part of percentage-done), MaybeReport will print out the new percentage, overwriting the previous percentage.
func (*Progress) ReportProgress ¶
func (*Progress) UpdateProgress ¶
func (p *Progress) UpdateProgress(message string, pct int, progressStatus ProgressStatus)
type ProgressStatus ¶
type ProgressStatus int
ProgressStatus specifies a stage of migration.
const ( DefaultStatus ProgressStatus = iota SchemaMigrationComplete SchemaCreationInProgress DataMigrationComplete DataWriteInProgress ForeignKeyUpdateInProgress ForeignKeyUpdateComplete )
Defines the progress statuses that we track
type PubsubResources ¶
type PubsubResources struct { TopicId string SubscriptionId string NotificationId string BucketName string Region string }
Stores information related to generated Pubsub Resources.
type Reader ¶
type Reader struct { LineNumber int // Starting at line 1 Offset int // Character offset from start of input. Starts with character 1. EOF bool // contains filtered or unexported fields }
Reader is a simple line-reader wrapper around bufio.Reader that provides line number, file offset, and cached eof state. Errors are printed (via fmt.Print) and then treated as eof.
type ReferenceElement ¶
type ReferenceElement struct {
Name string
}
type Rule ¶
type Rule struct { Id string Name string Type string ObjectType string AssociatedObjects string Enabled bool Data interface{} AddedOn datetime.DateTime }
Stores information related to rules during schema conversion
type SchemaDetails ¶
type SchemaDetails struct {
TableDetails []TableDetails `json:TableDetails`
}
type SchemaIssue ¶
type SchemaIssue int
SchemaIssue specifies a schema conversion issue.
const ( DefaultValue SchemaIssue = iota ForeignKey MissingPrimaryKey UniqueIndexPrimaryKey MultiDimensionalArray NoGoodType Numeric NumericThatFits Decimal DecimalThatFits Serial AutoIncrement Timestamp Datetime Widened Time StringOverflow HotspotTimestamp HotspotAutoIncrement RedundantIndex AutoIncrementIndex InterleaveIndex InterleavedNotInOrder InterleavedOrder InterleavedAddColumn IllegalName InterleavedRenameColumn InterleavedChangeColumnSize RowLimitExceeded ShardIdColumnAdded ShardIdColumnPrimaryKey ArrayTypeNotSupported ForeignKeyOnDelete ForeignKeyOnUpdate SequenceCreated ForeignKeyActionNotSupported NumericPKNotSupported )
Defines all of the schema issues we track. Includes issues with type mappings, as well as features (such as source DB constraints) that aren't supported in Spanner.
type ShardResources ¶
type ShardResources struct { DatastreamResources DatastreamResources PubsubResources PubsubResources DlqPubsubResources PubsubResources DataflowResources DataflowResources GcsResources GcsResources MonitoringResources MonitoringResources }
type SyntheticPKey ¶
SyntheticPKey specifies a synthetic primary key and current sequence count for a table, if needed. We use a synthetic primary key when the source DB table has no primary key.
type TableDetails ¶
type TableDetails struct {
TableName string `json:TableName`
}
type TableIssues ¶
type TableIssues struct { ColumnLevelIssues map[string][]SchemaIssue TableLevelIssues []SchemaIssue }
type VerifyExpressionsInput ¶
type VerifyExpressionsInput struct { Conv *Conv Source string ExpressionDetailList []ExpressionDetail }
type VerifyExpressionsOutput ¶
type VerifyExpressionsOutput struct { ExpressionVerificationOutputList []ExpressionVerificationOutput Err error }