schema

package
v4.5.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Aug 18, 2023 License: MPL-2.0 Imports: 22 Imported by: 355

Documentation

Overview

Package schema defines types supported by tables in source plugins

Index

Constants

View Source
const (
	MetadataUnique         = "cq:extension:unique"
	MetadataPrimaryKey     = "cq:extension:primary_key"
	MetadataConstraintName = "cq:extension:constraint_name"
	MetadataIncremental    = "cq:extension:incremental"

	MetadataTrue             = "true"
	MetadataFalse            = "false"
	MetadataTableName        = "cq:table_name"
	MetadataTableDescription = "cq:table_description"
	MetadataTableTitle       = "cq:table_title"
	MetadataTableDependsOn   = "cq:table_depends_on"
)

Variables

View Source
var CqIDColumn = Column{
	Name:        "_cq_id",
	Type:        types.ExtensionTypes.UUID,
	Description: "Internal CQ ID of the row",
	NotNull:     true,
	Unique:      true,
}

These columns are managed and populated by the source plugins

View Source
var CqParentIDColumn = Column{
	Name:          "_cq_parent_id",
	Type:          types.ExtensionTypes.UUID,
	Description:   "Internal CQ ID of the parent row",
	Resolver:      parentCqUUIDResolver(),
	IgnoreInTests: true,
}
View Source
var CqSourceNameColumn = Column{
	Name:        "_cq_source_name",
	Type:        arrow.BinaryTypes.String,
	Description: "Internal CQ row that references the source plugin name data was retrieved",
}
View Source
var CqSyncTimeColumn = Column{
	Name:        "_cq_sync_time",
	Type:        arrow.FixedWidthTypes.Timestamp_us,
	Description: "Internal CQ row of when sync was started (this will be the same for all rows in a single fetch)",
}

These columns are managed and populated by the destination plugin.

Functions

func AddCqIDs

func AddCqIDs(table *Table)

AddCqIDs adds the cq_id and cq_parent_id columns to the table and all its relations set cq_id as primary key if no other primary keys

func FindEmptyColumns

func FindEmptyColumns(table *Table, records []arrow.Record) []string

func ValidColumnName

func ValidColumnName(name string) bool

func ValidateTable

func ValidateTable(t *Table) error

Types

type ClientMeta

type ClientMeta interface {
	ID() string
}

type Column

type Column struct {
	// Name of column
	Name string `json:"name"`
	// Value Type of column i.e String, UUID etc'
	Type arrow.DataType `json:"type"`
	// Description about column, this description is added as a comment in the database
	Description string `json:"description"`
	// Column Resolver allows to set your own data for a column; this can be an API call, setting multiple embedded values, etc
	Resolver ColumnResolver `json:"-"`

	// IgnoreInTests is used to skip verifying the column is non-nil in integration tests.
	// By default, integration tests perform a fetch for all resources in cloudquery's test account, and
	// verify all columns are non-nil.
	// If IgnoreInTests is true, verification is skipped for this column.
	// Used when it is hard to create a reproducible environment with this column being non-nil (e.g. various error columns).
	IgnoreInTests bool `json:"-"`

	// PrimaryKey requires the destinations supporting this to include this column into the primary key
	PrimaryKey bool `json:"primary_key"`
	// NotNull requires the destinations supporting this to mark this column as non-nullable
	NotNull bool `json:"not_null"`
	// IncrementalKey is a flag that indicates if the column is used as part of an incremental key.
	// It is mainly used for documentation purposes, but may also be used as part of ensuring that
	// migrations are done correctly.
	IncrementalKey bool `json:"incremental_key"`
	// Unique requires the destinations supporting this to mark this column as unique
	Unique bool `json:"unique"`
}

Column definition for Table

func NewColumnFromArrowField

func NewColumnFromArrowField(f arrow.Field) Column

NewColumnFromArrowField creates a new Column from an arrow.Field arrow.Field is a low-level representation of a CloudQuery column that can be sent over the wire in a cross-language way.

func (Column) MarshalJSON added in v4.5.0

func (c Column) MarshalJSON() ([]byte, error)

func (Column) String

func (c Column) String() string

func (Column) ToArrowField

func (c Column) ToArrowField() arrow.Field

type ColumnList

type ColumnList []Column

func (ColumnList) Get

func (c ColumnList) Get(name string) *Column

func (ColumnList) Index

func (c ColumnList) Index(col string) int

func (ColumnList) Names

func (c ColumnList) Names() []string

func (ColumnList) String

func (c ColumnList) String() string

type ColumnResolver

type ColumnResolver func(ctx context.Context, meta ClientMeta, resource *Resource, c Column) error

ColumnResolver is called for each row received in TableResolver's data fetch. execution holds all relevant information regarding execution as well as the Column called. resource holds the current row we are resolving the column for.

func ParentColumnResolver

func ParentColumnResolver(name string) ColumnResolver

ParentColumnResolver resolves a column from the parent's table data, if name isn't set the column will be set to null

func PathResolver

func PathResolver(path string) ColumnResolver

PathResolver resolves a field in the Resource.Item

Examples: PathResolver("Field") PathResolver("InnerStruct.Field") PathResolver("InnerStruct.InnerInnerStruct.Field")

type GenTestDataOptions

type GenTestDataOptions struct {
	// SourceName is the name of the source to set in the source_name column.
	SourceName string
	// SyncTime is the time to set in the sync_time column.
	SyncTime time.Time
	// MaxRows is the number of rows to generate.
	MaxRows int
	// StableUUID is the UUID to use for all rows. If set to uuid.Nil, a new UUID will be generated
	StableUUID uuid.UUID
	// StableTime is the time to use for all rows other than sync time. If set to time.Time{}, a new time will be generated
	StableTime time.Time
	// TimePrecision is the precision to use for time columns.
	TimePrecision time.Duration
	// Seed is the seed to use for random data generation.
	Seed int64
	// NullRows indicates whether to generate rows with all null values.
	NullRows bool
}

GenTestDataOptions are options for generating test data

type LengthTableValidator

type LengthTableValidator struct{}

func (LengthTableValidator) Validate

func (LengthTableValidator) Validate(t *Table) error

type Multiplexer

type Multiplexer func(meta ClientMeta) []ClientMeta

type Resource

type Resource struct {
	// Original resource item that wa from prior resolve
	Item any
	// Set if this is an embedded table
	Parent *Resource
	// internal fields
	Table *Table
	// contains filtered or unexported fields
}

Resource represents a row in it's associated table, it carries a reference to the original item, and automatically generates an Id based on Table's Columns. Resource data can be accessed by the Get and Set methods

func NewResourceData

func NewResourceData(t *Table, parent *Resource, item any) *Resource

func (*Resource) CalculateCQID

func (r *Resource) CalculateCQID(deterministicCQID bool) error

func (*Resource) Get

func (r *Resource) Get(columnName string) scalar.Scalar

func (*Resource) GetItem

func (r *Resource) GetItem() any

func (*Resource) GetValues

func (r *Resource) GetValues() scalar.Vector

func (*Resource) Set

func (r *Resource) Set(columnName string, value any) error

Set sets a column with value. This does validation and conversion to one of concrete it returns an error just for backward compatibility and panics in case it fails

func (*Resource) SetItem

func (r *Resource) SetItem(item any)

Override original item (this is useful for apis that follow list/details pattern)

func (*Resource) Validate

func (r *Resource) Validate() error

Validates that all primary keys have values.

type Resources

type Resources []*Resource

func (Resources) ColumnNames

func (rr Resources) ColumnNames() []string

func (Resources) TableName

func (rr Resources) TableName() string

type RowResolver

type RowResolver func(ctx context.Context, meta ClientMeta, resource *Resource) error

type Schemas

type Schemas []*arrow.Schema

func (Schemas) Len

func (s Schemas) Len() int

func (Schemas) SchemaByName

func (s Schemas) SchemaByName(name string) *arrow.Schema

type SyncSummary deprecated

type SyncSummary struct {
	Resources uint64
	Errors    uint64
	Panics    uint64
}

Deprecated: SyncSummary is deprecated.

type Table

type Table struct {
	// Name of table
	Name string `json:"name"`
	// Title to be used in documentation (optional: will be generated from name if not set)
	Title string `json:"title"`
	// table description
	Description string `json:"description"`
	// Columns are the set of fields that are part of this table
	Columns ColumnList `json:"columns"`
	// Relations are a set of related tables defines
	Relations Tables `json:"relations"`
	// Transform
	Transform Transform `json:"-"`
	// Resolver is the main entry point to fetching table data and
	Resolver TableResolver `json:"-"`
	// Multiplex returns re-purposed meta clients. The sdk will execute the table with each of them
	Multiplex Multiplexer `json:"-"`
	// PostResourceResolver is called after all columns have been resolved, but before the Resource is sent to be inserted. The ordering of resolvers is:
	//  (Table) Resolver → PreResourceResolver → ColumnResolvers → PostResourceResolver
	PostResourceResolver RowResolver `json:"-"`
	// PreResourceResolver is called before all columns are resolved but after Resource is created. The ordering of resolvers is:
	//  (Table) Resolver → PreResourceResolver → ColumnResolvers → PostResourceResolver
	PreResourceResolver RowResolver `json:"-"`
	// IsIncremental is a flag that indicates if the table is incremental or not. This flag mainly affects how the table is
	// documented.
	IsIncremental bool `json:"is_incremental"`

	// IgnoreInTests is used to exclude a table from integration tests.
	// By default, integration tests fetch all resources from cloudquery's test account, and verify all tables
	// have at least one row.
	// When IgnoreInTests is true, integration tests won't fetch from this table.
	// Used when it is hard to create a reproducible environment with a row in this table.
	IgnoreInTests bool `json:"-"`

	// Parent is the parent table in case this table is called via parent table (i.e. relation)
	Parent *Table `json:"-"`

	PkConstraintName string `json:"pk_constraint_name"`
}

func NewTableFromArrowSchema

func NewTableFromArrowSchema(sc *arrow.Schema) (*Table, error)

NewTableFromArrowSchema creates a CloudQuery Table abstraction from an Arrow schema. The Arrow schema is a low level representation of a table that can be sent over the wire in a cross-language way.

func TestTable

func TestTable(name string, opts TestSourceOptions) *Table

func (*Table) Column

func (t *Table) Column(name string) *Column

func (*Table) Copy

func (t *Table) Copy(parent *Table) *Table

func (*Table) GetChanges

func (t *Table) GetChanges(old *Table) []TableColumnChange

GetChanges returns changes between two tables when t is the new one and old is the old one.

func (*Table) IncrementalKeys

func (t *Table) IncrementalKeys() []string

func (*Table) OverwriteOrAddColumn

func (t *Table) OverwriteOrAddColumn(column *Column)

OverwriteOrAddColumn overwrites or adds columns. If the column with the same name exists, overwrites it. Otherwise, adds the column to the beginning of the table.

func (*Table) PrimaryKeys

func (t *Table) PrimaryKeys() []string

func (*Table) PrimaryKeysIndexes

func (t *Table) PrimaryKeysIndexes() []int

func (*Table) TableNames

func (t *Table) TableNames() []string

func (*Table) ToArrowSchema

func (t *Table) ToArrowSchema() *arrow.Schema

func (*Table) ValidateColumnNames

func (t *Table) ValidateColumnNames() error

func (*Table) ValidateDuplicateColumns

func (t *Table) ValidateDuplicateColumns() error

func (*Table) ValidateName

func (t *Table) ValidateName() error

type TableColumnChange

type TableColumnChange struct {
	Type       TableColumnChangeType
	ColumnName string
	Current    Column
	Previous   Column
}

func (TableColumnChange) String

func (t TableColumnChange) String() string

type TableColumnChangeType

type TableColumnChangeType int
const (
	TableColumnChangeTypeUnknown TableColumnChangeType = iota
	TableColumnChangeTypeAdd
	TableColumnChangeTypeUpdate
	TableColumnChangeTypeRemove
)

func (TableColumnChangeType) String

func (t TableColumnChangeType) String() string

type TableResolver

type TableResolver func(ctx context.Context, meta ClientMeta, parent *Resource, res chan<- any) error

TableResolver is the main entry point when a table is sync is called.

Table resolver has 3 main arguments: - meta(ClientMeta): is the client returned by the plugin.Provider Configure call - parent(Resource): resource is the parent resource in case this table is called via parent table (i.e. relation) - res(chan any): is a channel to pass results fetched by the TableResolver

type TableValidator

type TableValidator interface {
	Validate(t *Table) error
}

type Tables

type Tables []*Table

func NewTablesFromArrowSchemas

func NewTablesFromArrowSchemas(schemas []*arrow.Schema) (Tables, error)

func (Tables) FilterDfs

func (tt Tables) FilterDfs(tables, skipTables []string, skipDependentTables bool) (Tables, error)

func (Tables) FilterDfsFunc

func (tt Tables) FilterDfsFunc(include, exclude func(*Table) bool, skipDependentTables bool) Tables

func (Tables) FlattenTables

func (tt Tables) FlattenTables() Tables

func (Tables) Get

func (tt Tables) Get(name string) *Table

Get returns a table by name. Returns top-level tables and relations.

func (Tables) GetTopLevel

func (tt Tables) GetTopLevel(name string) *Table

GetTopLevel returns a table by name. Only returns the table if it is in top-level list.

func (Tables) TableNames

func (tt Tables) TableNames() []string

func (Tables) ToArrowSchemas

func (tt Tables) ToArrowSchemas() Schemas

func (Tables) UnflattenTables added in v4.4.0

func (tt Tables) UnflattenTables() (Tables, error)

UnflattenTables returns a new Tables copy with the relations unflattened. This is the opposite operation of FlattenTables.

func (Tables) ValidateColumnNames

func (tt Tables) ValidateColumnNames() error

func (Tables) ValidateDuplicateColumns

func (tt Tables) ValidateDuplicateColumns() error

func (Tables) ValidateDuplicateTables

func (tt Tables) ValidateDuplicateTables() error

func (Tables) ValidateTableNames

func (tt Tables) ValidateTableNames() error

type TestDataGenerator

type TestDataGenerator struct {
	// contains filtered or unexported fields
}

func NewTestDataGenerator

func NewTestDataGenerator() *TestDataGenerator

func (*TestDataGenerator) Generate

func (tg *TestDataGenerator) Generate(table *Table, opts GenTestDataOptions) []arrow.Record

GenTestData generates a slice of arrow.Records with the given schema and options.

type TestSourceOptions

type TestSourceOptions struct {
	SkipDates      bool
	SkipDurations  bool
	SkipIntervals  bool
	SkipLargeTypes bool // e.g. large binary, large string
	SkipLists      bool // lists of all primitive types. Lists that were supported by CQTypes are always included.
	SkipMaps       bool
	SkipStructs    bool
	SkipTimes      bool // time of day types
	SkipTimestamps bool // timestamp types. Microsecond timestamp is always be included, regardless of this setting.
	TimePrecision  time.Duration
	SkipDecimals   bool
}

TestSourceOptions controls which types are included by TestSourceColumns.

type Transform

type Transform func(table *Table) error

type ValidationError

type ValidationError struct {
	Err   error
	Msg   string
	Type  arrow.DataType
	Value any
}

func (*ValidationError) Error

func (e *ValidationError) Error() string

func (*ValidationError) MaskedError

func (e *ValidationError) MaskedError() string

this prints the error without the value

func (*ValidationError) Unwrap

func (e *ValidationError) Unwrap() error

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL