tableschema

package
v0.4.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 6, 2024 License: Apache-2.0 Imports: 14 Imported by: 3

Documentation

Index

Constants

This section is empty.

Variables

View Source
var ArrowOptionConfig = struct {
	WithExtendedMode  func() ArrowOptions
	WithTimestampUnit func(unit TimeUnit) ArrowOptions
	WithDatetimeUnit  func(unit TimeUnit) ArrowOptions
}{
	WithExtendedMode:  withExtendedMode,
	WithTimestampUnit: withTimestampUnit,
	WithDatetimeUnit:  withDatetimeUnit,
}
View Source
var CLUSTER_TYPE = struct {
	Hash  ClusterType
	Range ClusterType
}{
	Hash:  "hash",
	Range: "range",
}
View Source
var SORT_ORDER = struct {
	ASC  SortOrder
	DESC SortOrder
}{
	ASC:  "asc",
	DESC: "desc",
}

Functions

func ToMaxComputeRecords added in v0.3.14

func ToMaxComputeRecords(arrowBatch array.Record, columns []Column, opt ...ArrowOptions) ([]data.Record, error)

ToMaxComputeRecords 将 Arrow Record Batch 转换为 ODPS Record 列表

func TypeToArrowType

func TypeToArrowType(odpsType datatype.DataType) (arrow.DataType, error)

TypeToArrowType convert odps field type to arrow field type * Storage Type | Arrow Type * ----------------------+--------------------- * boolean | boolean * tinyint | int8 * smallint | int16 * int | int32 * bigint | int64 * float | float32 * double | float64 * char | utf8 * varchar | utf8 * string | utf8 * binary | binary * date | date32 * datetime | timestamp(nano) * timestamp | timestamp(nano) 【注:精度选择功能开发中】 * interval_day_time | day_time_interval * interval_year_month | month_interval * decimal | decimal * struct | struct * array | list * map | map

Types

type ArrowOptions added in v0.3.14

type ArrowOptions func(cfg *arrowOptions)

ArrowOptions can not be used directly, it can be created by ArrowOptionConfig.XXX

type ClusterInfo

type ClusterInfo struct {
	ClusterType ClusterType
	ClusterCols []string
	SortCols    []SortColumn
	BucketNum   int
}

ClusterInfo 聚簇信息

type ClusterType added in v0.3.7

type ClusterType = string

type Column

type Column struct {
	Name               string
	Type               datatype2.DataType
	Comment            string
	Label              string
	NotNull            bool
	HasDefaultValue    bool
	DefaultValue       string
	ExtendedLabels     []string
	GenerateExpression GenerateExpression
}

func (*Column) UnmarshalJSON

func (c *Column) UnmarshalJSON(data []byte) error

type DatePart added in v0.3.17

type DatePart int

DatePart Enumeration value, used to represent the unit of time type trunc

const (
	YEAR  DatePart // YEAR trunc time to string like YYYY
	MONTH          // MONTH trunc time to string like YYYY-MM
	DAY            // DAY trunc time to string like YYYY-MM-DD
	HOUR           // HOUR trunc time to string like YYYY-MM-DD HH:00:00
)

func (DatePart) String added in v0.3.17

func (p DatePart) String() string

type GenerateExpression added in v0.3.17

type GenerateExpression interface {
	String() string
	// contains filtered or unexported methods
}

GenerateExpression generates a column expression, used to generate partition values for the Auto-Partition table

type SchemaBuilder

type SchemaBuilder struct {
	// contains filtered or unexported fields
}

func NewSchemaBuilder

func NewSchemaBuilder() *SchemaBuilder

func (*SchemaBuilder) Build

func (builder *SchemaBuilder) Build() TableSchema

func (*SchemaBuilder) ClusterBucketNum added in v0.3.7

func (builder *SchemaBuilder) ClusterBucketNum(bucketNum int) *SchemaBuilder

func (*SchemaBuilder) ClusterColumns added in v0.3.7

func (builder *SchemaBuilder) ClusterColumns(clusterCols []string) *SchemaBuilder

func (*SchemaBuilder) ClusterSortColumns added in v0.3.7

func (builder *SchemaBuilder) ClusterSortColumns(clusterSortCols []SortColumn) *SchemaBuilder

func (*SchemaBuilder) ClusterType added in v0.3.7

func (builder *SchemaBuilder) ClusterType(clusterType ClusterType) *SchemaBuilder

func (*SchemaBuilder) Column

func (builder *SchemaBuilder) Column(column Column) *SchemaBuilder

func (*SchemaBuilder) Columns

func (builder *SchemaBuilder) Columns(columns ...Column) *SchemaBuilder

func (*SchemaBuilder) Comment

func (builder *SchemaBuilder) Comment(comment string) *SchemaBuilder

func (*SchemaBuilder) IsMaterializedView added in v0.3.14

func (builder *SchemaBuilder) IsMaterializedView(isMaterializedView bool) *SchemaBuilder

func (*SchemaBuilder) IsMaterializedViewRewriteEnabled added in v0.3.14

func (builder *SchemaBuilder) IsMaterializedViewRewriteEnabled(isMaterializedViewRewriteEnabled bool) *SchemaBuilder

func (*SchemaBuilder) IsVirtualView added in v0.3.14

func (builder *SchemaBuilder) IsVirtualView(isVirtualView bool) *SchemaBuilder

func (*SchemaBuilder) Lifecycle

func (builder *SchemaBuilder) Lifecycle(lifecycle int) *SchemaBuilder

Lifecycle 表的生命周期,仅支持正整数。单位:天

func (*SchemaBuilder) Location

func (builder *SchemaBuilder) Location(location string) *SchemaBuilder

func (*SchemaBuilder) MvProperties added in v0.3.14

func (builder *SchemaBuilder) MvProperties(properties map[string]string) *SchemaBuilder

func (*SchemaBuilder) MvProperty added in v0.3.14

func (builder *SchemaBuilder) MvProperty(key, value string) *SchemaBuilder

func (*SchemaBuilder) Name

func (builder *SchemaBuilder) Name(name string) *SchemaBuilder

func (*SchemaBuilder) PartitionColumn

func (builder *SchemaBuilder) PartitionColumn(column Column) *SchemaBuilder

func (*SchemaBuilder) PartitionColumns

func (builder *SchemaBuilder) PartitionColumns(columns ...Column) *SchemaBuilder

func (*SchemaBuilder) PrimaryKeys added in v0.4.0

func (builder *SchemaBuilder) PrimaryKeys(primaryKeys []string) *SchemaBuilder

PrimaryKeys specify primary keys of the table

func (*SchemaBuilder) StorageHandler

func (builder *SchemaBuilder) StorageHandler(storageHandler string) *SchemaBuilder

func (*SchemaBuilder) TblProperties added in v0.3.16

func (builder *SchemaBuilder) TblProperties(properties map[string]string) *SchemaBuilder

TblProperties 表的属性,key-value 形式

func (*SchemaBuilder) ViewText added in v0.3.14

func (builder *SchemaBuilder) ViewText(viewText string) *SchemaBuilder

type SortColumn

type SortColumn struct {
	Name  string
	Order SortOrder
}

type SortOrder added in v0.3.7

type SortOrder string

type TableSchema

type TableSchema struct {
	TableName                        string
	Columns                          []Column
	Comment                          string
	CreateTime                       common.GMTTime
	ExtendedLabel                    []string
	HubLifecycle                     int
	IsExternal                       bool
	IsMaterializedView               bool
	IsMaterializedViewRewriteEnabled bool
	IsMaterializedViewOutdated       bool

	IsVirtualView    bool
	LastDDLTime      common.GMTTime
	LastModifiedTime common.GMTTime
	LastAccessTime   common.GMTTime
	Lifecycle        int
	Owner            string
	PartitionColumns []Column `json:"PartitionKeys"`
	RecordNum        int
	ShardExist       bool
	ShardInfo        string
	Size             int64
	TableLabel       string
	ViewText         string
	ViewExpandedText string

	// extended schema, got by adding "?extended" to table api
	FileNum       int
	IsArchived    bool
	PhysicalSize  int
	Reserved      string // reserved json string, 字段不固定
	PrimaryKeys   []string
	Transactional bool

	// for external table extended info
	StorageHandler string
	Location       string

	SerDeProperties map[string]string `json:"-"`
	Props           string
	MvProperties    map[string]string `json:"-"` // materialized view properties
	RefreshHistory  string

	// for clustered info
	TblProperties map[string]string `json:"-"`
	ClusterInfo   ClusterInfo
	// contains filtered or unexported fields
}

func (*TableSchema) FieldByName

func (schema *TableSchema) FieldByName(name string) (Column, bool)

func (*TableSchema) GeneratePartitionSpec added in v0.3.17

func (schema *TableSchema) GeneratePartitionSpec(record *data.Record) (string, error)

GeneratePartitionSpec Used for Auto-Partition tables to automatically generate partition columns based on Record

func (*TableSchema) ToArrowSchema

func (schema *TableSchema) ToArrowSchema() *arrow.Schema

func (*TableSchema) ToBaseSQLString

func (schema *TableSchema) ToBaseSQLString(projectName string, schemaName string, createIfNotExists, isExternal bool) (string, error)

func (*TableSchema) ToExternalSQLString

func (schema *TableSchema) ToExternalSQLString(
	projectName string,
	schemaName string,
	createIfNotExists bool,
	serdeProperties map[string]string,
	jars []string,
) (string, error)

func (*TableSchema) ToSQLString

func (schema *TableSchema) ToSQLString(projectName string, schemaName string, createIfNotExists bool) (string, error)

func (*TableSchema) ToViewSQLString added in v0.3.14

func (schema *TableSchema) ToViewSQLString(projectName string, schemaName string, orReplace, createIfNotExists, buildDeferred bool) (string, error)

func (*TableSchema) UnmarshalJSON added in v0.3.15

func (schema *TableSchema) UnmarshalJSON(data []byte) error

type TimeUnit added in v0.3.14

type TimeUnit string
const (
	Second TimeUnit = "second"
	Milli  TimeUnit = "milli"
	Micro  TimeUnit = "micro"
	Nano   TimeUnit = "nano"
)

type TruncTime added in v0.3.17

type TruncTime struct {
	// contains filtered or unexported fields
}

TruncTime Implementation class of GenerateExpression, corresponding to SQL trunc_time('dateColumnName', 'datePart') syntax

func NewTruncTime added in v0.3.17

func NewTruncTime(dateColumnName string, datePart DatePart) *TruncTime

NewTruncTime Create a TruncTime instance based on column name and date part

func (*TruncTime) String added in v0.3.17

func (t *TruncTime) String() string

String Returns a string representation like "trunc_time(dateColumnName, 'datePart')"

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL