Documentation
¶
Overview ¶
Package ir is the Intermediate Representation of parsed SQL statements
Index ¶
- Constants
- func AttrToPythonValue(attr interface{}) string
- func DTypeToString(dt int) string
- func InferFeatureColumns(trainStmt *TrainStmt, db *database.DB) error
- func LogDerivationResult(wr *pipe.Writer, trainStmt *TrainStmt)
- func MarshalToJSONString(in interface{}) (string, error)
- type BucketColumn
- type CategoryColumn
- type CategoryHashColumn
- type CategoryIDColumn
- type ColumnMap
- type CrossColumn
- type EmbeddingColumn
- type EvaluateStmt
- type ExplainStmt
- type FeatureColumn
- type FieldDesc
- type FieldDescMap
- type IndicatorColumn
- type NormalStmt
- type NumericColumn
- type OptimizeExpr
- type OptimizeStmt
- type PredictStmt
- type RunStmt
- type SQLFlowStmt
- type SeqCategoryIDColumn
- type ShowTrainStmt
- type TrainStmt
- func GenerateTrainStmt(slct *parser.SQLFlowSelectStmt) (*TrainStmt, error)
- func GenerateTrainStmtByModel(slct *parser.SQLFlowSelectStmt, connStr, cwd, modelDir, model string) (*TrainStmt, error)
- func GenerateTrainStmtWithInferredColumns(slct *parser.SQLFlowSelectStmt, connStr string, modelDir string, cwd string, ...) (*TrainStmt, error)
- func MockTrainStmt(isxgboost bool) *TrainStmt
Constants ¶
const ( Int int = iota Float String )
Possible DType values in FieldDesc
const ( // TensorFlow is a kind of `TrainStmt` TensorFlow = iota // XGBoost is a kind of `TrainStmt` XGBoost // KMeans is a kind of `TrainStmt` KMeans )
Variables ¶
This section is empty.
Functions ¶
func AttrToPythonValue ¶ added in v0.4.1
func AttrToPythonValue(attr interface{}) string
AttrToPythonValue format the WITH attributes to corresponding Python code.
func DTypeToString ¶ added in v0.4.1
DTypeToString returns string value of dtype
func InferFeatureColumns ¶
InferFeatureColumns fill up featureColumn and columnSpec structs for all fields. if wr is not nil, then write
func LogDerivationResult ¶
LogDerivationResult write messages to wr to log the feature derivation results
func MarshalToJSONString ¶ added in v0.4.1
MarshalToJSONString converts any data to JSON string.
Types ¶
type BucketColumn ¶
type BucketColumn struct { SourceColumn *NumericColumn Boundaries []int }
BucketColumn represents `tf.feature_column.bucketized_column` ref: https://www.tensorflow.org/api_docs/python/tf/feature_column/bucketized_column
func (*BucketColumn) ApplyTo ¶
func (c *BucketColumn) ApplyTo(other *FieldDesc) (FeatureColumn, error)
ApplyTo applies the FeatureColumn to a new field
func (*BucketColumn) GenPythonCode ¶ added in v0.4.1
func (c *BucketColumn) GenPythonCode() string
GenPythonCode generate Python code to construct a runtime.feature.column.*
func (*BucketColumn) GetFieldDesc ¶
func (c *BucketColumn) GetFieldDesc() []*FieldDesc
GetFieldDesc returns FieldDesc member
func (*BucketColumn) NumClass ¶
func (c *BucketColumn) NumClass() int64
NumClass returns class number of BucketColumn
type CategoryColumn ¶
type CategoryColumn interface { FeatureColumn NumClass() int64 }
CategoryColumn corresponds to categorical column
type CategoryHashColumn ¶
CategoryHashColumn represents `tf.feature_column.categorical_column_with_hash_bucket` ref: https://www.tensorflow.org/api_docs/python/tf/feature_column/categorical_column_with_hash_bucket
func (*CategoryHashColumn) ApplyTo ¶
func (c *CategoryHashColumn) ApplyTo(other *FieldDesc) (FeatureColumn, error)
ApplyTo applies the FeatureColumn to a new field
func (*CategoryHashColumn) GenPythonCode ¶ added in v0.4.1
func (c *CategoryHashColumn) GenPythonCode() string
GenPythonCode generate Python code to construct a runtime.feature.column.*
func (*CategoryHashColumn) GetFieldDesc ¶
func (c *CategoryHashColumn) GetFieldDesc() []*FieldDesc
GetFieldDesc returns FieldDesc member
func (*CategoryHashColumn) NumClass ¶
func (c *CategoryHashColumn) NumClass() int64
NumClass returns class number of CategoryHashColumn
type CategoryIDColumn ¶
CategoryIDColumn represents `tf.feature_column.categorical_column_with_identity` ref: https://www.tensorflow.org/api_docs/python/tf/feature_column/categorical_column_with_identity
func (*CategoryIDColumn) ApplyTo ¶
func (c *CategoryIDColumn) ApplyTo(other *FieldDesc) (FeatureColumn, error)
ApplyTo applies the FeatureColumn to a new field
func (*CategoryIDColumn) GenPythonCode ¶ added in v0.4.1
func (c *CategoryIDColumn) GenPythonCode() string
GenPythonCode generate Python code to construct a runtime.feature.column.*
func (*CategoryIDColumn) GetFieldDesc ¶
func (c *CategoryIDColumn) GetFieldDesc() []*FieldDesc
GetFieldDesc returns FieldDesc member
func (*CategoryIDColumn) NumClass ¶
func (c *CategoryIDColumn) NumClass() int64
NumClass returns class number of CategoryIDColumn
type ColumnMap ¶
type ColumnMap map[string]map[string][]FeatureColumn
ColumnMap is like: target -> key -> []FeatureColumn one column's data can be used by multiple feature columns, e.g. EMBEDDING(c1), CROSS(c1, c2)
type CrossColumn ¶
type CrossColumn struct { Keys []interface{} HashBucketSize int64 }
CrossColumn represents `tf.feature_column.crossed_column` ref: https://www.tensorflow.org/api_docs/python/tf/feature_column/crossed_column
func (*CrossColumn) ApplyTo ¶
func (c *CrossColumn) ApplyTo(other *FieldDesc) (FeatureColumn, error)
ApplyTo applies the FeatureColumn to a new field
func (*CrossColumn) GenPythonCode ¶ added in v0.4.1
func (c *CrossColumn) GenPythonCode() string
GenPythonCode generate Python code to construct a runtime.feature.column.*
func (*CrossColumn) GetFieldDesc ¶
func (c *CrossColumn) GetFieldDesc() []*FieldDesc
GetFieldDesc returns FieldDesc member
func (*CrossColumn) NumClass ¶
func (c *CrossColumn) NumClass() int64
NumClass returns class number of CrossColumn
type EmbeddingColumn ¶
type EmbeddingColumn struct { CategoryColumn Dimension int Combiner string Initializer string // only used when EMBEDDING(col_name, ...) this will set CategoryColumn = nil // will fill the feature column details using feature_derivation Name string }
EmbeddingColumn represents `tf.feature_column.embedding_column` ref: https://www.tensorflow.org/api_docs/python/tf/feature_column/embedding_column
func (*EmbeddingColumn) ApplyTo ¶
func (c *EmbeddingColumn) ApplyTo(other *FieldDesc) (FeatureColumn, error)
ApplyTo applies the FeatureColumn to a new field
func (*EmbeddingColumn) GenPythonCode ¶ added in v0.4.1
func (c *EmbeddingColumn) GenPythonCode() string
GenPythonCode generate Python code to construct a runtime.feature.column.*
func (*EmbeddingColumn) GetFieldDesc ¶
func (c *EmbeddingColumn) GetFieldDesc() []*FieldDesc
GetFieldDesc returns FieldDesc member
type EvaluateStmt ¶
type EvaluateStmt struct { OriginalSQL string Select string Attributes map[string]interface{} ModelName string Label FeatureColumn Into string TmpEvaluateTable string TrainStmt *TrainStmt }
EvaluateStmt is the intermediate representation for code generation of an evaluation job
func GenerateEvaluateStmt ¶
func GenerateEvaluateStmt(slct *parser.SQLFlowSelectStmt, connStr string, modelDir string, cwd string, getTrainStmtFromModel bool) (*EvaluateStmt, error)
GenerateEvaluateStmt generates a `EvaluateStmt` from the parsed result `slct`
func (*EvaluateStmt) GetOriginalSQL ¶
func (stmt *EvaluateStmt) GetOriginalSQL() string
GetOriginalSQL returns the original SQL statement used to get current IR result
func (*EvaluateStmt) IsExtended ¶
func (stmt *EvaluateStmt) IsExtended() bool
IsExtended returns whether a SQLFlowStmt is an extended SQL statement
func (*EvaluateStmt) SetOriginalSQL ¶
func (stmt *EvaluateStmt) SetOriginalSQL(sql string)
SetOriginalSQL sets the original sql string
type ExplainStmt ¶
type ExplainStmt struct { // OriginalSQL record the original SQL statement used to get current IR result // FIXME(typhoonzero): OriginalSQL is a temporary field. Can remove this when all moved to IR OriginalSQL string // Select specifies the query for fetching the analysis data. For example, "select * from iris.test;". Select string // Attributes is a map of parsed attribute in the WITH clause. For example, after parsing // "select ... explain ... with explain.plot_type = "bar"", // the Attributes will be {"explain.plot_type": "bar"} Attributes map[string]interface{} // Explainer types. For example TreeExplainer. Explainer string // ModelName is the model to be explained, e.g. TO EXPLAIN model_name ModelName string // Into stores the model explain result. Note that this field is optional. Into string // When SQLFLOW_submitter == "pai", tmp tables will be created for predicting task // see: pai_submitter.go TmpExplainTable string // TrainStmt is the TrainStmt used for generating the training job of the corresponding model TrainStmt *TrainStmt }
ExplainStmt is the intermediate representation for code generation of a analysis job
func GenerateExplainStmt ¶
func GenerateExplainStmt(slct *parser.SQLFlowSelectStmt, connStr, modelDir string, cwd string, getTrainStmtFromModel bool) (*ExplainStmt, error)
GenerateExplainStmt generates a `ExplainStmt` from the parsed result `slct`
func (*ExplainStmt) GetOriginalSQL ¶
func (stmt *ExplainStmt) GetOriginalSQL() string
GetOriginalSQL returns the original SQL statement used to get current IR result
func (*ExplainStmt) IsExtended ¶
func (stmt *ExplainStmt) IsExtended() bool
IsExtended returns whether a SQLFlowStmt is an extended SQL statement
func (*ExplainStmt) SetOriginalSQL ¶
func (stmt *ExplainStmt) SetOriginalSQL(sql string)
SetOriginalSQL sets the original sql string
type FeatureColumn ¶
type FeatureColumn interface { GetFieldDesc() []*FieldDesc ApplyTo(*FieldDesc) (FeatureColumn, error) GenPythonCode() string }
FeatureColumn corresponds to the COLUMN clause in TO TRAIN.
type FieldDesc ¶
type FieldDesc struct { Name string `json:"name"` // the name for a field, e.g. "petal_length" DType int `json:"dtype"` // e.g. "float", "int32" Delimiter string `json:"delimiter"` // Needs to be "," if the field saves strings like "1,23,42". Format string `json:"format"` // The data format, "", "csv" or "kv" Shape []int `json:"shape"` // [3] if the field saves strings of three numbers like "1,23,42". IsSparse bool `json:"is_sparse"` // If the field saves a sparse tensor. // Vocabulary stores all possible enumerate values if the column type is string, // e.g. the column values are: "MALE", "FEMALE", "NULL" Vocabulary map[string]string `json:"vocabulary"` // use a map to generate a list without duplication // if the column data is used as embedding(category_column()), the `num_buckets` should use the maxID // appeared in the sample data. if error still occurs, users should set `num_buckets` manually. MaxID int64 }
FieldDesc describes a field used as the input to a feature column.
func (*FieldDesc) GenPythonCode ¶ added in v0.4.1
GenPythonCode generate Python code to construct a runtime.feature.field_desc
type FieldDescMap ¶
FieldDescMap is a mapping from column name to ColumnSpec struct
type IndicatorColumn ¶
type IndicatorColumn struct { CategoryColumn // only used when INDICATOR(col_name, ...) this will set CategoryColumn = nil // will fill the feature column details using feature_derivation Name string }
IndicatorColumn represents `tf.feature_column.indicator_column` ref: https://www.tensorflow.org/api_docs/python/tf/feature_column/indicator_column
func (*IndicatorColumn) ApplyTo ¶
func (c *IndicatorColumn) ApplyTo(other *FieldDesc) (FeatureColumn, error)
ApplyTo applies the FeatureColumn to a new field
func (*IndicatorColumn) GenPythonCode ¶ added in v0.4.1
func (c *IndicatorColumn) GenPythonCode() string
GenPythonCode generate Python code to construct a runtime.feature.column.*
func (*IndicatorColumn) GetFieldDesc ¶
func (c *IndicatorColumn) GetFieldDesc() []*FieldDesc
GetFieldDesc returns FieldDesc member
type NormalStmt ¶
type NormalStmt string
NormalStmt is a SQL statement without using SQLFlow syntax extension.
func (*NormalStmt) GetOriginalSQL ¶
func (stmt *NormalStmt) GetOriginalSQL() string
GetOriginalSQL returns the original SQL statement used to get current IR result
func (*NormalStmt) IsExtended ¶
func (stmt *NormalStmt) IsExtended() bool
IsExtended returns whether a SQLFlowStmt is an extended SQL statement
func (*NormalStmt) SetOriginalSQL ¶
func (stmt *NormalStmt) SetOriginalSQL(sql string)
SetOriginalSQL sets the original sql string
type NumericColumn ¶
type NumericColumn struct {
FieldDesc *FieldDesc
}
NumericColumn represents a dense tensor for the model input
FieldDesc indicates the meta information for decoding the field. Please be aware that FieldDesc also contains information for dimension and data type
func (*NumericColumn) ApplyTo ¶
func (c *NumericColumn) ApplyTo(other *FieldDesc) (FeatureColumn, error)
ApplyTo applies the FeatureColumn to a new field
func (*NumericColumn) GenPythonCode ¶ added in v0.4.1
func (c *NumericColumn) GenPythonCode() string
GenPythonCode generate Python code to construct a runtime.feature.column.*
func (*NumericColumn) GetFieldDesc ¶
func (c *NumericColumn) GetFieldDesc() []*FieldDesc
GetFieldDesc returns FieldDesc member
type OptimizeExpr ¶
type OptimizeExpr struct { // Objective expression or constraint expression string tokens prepared for generate target code. ExpressionTokens []string // constraint group by like: SUM(markets) <= capacity GROUP BY plants, will expand to // for p in plants: // sum(m for m in markets) <= capacity GroupBy string }
OptimizeExpr is the intermediate code for generating target solver expressions.
type OptimizeStmt ¶
type OptimizeStmt struct { // OriginalSQL records the original SQL statement used to get current IR result OriginalSQL string // Select is the select statement before TO MAXIMIZE|MINIMIZE clause. Select string // Variables is the variable name list to be optimized Variables []string // ResultValueName is the column name of the result variable ResultValueName string // VariableType VariableType string // Attributes is a map of parsed attribute in the WITH clause. Attributes map[string]interface{} // Objective Objective OptimizeExpr // Direction, "maximize" or "minimize" Direction string // Constraints Constraints []*OptimizeExpr // Solver Solver string // ResultTable is the table name to store results. ResultTable string }
OptimizeStmt is the intermediate representation of "SELECT TO MAXIMIZE|MINIMIZE" statement.
func GenerateOptimizeStmt ¶
func GenerateOptimizeStmt(optimizeStmt *parser.SQLFlowSelectStmt) (*OptimizeStmt, error)
GenerateOptimizeStmt generates a `OptimizeStmt` from the parsed result `optimizeStmt`
func (*OptimizeStmt) GetOriginalSQL ¶
func (stmt *OptimizeStmt) GetOriginalSQL() string
GetOriginalSQL returns the original SQL statement used to get current IR result
func (*OptimizeStmt) IsExtended ¶
func (stmt *OptimizeStmt) IsExtended() bool
IsExtended returns whether a SQLFlowStmt is an extended SQL statement
func (*OptimizeStmt) SetOriginalSQL ¶
func (stmt *OptimizeStmt) SetOriginalSQL(sql string)
SetOriginalSQL sets the original sql string
type PredictStmt ¶
type PredictStmt struct { // OriginalSQL record the original SQL statement used to get current IR result // FIXME(typhoonzero): OriginalSQL is a temporary field. Can remove this when all moved to IR OriginalSQL string // Select specifies the query for fetching the prediction data. For example, "select * from iris.test;". Select string // ResultTable specifies the table to store the prediction result. ResultTable string // ResultColumn is the column to store predict result in ResultTable ResultColumn string // Attributes is a map of parsed attribute in the WITH clause. For example, after parsing // "select ... predict ... with predict.batch_size = 32 into ...", // the Attributes will be {"predict.batch_size": 32} Attributes map[string]interface{} // Using is the model specified by USING clause. Using string // TrainStmt is the TrainStmt used for generating the training job of the corresponding model TrainStmt *TrainStmt // When SQLFLOW_submitter == "pai", tmp tables will be created for predicting task // see: pai_submitter.go TmpPredictTable string }
PredictStmt is the intermediate representation for code generation of a prediction job
Please be aware the PredictStmt IR contains the result table name, so the generated Python program is responsible to create and write the result table.
func GeneratePredictStmt ¶
func GeneratePredictStmt(slct *parser.SQLFlowSelectStmt, connStr string, modelDir string, cwd string, getTrainStmtFromModel bool) (*PredictStmt, error)
GeneratePredictStmt generates a `PredictStmt` from the parsed result `slct`
func MockPredStmt ¶
func MockPredStmt(trainStmt *TrainStmt) *PredictStmt
MockPredStmt generates a sample PredictStmt for test.
func (*PredictStmt) GetOriginalSQL ¶
func (stmt *PredictStmt) GetOriginalSQL() string
GetOriginalSQL returns the original SQL statement used to get current IR result
func (*PredictStmt) IsExtended ¶
func (stmt *PredictStmt) IsExtended() bool
IsExtended returns whether a SQLFlowStmt is an extended SQL statement
func (*PredictStmt) SetOriginalSQL ¶
func (stmt *PredictStmt) SetOriginalSQL(sql string)
SetOriginalSQL sets the original sql string
type RunStmt ¶
type RunStmt struct { // OriginalSQL is the `SELECT TO RUN` statement. OriginalSQL string // Select is the select statement before TO RUN clause. Select string // ImageName is the name of the docker image after TO RUN keyword. ImageName string // Parameters is the command line parameters for the docker image. Parameters []string // Into is the output table names (0~N, comma separated) after INTO keyword. Into string }
RunStmt is the intermediate representation of `SELECT TO RUN` statement
func GenerateRunStmt ¶
func GenerateRunStmt(slct *parser.SQLFlowSelectStmt) (*RunStmt, error)
GenerateRunStmt generate the RunStmt result from the parsed result of `TO RUN` statement.
func (*RunStmt) GetOriginalSQL ¶
GetOriginalSQL returns the original SQL statement used to get current IR result
func (*RunStmt) IsExtended ¶
IsExtended returns whether a SQLFlowStmt is an extended SQL statement
func (*RunStmt) SetOriginalSQL ¶
SetOriginalSQL sets the original sql string
type SQLFlowStmt ¶
SQLFlowStmt has multiple implementations: TrainStmt, PredictStmt, ExplainStmt and standard SQL.
type SeqCategoryIDColumn ¶
SeqCategoryIDColumn represents `tf.feature_column.sequence_categorical_column_with_identity` ref: https://www.tensorflow.org/api_docs/python/tf/feature_column/sequence_categorical_column_with_identity
func (*SeqCategoryIDColumn) ApplyTo ¶
func (c *SeqCategoryIDColumn) ApplyTo(other *FieldDesc) (FeatureColumn, error)
ApplyTo applies the FeatureColumn to a new field
func (*SeqCategoryIDColumn) GenPythonCode ¶ added in v0.4.1
func (c *SeqCategoryIDColumn) GenPythonCode() string
GenPythonCode generate Python code to construct a runtime.feature.column.*
func (*SeqCategoryIDColumn) GetFieldDesc ¶
func (c *SeqCategoryIDColumn) GetFieldDesc() []*FieldDesc
GetFieldDesc returns FieldDesc member
func (*SeqCategoryIDColumn) NumClass ¶
func (c *SeqCategoryIDColumn) NumClass() int64
NumClass returns class number of SeqCategoryIDColumn
type ShowTrainStmt ¶
type ShowTrainStmt struct { // OriginalSQL is the SHOW TRAIN stmt itself OriginalSQL string // The model to show the train sql ModelName string }
ShowTrainStmt get and output the original train sql for ModelName
func GenerateShowTrainStmt ¶
func GenerateShowTrainStmt(showTrain *parser.SQLFlowSelectStmt) (*ShowTrainStmt, error)
GenerateShowTrainStmt a `ShowTrainStmt` from the parsed result `showTrain`
func (*ShowTrainStmt) GetOriginalSQL ¶
func (stmt *ShowTrainStmt) GetOriginalSQL() string
GetOriginalSQL returns the original SQL statement used to get current IR result
func (*ShowTrainStmt) IsExtended ¶
func (stmt *ShowTrainStmt) IsExtended() bool
IsExtended returns whether a SQLFlowStmt is an extended SQL statement
func (*ShowTrainStmt) SetOriginalSQL ¶
func (stmt *ShowTrainStmt) SetOriginalSQL(sql string)
SetOriginalSQL sets the original sql string
type TrainStmt ¶
type TrainStmt struct { // OriginalSQL record the original SQL statement used to get current IR result // FIXME(typhoonzero): OriginalSQL is a temporary field. Can remove this when all moved to IR OriginalSQL string // Select specifies the query for fetching the training data. For example, "select * from iris.train;". Select string // ValidationSelect specifies the query for fetching the validation data. For example, "select * from iris.val;". ValidationSelect string // ModelImage is the name of the model's Docker image, for example `TO TRAIN a_data_scientist/regressors:v0.2/MyDNNRegressor` // the name "a_data_scientist/regressors:v0.2" is a Docker image. ModelImage string // Estimator specifies the estimator type. For example, after parsing "select ... train DNNClassifier WITH ...", // the Estimator will be "DNNClassifier". Estimator string // Attributes is a map of parsed attribute in the WITH Clause. For example, after parsing // "select ... train ... with train.epoch = 1000, model.hidden_units = [10, 10]", // the Attributes will be {"train.epoch": 1000, "model.hidden_units": [10 10]}. Attributes map[string]interface{} // Features contain a map of a list of feature columns in the COLUMN clause. // For multiple COLUMN clauses like // “` // column ... for deep_feature // column ... for wide_feature // “` // They will be parsed as {"deep_feature": {...}, "wide_feature": {...}} // For single column clause like "column ...", "feature_columns" will be used as the default map key. Features map[string][]FeatureColumn // Label specifies the feature column in the LABEL clause. Label FeatureColumn // PreTrainedModel specifies the model name to be loaded for incremental training. PreTrainedModel string // Into specifies the table name in the INTO clause. Into string // When SQLFLOW_submitter == "pai", tmp tables will be created for training task // see: pai_submitter.go TmpTrainTable string TmpValidateTable string }
TrainStmt is the intermediate representation for code generation of a training job.
func GenerateTrainStmt ¶
func GenerateTrainStmt(slct *parser.SQLFlowSelectStmt) (*TrainStmt, error)
GenerateTrainStmt generates a `TrainStmt` without inferring feature columns
func GenerateTrainStmtByModel ¶
func GenerateTrainStmtByModel(slct *parser.SQLFlowSelectStmt, connStr, cwd, modelDir, model string) (*TrainStmt, error)
GenerateTrainStmtByModel generates a `TrainStmt` from a trained model
func GenerateTrainStmtWithInferredColumns ¶
func GenerateTrainStmtWithInferredColumns(slct *parser.SQLFlowSelectStmt, connStr string, modelDir string, cwd string, loadPreTrainedModel bool, verifyLabel bool) (*TrainStmt, error)
GenerateTrainStmtWithInferredColumns generates a `TrainStmt` with inferred feature columns
func MockTrainStmt ¶
MockTrainStmt generates a sample TrainStmt for test.
func (*TrainStmt) GetModelKind ¶
GetModelKind returns the kind of model in the TrainStmt
func (*TrainStmt) GetOriginalSQL ¶
GetOriginalSQL returns the original SQL statement used to get current IR result
func (*TrainStmt) IsExtended ¶
IsExtended returns whether a SQLFlowStmt is an extended SQL statement
func (*TrainStmt) SetOriginalSQL ¶
SetOriginalSQL sets the original sql string