Documentation ¶
Overview ¶
Package Lex is a Lexer for QLBridge which is more of a lex-toolkit and implements 4 Dialects {SQL, FilterQL, Json, Expressions}.
Index ¶
- Variables
- func IdentityRunesOnly(identity string) bool
- func IsBreak(r rune) bool
- func IsIdentifierRune(r rune) bool
- func IsValidIdentity(identity string) bool
- func LoadTokenInfo()
- type Clause
- type Dialect
- type KeywordMatcher
- type Lexer
- func (l *Lexer) ConsumeWord(word string)
- func (l *Lexer) Emit(t TokenType)
- func (l *Lexer) ErrMsg(t Token, msg string) error
- func (l *Lexer) IsComment() bool
- func (l *Lexer) IsEnd() bool
- func (l *Lexer) Next() (r rune)
- func (l *Lexer) NextToken() Token
- func (l *Lexer) Peek() rune
- func (l *Lexer) PeekWord() string
- func (l *Lexer) PeekX(x int) string
- func (l *Lexer) Push(name string, state StateFn)
- func (l *Lexer) RawInput() string
- func (l *Lexer) Remainder() (string, bool)
- func (l *Lexer) ReverseTrim()
- func (l *Lexer) SkipWhiteSpaces()
- func (l *Lexer) SkipWhiteSpacesNewLine() bool
- type NamedStateFn
- type StateFn
- func LexColumnNames(l *Lexer) StateFn
- func LexColumns(l *Lexer) StateFn
- func LexComment(l *Lexer) StateFn
- func LexConditionalClause(l *Lexer) StateFn
- func LexCreate(l *Lexer) StateFn
- func LexDataType(forToken TokenType) StateFn
- func LexDdlAlterColumn(l *Lexer) StateFn
- func LexDdlTable(l *Lexer) StateFn
- func LexDdlTableColumn(l *Lexer) StateFn
- func LexDdlTableStorage(l *Lexer) StateFn
- func LexDialectForStatement(l *Lexer) StateFn
- func LexDrop(l *Lexer) StateFn
- func LexDuration(l *Lexer) StateFn
- func LexEmpty(l *Lexer) StateFn
- func LexEndOfStatement(l *Lexer) StateFn
- func LexEndOfSubStatement(l *Lexer) StateFn
- func LexEngineKeyValue(l *Lexer) StateFn
- func LexExpression(l *Lexer) StateFn
- func LexExpressionOrIdentity(l *Lexer) StateFn
- func LexExpressionParens(l *Lexer) StateFn
- func LexFilterClause(l *Lexer) StateFn
- func LexIdentifierOfType(forToken TokenType) StateFn
- func LexIdentityOrValue(l *Lexer) StateFn
- func LexInlineComment(l *Lexer) StateFn
- func LexInlineCommentNoTag(l *Lexer) StateFn
- func LexInto(l *Lexer) StateFn
- func LexJoinEntry(l *Lexer) StateFn
- func LexJson(l *Lexer) StateFn
- func LexJsonArray(l *Lexer) StateFn
- func LexJsonIdentity(l *Lexer) StateFn
- func LexJsonObject(l *Lexer) StateFn
- func LexJsonOrKeyValue(l *Lexer) StateFn
- func LexJsonValue(l *Lexer) StateFn
- func LexLimit(l *Lexer) StateFn
- func LexListOfArgs(l *Lexer) StateFn
- func LexLogical(l *Lexer) StateFn
- func LexMatchClosure(tok TokenType, nextFn StateFn) StateFn
- func LexMultilineComment(l *Lexer) StateFn
- func LexNumber(l *Lexer) StateFn
- func LexNumberOrDuration(l *Lexer) StateFn
- func LexOrderByColumn(l *Lexer) StateFn
- func LexParenLeft(l *Lexer) StateFn
- func LexParenRight(l *Lexer) StateFn
- func LexPreparedStatement(l *Lexer) StateFn
- func LexRegex(l *Lexer) StateFn
- func LexSelectClause(l *Lexer) StateFn
- func LexSelectList(l *Lexer) StateFn
- func LexShowClause(l *Lexer) StateFn
- func LexStatement(l *Lexer) StateFn
- func LexSubQuery(l *Lexer) StateFn
- func LexTableColumns(l *Lexer) StateFn
- func LexTableReferenceFirst(l *Lexer) StateFn
- func LexTableReferences(l *Lexer) StateFn
- func LexUpsertClause(l *Lexer) StateFn
- func LexUrnaryNot(l *Lexer) StateFn
- func LexValue(l *Lexer) StateFn
- func LexValueColumns(l *Lexer) StateFn
- type Token
- type TokenInfo
- type TokenType
Constants ¶
This section is empty.
Variables ¶
var ( // FilterStatement a FilterQL statement. FilterStatement = []*Clause{ {Token: TokenFilter, Lexer: LexFilterClause, Optional: true}, {Token: TokenFrom, Lexer: LexIdentifier, Optional: true}, {Token: TokenLimit, Lexer: LexNumber, Optional: true}, {Token: TokenWith, Lexer: LexJsonOrKeyValue, Optional: true}, {Token: TokenAlias, Lexer: LexIdentifier, Optional: true}, {Token: TokenEOF, Lexer: LexEndOfStatement, Optional: false}, } // FilterSelectStatement Filter statement that also supports column projection. FilterSelectStatement = []*Clause{ {Token: TokenSelect, Lexer: LexSelectClause, Optional: false}, {Token: TokenFrom, Lexer: LexIdentifier, Optional: false}, {Token: TokenWhere, Lexer: LexConditionalClause, Optional: true}, {Token: TokenFilter, Lexer: LexFilterClause, Optional: true}, {Token: TokenLimit, Lexer: LexNumber, Optional: true}, {Token: TokenWith, Lexer: LexJsonOrKeyValue, Optional: true}, {Token: TokenAlias, Lexer: LexIdentifier, Optional: true}, {Token: TokenEOF, Lexer: LexEndOfStatement, Optional: false}, } // FilterQLDialect is a Where Clause filtering language slightly // more DSL'ish than SQL Where Clause. FilterQLDialect *Dialect = &Dialect{ Statements: []*Clause{ {Token: TokenFilter, Clauses: FilterStatement}, {Token: TokenSelect, Clauses: FilterSelectStatement}, }, IdentityQuoting: IdentityQuotingWSingleQuote, } )
var ( // SqlDialect is a SQL dialect // // SELECT // UPDATE // INSERT // UPSERT // DELETE // // SHOW idenity; // DESCRIBE identity; // PREPARE // // ddl // ALTER // CREATE (TABLE|VIEW|CONTINUOUSVIEW|SOURCE) // // TODO: // CREATE // VIEW SqlDialect *Dialect = &Dialect{ Statements: []*Clause{ {Token: TokenPrepare, Clauses: SqlPrepare}, {Token: TokenSelect, Clauses: SqlSelect}, {Token: TokenUpdate, Clauses: SqlUpdate}, {Token: TokenUpsert, Clauses: SqlUpsert}, {Token: TokenInsert, Clauses: SqlInsert}, {Token: TokenDelete, Clauses: SqlDelete}, {Token: TokenCreate, Clauses: SqlCreate}, {Token: TokenDrop, Clauses: SqlDrop}, {Token: TokenAlter, Clauses: SqlAlter}, {Token: TokenDescribe, Clauses: SqlDescribe}, {Token: TokenExplain, Clauses: SqlExplain}, {Token: TokenDesc, Clauses: SqlDescribeAlt}, {Token: TokenShow, Clauses: SqlShow}, {Token: TokenSet, Clauses: SqlSet}, {Token: TokenUse, Clauses: SqlUse}, {Token: TokenRollback, Clauses: SqlRollback}, {Token: TokenCommit, Clauses: SqlCommit}, }, } // SqlSelect Select statement. SqlSelect = []*Clause{ {Token: TokenSelect, Lexer: LexSelectClause, Name: "sqlSelect.Select"}, {Token: TokenInto, Lexer: LexInto, Optional: true, Name: "sqlSelect.INTO"}, {Token: TokenFrom, Lexer: LexTableReferenceFirst, Optional: true, Repeat: false, Clauses: fromSource, Name: "sqlSelect.From"}, {KeywordMatcher: sourceMatch, Optional: true, Repeat: true, Clauses: moreSources, Name: "sqlSelect.sources"}, {Token: TokenWhere, Lexer: LexConditionalClause, Optional: true, Clauses: whereQuery, Name: "sqlSelect.where"}, {Token: TokenGroupBy, Lexer: LexColumns, Optional: true, Name: "sqlSelect.groupby"}, {Token: TokenHaving, Lexer: LexConditionalClause, Optional: true, Name: "sqlSelect.having"}, {Token: TokenOrderBy, Lexer: LexOrderByColumn, Optional: true, Name: "sqlSelect.orderby"}, {Token: TokenLimit, Lexer: LexLimit, Optional: true, Name: "sqlSelect.limit"}, {Token: TokenOffset, Lexer: LexNumber, Optional: true, Name: "sqlSelect.offset"}, {Token: TokenWith, Lexer: LexJsonOrKeyValue, Optional: true, Name: "sqlSelect.with"}, {Token: TokenAlias, Lexer: LexIdentifier, Optional: true, Name: "sqlSelect.alias"}, {Token: TokenEOF, Lexer: LexEndOfStatement, Optional: false, Name: "sqlSelect.eos"}, } // SqlUpdate update statement SqlUpdate = []*Clause{ {Token: TokenUpdate, Lexer: LexIdentifierOfType(TokenTable)}, {Token: TokenSet, Lexer: LexColumns}, {Token: TokenWhere, Lexer: LexColumns, Optional: true}, {Token: TokenLimit, Lexer: LexNumber, Optional: true}, {Token: TokenWith, Lexer: LexJsonOrKeyValue, Optional: true}, } // SqlUpsert sql upsert SqlUpsert = []*Clause{ {Token: TokenUpsert, Lexer: LexUpsertClause, Name: "upsert.entry"}, {Token: TokenSet, Lexer: LexTableColumns, Optional: true}, {Token: TokenLeftParenthesis, Lexer: LexTableColumns, Optional: true}, {Token: TokenWith, Lexer: LexJsonOrKeyValue, Optional: true}, } // SqlInsert insert statement SqlInsert = []*Clause{ {Token: TokenInsert, Lexer: LexUpsertClause, Name: "insert.entry"}, {Token: TokenLeftParenthesis, Lexer: LexColumnNames, Optional: true}, {Token: TokenSet, Lexer: LexTableColumns, Optional: true}, {Token: TokenSelect, Optional: true, Clauses: insertSubQuery}, {Token: TokenValues, Lexer: LexTableColumns, Optional: true}, {Token: TokenWith, Lexer: LexJsonOrKeyValue, Optional: true}, } // SqlReplace replace statement SqlReplace = []*Clause{ {Token: TokenReplace, Lexer: LexEmpty}, {Token: TokenInto, Lexer: LexIdentifierOfType(TokenTable)}, {Token: TokenSet, Lexer: LexTableColumns, Optional: true}, {Token: TokenLeftParenthesis, Lexer: LexTableColumns, Optional: true}, {Token: TokenWith, Lexer: LexJsonOrKeyValue, Optional: true}, } // SqlDelete delete statement SqlDelete = []*Clause{ {Token: TokenDelete, Lexer: LexEmpty}, {Token: TokenFrom, Lexer: LexIdentifierOfType(TokenTable)}, {Token: TokenSet, Lexer: LexColumns, Optional: true}, {Token: TokenWhere, Lexer: LexColumns, Optional: true}, {Token: TokenLimit, Lexer: LexNumber, Optional: true}, {Token: TokenWith, Lexer: LexJsonOrKeyValue, Optional: true}, } // SqlAlter alter statement SqlAlter = []*Clause{ {Token: TokenAlter, Lexer: LexEmpty}, {Token: TokenTable, Lexer: LexIdentifier}, {Token: TokenChange, Lexer: LexDdlAlterColumn}, {Token: TokenWith, Lexer: LexJsonOrKeyValue, Optional: true}, } // SqlCreate CREATE {SCHEMA | DATABASE | SOURCE | TABLE | VIEW | CONTINUOUSVIEW} SqlCreate = []*Clause{ {Token: TokenCreate, Lexer: LexCreate}, {Token: TokenEngine, Lexer: LexDdlTableStorage, Optional: true}, {Token: TokenSelect, Clauses: SqlSelect, Optional: true}, {Token: TokenWith, Lexer: LexJsonOrKeyValue, Optional: true}, } // SqlDrop DROP {SCHEMA | DATABASE | SOURCE | TABLE} SqlDrop = []*Clause{ {Token: TokenDrop, Lexer: LexDrop}, } // SqlDescribe Describe {table,database} SqlDescribe = []*Clause{ {Token: TokenDescribe, Lexer: LexColumns}, } // SqlDescribeAlt alternate spelling of Describe SqlDescribeAlt = []*Clause{ {Token: TokenDesc, Lexer: LexColumns}, } // SqlExplain is alias of describe SqlExplain = []*Clause{ {Token: TokenExplain, Lexer: LexColumns}, } // SqlShow SqlShow = []*Clause{ {Token: TokenShow, Lexer: LexShowClause}, {Token: TokenWhere, Lexer: LexConditionalClause, Optional: true}, } // SqlPrepare SqlPrepare = []*Clause{ {Token: TokenPrepare, Lexer: LexPreparedStatement}, {Token: TokenFrom, Lexer: LexTableReferences}, } // SqlSet SqlSet = []*Clause{ {Token: TokenSet, Lexer: LexColumns}, } // SqlUse SqlUse = []*Clause{ {Token: TokenUse, Lexer: LexIdentifier}, } // SqlRollback SqlRollback = []*Clause{ {Token: TokenRollback, Lexer: LexEmpty}, } // SqlCommit SqlCommit = []*Clause{ {Token: TokenCommit, Lexer: LexEmpty}, } )
var ( // SUPPORT_DURATION FEATURE FLAGS SUPPORT_DURATION = true // Identity Quoting // http://stackoverflow.com/questions/1992314/what-is-the-difference-between-single-and-double-quotes-in-sql // you might want to set this to not include single ticks // http://dev.mysql.com/doc/refman/5.7/en/string-literals.html //IdentityQuoting = []byte{'[', '`', '"'} // mysql ansi-ish, no single quote identities, and allowing double-quote IdentityQuotingWSingleQuote = []byte{'[', '`', '\''} // more ansi-ish, allow single quotes around identities IdentityQuoting = []byte{'[', '`'} // no single quote around identities bc effing mysql uses single quote for string literals )
var ( // IDENTITY_CHARS Which Identity Characters are allowed for UNESCAPED identities IDENTITY_CHARS = "_.-/" // A much more lax identity char set rule that allows spaces IDENTITY_LAX_CHARS = "_./- " // sql variables start with @@ ?? IDENTITY_SQL_CHARS = "@_.-" // list of token-name TokenNameMap = map[TokenType]*TokenInfo{}/* 146 elements not displayed */ TokenToOp = make(map[string]TokenType) )
var LexDataTypeDefinition = LexDataType(TokenTypeDef)
var LexIdentifier = LexIdentifierOfType(TokenIdentity)
LexIdentifier scans and finds named things (tables, columns)
and specifies them as TokenIdentity, uses LexIdentifierType TODO: dialect controls escaping/quoting techniques [name] select [first name] from usertable; 'name' select 'user' from usertable; first_name select first_name from usertable; usertable select first_name AS fname from usertable; _name select _name AS name from stuff;
var LexTableIdentifier = LexIdentifierOfType(TokenTable)
var ( // Trace is a global var to turn on tracing. can be turned out with env // variable "lextrace=true" // // export lextrace=true Trace bool )
Functions ¶
func IdentityRunesOnly ¶
func IsIdentifierRune ¶
IsIdentifierRune Is this a valid identity rune?
func IsValidIdentity ¶
IsValidIdentity test the given string to determine if any characters are not valid and therefore must be quoted
Types ¶
type Clause ¶
type Clause struct { Optional bool // Is this Clause/Keyword optional? Repeat bool // Repeatable clause? Token TokenType // Token identifiyng start of clause, optional KeywordMatcher KeywordMatcher Lexer StateFn // Lex Function to lex clause, optional Clauses []*Clause // Children Clauses Name string // contains filtered or unexported fields }
Clause is a unique "Section" of a statement
func (*Clause) MatchesKeyword ¶
MatchesKeyword
type Dialect ¶
type Dialect struct { Name string Statements []*Clause IdentityQuoting []byte // contains filtered or unexported fields }
Dialect is a Language made up of multiple Statements. Examples are {SQL, CQL, GRAPHQL}
var ( // ExpressionDialect, is a Single Expression dialect, useful for parsing Single // function // // eq(tolower(item_name),"buy") ExpressionDialect *Dialect = &Dialect{ Statements: []*Clause{ {Token: TokenNil, Clauses: expressionStatement}, }, } // logical Expression Statement of the following functional format // // 5 > 4 => true // 4 + 5 => 9 // tolower(item) + 12 > 4 // 4 IN (4,5,6) // LogicalExpressionDialect *Dialect = &Dialect{ Statements: []*Clause{ {Token: TokenNil, Clauses: logicalEpressions}, }, } )
type KeywordMatcher ¶
KeywordMatcher A Clause may supply a keyword matcher instead of keyword-token
type Lexer ¶
type Lexer struct {
// contains filtered or unexported fields
}
Lexer holds the state of the lexical scanning.
Holds a *Dialect* which gives much of the rules specific to this language.
many-generations removed from that Based on the lexer from the "text/template" package. See http://www.youtube.com/watch?v=HxaD_trXwRE
func NewExpressionLexer ¶
NewExpressionLexer creates a new lexer for the input string using Expression Dialect.
func NewFilterQLLexer ¶
NewFilterQLLexer creates a new lexer for the input string using FilterQLDialect which is dsl for where/filtering.
func NewJsonLexer ¶
NewJsonLexer Creates a new json dialect lexer for the input string.
func NewSqlLexer ¶
NewSqlLexer creates a new lexer for the input string using SqlDialect this is sql(ish) compatible parser.
func (*Lexer) ConsumeWord ¶
ConsumeWord lets move position to consume given word
func (*Lexer) ErrMsg ¶
ErrMsg an error message helper which provides context of where in input string the error is occuring, line, column, current token info.
func (*Lexer) Remainder ¶
Remainder SQL and other string expressions may contain more than one statement such as:
use schema_x; show tables; set @my_var = "value"; select a,b from `users` where name = @my_var;
func (*Lexer) ReverseTrim ¶
func (l *Lexer) ReverseTrim()
Skips white space characters at end by trimming so we can recognize the end
more easily
func (*Lexer) SkipWhiteSpaces ¶
func (l *Lexer) SkipWhiteSpaces()
SkipWhiteSpaces Skips white space characters in the input.
func (*Lexer) SkipWhiteSpacesNewLine ¶
SkipWhiteSpacesNewLine Skips white space characters in the input, returns bool for if it contained new line
type NamedStateFn ¶
NamedStateFn is a StateFn which has a name for tracing debugging.
type StateFn ¶
StateFn represents the state of the lexer as a function that returns the next state.
func LexColumnNames ¶
LexColumnNames Handle list of column names on insert/update statements
<insert_into> <col_names> VALUES <col_value_list> <col_names> := '(' <identity> [, <identity>]* ')'
func LexComment ¶
LexComment looks for valid comments which are any of the following
including the in-line comment blocks /* hello */ // hello -- hello # hello SELECT name --name is the combined first-last name , age FROM `USER` ...
func LexConditionalClause ¶
LexConditionalClause Handle logical Conditional Clause used for [WHERE, WITH, JOIN ON] logicaly grouped with parens and/or separated by commas or logic (AND/OR/NOT)
SELECT ... WHERE <conditional_clause> <conditional_clause> ::= <expr> [( AND <expr> | OR <expr> | '(' <expr> ')' )] <expr> ::= <predicatekw> '('? <expr> [, <expr>] ')'? | <func> | <subselect>
SEE: <expr> = LexExpression
func LexCreate ¶
LexCreate allows us to lex the words after CREATE
CREATE {SCHEMA|DATABASE|SOURCE} [IF NOT EXISTS] <identity> <WITH> CREATE {TABLE} <identity> [IF NOT EXISTS] <table_spec> [WITH] CREATE [OR REPLACE] {VIEW|CONTINUOUSVIEW} <identity> AS <select_statement> [WITH]
func LexDataType ¶
LexDataType scans and finds datatypes. `[]` are valid inside of data types, no escaping such as ',"
[]string CREATE table( field []string ) map[string]int int, string, etc
func LexDdlAlterColumn ¶
LexDdlAlterColumn data definition language column alter
CHANGE col1_old col1_new varchar(10), CHANGE col2_old col2_new TEXT ADD col3 BIGINT AFTER col1_new ADD col2 TEXT FIRST,
func LexDdlTableColumn ¶
LexDdlTableColumn data definition language column (repeated)
col1_new varchar(10), col2_new TEXT
func LexDdlTableStorage ¶
LexDdlTableStorage data definition language column (repeated)
ENGINE=InnoDB AUTO_INCREMENT=4080 DEFAULT CHARSET=utf8
func LexDialectForStatement ¶
Find first keyword in the current queryText, then find appropriate statement in dialect. ie [SELECT, ALTER, CREATE, INSERT] in sql
func LexDrop ¶
LexDrop allows us to lex the words after DROP
DROP {DATABASE | SCHEMA} [IF EXISTS] db_name DROP [TEMPORARY] TABLE [IF EXISTS] tbl_name [, tbl_name] [RESTRICT | CASCADE] DROP INDEX index_name ON tbl_name [algorithm_option | lock_option] ...
func LexDuration ¶
LexDuration floats, integers time-durations
durations: 45m, 2w, 20y, 22d, 40ms, 100ms, -100ms
func LexEndOfStatement ¶
LexEndOfStatement Look for end of statement defined by either a semicolon or end of file
func LexEndOfSubStatement ¶
LexEndOfSubStatement Look for end of statement defined by either a semicolon or end of file.
func LexEngineKeyValue ¶
LexEngineKeyValue key value pairs
Start with identity for key/value pairs supports keyword DEFAULT supports non-quoted values
func LexExpression ¶
<expr> Handle single logical expression which may be nested and has
user defined function names that are NOT validated by lexer
<expr> ::= <predicatekw> '('? <expr> [, <expr>] ')'? | <func> | <subselect>
<func> ::= <identity>'(' <expr> ')' <predicatekw> ::= [NOT] (IN | INTERSECTS | CONTAINS | RANGE | LIKE | EQUALS )
Examples:
(colx = y OR colb = b) cola = 'a5' cola != "a5", colb = "a6" REPLACE(cola,"stuff") != "hello" FirstName = REPLACE(LOWER(name," ")) cola IN (1,2,3) cola LIKE "abc" eq(name,"bob") AND age > 5 time > now() -1h (4 + 5) > 10 reg_date BETWEEN x AND y
func LexExpressionOrIdentity ¶
look for either an Expression or Identity
expressions: Legal identity characters, terminated by ( identity: legal identity characters REPLACE(name,"stuff") name
func LexExpressionParens ¶
lex Expression looks for an expression, identified by parenthesis, may be nested
|--expr----| dostuff(name,"arg") // the left parenthesis identifies it as Expression eq(trim(name," "),"gmail.com")
func LexFilterClause ¶
LexFilterClause Handle Filter QL Main Statement
FILTER := ( <filter_bool_expr> | <filter_expr> ) <filter_bool_expr> := ( AND | OR ) '(' ( <filter_bool_expr> | <filter_expr> ) [, ( <filter_bool_expr> | <filter_expr> ) ] ')' <filter_expr> := <expr>
Examples:
FILTER
/ AND (
daysago(datefield) < 100 , domain(url) == "google.com" , INCLUDE name_of_filter , , OR ( momentum > 20 , propensity > 50 ) ) ALIAS myfilter FILTER x > 7
func LexIdentifierOfType ¶
LexIdentifierOfType scans and finds named things (tables, columns)
supports quoted, bracket, or raw identifiers TODO: dialect controls escaping/quoting techniques [name] select [first name] from usertable; 'name' select 'user' from usertable; `user` select first_name from `user`; first_name select first_name from usertable; usertable select first_name AS fname from usertable; _name select _name AS name from stuff; @@varname select @@varname;
func LexIdentityOrValue ¶
look for either an Identity or Value
func LexInlineCommentNoTag ¶
Comment beginning with //, # or -- but do not emit the tag just text comment
func LexJoinEntry ¶
Handle Source References ie [From table], [SubSelects], Joins
SELECT ... FROM <sources> <sources> := <source> [, <join_clause> <source>]* <source> := ( <table_source> | <subselect> ) [AS <identifier>] <table_source> := <identifier> <join_clause> := (INNER | LEFT | OUTER)? JOIN [ON <conditional_clause>] <subselect> := '(' <select_stmt> ')'
func LexJsonIdentity ¶
lex a string value value:
strings must be quoted "stuff" -> stuff "items's with quote"
func LexJsonOrKeyValue ¶
Lex either Json or Key/Value pairs
Must start with { or [ for json Start with identity for key/value pairs
func LexJsonValue ¶
LexJsonValue: Consume values, first consuming Colon
<jsonvalue> ::= ':' ( <value>, <array>, <jsonobject> ) [, ...]
func LexListOfArgs ¶
LexListOfArgs list of arguments, comma separated list of args which may be a mixture of expressions, identities, values
REPLACE(LOWER(x),"xyz") REPLACE(x,"xyz") COUNT(*) sum( 4 * toint(age)) IN (a,b,c) varchar(10) CAST(field AS int) (a,b,c,d) -- For Insert statement, list of columns
func LexLogical ¶
LexLogical is a lex entry function for logical expression language (+-/> etc)
ie, the full logical boolean logic
func LexMatchClosure ¶
LexMatchClosure matches expected tokentype emitting the token on success and returning passed state function.
func LexMultilineComment ¶
A multi-line comment of format /* comment */ it does not have to actually be multi-line, just surrounded by those comments
func LexNumber ¶
LexNumber floats, integers, hex, exponential, signed
1.23 100 -827 6.02e23 0X1A2B, 0x1a2b, 0x1A2B.2B
Floats must be in decimal and must either:
- Have digits both before and after the decimal point (both can be a single 0), e.g. 0.5, -100.0, or
- Have a lower-case e that represents scientific notation, e.g. -3e-3, 6.02e23.
Integers can be:
- decimal (e.g. -827)
- hexadecimal (must begin with 0x and must use capital A-F, e.g. 0x1A2B)
func LexNumberOrDuration ¶
LexNumberOrDuration floats, integers, hex, exponential, signed
1.23 100 -827 6.02e23 0X1A2B, 0x1a2b, 0x1A2B.2B
durations: 45m, 2w, 20y, 22d, 40ms, 100ms, -100ms
Floats must be in decimal and must either:
- Have digits both before and after the decimal point (both can be a single 0), e.g. 0.5, -100.0, or
- Have a lower-case e that represents scientific notation, e.g. -3e-3, 6.02e23.
Integers can be:
- decimal (e.g. -827)
- hexadecimal (must begin with 0x and must use capital A-F, e.g. 0x1A2B)
func LexOrderByColumn ¶
Handle columnar identies with keyword appendate (ASC, DESC)
[ORDER BY] ( <identity> | <expr> ) [(ASC | DESC)]
func LexParenLeft ¶
LexParenLeft: look for end of paren, of which we have descended and consumed start
func LexParenRight ¶
LexParenRight: look for end of paren, of which we have descended and consumed start
func LexPreparedStatement ¶
Handle prepared statements
<PREPARE_STMT> := PREPARE <identity> FROM <string_value>
func LexSelectClause ¶
LexSelectClause Handle start of select statements, specifically looking for @@variables, *, or else we drop into <select_list>
<SELECT> :== (DISTINCT|ALL)? ( <sql_variable> | * | <select_list> ) [FROM <source_clause>] <sql_variable> = @@stuff
func LexSelectList ¶
Handle repeating Select List for columns
SELECT <select_list> <select_list> := <select_col> [, <select_col>]* <select_col> :== ( <identifier> | <expression> | '*' ) [AS <identifier>] [IF <expression>] [<comment>] Note, our Columns support a non-standard IF guard at a per column basis
func LexShowClause ¶
LexShowClause Handle show statement
SHOW [FULL] <multi_word_identifier> <identity> <like_or_where>
func LexStatement ¶
LexStatement is the main entrypoint to lex Grammars primarily associated with QL type languages, which is keywords separate clauses, and have order [select .. FROM name WHERE ..] the keywords which are reserved serve as identifiers to stop lexing and move to next clause lexer
func LexTableColumns ¶
Handle repeating Insert/Upsert/Update statements
<insert_into> <col_names> VALUES <col_value_list> <set> <upsert_cols> VALUES <col_value_list> <upsert_cols> := <upsert_col> [, <upsert_col>]* <upsert_col> := <identity> = <expr> <col_names> := <identity> [, <identity>]* <col_value_list> := <col_value_row> [, <col_value_row>] * <col_value_row> := '(' <expr> [, <expr>]* ')'
func LexTableReferenceFirst ¶
Handle Source References ie [From table], [SubSelects], Joins
SELECT ... FROM <sources> <sources> := <source> [, <join_clause> <source>]* <source> := ( <table_source> | <subselect> ) [AS <identifier>] <table_source> := <identifier> <join_clause> := (INNER | LEFT | OUTER)? JOIN [ON <conditional_clause>] <subselect> := '(' <select_stmt> ')'
func LexTableReferences ¶
Handle Source References ie [From table], [SubSelects], Joins
SELECT ... FROM <sources> <sources> := <source> [, <join_clause> <source>]* <source> := ( <table_source> | <subselect> ) [AS <identifier>] <table_source> := <identifier> <join_clause> := (INNER | LEFT | OUTER)? JOIN [ON <conditional_clause>] <subselect> := '(' <select_stmt> ')'
func LexUpsertClause ¶
Handle start of insert, Upsert statements
func LexValue ¶
lex a value: string, integer, float
- literal strings must be quoted - numerics with no period are integers - numerics with period are floats
"stuff" -> [string] = stuff 'stuff' -> [string] = stuff "items's with quote" -> [string] = items's with quote 1.23 -> [float] = 1.23 100 -> [integer] = 100 ["hello","world"] -> [array] {"hello","world"}
func LexValueColumns ¶
LexValueColumns VALUES (a,b,c),(d,e,f);
type Token ¶
type Token struct { T TokenType // type V string // value Quote byte // quote mark: " ` [ ' Line int // Line # Column int // Position in line Pos int // Absolute position }
Token represents a text string returned from the lexer.
type TokenInfo ¶
type TokenInfo struct { T TokenType Kw string HasSpaces bool Description string // contains filtered or unexported fields }
TokenInfo provides metadata about tokens
type TokenType ¶
type TokenType uint16
TokenType identifies the type of lexical tokens.
const ( // Basic grammar items TokenNil TokenType = 0 // not used TokenEOF TokenType = 1 // EOF TokenEOS TokenType = 2 // ; TokenEofOrEos TokenType = 3 // End of file, OR ; TokenError TokenType = 4 // error occurred; value is text of error TokenRaw TokenType = 5 // raw unlexed text string TokenNewLine TokenType = 6 // NewLine = \n // Comments TokenComment TokenType = 10 // Comment value string TokenCommentML TokenType = 11 // Comment MultiValue TokenCommentStart TokenType = 12 // /* TokenCommentEnd TokenType = 13 // */ TokenCommentSlashes TokenType = 14 // Single Line comment: // hello TokenCommentSingleLine TokenType = 15 // Single Line comment: -- hello TokenCommentHash TokenType = 16 // Single Line comment: # hello // Misc TokenComma TokenType = 20 // , TokenStar TokenType = 21 // * TokenColon TokenType = 22 // : TokenLeftBracket TokenType = 23 // [ TokenRightBracket TokenType = 24 // ] TokenLeftBrace TokenType = 25 // { TokenRightBrace TokenType = 26 // } // operand related tokens TokenMinus TokenType = 60 // - TokenPlus TokenType = 61 // + TokenPlusPlus TokenType = 62 // ++ TokenPlusEquals TokenType = 63 // += TokenDivide TokenType = 64 // / TokenMultiply TokenType = 65 // * TokenModulus TokenType = 66 // % TokenEqual TokenType = 67 // = TokenEqualEqual TokenType = 68 // == TokenNE TokenType = 69 // != TokenGE TokenType = 70 // >= TokenLE TokenType = 71 // <= TokenGT TokenType = 72 // > TokenLT TokenType = 73 // < TokenIf TokenType = 74 // IF TokenOr TokenType = 75 // || TokenAnd TokenType = 76 // && TokenBetween TokenType = 77 // between TokenLogicOr TokenType = 78 // OR TokenLogicAnd TokenType = 79 // AND TokenIN TokenType = 80 // IN TokenLike TokenType = 81 // LIKE TokenNegate TokenType = 82 // NOT TokenLeftParenthesis TokenType = 83 // ( TokenRightParenthesis TokenType = 84 // ) TokenTrue TokenType = 85 // True TokenFalse TokenType = 86 // False TokenIs TokenType = 87 // IS TokenNull TokenType = 88 // NULL TokenContains TokenType = 89 // CONTAINS TokenIntersects TokenType = 90 // INTERSECTS // ql top-level keywords, these first keywords determine parser TokenPrepare TokenType = 200 TokenInsert TokenType = 201 TokenUpdate TokenType = 202 TokenDelete TokenType = 203 TokenSelect TokenType = 204 TokenUpsert TokenType = 205 TokenAlter TokenType = 206 TokenCreate TokenType = 207 TokenDrop TokenType = 208 TokenSubscribe TokenType = 209 TokenFilter TokenType = 210 TokenShow TokenType = 211 TokenDescribe TokenType = 212 // We can also use TokenDesc TokenExplain TokenType = 213 // another alias for desccribe TokenReplace TokenType = 214 // Insert/Replace are interchangeable on insert statements TokenRollback TokenType = 215 TokenCommit TokenType = 216 // Other QL Keywords, These are clause-level keywords that mark separation between clauses TokenFrom TokenType = 300 // from TokenWhere TokenType = 301 // where TokenHaving TokenType = 302 // having TokenGroupBy TokenType = 303 // group by TokenBy TokenType = 304 // by TokenAlias TokenType = 305 // alias TokenWith TokenType = 306 // with TokenValues TokenType = 307 // values TokenInto TokenType = 308 // into TokenLimit TokenType = 309 // limit TokenOrderBy TokenType = 310 // order by TokenInner TokenType = 311 // inner , ie of join TokenCross TokenType = 312 // cross TokenOuter TokenType = 313 // outer TokenLeft TokenType = 314 // left TokenRight TokenType = 315 // right TokenJoin TokenType = 316 // Join TokenOn TokenType = 317 // on TokenDistinct TokenType = 318 // DISTINCT TokenAll TokenType = 319 // all TokenInclude TokenType = 320 // INCLUDE TokenExists TokenType = 321 // EXISTS TokenOffset TokenType = 322 // OFFSET TokenFull TokenType = 323 // FULL TokenGlobal TokenType = 324 // GLOBAL TokenSession TokenType = 325 // SESSION TokenTables TokenType = 326 // TABLES // ddl major words TokenSchema TokenType = 400 // SCHEMA TokenDatabase TokenType = 401 // DATABASE TokenTable TokenType = 402 // TABLE TokenSource TokenType = 403 // SOURCE TokenView TokenType = 404 // VIEW TokenContinuousView TokenType = 405 // CONTINUOUSVIEW TokenTemp TokenType = 406 // TEMP or TEMPORARY // ddl other TokenChange TokenType = 410 // change TokenAdd TokenType = 411 // add TokenFirst TokenType = 412 // first TokenAfter TokenType = 413 // after TokenCharacterSet TokenType = 414 // character set TokenDefault TokenType = 415 // default TokenUnique TokenType = 416 // unique TokenKey TokenType = 417 // key TokenPrimary TokenType = 418 // primary TokenConstraint TokenType = 419 // constraint TokenForeign TokenType = 420 // foreign TokenReferences TokenType = 421 // references TokenEngine TokenType = 422 // engine // Other QL keywords TokenSet TokenType = 500 // set TokenAs TokenType = 501 // as TokenAsc TokenType = 502 // ascending TokenDesc TokenType = 503 // descending TokenUse TokenType = 504 // use // User defined function/expression TokenUdfExpr TokenType = 550 // Value Types TokenIdentity TokenType = 600 // identity, either column, table name etc TokenValue TokenType = 601 // 'some string' string or continuous sequence of chars delimited by WHITE SPACE | ' | , | ( | ) TokenValueEscaped TokenType = 602 // ” becomes ' inside the string, parser will need to replace the string TokenRegex TokenType = 603 // regex TokenDuration TokenType = 604 // 14d , 22w, 3y, 45ms, 45us, 24hr, 2h, 45m, 30s // Data Type Definitions TokenTypeDef TokenType = 999 TokenTypeBool TokenType = 998 TokenTypeFloat TokenType = 997 TokenTypeInteger TokenType = 996 TokenTypeString TokenType = 995 TokenTypeVarChar TokenType = 994 TokenTypeChar TokenType = 993 TokenTypeBigInt TokenType = 992 TokenTypeTime TokenType = 991 TokenTypeText TokenType = 990 TokenTypeJson TokenType = 989 // Value types TokenValueType TokenType = 1000 // A generic Identifier of value type TokenBool TokenType = 1001 TokenFloat TokenType = 1002 TokenInteger TokenType = 1003 TokenString TokenType = 1004 TokenTime TokenType = 1005 // Composite Data Types TokenJson TokenType = 1010 TokenList TokenType = 1011 TokenMap TokenType = 1012 )
// List of datatypes from MySql, implement them as tokens? or leave as Identity during // DDL create/alter statements? BOOL TINYINT BOOLEAN TINYINT CHARACTER VARYING(M) VARCHAR(M) FIXED DECIMAL FLOAT4 FLOAT FLOAT8 DOUBLE INT1 TINYINT INT2 SMALLINT INT3 MEDIUMINT INT4 INT INT8 BIGINT LONG VARBINARY MEDIUMBLOB LONG VARCHAR MEDIUMTEXT LONG MEDIUMTEXT MIDDLEINT MEDIUMINT NUMERIC DECIMAL
func (TokenType) MatchString ¶
MatchString which keyword should we look for, either full keyword OR in case of spaces such as "group by" look for group