lex

package
v0.0.0-...-fc2d520 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 26, 2018 License: MIT Imports: 7 Imported by: 0

README

Lexing/Dialects

QLBridge implements a few different Dialects: Sql, FilterQL, Expressions, json

  • SQL a subset, non-complete implementation of SQL
  • FilterQL A filtering language, think just the WHERE part of SQL but more DSL'ish with syntax AND ( <expr>, <expr>, <expr> ) instead of <expr> AND <expr> AND <expr>
  • Expression Simple boolean logic expressions see https://github.com/araddon/qlbridge/blob/master/vm/vm_test.go#L57 for examples
  • Json Lexes json (instead of marshal)

Creating a custom Lexer/Parser ie Dialect

See example in dialects/example folder for a custom ql dialect, this example creates a mythical SUBSCRIBETO query language...

// Tokens Specific to our PUBSUB
var TokenSubscribeTo lex.TokenType = 1000

// Custom lexer for our maybe hash function
func LexMaybe(l *ql.Lexer) ql.StateFn {

	l.SkipWhiteSpaces()

	keyWord := strings.ToLower(l.PeekWord())

	switch keyWord {
	case "maybe":
		l.ConsumeWord("maybe")
		l.Emit(lex.TokenIdentity)
		return ql.LexExpressionOrIdentity
	}
	return ql.LexExpressionOrIdentity
}

func main() {

	// We are going to inject new tokens into qlbridge
	lex.TokenNameMap[TokenSubscribeTo] = &lex.TokenInfo{Description: "subscribeto"}

	// OverRide the Identity Characters in qlbridge to allow a dash in identity
	ql.IDENTITY_CHARS = "_./-"

	ql.LoadTokenInfo()
	ourDialect.Init()

	// We are going to create our own Dialect that uses a "SUBSCRIBETO" keyword
	pubsub = &ql.Statement{TokenSubscribeTo, []*ql.Clause{
		{Token: TokenSubscribeTo, Lexer: ql.LexColumns},
		{Token: lex.TokenFrom, Lexer: LexMaybe},
		{Token: lex.TokenWhere, Lexer: ql.LexColumns, Optional: true},
	}}
	ourDialect = &ql.Dialect{
		"Subscribe To", []*ql.Statement{pubsub},
	}

	l := ql.NewLexer(`
			SUBSCRIBETO
				count(x), Name
			FROM ourstream
			WHERE 
				k = REPLACE(LOWER(Name),'cde','xxx');
		`, ourDialect)

}

Documentation

Overview

Package Lex is a Lexer for QLBridge which is more of a lex-toolkit and implements 4 Dialects {SQL, FilterQL, Json, Expressions}.

Index

Constants

This section is empty.

Variables

View Source
var (
	// FilterStatement a FilterQL statement.
	FilterStatement = []*Clause{
		{Token: TokenFilter, Lexer: LexFilterClause, Optional: true},
		{Token: TokenFrom, Lexer: LexIdentifier, Optional: true},
		{Token: TokenLimit, Lexer: LexNumber, Optional: true},
		{Token: TokenWith, Lexer: LexJsonOrKeyValue, Optional: true},
		{Token: TokenAlias, Lexer: LexIdentifier, Optional: true},
		{Token: TokenEOF, Lexer: LexEndOfStatement, Optional: false},
	}
	// FilterSelectStatement Filter statement that also supports column projection.
	FilterSelectStatement = []*Clause{
		{Token: TokenSelect, Lexer: LexSelectClause, Optional: false},
		{Token: TokenFrom, Lexer: LexIdentifier, Optional: false},
		{Token: TokenWhere, Lexer: LexConditionalClause, Optional: true},
		{Token: TokenFilter, Lexer: LexFilterClause, Optional: true},
		{Token: TokenLimit, Lexer: LexNumber, Optional: true},
		{Token: TokenWith, Lexer: LexJsonOrKeyValue, Optional: true},
		{Token: TokenAlias, Lexer: LexIdentifier, Optional: true},
		{Token: TokenEOF, Lexer: LexEndOfStatement, Optional: false},
	}
	// FilterQLDialect is a Where Clause filtering language slightly
	// more DSL'ish than SQL Where Clause.
	FilterQLDialect *Dialect = &Dialect{
		Statements: []*Clause{
			{Token: TokenFilter, Clauses: FilterStatement},
			{Token: TokenSelect, Clauses: FilterSelectStatement},
		},
		IdentityQuoting: IdentityQuotingWSingleQuote,
	}
)
View Source
var (

	// SqlDialect is a SQL dialect
	//
	//    SELECT
	//    UPDATE
	//    INSERT
	//    UPSERT
	//    DELETE
	//
	//    SHOW idenity;
	//    DESCRIBE identity;
	//    PREPARE
	//
	// ddl
	//    ALTER
	//    CREATE (TABLE|VIEW|CONTINUOUSVIEW|SOURCE)
	//
	//  TODO:
	//      CREATE
	//      VIEW
	SqlDialect *Dialect = &Dialect{
		Statements: []*Clause{
			{Token: TokenPrepare, Clauses: SqlPrepare},
			{Token: TokenSelect, Clauses: SqlSelect},
			{Token: TokenUpdate, Clauses: SqlUpdate},
			{Token: TokenUpsert, Clauses: SqlUpsert},
			{Token: TokenInsert, Clauses: SqlInsert},
			{Token: TokenDelete, Clauses: SqlDelete},
			{Token: TokenCreate, Clauses: SqlCreate},
			{Token: TokenDrop, Clauses: SqlDrop},
			{Token: TokenAlter, Clauses: SqlAlter},
			{Token: TokenDescribe, Clauses: SqlDescribe},
			{Token: TokenExplain, Clauses: SqlExplain},
			{Token: TokenDesc, Clauses: SqlDescribeAlt},
			{Token: TokenShow, Clauses: SqlShow},
			{Token: TokenSet, Clauses: SqlSet},
			{Token: TokenUse, Clauses: SqlUse},
			{Token: TokenRollback, Clauses: SqlRollback},
			{Token: TokenCommit, Clauses: SqlCommit},
		},
	}
	// SqlSelect Select statement.
	SqlSelect = []*Clause{
		{Token: TokenSelect, Lexer: LexSelectClause, Name: "sqlSelect.Select"},
		{Token: TokenInto, Lexer: LexInto, Optional: true, Name: "sqlSelect.INTO"},
		{Token: TokenFrom, Lexer: LexTableReferenceFirst, Optional: true, Repeat: false, Clauses: fromSource, Name: "sqlSelect.From"},
		{KeywordMatcher: sourceMatch, Optional: true, Repeat: true, Clauses: moreSources, Name: "sqlSelect.sources"},
		{Token: TokenWhere, Lexer: LexConditionalClause, Optional: true, Clauses: whereQuery, Name: "sqlSelect.where"},
		{Token: TokenGroupBy, Lexer: LexColumns, Optional: true, Name: "sqlSelect.groupby"},
		{Token: TokenHaving, Lexer: LexConditionalClause, Optional: true, Name: "sqlSelect.having"},
		{Token: TokenOrderBy, Lexer: LexOrderByColumn, Optional: true, Name: "sqlSelect.orderby"},
		{Token: TokenLimit, Lexer: LexLimit, Optional: true, Name: "sqlSelect.limit"},
		{Token: TokenOffset, Lexer: LexNumber, Optional: true, Name: "sqlSelect.offset"},
		{Token: TokenWith, Lexer: LexJsonOrKeyValue, Optional: true, Name: "sqlSelect.with"},
		{Token: TokenAlias, Lexer: LexIdentifier, Optional: true, Name: "sqlSelect.alias"},
		{Token: TokenEOF, Lexer: LexEndOfStatement, Optional: false, Name: "sqlSelect.eos"},
	}

	// SqlUpdate update statement
	SqlUpdate = []*Clause{
		{Token: TokenUpdate, Lexer: LexIdentifierOfType(TokenTable)},
		{Token: TokenSet, Lexer: LexColumns},
		{Token: TokenWhere, Lexer: LexColumns, Optional: true},
		{Token: TokenLimit, Lexer: LexNumber, Optional: true},
		{Token: TokenWith, Lexer: LexJsonOrKeyValue, Optional: true},
	}
	// SqlUpsert sql upsert
	SqlUpsert = []*Clause{
		{Token: TokenUpsert, Lexer: LexUpsertClause, Name: "upsert.entry"},
		{Token: TokenSet, Lexer: LexTableColumns, Optional: true},
		{Token: TokenLeftParenthesis, Lexer: LexTableColumns, Optional: true},
		{Token: TokenWith, Lexer: LexJsonOrKeyValue, Optional: true},
	}
	// SqlInsert insert statement
	SqlInsert = []*Clause{
		{Token: TokenInsert, Lexer: LexUpsertClause, Name: "insert.entry"},
		{Token: TokenLeftParenthesis, Lexer: LexColumnNames, Optional: true},
		{Token: TokenSet, Lexer: LexTableColumns, Optional: true},
		{Token: TokenSelect, Optional: true, Clauses: insertSubQuery},
		{Token: TokenValues, Lexer: LexTableColumns, Optional: true},
		{Token: TokenWith, Lexer: LexJsonOrKeyValue, Optional: true},
	}

	// SqlReplace replace statement
	SqlReplace = []*Clause{
		{Token: TokenReplace, Lexer: LexEmpty},
		{Token: TokenInto, Lexer: LexIdentifierOfType(TokenTable)},
		{Token: TokenSet, Lexer: LexTableColumns, Optional: true},
		{Token: TokenLeftParenthesis, Lexer: LexTableColumns, Optional: true},
		{Token: TokenWith, Lexer: LexJsonOrKeyValue, Optional: true},
	}
	// SqlDelete delete statement
	SqlDelete = []*Clause{
		{Token: TokenDelete, Lexer: LexEmpty},
		{Token: TokenFrom, Lexer: LexIdentifierOfType(TokenTable)},
		{Token: TokenSet, Lexer: LexColumns, Optional: true},
		{Token: TokenWhere, Lexer: LexColumns, Optional: true},
		{Token: TokenLimit, Lexer: LexNumber, Optional: true},
		{Token: TokenWith, Lexer: LexJsonOrKeyValue, Optional: true},
	}
	// SqlAlter alter statement
	SqlAlter = []*Clause{
		{Token: TokenAlter, Lexer: LexEmpty},
		{Token: TokenTable, Lexer: LexIdentifier},
		{Token: TokenChange, Lexer: LexDdlAlterColumn},
		{Token: TokenWith, Lexer: LexJsonOrKeyValue, Optional: true},
	}
	// SqlCreate CREATE {SCHEMA | DATABASE | SOURCE | TABLE | VIEW | CONTINUOUSVIEW}
	SqlCreate = []*Clause{
		{Token: TokenCreate, Lexer: LexCreate},
		{Token: TokenEngine, Lexer: LexDdlTableStorage, Optional: true},
		{Token: TokenSelect, Clauses: SqlSelect, Optional: true},
		{Token: TokenWith, Lexer: LexJsonOrKeyValue, Optional: true},
	}
	// SqlDrop DROP {SCHEMA | DATABASE | SOURCE | TABLE}
	SqlDrop = []*Clause{
		{Token: TokenDrop, Lexer: LexDrop},
	}
	// SqlDescribe Describe {table,database}
	SqlDescribe = []*Clause{
		{Token: TokenDescribe, Lexer: LexColumns},
	}
	// SqlDescribeAlt alternate spelling of Describe
	SqlDescribeAlt = []*Clause{
		{Token: TokenDesc, Lexer: LexColumns},
	}
	// SqlExplain is alias of describe
	SqlExplain = []*Clause{
		{Token: TokenExplain, Lexer: LexColumns},
	}
	// SqlShow
	SqlShow = []*Clause{
		{Token: TokenShow, Lexer: LexShowClause},
		{Token: TokenWhere, Lexer: LexConditionalClause, Optional: true},
	}
	// SqlPrepare
	SqlPrepare = []*Clause{
		{Token: TokenPrepare, Lexer: LexPreparedStatement},
		{Token: TokenFrom, Lexer: LexTableReferences},
	}
	// SqlSet
	SqlSet = []*Clause{
		{Token: TokenSet, Lexer: LexColumns},
	}
	// SqlUse
	SqlUse = []*Clause{
		{Token: TokenUse, Lexer: LexIdentifier},
	}
	// SqlRollback
	SqlRollback = []*Clause{
		{Token: TokenRollback, Lexer: LexEmpty},
	}
	// SqlCommit
	SqlCommit = []*Clause{
		{Token: TokenCommit, Lexer: LexEmpty},
	}
)
View Source
var (
	// SUPPORT_DURATION FEATURE FLAGS
	SUPPORT_DURATION = true
	// Identity Quoting
	//  http://stackoverflow.com/questions/1992314/what-is-the-difference-between-single-and-double-quotes-in-sql
	// you might want to set this to not include single ticks
	//  http://dev.mysql.com/doc/refman/5.7/en/string-literals.html
	//IdentityQuoting = []byte{'[', '`', '"'} // mysql ansi-ish, no single quote identities, and allowing double-quote
	IdentityQuotingWSingleQuote = []byte{'[', '`', '\''} // more ansi-ish, allow single quotes around identities
	IdentityQuoting             = []byte{'[', '`'}       // no single quote around identities bc effing mysql uses single quote for string literals
)
View Source
var (
	// IDENTITY_CHARS Which Identity Characters are allowed for UNESCAPED identities
	IDENTITY_CHARS = "_.-/"
	// A much more lax identity char set rule  that allows spaces
	IDENTITY_LAX_CHARS = "_./- "
	// sql variables start with @@ ??
	IDENTITY_SQL_CHARS = "@_.-"

	// list of token-name
	TokenNameMap = map[TokenType]*TokenInfo{}/* 146 elements not displayed */

	TokenToOp = make(map[string]TokenType)
)
View Source
var LexDataTypeDefinition = LexDataType(TokenTypeDef)

LexIdentifier scans and finds named things (tables, columns)

and specifies them as TokenIdentity, uses LexIdentifierType

TODO: dialect controls escaping/quoting techniques

[name]         select [first name] from usertable;
'name'         select 'user' from usertable;
first_name     select first_name from usertable;
usertable      select first_name AS fname from usertable;
_name          select _name AS name from stuff;
View Source
var LexTableIdentifier = LexIdentifierOfType(TokenTable)
View Source
var (
	// Trace is a global var to turn on tracing.  can be turned out with env
	// variable "lextrace=true"
	//
	//     export lextrace=true
	Trace bool
)

Functions

func IdentityRunesOnly

func IdentityRunesOnly(identity string) bool

func IsBreak

func IsBreak(r rune) bool

IsBreak is some character such as comma, ;, etc

func IsIdentifierRune

func IsIdentifierRune(r rune) bool

IsIdentifierRune Is this a valid identity rune?

func IsValidIdentity

func IsValidIdentity(identity string) bool

IsValidIdentity test the given string to determine if any characters are not valid and therefore must be quoted

func LoadTokenInfo

func LoadTokenInfo()

LoadTokenInfo load the token info into global map

Types

type Clause

type Clause struct {
	Optional       bool      // Is this Clause/Keyword optional?
	Repeat         bool      // Repeatable clause?
	Token          TokenType // Token identifiyng start of clause, optional
	KeywordMatcher KeywordMatcher
	Lexer          StateFn   // Lex Function to lex clause, optional
	Clauses        []*Clause // Children Clauses
	Name           string
	// contains filtered or unexported fields
}

Clause is a unique "Section" of a statement

func (*Clause) MatchesKeyword

func (c *Clause) MatchesKeyword(peekWord string, l *Lexer) bool

MatchesKeyword

func (*Clause) String

func (c *Clause) String() string

type Dialect

type Dialect struct {
	Name            string
	Statements      []*Clause
	IdentityQuoting []byte
	// contains filtered or unexported fields
}

Dialect is a Language made up of multiple Statements. Examples are {SQL, CQL, GRAPHQL}

var (

	// ExpressionDialect, is a Single Expression dialect, useful for parsing Single
	// function
	//
	//    eq(tolower(item_name),"buy")
	ExpressionDialect *Dialect = &Dialect{
		Statements: []*Clause{
			{Token: TokenNil, Clauses: expressionStatement},
		},
	}

	// logical Expression Statement of the following functional format
	//
	//   5 > 4   => true
	//   4 + 5   => 9
	//   tolower(item) + 12 > 4
	//   4 IN (4,5,6)
	//
	LogicalExpressionDialect *Dialect = &Dialect{
		Statements: []*Clause{
			{Token: TokenNil, Clauses: logicalEpressions},
		},
	}
)
var (

	// JsonDialect, is a json lexer
	//
	//    ["hello","world"]
	//    {"name":"bob","apples":["honeycrisp","fuji"]}
	//
	JsonDialect *Dialect = &Dialect{
		Statements: []*Clause{
			{Token: TokenNil, Clauses: jsonDialectStatement},
		},
	}
)

func (*Dialect) Init

func (m *Dialect) Init()

Init Dialects have one time load-setup.

type KeywordMatcher

type KeywordMatcher func(c *Clause, peekWord string, l *Lexer) bool

KeywordMatcher A Clause may supply a keyword matcher instead of keyword-token

type Lexer

type Lexer struct {
	// contains filtered or unexported fields
}

Lexer holds the state of the lexical scanning.

Holds a *Dialect* which gives much of the rules specific to this language.

many-generations removed from that Based on the lexer from the "text/template" package. See http://www.youtube.com/watch?v=HxaD_trXwRE

func NewExpressionLexer

func NewExpressionLexer(input string) *Lexer

NewExpressionLexer creates a new lexer for the input string using Expression Dialect.

func NewFilterQLLexer

func NewFilterQLLexer(input string) *Lexer

NewFilterQLLexer creates a new lexer for the input string using FilterQLDialect which is dsl for where/filtering.

func NewJsonLexer

func NewJsonLexer(input string) *Lexer

NewJsonLexer Creates a new json dialect lexer for the input string.

func NewLexer

func NewLexer(input string, dialect *Dialect) *Lexer

NewLexer Creates a new lexer for the input string

func NewSqlLexer

func NewSqlLexer(input string) *Lexer

NewSqlLexer creates a new lexer for the input string using SqlDialect this is sql(ish) compatible parser.

func (*Lexer) ConsumeWord

func (l *Lexer) ConsumeWord(word string)

ConsumeWord lets move position to consume given word

func (*Lexer) Emit

func (l *Lexer) Emit(t TokenType)

Emit passes an token back to the client.

func (*Lexer) ErrMsg

func (l *Lexer) ErrMsg(t Token, msg string) error

ErrMsg an error message helper which provides context of where in input string the error is occuring, line, column, current token info.

func (*Lexer) IsComment

func (l *Lexer) IsComment() bool

IsComment Is this a comment?

func (*Lexer) IsEnd

func (l *Lexer) IsEnd() bool

IsEnd have we consumed all input?

func (*Lexer) Next

func (l *Lexer) Next() (r rune)

Next returns the next rune in the input

func (*Lexer) NextToken

func (l *Lexer) NextToken() Token

NextToken returns the next token from the input.

func (*Lexer) Peek

func (l *Lexer) Peek() rune

Peek returns but does not consume the next rune in the input.

func (*Lexer) PeekWord

func (l *Lexer) PeekWord() string

PeekWord grab the next word (till whitespace, without consuming)

func (*Lexer) PeekX

func (l *Lexer) PeekX(x int) string

PeekX grab the next x characters without consuming

func (*Lexer) Push

func (l *Lexer) Push(name string, state StateFn)

Push a named StateFn onto stack.

func (*Lexer) RawInput

func (l *Lexer) RawInput() string

RawInput return the orgiginal string we are lexing.

func (*Lexer) Remainder

func (l *Lexer) Remainder() (string, bool)

Remainder SQL and other string expressions may contain more than one statement such as:

use schema_x;  show tables;

set @my_var = "value"; select a,b from `users` where name = @my_var;

func (*Lexer) ReverseTrim

func (l *Lexer) ReverseTrim()

Skips white space characters at end by trimming so we can recognize the end

more easily

func (*Lexer) SkipWhiteSpaces

func (l *Lexer) SkipWhiteSpaces()

SkipWhiteSpaces Skips white space characters in the input.

func (*Lexer) SkipWhiteSpacesNewLine

func (l *Lexer) SkipWhiteSpacesNewLine() bool

SkipWhiteSpacesNewLine Skips white space characters in the input, returns bool for if it contained new line

type NamedStateFn

type NamedStateFn struct {
	Name    string
	StateFn StateFn
}

NamedStateFn is a StateFn which has a name for tracing debugging.

type StateFn

type StateFn func(*Lexer) StateFn

StateFn represents the state of the lexer as a function that returns the next state.

func LexColumnNames

func LexColumnNames(l *Lexer) StateFn

LexColumnNames Handle list of column names on insert/update statements

<insert_into> <col_names> VALUES <col_value_list>

<col_names> := '(' <identity> [, <identity>]* ')'

func LexColumns

func LexColumns(l *Lexer) StateFn

Alias for Expression

func LexComment

func LexComment(l *Lexer) StateFn

LexComment looks for valid comments which are any of the following

 including the in-line comment blocks

/* hello */
//  hello
-- hello
# hello
SELECT name --name is the combined first-last name
       , age FROM `USER` ...

func LexConditionalClause

func LexConditionalClause(l *Lexer) StateFn

LexConditionalClause Handle logical Conditional Clause used for [WHERE, WITH, JOIN ON] logicaly grouped with parens and/or separated by commas or logic (AND/OR/NOT)

SELECT ... WHERE <conditional_clause>

<conditional_clause> ::= <expr> [( AND <expr> | OR <expr> | '(' <expr> ')' )]

<expr> ::= <predicatekw> '('? <expr> [, <expr>] ')'? | <func> | <subselect>

SEE: <expr> = LexExpression

func LexCreate

func LexCreate(l *Lexer) StateFn

LexCreate allows us to lex the words after CREATE

CREATE {SCHEMA|DATABASE|SOURCE} [IF NOT EXISTS] <identity>  <WITH>
CREATE {TABLE} <identity> [IF NOT EXISTS] <table_spec> [WITH]
CREATE [OR REPLACE] {VIEW|CONTINUOUSVIEW} <identity> AS <select_statement> [WITH]

func LexDataType

func LexDataType(forToken TokenType) StateFn

LexDataType scans and finds datatypes. `[]` are valid inside of data types, no escaping such as ',"

[]string       CREATE table( field []string )
map[string]int
int, string, etc

func LexDdlAlterColumn

func LexDdlAlterColumn(l *Lexer) StateFn

LexDdlAlterColumn data definition language column alter

CHANGE col1_old col1_new varchar(10),
CHANGE col2_old col2_new TEXT
ADD col3 BIGINT AFTER col1_new
ADD col2 TEXT FIRST,

func LexDdlTable

func LexDdlTable(l *Lexer) StateFn

LexDdlTable data definition language table

func LexDdlTableColumn

func LexDdlTableColumn(l *Lexer) StateFn

LexDdlTableColumn data definition language column (repeated)

col1_new varchar(10),
col2_new TEXT

func LexDdlTableStorage

func LexDdlTableStorage(l *Lexer) StateFn

LexDdlTableStorage data definition language column (repeated)

ENGINE=InnoDB AUTO_INCREMENT=4080 DEFAULT CHARSET=utf8

func LexDialectForStatement

func LexDialectForStatement(l *Lexer) StateFn

Find first keyword in the current queryText, then find appropriate statement in dialect. ie [SELECT, ALTER, CREATE, INSERT] in sql

func LexDrop

func LexDrop(l *Lexer) StateFn

LexDrop allows us to lex the words after DROP

DROP {DATABASE | SCHEMA} [IF EXISTS] db_name

DROP [TEMPORARY] TABLE [IF EXISTS] tbl_name [, tbl_name] [RESTRICT | CASCADE]

DROP INDEX index_name ON tbl_name
    [algorithm_option | lock_option] ...

func LexDuration

func LexDuration(l *Lexer) StateFn

LexDuration floats, integers time-durations

durations: 45m, 2w, 20y, 22d, 40ms, 100ms, -100ms

func LexEmpty

func LexEmpty(l *Lexer) StateFn

Doesn't actually lex anything, used for single token clauses

func LexEndOfStatement

func LexEndOfStatement(l *Lexer) StateFn

LexEndOfStatement Look for end of statement defined by either a semicolon or end of file

func LexEndOfSubStatement

func LexEndOfSubStatement(l *Lexer) StateFn

LexEndOfSubStatement Look for end of statement defined by either a semicolon or end of file.

func LexEngineKeyValue

func LexEngineKeyValue(l *Lexer) StateFn

LexEngineKeyValue key value pairs

Start with identity for key/value pairs
supports keyword DEFAULT
supports non-quoted values

func LexExpression

func LexExpression(l *Lexer) StateFn

<expr> Handle single logical expression which may be nested and has

user defined function names that are NOT validated by lexer

<expr> ::= <predicatekw> '('? <expr> [, <expr>] ')'? | <func> | <subselect>

<func> ::= <identity>'(' <expr> ')'
<predicatekw> ::= [NOT] (IN | INTERSECTS | CONTAINS | RANGE | LIKE | EQUALS )

Examples:

(colx = y OR colb = b)
cola = 'a5'
cola != "a5", colb = "a6"
REPLACE(cola,"stuff") != "hello"
FirstName = REPLACE(LOWER(name," "))
cola IN (1,2,3)
cola LIKE "abc"
eq(name,"bob") AND age > 5
time > now() -1h
(4 + 5) > 10
reg_date BETWEEN x AND y

func LexExpressionOrIdentity

func LexExpressionOrIdentity(l *Lexer) StateFn

look for either an Expression or Identity

expressions:    Legal identity characters, terminated by (
identity:    legal identity characters

REPLACE(name,"stuff")
name

func LexExpressionParens

func LexExpressionParens(l *Lexer) StateFn

lex Expression looks for an expression, identified by parenthesis, may be nested

       |--expr----|
dostuff(name,"arg")    // the left parenthesis identifies it as Expression
eq(trim(name," "),"gmail.com")

func LexFilterClause

func LexFilterClause(l *Lexer) StateFn

LexFilterClause Handle Filter QL Main Statement

FILTER := ( <filter_bool_expr> | <filter_expr> )

<filter_bool_expr> :=  ( AND | OR ) '(' ( <filter_bool_expr> | <filter_expr> ) [, ( <filter_bool_expr> | <filter_expr> ) ] ')'

<filter_expr> :=  <expr>

Examples:

FILTER

/ AND (

      daysago(datefield) < 100
      , domain(url) == "google.com"
      , INCLUDE name_of_filter
      ,
      , OR (
          momentum > 20
         , propensity > 50
      )
   )
ALIAS myfilter

FILTER x > 7

func LexIdentifierOfType

func LexIdentifierOfType(forToken TokenType) StateFn

LexIdentifierOfType scans and finds named things (tables, columns)

supports quoted, bracket, or raw identifiers

TODO: dialect controls escaping/quoting techniques

[name]         select [first name] from usertable;
'name'         select 'user' from usertable;
`user`         select first_name from `user`;
first_name     select first_name from usertable;
usertable      select first_name AS fname from usertable;
_name          select _name AS name from stuff;
@@varname      select @@varname;

func LexIdentityOrValue

func LexIdentityOrValue(l *Lexer) StateFn

look for either an Identity or Value

func LexInlineComment

func LexInlineComment(l *Lexer) StateFn

Comment beginning with //, # or --

func LexInlineCommentNoTag

func LexInlineCommentNoTag(l *Lexer) StateFn

Comment beginning with //, # or -- but do not emit the tag just text comment

func LexInto

func LexInto(l *Lexer) StateFn

LexInto clause

func LexJoinEntry

func LexJoinEntry(l *Lexer) StateFn

Handle Source References ie [From table], [SubSelects], Joins

SELECT ...  FROM <sources>

<sources>      := <source> [, <join_clause> <source>]*
<source>       := ( <table_source> | <subselect> ) [AS <identifier>]
<table_source> := <identifier>
<join_clause>  := (INNER | LEFT | OUTER)? JOIN [ON <conditional_clause>]
<subselect>    := '(' <select_stmt> ')'

func LexJson

func LexJson(l *Lexer) StateFn

Lex Valid Json

Must start with { or [

func LexJsonArray

func LexJsonArray(l *Lexer) StateFn

Lex Valid Json Array

Must End with ]

func LexJsonIdentity

func LexJsonIdentity(l *Lexer) StateFn

lex a string value value:

strings must be quoted

"stuff"    -> stuff
"items's with quote"

func LexJsonObject

func LexJsonObject(l *Lexer) StateFn

Lex Valid Json Object

Must End with }

func LexJsonOrKeyValue

func LexJsonOrKeyValue(l *Lexer) StateFn

Lex either Json or Key/Value pairs

Must start with { or [ for json
Start with identity for key/value pairs

func LexJsonValue

func LexJsonValue(l *Lexer) StateFn

LexJsonValue: Consume values, first consuming Colon

<jsonvalue> ::= ':' ( <value>, <array>, <jsonobject> ) [, ...]

func LexLimit

func LexLimit(l *Lexer) StateFn

LexLimit clause

LIMIT 1000 OFFSET 100
LIMIT 0, 1000
LIMIT 1000

func LexListOfArgs

func LexListOfArgs(l *Lexer) StateFn

LexListOfArgs list of arguments, comma separated list of args which may be a mixture of expressions, identities, values

REPLACE(LOWER(x),"xyz")
REPLACE(x,"xyz")
COUNT(*)
sum( 4 * toint(age))
IN (a,b,c)
varchar(10)
CAST(field AS int)

(a,b,c,d)   -- For Insert statement, list of columns

func LexLogical

func LexLogical(l *Lexer) StateFn

LexLogical is a lex entry function for logical expression language (+-/> etc)

ie, the full logical boolean logic

func LexMatchClosure

func LexMatchClosure(tok TokenType, nextFn StateFn) StateFn

LexMatchClosure matches expected tokentype emitting the token on success and returning passed state function.

func LexMultilineComment

func LexMultilineComment(l *Lexer) StateFn

A multi-line comment of format /* comment */ it does not have to actually be multi-line, just surrounded by those comments

func LexNumber

func LexNumber(l *Lexer) StateFn

LexNumber floats, integers, hex, exponential, signed

1.23
100
-827
6.02e23
0X1A2B,  0x1a2b, 0x1A2B.2B

Floats must be in decimal and must either:

  • Have digits both before and after the decimal point (both can be a single 0), e.g. 0.5, -100.0, or
  • Have a lower-case e that represents scientific notation, e.g. -3e-3, 6.02e23.

Integers can be:

  • decimal (e.g. -827)
  • hexadecimal (must begin with 0x and must use capital A-F, e.g. 0x1A2B)

func LexNumberOrDuration

func LexNumberOrDuration(l *Lexer) StateFn

LexNumberOrDuration floats, integers, hex, exponential, signed

1.23
100
-827
6.02e23
0X1A2B,  0x1a2b, 0x1A2B.2B

durations: 45m, 2w, 20y, 22d, 40ms, 100ms, -100ms

Floats must be in decimal and must either:

  • Have digits both before and after the decimal point (both can be a single 0), e.g. 0.5, -100.0, or
  • Have a lower-case e that represents scientific notation, e.g. -3e-3, 6.02e23.

Integers can be:

  • decimal (e.g. -827)
  • hexadecimal (must begin with 0x and must use capital A-F, e.g. 0x1A2B)

func LexOrderByColumn

func LexOrderByColumn(l *Lexer) StateFn

Handle columnar identies with keyword appendate (ASC, DESC)

[ORDER BY] ( <identity> | <expr> ) [(ASC | DESC)]

func LexParenLeft

func LexParenLeft(l *Lexer) StateFn

LexParenLeft: look for end of paren, of which we have descended and consumed start

func LexParenRight

func LexParenRight(l *Lexer) StateFn

LexParenRight: look for end of paren, of which we have descended and consumed start

func LexPreparedStatement

func LexPreparedStatement(l *Lexer) StateFn

Handle prepared statements

<PREPARE_STMT> := PREPARE <identity> FROM <string_value>

func LexRegex

func LexRegex(l *Lexer) StateFn

lex a regex: first character must be a /

/^stats\./i
/.*/
/^stats.*/

func LexSelectClause

func LexSelectClause(l *Lexer) StateFn

LexSelectClause Handle start of select statements, specifically looking for @@variables, *, or else we drop into <select_list>

<SELECT> :==
    (DISTINCT|ALL)? ( <sql_variable> | * | <select_list> ) [FROM <source_clause>]

<sql_variable> = @@stuff

func LexSelectList

func LexSelectList(l *Lexer) StateFn

Handle repeating Select List for columns

   SELECT <select_list>

   <select_list> := <select_col> [, <select_col>]*

   <select_col> :== ( <identifier> | <expression> | '*' ) [AS <identifier>] [IF <expression>] [<comment>]

Note, our Columns support a non-standard IF guard at a per column basis

func LexShowClause

func LexShowClause(l *Lexer) StateFn

LexShowClause Handle show statement

SHOW [FULL] <multi_word_identifier> <identity> <like_or_where>

func LexStatement

func LexStatement(l *Lexer) StateFn

LexStatement is the main entrypoint to lex Grammars primarily associated with QL type languages, which is keywords separate clauses, and have order [select .. FROM name WHERE ..] the keywords which are reserved serve as identifiers to stop lexing and move to next clause lexer

func LexSubQuery

func LexSubQuery(l *Lexer) StateFn

Handle recursive subqueries

func LexTableColumns

func LexTableColumns(l *Lexer) StateFn

Handle repeating Insert/Upsert/Update statements

<insert_into> <col_names> VALUES <col_value_list>
<set> <upsert_cols> VALUES <col_value_list>

<upsert_cols> := <upsert_col> [, <upsert_col>]*
<upsert_col> := <identity> = <expr>

<col_names> := <identity> [, <identity>]*
<col_value_list> := <col_value_row> [, <col_value_row>] *

<col_value_row> := '(' <expr> [, <expr>]* ')'

func LexTableReferenceFirst

func LexTableReferenceFirst(l *Lexer) StateFn

Handle Source References ie [From table], [SubSelects], Joins

SELECT ...  FROM <sources>

<sources>      := <source> [, <join_clause> <source>]*
<source>       := ( <table_source> | <subselect> ) [AS <identifier>]
<table_source> := <identifier>
<join_clause>  := (INNER | LEFT | OUTER)? JOIN [ON <conditional_clause>]
<subselect>    := '(' <select_stmt> ')'

func LexTableReferences

func LexTableReferences(l *Lexer) StateFn

Handle Source References ie [From table], [SubSelects], Joins

SELECT ...  FROM <sources>

<sources>      := <source> [, <join_clause> <source>]*
<source>       := ( <table_source> | <subselect> ) [AS <identifier>]
<table_source> := <identifier>
<join_clause>  := (INNER | LEFT | OUTER)? JOIN [ON <conditional_clause>]
<subselect>    := '(' <select_stmt> ')'

func LexUpsertClause

func LexUpsertClause(l *Lexer) StateFn

Handle start of insert, Upsert statements

func LexUrnaryNot

func LexUrnaryNot(l *Lexer) StateFn

LexUrnaryNot NOT

func LexValue

func LexValue(l *Lexer) StateFn

lex a value: string, integer, float

- literal strings must be quoted - numerics with no period are integers - numerics with period are floats

"stuff"    -> [string] = stuff
'stuff'    -> [string] = stuff
"items's with quote" -> [string] = items's with quote
1.23  -> [float] = 1.23
100   -> [integer] = 100
["hello","world"]  -> [array] {"hello","world"}

func LexValueColumns

func LexValueColumns(l *Lexer) StateFn

LexValueColumns VALUES (a,b,c),(d,e,f);

type Token

type Token struct {
	T      TokenType // type
	V      string    // value
	Quote  byte      // quote mark:    " ` [ '
	Line   int       // Line #
	Column int       // Position in line
	Pos    int       // Absolute position
}

Token represents a text string returned from the lexer.

func TokenFromOp

func TokenFromOp(op string) Token

TokenFromOp get token from operation string

func (Token) Err

func (t Token) Err(l *Lexer) error

func (Token) ErrMsg

func (t Token) ErrMsg(l *Lexer, msg string) error

func (Token) String

func (t Token) String() string

convert to human readable string

type TokenInfo

type TokenInfo struct {
	T  TokenType
	Kw string

	HasSpaces   bool
	Description string
	// contains filtered or unexported fields
}

TokenInfo provides metadata about tokens

type TokenType

type TokenType uint16

TokenType identifies the type of lexical tokens.

const (

	// Basic grammar items
	TokenNil      TokenType = 0 // not used
	TokenEOF      TokenType = 1 // EOF
	TokenEOS      TokenType = 2 // ;
	TokenEofOrEos TokenType = 3 // End of file, OR ;
	TokenError    TokenType = 4 // error occurred; value is text of error
	TokenRaw      TokenType = 5 // raw unlexed text string
	TokenNewLine  TokenType = 6 // NewLine  = \n

	// Comments
	TokenComment           TokenType = 10 // Comment value string
	TokenCommentML         TokenType = 11 // Comment MultiValue
	TokenCommentStart      TokenType = 12 // /*
	TokenCommentEnd        TokenType = 13 // */
	TokenCommentSlashes    TokenType = 14 // Single Line comment:   // hello
	TokenCommentSingleLine TokenType = 15 // Single Line comment:   -- hello
	TokenCommentHash       TokenType = 16 // Single Line comment:  # hello

	// Misc
	TokenComma        TokenType = 20 // ,
	TokenStar         TokenType = 21 // *
	TokenColon        TokenType = 22 // :
	TokenLeftBracket  TokenType = 23 // [
	TokenRightBracket TokenType = 24 // ]
	TokenLeftBrace    TokenType = 25 // {
	TokenRightBrace   TokenType = 26 // }

	//  operand related tokens
	TokenMinus            TokenType = 60 // -
	TokenPlus             TokenType = 61 // +
	TokenPlusPlus         TokenType = 62 // ++
	TokenPlusEquals       TokenType = 63 // +=
	TokenDivide           TokenType = 64 // /
	TokenMultiply         TokenType = 65 // *
	TokenModulus          TokenType = 66 // %
	TokenEqual            TokenType = 67 // =
	TokenEqualEqual       TokenType = 68 // ==
	TokenNE               TokenType = 69 // !=
	TokenGE               TokenType = 70 // >=
	TokenLE               TokenType = 71 // <=
	TokenGT               TokenType = 72 // >
	TokenLT               TokenType = 73 // <
	TokenIf               TokenType = 74 // IF
	TokenOr               TokenType = 75 // ||
	TokenAnd              TokenType = 76 // &&
	TokenBetween          TokenType = 77 // between
	TokenLogicOr          TokenType = 78 // OR
	TokenLogicAnd         TokenType = 79 // AND
	TokenIN               TokenType = 80 // IN
	TokenLike             TokenType = 81 // LIKE
	TokenNegate           TokenType = 82 // NOT
	TokenLeftParenthesis  TokenType = 83 // (
	TokenRightParenthesis TokenType = 84 // )
	TokenTrue             TokenType = 85 // True
	TokenFalse            TokenType = 86 // False
	TokenIs               TokenType = 87 // IS
	TokenNull             TokenType = 88 // NULL
	TokenContains         TokenType = 89 // CONTAINS
	TokenIntersects       TokenType = 90 // INTERSECTS

	// ql top-level keywords, these first keywords determine parser
	TokenPrepare   TokenType = 200
	TokenInsert    TokenType = 201
	TokenUpdate    TokenType = 202
	TokenDelete    TokenType = 203
	TokenSelect    TokenType = 204
	TokenUpsert    TokenType = 205
	TokenAlter     TokenType = 206
	TokenCreate    TokenType = 207
	TokenDrop      TokenType = 208
	TokenSubscribe TokenType = 209
	TokenFilter    TokenType = 210
	TokenShow      TokenType = 211
	TokenDescribe  TokenType = 212 // We can also use TokenDesc
	TokenExplain   TokenType = 213 // another alias for desccribe
	TokenReplace   TokenType = 214 // Insert/Replace are interchangeable on insert statements
	TokenRollback  TokenType = 215
	TokenCommit    TokenType = 216

	// Other QL Keywords, These are clause-level keywords that mark separation between clauses
	TokenFrom     TokenType = 300 // from
	TokenWhere    TokenType = 301 // where
	TokenHaving   TokenType = 302 // having
	TokenGroupBy  TokenType = 303 // group by
	TokenBy       TokenType = 304 // by
	TokenAlias    TokenType = 305 // alias
	TokenWith     TokenType = 306 // with
	TokenValues   TokenType = 307 // values
	TokenInto     TokenType = 308 // into
	TokenLimit    TokenType = 309 // limit
	TokenOrderBy  TokenType = 310 // order by
	TokenInner    TokenType = 311 // inner , ie of join
	TokenCross    TokenType = 312 // cross
	TokenOuter    TokenType = 313 // outer
	TokenLeft     TokenType = 314 // left
	TokenRight    TokenType = 315 // right
	TokenJoin     TokenType = 316 // Join
	TokenOn       TokenType = 317 // on
	TokenDistinct TokenType = 318 // DISTINCT
	TokenAll      TokenType = 319 // all
	TokenInclude  TokenType = 320 // INCLUDE
	TokenExists   TokenType = 321 // EXISTS
	TokenOffset   TokenType = 322 // OFFSET
	TokenFull     TokenType = 323 // FULL
	TokenGlobal   TokenType = 324 // GLOBAL
	TokenSession  TokenType = 325 // SESSION
	TokenTables   TokenType = 326 // TABLES

	// ddl major words
	TokenSchema         TokenType = 400 // SCHEMA
	TokenDatabase       TokenType = 401 // DATABASE
	TokenTable          TokenType = 402 // TABLE
	TokenSource         TokenType = 403 // SOURCE
	TokenView           TokenType = 404 // VIEW
	TokenContinuousView TokenType = 405 // CONTINUOUSVIEW
	TokenTemp           TokenType = 406 // TEMP or TEMPORARY

	// ddl other
	TokenChange       TokenType = 410 // change
	TokenAdd          TokenType = 411 // add
	TokenFirst        TokenType = 412 // first
	TokenAfter        TokenType = 413 // after
	TokenCharacterSet TokenType = 414 // character set
	TokenDefault      TokenType = 415 // default
	TokenUnique       TokenType = 416 // unique
	TokenKey          TokenType = 417 // key
	TokenPrimary      TokenType = 418 // primary
	TokenConstraint   TokenType = 419 // constraint
	TokenForeign      TokenType = 420 // foreign
	TokenReferences   TokenType = 421 // references
	TokenEngine       TokenType = 422 // engine

	// Other QL keywords
	TokenSet  TokenType = 500 // set
	TokenAs   TokenType = 501 // as
	TokenAsc  TokenType = 502 // ascending
	TokenDesc TokenType = 503 // descending
	TokenUse  TokenType = 504 // use

	// User defined function/expression
	TokenUdfExpr TokenType = 550

	// Value Types
	TokenIdentity     TokenType = 600 // identity, either column, table name etc
	TokenValue        TokenType = 601 // 'some string' string or continuous sequence of chars delimited by WHITE SPACE | ' | , | ( | )
	TokenValueEscaped TokenType = 602 // ” becomes ' inside the string, parser will need to replace the string
	TokenRegex        TokenType = 603 // regex
	TokenDuration     TokenType = 604 // 14d , 22w, 3y, 45ms, 45us, 24hr, 2h, 45m, 30s

	// Data Type Definitions
	TokenTypeDef     TokenType = 999
	TokenTypeBool    TokenType = 998
	TokenTypeFloat   TokenType = 997
	TokenTypeInteger TokenType = 996
	TokenTypeString  TokenType = 995
	TokenTypeVarChar TokenType = 994
	TokenTypeChar    TokenType = 993
	TokenTypeBigInt  TokenType = 992
	TokenTypeTime    TokenType = 991
	TokenTypeText    TokenType = 990
	TokenTypeJson    TokenType = 989

	// Value types
	TokenValueType TokenType = 1000 // A generic Identifier of value type
	TokenBool      TokenType = 1001
	TokenFloat     TokenType = 1002
	TokenInteger   TokenType = 1003
	TokenString    TokenType = 1004
	TokenTime      TokenType = 1005

	// Composite Data Types
	TokenJson TokenType = 1010
	TokenList TokenType = 1011
	TokenMap  TokenType = 1012
)

// List of datatypes from MySql, implement them as tokens? or leave as Identity during // DDL create/alter statements? BOOL TINYINT BOOLEAN TINYINT CHARACTER VARYING(M) VARCHAR(M) FIXED DECIMAL FLOAT4 FLOAT FLOAT8 DOUBLE INT1 TINYINT INT2 SMALLINT INT3 MEDIUMINT INT4 INT INT8 BIGINT LONG VARBINARY MEDIUMBLOB LONG VARCHAR MEDIUMTEXT LONG MEDIUMTEXT MIDDLEINT MEDIUMINT NUMERIC DECIMAL

func (TokenType) MatchString

func (typ TokenType) MatchString() string

MatchString which keyword should we look for, either full keyword OR in case of spaces such as "group by" look for group

func (TokenType) MultiWord

func (typ TokenType) MultiWord() bool

MultiWord is this a word such as "Group by" with multiple words?

func (TokenType) String

func (typ TokenType) String() string

String convert to human readable string

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL