lex

package

v0.0.0-...-8371fe2 Latest Latest Go to latest Published: Oct 17, 2016 License: MIT Imports: 6 Imported by: 0

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/prashantv/qlbridge

Links

Open Source Insights

README ¶

Lexing/Dialects

QLBridge implements a few different Dialects: Sql, FilterQL, Expressions, json

SQL a subset, non-complete implementation of SQL
FilterQL A filtering language, think just the WHERE part of SQL but more DSL'ish with syntax AND ( <expr>, <expr>, <expr> ) instead of <expr> AND <expr> AND <expr>
Expression Simple boolean logic expressions see https://github.com/araddon/qlbridge/blob/master/vm/vm_test.go#L57 for examples
Json Lexes json (instead of marshal)

Creating a custom Lexer/Parser ie Dialect

See example in dialects/example folder for a custom ql dialect, this example creates a mythical SUBSCRIBETO query language...

// Tokens Specific to our PUBSUB
var TokenSubscribeTo lex.TokenType = 1000

// Custom lexer for our maybe hash function
func LexMaybe(l *ql.Lexer) ql.StateFn {

	l.SkipWhiteSpaces()

	keyWord := strings.ToLower(l.PeekWord())

	switch keyWord {
	case "maybe":
		l.ConsumeWord("maybe")
		l.Emit(lex.TokenIdentity)
		return ql.LexExpressionOrIdentity
	}
	return ql.LexExpressionOrIdentity
}

func main() {

	// We are going to inject new tokens into qlbridge
	lex.TokenNameMap[TokenSubscribeTo] = &lex.TokenInfo{Description: "subscribeto"}

	// OverRide the Identity Characters in qlbridge to allow a dash in identity
	ql.IDENTITY_CHARS = "_./-"

	ql.LoadTokenInfo()
	ourDialect.Init()

	// We are going to create our own Dialect that uses a "SUBSCRIBETO" keyword
	pubsub = &ql.Statement{TokenSubscribeTo, []*ql.Clause{
		{Token: TokenSubscribeTo, Lexer: ql.LexColumns},
		{Token: lex.TokenFrom, Lexer: LexMaybe},
		{Token: lex.TokenWhere, Lexer: ql.LexColumns, Optional: true},
	}}
	ourDialect = &ql.Dialect{
		"Subscribe To", []*ql.Statement{pubsub},
	}

	l := ql.NewLexer(`
			SUBSCRIBETO
				count(x), Name
			FROM ourstream
			WHERE 
				k = REPLACE(LOWER(Name),'cde','xxx');
		`, ourDialect)

}

Documentation ¶

Overview ¶

Lexing for QLBridge implements 4 Dialects

SQL
FilterQL a Where filtering language
Json
Expression - simple native logical/functional expression evaluator

Index ¶

Variables
func IdentityRunesOnly(identity string) bool
func IsBreak(r rune) bool
func IsIdentifierRune(r rune) bool
func LoadTokenInfo()
type Clause
- func (c *Clause) MatchesKeyword(peekWord string, l *Lexer) bool
- func (c *Clause) String() string
type Dialect
- func (m *Dialect) Init()
type KeywordMatcher
type Lexer
- func NewFilterQLLexer(input string) *Lexer
- func NewJsonLexer(input string) *Lexer
- func NewLexer(input string, dialect *Dialect) *Lexer
- func NewSqlLexer(input string) *Lexer
- func (l *Lexer) ConsumeWord(word string)
- func (l *Lexer) Emit(t TokenType)
- func (l *Lexer) IsComment() bool
- func (l *Lexer) IsEnd() bool
- func (l *Lexer) LexMatchSkip(tok TokenType, skip int, fn StateFn) StateFn
- func (l *Lexer) Next() (r rune)
- func (l *Lexer) NextToken() Token
- func (l *Lexer) Peek() rune
- func (l *Lexer) PeekWord() string
- func (l *Lexer) PeekWord2() string
- func (l *Lexer) PeekX(x int) string
- func (l *Lexer) Push(name string, state StateFn)
- func (l *Lexer) RawInput() string
- func (l *Lexer) Remainder() (string, bool)
- func (l *Lexer) ReverseTrim()
- func (l *Lexer) SkipWhiteSpaces()
- func (l *Lexer) SkipWhiteSpacesNewLine() bool
type NamedStateFn
type StateFn
- func LexColumnNames(l *Lexer) StateFn
- func LexColumns(l *Lexer) StateFn
- func LexComment(l *Lexer) StateFn
- func LexConditionalClause(l *Lexer) StateFn
- func LexDataType(forToken TokenType) StateFn
- func LexDdlColumn(l *Lexer) StateFn
- func LexDialectForStatement(l *Lexer) StateFn
- func LexDuration(l *Lexer) StateFn
- func LexEmpty(l *Lexer) StateFn
- func LexEndOfStatement(l *Lexer) StateFn
- func LexExpression(l *Lexer) StateFn
- func LexExpressionOrIdentity(l *Lexer) StateFn
- func LexExpressionParens(l *Lexer) StateFn
- func LexFilterClause(l *Lexer) StateFn
- func LexIdentifierOfType(forToken TokenType) StateFn
- func LexIdentityOrValue(l *Lexer) StateFn
- func LexInlineComment(l *Lexer) StateFn
- func LexInlineCommentNoTag(l *Lexer) StateFn
- func LexJoinEntry(l *Lexer) StateFn
- func LexJson(l *Lexer) StateFn
- func LexJsonArray(l *Lexer) StateFn
- func LexJsonIdentity(l *Lexer) StateFn
- func LexJsonObject(l *Lexer) StateFn
- func LexJsonOrKeyValue(l *Lexer) StateFn
- func LexJsonValue(l *Lexer) StateFn
- func LexLimit(l *Lexer) StateFn
- func LexListOfArgs(l *Lexer) StateFn
- func LexLogical(l *Lexer) StateFn
- func LexMatchClosure(tok TokenType, nextFn StateFn) StateFn
- func LexMultilineComment(l *Lexer) StateFn
- func LexNameValueArgs(l *Lexer) StateFn
- func LexNumber(l *Lexer) StateFn
- func LexNumberOrDuration(l *Lexer) StateFn
- func LexOrderByColumn(l *Lexer) StateFn
- func LexPreparedStatement(l *Lexer) StateFn
- func LexRegex(l *Lexer) StateFn
- func LexRightParen(l *Lexer) StateFn
- func LexSelectClause(l *Lexer) StateFn
- func LexSelectList(l *Lexer) StateFn
- func LexShowClause(l *Lexer) StateFn
- func LexStatement(l *Lexer) StateFn
- func LexSubQuery(l *Lexer) StateFn
- func LexTableColumns(l *Lexer) StateFn
- func LexTableReferenceFirst(l *Lexer) StateFn
- func LexTableReferences(l *Lexer) StateFn
- func LexUpsertClause(l *Lexer) StateFn
- func LexValue(l *Lexer) StateFn
type Token
- func (t Token) String() string
type TokenInfo
type TokenType
- func (typ TokenType) MatchString() string
- func (typ TokenType) MultiWord() bool
- func (typ TokenType) String() string

Constants ¶

This section is empty.

Variables ¶

View Source

var (
	// FEATURE FLAGS
	SUPPORT_DURATION = true
	// Identity Quoting
	//  http://stackoverflow.com/questions/1992314/what-is-the-difference-between-single-and-double-quotes-in-sql
	// you might want to set this to not include single ticks
	//  http://dev.mysql.com/doc/refman/5.7/en/string-literals.html
	//IdentityQuoting = []byte{'[', '`', '"'} // mysql ansi-ish, no single quote identities, and allowing double-quote
	IdentityQuotingWSingleQuote = []byte{'[', '`', '\''} // more ansi-ish, allow single quotes around identities
	IdentityQuoting             = []byte{'[', '`'}       // no single quote around identities bc effing mysql uses single quote for string literals
)

View Source

var (
	// Which Identity Characters are allowed for UNESCAPED identities
	IDENTITY_CHARS = "_.-/"
	// A much more lax identity char set rule  that allows spaces
	IDENTITY_LAX_CHARS = "_./- "
	// sql variables start with @@ ??
	IDENTITY_SQL_CHARS = "@_.-"

	// list of token-name
	TokenNameMap = map[TokenType]*TokenInfo{}/* 122 elements not displayed */

)

View Source

var FilterSelectStatement = []*Clause{
	{Token: TokenSelect, Lexer: LexSelectClause, Optional: false},
	{Token: TokenFrom, Lexer: LexTableReferences, Optional: false},
	{Token: TokenWhere, Lexer: LexConditionalClause, Optional: true},
	{Token: TokenFilter, Lexer: LexFilterClause, Optional: true},
	{Token: TokenLimit, Lexer: LexNumber, Optional: true},
	{Token: TokenWith, Lexer: LexJsonOrKeyValue, Optional: true},
	{Token: TokenAlias, Lexer: LexIdentifier, Optional: true},
	{Token: TokenEOF, Lexer: LexEndOfStatement, Optional: false},
}

View Source

var FilterStatement = []*Clause{
	{Token: TokenFilter, Lexer: LexFilterClause, Optional: true},
	{Token: TokenFrom, Lexer: LexTableReferences, Optional: true},
	{Token: TokenLimit, Lexer: LexNumber, Optional: true},
	{Token: TokenWith, Lexer: LexJsonOrKeyValue, Optional: true},
	{Token: TokenAlias, Lexer: LexIdentifier, Optional: true},
	{Token: TokenEOF, Lexer: LexEndOfStatement, Optional: false},
}

View Source

var LexDataTypeIdentity = LexDataType(TokenDataType)

View Source

var LexIdentifier = LexIdentifierOfType(TokenIdentity)

LexIdentifier scans and finds named things (tables, columns)

and specifies them as TokenIdentity, uses LexIdentifierType

TODO: dialect controls escaping/quoting techniques

[name]         select [first name] from usertable;
'name'         select 'user' from usertable;
first_name     select first_name from usertable;
usertable      select first_name AS fname from usertable;
_name          select _name AS name from stuff;

View Source

var LexTableIdentifier = LexIdentifierOfType(TokenTable)

View Source

var SqlAlter = []*Clause{
	{Token: TokenAlter, Lexer: LexEmpty},
	{Token: TokenTable, Lexer: LexIdentifier},
	{Token: TokenChange, Lexer: LexDdlColumn},
	{Token: TokenWith, Lexer: LexJson, Optional: true},
}

View Source

var SqlCommit = []*Clause{
	{Token: TokenCommit, Lexer: LexEmpty},
}

View Source

var SqlDelete = []*Clause{
	{Token: TokenDelete, Lexer: LexEmpty},
	{Token: TokenFrom, Lexer: LexIdentifierOfType(TokenTable)},
	{Token: TokenSet, Lexer: LexColumns, Optional: true},
	{Token: TokenWhere, Lexer: LexColumns, Optional: true},
	{Token: TokenLimit, Lexer: LexNumber, Optional: true},
	{Token: TokenWith, Lexer: LexJson, Optional: true},
}

View Source

var SqlDescribe = []*Clause{
	{Token: TokenDescribe, Lexer: LexColumns},
}

View Source

var SqlDescribeAlt = []*Clause{
	{Token: TokenDesc, Lexer: LexColumns},
}

alternate spelling of Describe

View Source

var SqlExplain = []*Clause{
	{Token: TokenExplain, Lexer: LexColumns},
}

Explain is alias of describe

View Source

var SqlInsert = []*Clause{
	{Token: TokenInsert, Lexer: LexUpsertClause, Name: "insert.entry"},
	{Token: TokenLeftParenthesis, Lexer: LexColumnNames, Optional: true},
	{Token: TokenSet, Lexer: LexTableColumns, Optional: true},
	{Token: TokenSelect, Optional: true, Clauses: insertSubQuery},
	{Token: TokenValues, Lexer: LexTableColumns, Optional: true},
	{Token: TokenWith, Lexer: LexJson, Optional: true},
}

View Source

var SqlPrepare = []*Clause{
	{Token: TokenPrepare, Lexer: LexPreparedStatement},
	{Token: TokenFrom, Lexer: LexTableReferences},
}

View Source

var SqlReplace = []*Clause{
	{Token: TokenReplace, Lexer: LexEmpty},
	{Token: TokenInto, Lexer: LexIdentifierOfType(TokenTable)},
	{Token: TokenSet, Lexer: LexTableColumns, Optional: true},
	{Token: TokenLeftParenthesis, Lexer: LexTableColumns, Optional: true},
	{Token: TokenWith, Lexer: LexJson, Optional: true},
}

View Source

var SqlRollback = []*Clause{
	{Token: TokenRollback, Lexer: LexEmpty},
}

View Source

var SqlSelect = []*Clause{
	{Token: TokenSelect, Lexer: LexSelectClause},
	{Token: TokenInto, Lexer: LexIdentifierOfType(TokenTable), Optional: true},
	{Token: TokenFrom, Lexer: LexTableReferenceFirst, Optional: true, Repeat: false, Clauses: fromSource, Name: "sqlSelect.From"},
	{KeywordMatcher: sourceMatch, Optional: true, Repeat: true, Clauses: moreSources, Name: "sqlSelect.sources"},
	{Token: TokenWhere, Lexer: LexConditionalClause, Optional: true, Clauses: whereQuery, Name: "sqlSelect.where"},
	{Token: TokenGroupBy, Lexer: LexColumns, Optional: true, Name: "sqlSelect.groupby"},
	{Token: TokenHaving, Lexer: LexConditionalClause, Optional: true, Name: "sqlSelect.having"},
	{Token: TokenOrderBy, Lexer: LexOrderByColumn, Optional: true, Name: "sqlSelect.orderby"},
	{Token: TokenLimit, Lexer: LexLimit, Optional: true},
	{Token: TokenOffset, Lexer: LexNumber, Optional: true},
	{Token: TokenWith, Lexer: LexJsonOrKeyValue, Optional: true},
	{Token: TokenAlias, Lexer: LexIdentifier, Optional: true},
	{Token: TokenEOF, Lexer: LexEndOfStatement, Optional: false},
}

View Source

var SqlSet = []*Clause{
	{Token: TokenSet, Lexer: LexColumns},
}

View Source

var SqlShow = []*Clause{
	{Token: TokenShow, Lexer: LexShowClause},
	{Token: TokenWhere, Lexer: LexConditionalClause, Optional: true},
}

View Source

var SqlUpdate = []*Clause{
	{Token: TokenUpdate, Lexer: LexIdentifierOfType(TokenTable)},
	{Token: TokenSet, Lexer: LexColumns},
	{Token: TokenWhere, Lexer: LexColumns, Optional: true},
	{Token: TokenLimit, Lexer: LexNumber, Optional: true},
	{Token: TokenWith, Lexer: LexJson, Optional: true},
}

View Source

var SqlUpsert = []*Clause{
	{Token: TokenUpsert, Lexer: LexUpsertClause, Name: "upsert.entry"},
	{Token: TokenSet, Lexer: LexTableColumns, Optional: true},
	{Token: TokenLeftParenthesis, Lexer: LexTableColumns, Optional: true},
	{Token: TokenWith, Lexer: LexJson, Optional: true},
}

View Source

var SqlUse = []*Clause{
	{Token: TokenUse, Lexer: LexIdentifier},
}

Functions ¶

func IdentityRunesOnly ¶

func IdentityRunesOnly(identity string) bool

func IsBreak ¶

func IsBreak(r rune) bool

A break, is some character such as comma, ;, etc

func IsIdentifierRune ¶

func IsIdentifierRune(r rune) bool

func LoadTokenInfo ¶

func LoadTokenInfo()

Types ¶

type Clause ¶

type Clause struct {
	Optional       bool      // Is this Clause/Keyword optional?
	Repeat         bool      // Repeatable clause?
	Token          TokenType // Token identifiyng start of clause, optional
	KeywordMatcher KeywordMatcher
	Lexer          StateFn   // Lex Function to lex clause, optional
	Clauses        []*Clause // Children Clauses
	Name           string
	// contains filtered or unexported fields
}

func (*Clause) MatchesKeyword ¶

func (c *Clause) MatchesKeyword(peekWord string, l *Lexer) bool

func (*Clause) String ¶

func (c *Clause) String() string

type Dialect ¶

type Dialect struct {
	Name            string
	Statements      []*Clause
	IdentityQuoting []byte
	// contains filtered or unexported fields
}

Dialect is a Language made up of multiple Statements

SQL
CQL
INFLUXQL   etc

var ExpressionDialect *Dialect = &Dialect{
	Statements: []*Clause{
		&Clause{Token: TokenNil, Clauses: expressionStatement},
	},
}

ExpressionDialect, is a Single Expression dialect, useful for parsing Single function

eq(tolower(item_name),"buy")

var FilterQLDialect *Dialect = &Dialect{
	Statements: []*Clause{
		&Clause{Token: TokenFilter, Clauses: FilterStatement},
		&Clause{Token: TokenSelect, Clauses: FilterSelectStatement},
	},
	IdentityQuoting: IdentityQuotingWSingleQuote,
}

FilterQL is a Where Clause filtering language slightly

more DSL'ish than SQL Where Clause

var JsonDialect *Dialect = &Dialect{
	Statements: []*Clause{
		&Clause{Token: TokenNil, Clauses: jsonDialectStatement},
	},
}

JsonDialect, is a json lexer

["hello","world"]
{"name":"bob","apples":["honeycrisp","fuji"]}

var LogicalExpressionDialect *Dialect = &Dialect{
	Statements: []*Clause{
		&Clause{Token: TokenNil, Clauses: logicalEpressions},
	},
}

logical Expression Statement of the following functional format

5 > 4   => true
4 + 5   => 9
tolower(item) + 12 > 4
4 IN (4,5,6)

var SqlDialect *Dialect = &Dialect{
	Statements: []*Clause{
		&Clause{Token: TokenPrepare, Clauses: SqlPrepare},
		&Clause{Token: TokenSelect, Clauses: SqlSelect},
		&Clause{Token: TokenUpdate, Clauses: SqlUpdate},
		&Clause{Token: TokenUpsert, Clauses: SqlUpsert},
		&Clause{Token: TokenInsert, Clauses: SqlInsert},
		&Clause{Token: TokenDelete, Clauses: SqlDelete},
		&Clause{Token: TokenAlter, Clauses: SqlAlter},
		&Clause{Token: TokenDescribe, Clauses: SqlDescribe},
		&Clause{Token: TokenExplain, Clauses: SqlExplain},
		&Clause{Token: TokenDesc, Clauses: SqlDescribeAlt},
		&Clause{Token: TokenShow, Clauses: SqlShow},
		&Clause{Token: TokenSet, Clauses: SqlSet},
		&Clause{Token: TokenUse, Clauses: SqlUse},
		&Clause{Token: TokenRollback, Clauses: SqlRollback},
		&Clause{Token: TokenCommit, Clauses: SqlCommit},
	},
}

SqlDialect is a SQL like dialect

SELECT
UPDATE
INSERT
UPSERT
DELETE

SHOW idenity;
DESCRIBE identity;
PREPARE

ddl

  ALTER

TODO:
    CREATE
    VIEW

func (*Dialect) Init ¶

func (m *Dialect) Init()

type KeywordMatcher ¶

type KeywordMatcher func(c *Clause, peekWord string, l *Lexer) bool

A Clause may supply a keyword matcher instead of keyword-token

type Lexer ¶

type Lexer struct {
	// contains filtered or unexported fields
}

Lexer holds the state of the lexical scanning.

Holds a *Dialect* which gives much of the
  rules specific to this language

many-generations removed from that Based on the lexer from the "text/template" package. See http://www.youtube.com/watch?v=HxaD_trXwRE

func NewFilterQLLexer ¶

func NewFilterQLLexer(input string) *Lexer

creates a new lexer for the input string using FilterQLDialect

which is dsl for where/filtering

func NewJsonLexer ¶

func NewJsonLexer(input string) *Lexer

Creates a new json dialect lexer for the input string

func NewSqlLexer ¶

func NewSqlLexer(input string) *Lexer

creates a new lexer for the input string using SqlDialect

this is sql(ish) compatible parser

func (*Lexer) ConsumeWord ¶

func (l *Lexer) ConsumeWord(word string)

lets move position to consume given word

func (*Lexer) Emit ¶

func (l *Lexer) Emit(t TokenType)

emit passes an token back to the client.

func (*Lexer) IsComment ¶

func (l *Lexer) IsComment() bool

Is this a comment?

func (*Lexer) IsEnd ¶

func (l *Lexer) IsEnd() bool

have we consumed all input

func (*Lexer) LexMatchSkip ¶

func (l *Lexer) LexMatchSkip(tok TokenType, skip int, fn StateFn) StateFn

matches expected tokentype emitting the token on success and returning passed state function.

func (*Lexer) Next ¶

func (l *Lexer) Next() (r rune)

next returns the next rune in the input

func (*Lexer) NextToken ¶

func (l *Lexer) NextToken() Token

returns the next token from the input

func (*Lexer) Peek ¶

func (l *Lexer) Peek() rune

peek returns but does not consume the next rune in the input.

func (*Lexer) PeekWord ¶

func (l *Lexer) PeekWord() string

lets grab the next word (till whitespace, without consuming)

func (*Lexer) PeekWord2 ¶

func (l *Lexer) PeekWord2() string

lets grab the next word (till whitespace, without consuming)

func (*Lexer) PeekX ¶

func (l *Lexer) PeekX(x int) string

grab the next x characters without consuming

func (*Lexer) Push ¶

func (l *Lexer) Push(name string, state StateFn)

func (*Lexer) RawInput ¶

func (l *Lexer) RawInput() string

func (*Lexer) Remainder ¶

func (l *Lexer) Remainder() (string, bool)

SQL and other string expressions may contain more than one

statement such as:

  use schema_x;  show tables;

  set @my_var = "value"; select a,b from `users` where name = @my_var;

func (*Lexer) ReverseTrim ¶

func (l *Lexer) ReverseTrim()

Skips white space characters at end by trimming so we can recognize the end

more easily

func (*Lexer) SkipWhiteSpaces ¶

func (l *Lexer) SkipWhiteSpaces()

Skips white space characters in the input.

func (*Lexer) SkipWhiteSpacesNewLine ¶

func (l *Lexer) SkipWhiteSpacesNewLine() bool

Skips white space characters in the input, returns bool

for if it contained new line

type NamedStateFn ¶

type NamedStateFn struct {
	Name    string
	StateFn StateFn
}

type StateFn ¶

type StateFn func(*Lexer) StateFn

StateFn represents the state of the lexer as a function that returns the next state.

func LexColumnNames ¶

func LexColumnNames(l *Lexer) StateFn

Handle list of column names on insert/update statements

<insert_into> <col_names> VALUES <col_value_list>

<col_names> := '(' <identity> [, <identity>]* ')'

func LexComment ¶

func LexComment(l *Lexer) StateFn

LexComment looks for valid comments which are any of the following

 including the in-line comment blocks

/* hello */
//  hello
-- hello
# hello
SELECT name --name is the combined first-last name
       , age FROM `USER` ...

func LexDataType ¶

func LexDataType(forToken TokenType) StateFn

LexDataType scans and finds datatypes

 [] are valid inside of data types, no escaping such as ',"

[]string       CREATE table( field []string )
map[string]int
int, string, etc

func LexDdlColumn ¶

func LexDdlColumn(l *Lexer) StateFn

data definition language column

CHANGE col1_old col1_new varchar(10),
CHANGE col2_old col2_new TEXT
ADD col3 BIGINT AFTER col1_new
ADD col2 TEXT FIRST,

func LexDialectForStatement ¶

func LexDialectForStatement(l *Lexer) StateFn

Find first keyword in the current queryText, then find appropriate statement in dialect. ie [SELECT, ALTER, CREATE, INSERT] in sql

func LexDuration ¶

func LexDuration(l *Lexer) StateFn

LexDuration floats, integers time-durations

durations: 45m, 2w, 20y, 22d, 40ms, 100ms, -100ms

func LexEmpty ¶

func LexEmpty(l *Lexer) StateFn

Doesn't actually lex anything, used for single token clauses

func LexEndOfStatement ¶

func LexEndOfStatement(l *Lexer) StateFn

Look for end of statement defined by either a semicolon or end of file

func LexExpression ¶

func LexExpression(l *Lexer) StateFn

<expr> Handle single logical expression which may be nested and has

user defined function names that are NOT validated by lexer

<func> ::= <identity>'(' <expr> ')'
<predicatekw> ::= [NOT] (IN | INTERSECTS | CONTAINS | RANGE | LIKE | EQUALS )

Examples:

(colx = y OR colb = b)
cola = 'a5'
cola != "a5", colb = "a6"
REPLACE(cola,"stuff") != "hello"
FirstName = REPLACE(LOWER(name," "))
cola IN (1,2,3)
cola LIKE "abc"
eq(name,"bob") AND age > 5
time > now() -1h
(4 + 5) > 10
reg_date BETWEEN x AND y

func LexExpressionOrIdentity ¶

func LexExpressionOrIdentity(l *Lexer) StateFn

look for either an Expression or Identity

expressions:    Legal identity characters, terminated by (
identity:    legal identity characters

REPLACE(name,"stuff")
name

func LexExpressionParens ¶

func LexExpressionParens(l *Lexer) StateFn

lex Expression looks for an expression, identified by parenthesis, may be nested

       |--expr----|
dostuff(name,"arg")    // the left parenthesis identifies it as Expression
eq(trim(name," "),"gmail.com")

func LexFilterClause ¶

func LexFilterClause(l *Lexer) StateFn

Handle Filter QL Main Statement

FILTER := ( <filter_bool_expr> | <filter_expr> )

<filter_bool_expr> :=  ( AND | OR ) '(' ( <filter_bool_expr> | <filter_expr> ) [, ( <filter_bool_expr> | <filter_expr> ) ] ')'

<filter_expr> :=  <expr>

Examples:

FILTER

/ AND (

       daysago(datefield) < 100
       , domain(url) == "google.com"
       , INCLUDE name_of_filter
       ,
       , OR (
           momentum > 20
          , propensity > 50
       )
    )
 ALIAS myfilter

FILTER x > 7

func LexIdentifierOfType ¶

func LexIdentifierOfType(forToken TokenType) StateFn

LexIdentifierOfType scans and finds named things (tables, columns)

supports quoted, bracket, or raw identifiers

TODO: dialect controls escaping/quoting techniques

[name]         select [first name] from usertable;
'name'         select 'user' from usertable;
`user`         select first_name from `user`;
first_name     select first_name from usertable;
usertable      select first_name AS fname from usertable;
_name          select _name AS name from stuff;
@@varname      select @@varname;

func LexInlineCommentNoTag ¶

func LexInlineCommentNoTag(l *Lexer) StateFn

Comment begining with //, # or -- but do not emit the tag just text comment

func LexJoinEntry ¶

func LexJoinEntry(l *Lexer) StateFn

Handle Source References ie [From table], [SubSelects], Joins

SELECT ...  FROM <sources>

<sources>      := <source> [, <join_clause> <source>]*
<source>       := ( <table_source> | <subselect> ) [AS <identifier>]
<table_source> := <identifier>
<join_clause>  := (INNER | LEFT | OUTER)? JOIN [ON <conditional_clause>]
<subselect>    := '(' <select_stmt> ')'

func LexJson ¶

func LexJson(l *Lexer) StateFn

Lex Valid Json

Must start with { or [

func LexJsonArray ¶

func LexJsonArray(l *Lexer) StateFn

Lex Valid Json Array

Must End with ]

func LexJsonIdentity ¶

func LexJsonIdentity(l *Lexer) StateFn

lex a string value value:

strings must be quoted

"stuff"    -> stuff
"items's with quote"

func LexJsonObject ¶

func LexJsonObject(l *Lexer) StateFn

Lex Valid Json Object

Must End with }

func LexJsonOrKeyValue ¶

func LexJsonOrKeyValue(l *Lexer) StateFn

Lex either Json or Key/Value pairs

Must start with { or [ for json
Start with identity for key/value pairs

func LexJsonValue ¶

func LexJsonValue(l *Lexer) StateFn

LexJsonValue: Consume values, first consuming Colon

<jsonvalue> ::= ':' ( <value>, <array>, <jsonobject> ) [, ...]

func LexLimit ¶

func LexLimit(l *Lexer) StateFn

LexLimit clause

LIMIT 1000 OFFSET 100
LIMIT 0, 1000
LIMIT 1000

func LexListOfArgs ¶

func LexListOfArgs(l *Lexer) StateFn

list of arguments, comma seperated list of args which may be a mixture

of expressions, identities, values

    REPLACE(LOWER(x),"xyz")
    REPLACE(x,"xyz")
    COUNT(*)
    sum( 4 * toint(age))
    IN (a,b,c)
    varchar(10)
    CAST(field AS int)

    (a,b,c,d)   -- For Insert statment, list of columns

func LexLogical ¶

func LexLogical(l *Lexer) StateFn

LexLogical is a lex entry function for logical expression language (+-/> etc)

ie, the full logical boolean logic

func LexMatchClosure ¶

func LexMatchClosure(tok TokenType, nextFn StateFn) StateFn

matches expected tokentype emitting the token on success and returning passed state function.

func LexMultilineComment ¶

func LexMultilineComment(l *Lexer) StateFn

A multi-line comment of format /* comment */ it does not have to actually be multi-line, just surrounded by those comments

func LexNameValueArgs ¶

func LexNameValueArgs(l *Lexer) StateFn

<name_value_args> Handle comma delimited list of name = value args

Examples:

colx = y OR colb = b
cola = 'a5'
cola != "a5", colb = "a6"

func LexNumber ¶

func LexNumber(l *Lexer) StateFn

LexNumber floats, integers, hex, exponential, signed

1.23
100
-827
6.02e23
0X1A2B,  0x1a2b, 0x1A2B.2B

Floats must be in decimal and must either:

Have digits both before and after the decimal point (both can be a single 0), e.g. 0.5, -100.0, or
Have a lower-case e that represents scientific notation, e.g. -3e-3, 6.02e23.

Integers can be:

decimal (e.g. -827)
hexadecimal (must begin with 0x and must use capital A-F, e.g. 0x1A2B)

func LexNumberOrDuration ¶

func LexNumberOrDuration(l *Lexer) StateFn

LexNumberOrDuration floats, integers, hex, exponential, signed

1.23
100
-827
6.02e23
0X1A2B,  0x1a2b, 0x1A2B.2B

durations: 45m, 2w, 20y, 22d, 40ms, 100ms, -100ms

Floats must be in decimal and must either:

Have digits both before and after the decimal point (both can be a single 0), e.g. 0.5, -100.0, or
Have a lower-case e that represents scientific notation, e.g. -3e-3, 6.02e23.

Integers can be:

decimal (e.g. -827)
hexadecimal (must begin with 0x and must use capital A-F, e.g. 0x1A2B)

func LexOrderByColumn ¶

func LexOrderByColumn(l *Lexer) StateFn

Handle columnar identies with keyword appendate (ASC, DESC)

[ORDER BY] ( <identity> | <expr> ) [(ASC | DESC)]

func LexPreparedStatement ¶

func LexPreparedStatement(l *Lexer) StateFn

Handle prepared statements

<PREPARE_STMT> := PREPARE <identity> FROM <string_value>

func LexRegex ¶

func LexRegex(l *Lexer) StateFn

lex a regex: first character must be a /

/^stats\./i
/.*/
/^stats.*/

func LexSelectClause ¶

func LexSelectClause(l *Lexer) StateFn

Handle start of select statements, specifically looking for

@@variables, *, or else we drop into <select_list>

 <SELECT> :==
     (DISTINCT|ALL)? ( <sql_variable> | * | <select_list> ) [FROM <source_clause>]

 <sql_variable> = @@stuff

func LexSelectList ¶

func LexSelectList(l *Lexer) StateFn

Handle repeating Select List for columns

   SELECT <select_list>

   <select_list> := <select_col> [, <select_col>]*

   <select_col> :== ( <identifier> | <expression> | '*' ) [AS <identifier>] [IF <expression>] [<comment>]

Note, our Columns support a non-standard IF guard at a per column basis

func LexShowClause ¶

func LexShowClause(l *Lexer) StateFn

Handle show statement

SHOW [FULL] <multi_word_identifier> <identity> <like_or_where>

func LexStatement ¶

func LexStatement(l *Lexer) StateFn

LexStatement is the main entrypoint to lex Grammars primarily associated with QL type languages, which is keywords seperate clauses, and have order [select .. FROM name WHERE ..] the keywords which are reserved serve as identifiers to stop lexing and move to next clause lexer

func LexSubQuery ¶

func LexSubQuery(l *Lexer) StateFn

Handle recursive subqueries

func LexTableColumns ¶

func LexTableColumns(l *Lexer) StateFn

Handle repeating Insert/Upsert/Update statements

<insert_into> <col_names> VALUES <col_value_list>
<set> <upsert_cols> VALUES <col_value_list>

<upsert_cols> := <upsert_col> [, <upsert_col>]*
<upsert_col> := <identity> = <expr>

<col_names> := <identity> [, <identity>]*
<col_value_list> := <col_value_row> [, <col_value_row>] *

<col_value_row> := '(' <expr> [, <expr>]* ')'

func LexTableReferenceFirst ¶

func LexTableReferenceFirst(l *Lexer) StateFn

Handle Source References ie [From table], [SubSelects], Joins

SELECT ...  FROM <sources>

<sources>      := <source> [, <join_clause> <source>]*
<source>       := ( <table_source> | <subselect> ) [AS <identifier>]
<table_source> := <identifier>
<join_clause>  := (INNER | LEFT | OUTER)? JOIN [ON <conditional_clause>]
<subselect>    := '(' <select_stmt> ')'

func LexTableReferences ¶

func LexTableReferences(l *Lexer) StateFn

Handle Source References ie [From table], [SubSelects], Joins

SELECT ...  FROM <sources>

<sources>      := <source> [, <join_clause> <source>]*
<source>       := ( <table_source> | <subselect> ) [AS <identifier>]
<table_source> := <identifier>
<join_clause>  := (INNER | LEFT | OUTER)? JOIN [ON <conditional_clause>]
<subselect>    := '(' <select_stmt> ')'

func LexUpsertClause ¶

func LexUpsertClause(l *Lexer) StateFn

Handle start of insert, Upsert statements

func LexValue ¶

func LexValue(l *Lexer) StateFn

lex a value: string, integer, float

- literal strings must be quoted - numerics with no period are integers - numerics with period are floats

"stuff"    -> [string] = stuff
'stuff'    -> [string] = stuff
"items's with quote" -> [string] = items's with quote
1.23  -> [float] = 1.23
100   -> [integer] = 100
["hello","world"]  -> [array] {"hello","world"}

type Token ¶

type Token struct {
	T      TokenType // type
	V      string    // value
	Quote  byte      // quote mark:    " ` [ '
	Line   int       // Line #
	Column int       // Position in line
}

token represents a text string returned from the lexer.

func (Token) String ¶

func (t Token) String() string

convert to human readable string

type TokenInfo ¶

type TokenInfo struct {
	T  TokenType
	Kw string

	HasSpaces   bool
	Description string
	// contains filtered or unexported fields
}

type TokenType ¶

type TokenType uint16

TokenType identifies the type of lexical tokens.

const (

	// Basic grammar items
	TokenNil      TokenType = 0 // not used
	TokenEOF      TokenType = 1 // EOF
	TokenEOS      TokenType = 2 // ;
	TokenEofOrEos TokenType = 3 // End of file, OR ;
	TokenError    TokenType = 4 // error occurred; value is text of error
	TokenRaw      TokenType = 5 // raw unlexed text string
	TokenNewLine  TokenType = 6 // NewLine  = \n

	// Comments
	TokenComment           TokenType = 10 // Comment value string
	TokenCommentML         TokenType = 11 // Comment MultiValue
	TokenCommentStart      TokenType = 12 // /*
	TokenCommentEnd        TokenType = 13 // */
	TokenCommentSlashes    TokenType = 14 // Single Line comment:   // hello
	TokenCommentSingleLine TokenType = 15 // Single Line comment:   -- hello
	TokenCommentHash       TokenType = 16 // Single Line comment:  # hello

	// Misc
	TokenComma        TokenType = 20 // ,
	TokenStar         TokenType = 21 // *
	TokenColon        TokenType = 22 // :
	TokenLeftBracket  TokenType = 23 // [
	TokenRightBracket TokenType = 24 // ]
	TokenLeftBrace    TokenType = 25 // {
	TokenRightBrace   TokenType = 26 // }

	//  operand related tokens
	TokenMinus            TokenType = 60 // -
	TokenPlus             TokenType = 61 // +
	TokenPlusPlus         TokenType = 62 // ++
	TokenPlusEquals       TokenType = 63 // +=
	TokenDivide           TokenType = 64 // /
	TokenMultiply         TokenType = 65 // *
	TokenModulus          TokenType = 66 // %
	TokenEqual            TokenType = 67 // =
	TokenEqualEqual       TokenType = 68 // ==
	TokenNE               TokenType = 69 // !=
	TokenGE               TokenType = 70 // >=
	TokenLE               TokenType = 71 // <=
	TokenGT               TokenType = 72 // >
	TokenLT               TokenType = 73 // <
	TokenIf               TokenType = 74 // IF
	TokenOr               TokenType = 75 // ||
	TokenAnd              TokenType = 76 // &&
	TokenBetween          TokenType = 77 // between
	TokenLogicOr          TokenType = 78 // OR
	TokenLogicAnd         TokenType = 79 // AND
	TokenIN               TokenType = 80 // IN
	TokenLike             TokenType = 81 // LIKE
	TokenNegate           TokenType = 82 // NOT
	TokenLeftParenthesis  TokenType = 83 // (
	TokenRightParenthesis TokenType = 84 // )
	TokenTrue             TokenType = 85 // True
	TokenFalse            TokenType = 86 // False
	TokenIs               TokenType = 87 // IS
	TokenNull             TokenType = 88 // NULL
	TokenContains         TokenType = 89 // CONTAINS
	TokenIntersects       TokenType = 90 // INTERSECTS

	// ql top-level keywords, these first keywords determine parser
	TokenPrepare   TokenType = 200
	TokenInsert    TokenType = 201
	TokenUpdate    TokenType = 202
	TokenDelete    TokenType = 203
	TokenSelect    TokenType = 204
	TokenUpsert    TokenType = 205
	TokenAlter     TokenType = 206
	TokenCreate    TokenType = 207
	TokenSubscribe TokenType = 208
	TokenFilter    TokenType = 209
	TokenShow      TokenType = 210
	TokenDescribe  TokenType = 211 // We can also use TokenDesc
	TokenExplain   TokenType = 212 // another alias for desccribe
	TokenReplace   TokenType = 213 // Insert/Replace are interchangeable on insert statements
	TokenRollback  TokenType = 214
	TokenCommit    TokenType = 215

	// Other QL Keywords, These are clause-level keywords that mark seperation between clauses
	TokenTable    TokenType = 301 // table
	TokenFrom     TokenType = 302 // from
	TokenWhere    TokenType = 303 // where
	TokenHaving   TokenType = 304 // having
	TokenGroupBy  TokenType = 305 // group by
	TokenBy       TokenType = 306 // by
	TokenAlias    TokenType = 307 // alias
	TokenWith     TokenType = 308 // with
	TokenValues   TokenType = 309 // values
	TokenInto     TokenType = 310 // into
	TokenLimit    TokenType = 311 // limit
	TokenOrderBy  TokenType = 312 // order by
	TokenInner    TokenType = 313 // inner , ie of join
	TokenCross    TokenType = 314 // cross
	TokenOuter    TokenType = 315 // outer
	TokenLeft     TokenType = 316 // left
	TokenRight    TokenType = 317 // right
	TokenJoin     TokenType = 318 // Join
	TokenOn       TokenType = 319 // on
	TokenDistinct TokenType = 320 // DISTINCT
	TokenAll      TokenType = 321 // all
	TokenInclude  TokenType = 322 // INCLUDE
	TokenExists   TokenType = 323 // EXISTS
	TokenOffset   TokenType = 324 // OFFSET
	TokenFull     TokenType = 325 // FULL
	TokenGlobal   TokenType = 326 // GLOBAL
	TokenSession  TokenType = 327 // SESSION
	TokenTables   TokenType = 328 // TABLES

	// ddl
	TokenChange       TokenType = 400 // change
	TokenAdd          TokenType = 401 // add
	TokenFirst        TokenType = 402 // first
	TokenAfter        TokenType = 403 // after
	TokenCharacterSet TokenType = 404 // character set

	// Other QL keywords
	TokenSet  TokenType = 500 // set
	TokenAs   TokenType = 501 // as
	TokenAsc  TokenType = 502 // ascending
	TokenDesc TokenType = 503 // descending
	TokenUse  TokenType = 504 // use

	// User defined function/expression
	TokenUdfExpr TokenType = 550

	// Value Types
	TokenIdentity             TokenType = 600 // identity, either column, table name etc
	TokenValue                TokenType = 601 // 'some string' string or continous sequence of chars delimited by WHITE SPACE | ' | , | ( | )
	TokenValueWithSingleQuote TokenType = 602 // ” becomes ' inside the string, parser will need to replace the string
	TokenRegex                TokenType = 603 // regex
	TokenDuration             TokenType = 604 // 14d , 22w, 3y, 45ms, 45us, 24hr, 2h, 45m, 30s

	// Scalar literal data-types
	TokenDataType TokenType = 1000 // A generic Identifier of DataTypes
	TokenBool     TokenType = 1001
	TokenFloat    TokenType = 1002
	TokenInteger  TokenType = 1003
	TokenString   TokenType = 1004
	TokenVarChar  TokenType = 1005
	TokenBigInt   TokenType = 1006
	TokenText     TokenType = 1007
	TokenJson     TokenType = 1008

	// Composite Data Types
	TokenList TokenType = 1050
	TokenMap  TokenType = 1051
)

// List of datatypes from MySql, implement them as tokens? or leave as Identity during // DDL create/alter statements? BOOL TINYINT BOOLEAN TINYINT CHARACTER VARYING(M) VARCHAR(M) FIXED DECIMAL FLOAT4 FLOAT FLOAT8 DOUBLE INT1 TINYINT INT2 SMALLINT INT3 MEDIUMINT INT4 INT INT8 BIGINT LONG VARBINARY MEDIUMBLOB LONG VARCHAR MEDIUMTEXT LONG MEDIUMTEXT MIDDLEINT MEDIUMINT NUMERIC DECIMAL

func (TokenType) MatchString ¶

func (typ TokenType) MatchString() string

which keyword should we look for, either full keyword OR in case of spaces such as "group by" look for group

func (TokenType) MultiWord ¶

func (typ TokenType) MultiWord() bool

is this a word such as "Group by" with multiple words?

func (TokenType) String ¶

func (typ TokenType) String() string

convert to human readable string

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL

README ¶

Lexing/Dialects

Creating a custom Lexer/Parser ie Dialect

Documentation ¶

Overview ¶

Index ¶

Constants ¶

Variables ¶

Functions ¶

func IdentityRunesOnly ¶

func IsBreak ¶

func IsIdentifierRune ¶

func LoadTokenInfo ¶

Types ¶

type Clause ¶

func (*Clause) MatchesKeyword ¶

func (*Clause) String ¶

type Dialect ¶

func (*Dialect) Init ¶

type KeywordMatcher ¶

type Lexer ¶

func NewFilterQLLexer ¶

func NewJsonLexer ¶

func NewLexer ¶

func NewSqlLexer ¶

func (*Lexer) ConsumeWord ¶

func (*Lexer) Emit ¶

func (*Lexer) IsComment ¶

func (*Lexer) IsEnd ¶

func (*Lexer) LexMatchSkip ¶

func (*Lexer) Next ¶

func (*Lexer) NextToken ¶

func (*Lexer) Peek ¶

func (*Lexer) PeekWord ¶

func (*Lexer) PeekWord2 ¶

func (*Lexer) PeekX ¶

func (*Lexer) Push ¶

func (*Lexer) RawInput ¶

func (*Lexer) Remainder ¶

func (*Lexer) ReverseTrim ¶

func (*Lexer) SkipWhiteSpaces ¶

func (*Lexer) SkipWhiteSpacesNewLine ¶

type NamedStateFn ¶

type StateFn ¶

func LexColumnNames ¶

func LexColumns ¶

func LexComment ¶

func LexConditionalClause ¶

func LexDataType ¶

func LexDdlColumn ¶

func LexDialectForStatement ¶

func LexDuration ¶

func LexEmpty ¶

func LexEndOfStatement ¶

func LexExpression ¶

func LexExpressionOrIdentity ¶

func LexExpressionParens ¶

func LexFilterClause ¶

func LexIdentifierOfType ¶

func LexIdentityOrValue ¶

func LexInlineComment ¶

func LexInlineCommentNoTag ¶

func LexJoinEntry ¶

func LexJson ¶

func LexJsonArray ¶

func LexJsonIdentity ¶

func LexJsonObject ¶

func LexJsonOrKeyValue ¶

func LexJsonValue ¶

func LexLimit ¶

func LexListOfArgs ¶

func LexLogical ¶

func LexMatchClosure ¶

func LexMultilineComment ¶

func LexNameValueArgs ¶

func LexNumber ¶

func LexNumberOrDuration ¶

func LexOrderByColumn ¶

func LexPreparedStatement ¶

func LexRegex ¶