Documentation ¶
Overview ¶
Package parse does the parsing stage after lexing
Package parse does the parsing stage after lexing, using a top-down recursive-descent (TDRD) strategy, with a special reverse mode to deal with left-associative binary expressions which otherwise end up being right-associative for TDRD parsing. Higher-level rules provide scope to lower-level ones, with a special EOS end-of-statement scope recognized for
Index ¶
- Variables
- type Act
- type Actions
- func (i Actions) Desc() string
- func (i Actions) Int64() int64
- func (i Actions) MarshalText() ([]byte, error)
- func (i *Actions) SetInt64(in int64)
- func (i *Actions) SetString(s string) error
- func (i Actions) String() string
- func (i *Actions) UnmarshalText(text []byte) error
- func (i Actions) Values() []enums.Enum
- type Acts
- type Ast
- func (ast *Ast) ChildAst(idx int) *Ast
- func (ast *Ast) Destroy()
- func (t *Ast) KiType() *gti.Type
- func (t *Ast) New() ki.Ki
- func (ast *Ast) NextAst() *Ast
- func (ast *Ast) NextSiblingAst() *Ast
- func (ast *Ast) ParAst() *Ast
- func (ast *Ast) PrevAst() *Ast
- func (ast *Ast) SetTokReg(reg lex.Reg, src *lex.File)
- func (ast *Ast) SetTokRegEnd(pos lex.Pos, src *lex.File)
- func (ast *Ast) WriteTree(out io.Writer, depth int)
- type AstActs
- func (i AstActs) Desc() string
- func (i AstActs) Int64() int64
- func (i AstActs) MarshalText() ([]byte, error)
- func (i *AstActs) SetInt64(in int64)
- func (i *AstActs) SetString(s string) error
- func (i AstActs) String() string
- func (i *AstActs) UnmarshalText(text []byte) error
- func (i AstActs) Values() []enums.Enum
- type MatchStack
- type MatchState
- type Matches
- type Parser
- type Rule
- func (pr *Rule) AsParseRule() *Rule
- func (pr *Rule) BaseIface() reflect.Type
- func (pr *Rule) Compile(ps *State) bool
- func (pr *Rule) CompileAll(ps *State) bool
- func (pr *Rule) CompileExcl(ps *State, rs []string, rist int) bool
- func (pr *Rule) CompileTokMap(ps *State) bool
- func (pr *Rule) DoAct(ps *State, act *Act, par *Rule, ourAst, parAst *Ast) bool
- func (pr *Rule) DoActs(ps *State, ri int, par *Rule, ourAst, parAst *Ast) bool
- func (pr *Rule) DoRules(ps *State, par *Rule, parAst *Ast, scope lex.Reg, mpos Matches, ...) bool
- func (pr *Rule) DoRulesRevBinExp(ps *State, par *Rule, parAst *Ast, scope lex.Reg, mpos Matches, ourAst *Ast, ...) bool
- func (pr *Rule) Find(find string) []*Rule
- func (pr *Rule) IsGroup() bool
- func (t *Rule) KiType() *gti.Type
- func (pr *Rule) Match(ps *State, parAst *Ast, scope lex.Reg, depth int, optMap lex.TokenMap) (bool, lex.Reg, Matches)
- func (pr *Rule) MatchExclude(ps *State, scope lex.Reg, ktpos lex.Reg, depth int, optMap lex.TokenMap) bool
- func (pr *Rule) MatchGroup(ps *State, parAst *Ast, scope lex.Reg, depth int, optMap lex.TokenMap) (bool, lex.Reg, Matches)
- func (pr *Rule) MatchMixed(ps *State, parAst *Ast, scope lex.Reg, depth int, optMap lex.TokenMap) (bool, Matches)
- func (pr *Rule) MatchNoToks(ps *State, parAst *Ast, scope lex.Reg, depth int, optMap lex.TokenMap) (bool, Matches)
- func (pr *Rule) MatchOnlyToks(ps *State, parAst *Ast, scope lex.Reg, depth int, optMap lex.TokenMap) (bool, Matches)
- func (pr *Rule) MatchToken(ps *State, rr *RuleEl, ri int, kt token.KeyToken, creg *lex.Reg, mpos Matches, ...) (bool, lex.Pos)
- func (t *Rule) New() ki.Ki
- func (pr *Rule) OptimizeOrder(ps *State)
- func (pr *Rule) Parse(ps *State, par *Rule, parAst *Ast, scope lex.Reg, optMap lex.TokenMap, ...) *Rule
- func (pr *Rule) ParseRules(ps *State, par *Rule, parAst *Ast, scope lex.Reg, optMap lex.TokenMap, ...) *Rule
- func (pr *Rule) Scope(ps *State, parAst *Ast, scope lex.Reg) (lex.Reg, bool)
- func (t *Rule) SetActs(v Acts) *Rule
- func (t *Rule) SetAst(v AstActs) *Rule
- func (t *Rule) SetDesc(v string) *Rule
- func (t *Rule) SetExclFwd(v RuleList) *Rule
- func (t *Rule) SetExclKeyIdx(v int) *Rule
- func (t *Rule) SetExclRev(v RuleList) *Rule
- func (t *Rule) SetFiTokElseIdx(v int) *Rule
- func (t *Rule) SetFiTokMap(v map[string]*Rule) *Rule
- func (t *Rule) SetFirstTokMap(v bool) *Rule
- func (t *Rule) SetOff(v bool) *Rule
- func (t *Rule) SetOptTokMap(v bool) *Rule
- func (t *Rule) SetOrder(v ...int) *Rule
- func (t *Rule) SetRule(v string) *Rule
- func (pr *Rule) SetRuleMap(ps *State)
- func (t *Rule) SetRules(v RuleList) *Rule
- func (t *Rule) SetStackMatch(v string) *Rule
- func (pr *Rule) StartParse(ps *State) *Rule
- func (pr *Rule) Validate(ps *State) bool
- func (pr *Rule) WriteGrammar(writer io.Writer, depth int)
- type RuleEl
- type RuleFlags
- func (i RuleFlags) BitIndexString() string
- func (i RuleFlags) Desc() string
- func (i RuleFlags) HasFlag(f enums.BitFlag) bool
- func (i RuleFlags) Int64() int64
- func (i RuleFlags) MarshalText() ([]byte, error)
- func (i *RuleFlags) SetFlag(on bool, f ...enums.BitFlag)
- func (i *RuleFlags) SetInt64(in int64)
- func (i *RuleFlags) SetString(s string) error
- func (i *RuleFlags) SetStringOr(s string) error
- func (i RuleFlags) String() string
- func (i *RuleFlags) UnmarshalText(text []byte) error
- func (i RuleFlags) Values() []enums.Enum
- type RuleList
- type ScopeRule
- type ScopeRuleSet
- type State
- func (ps *State) AddAst(parAst *Ast, rule string, reg lex.Reg) *Ast
- func (ps *State) AddMatch(pr *Rule, scope lex.Reg, regs Matches)
- func (ps *State) AddNonMatch(scope lex.Reg, pr *Rule)
- func (ps *State) AllocRules()
- func (ps *State) AtEof() bool
- func (ps *State) AtEofNext() bool
- func (ps *State) ClearAst()
- func (ps *State) Destroy()
- func (ps *State) Error(pos lex.Pos, msg string, rule *Rule)
- func (ps *State) FindNameScoped(nm string) (*syms.Symbol, bool)
- func (ps *State) FindNameTopScope(nm string) (*syms.Symbol, bool)
- func (ps *State) FindToken(tkey token.KeyToken, reg lex.Reg) (lex.Pos, bool)
- func (ps *State) FindTokenReverse(tkey token.KeyToken, reg lex.Reg) (lex.Pos, bool)
- func (ps *State) GotoEof()
- func (ps *State) Init(src *lex.File, ast *Ast)
- func (ps *State) IsMatch(pr *Rule, scope lex.Reg) (*MatchState, bool)
- func (ps *State) IsNonMatch(scope lex.Reg, pr *Rule) bool
- func (ps *State) MatchLex(lx *lex.Lex, tkey token.KeyToken, isCat, isSubCat bool, cp lex.Pos) bool
- func (ps *State) MatchToken(tkey token.KeyToken, pos lex.Pos) bool
- func (ps *State) NextSrcLine() string
- func (ps *State) ResetNonMatches()
- func (ps *State) RuleString(full bool) string
- type Steps
- type TraceOpts
- func (pt *TraceOpts) CheckRule(rule string) bool
- func (pt *TraceOpts) CopyOpts(ot *TraceOpts)
- func (pt *TraceOpts) FullOn()
- func (pt *TraceOpts) Init()
- func (pt *TraceOpts) Out(ps *State, pr *Rule, step Steps, pos lex.Pos, scope lex.Reg, ast *Ast, ...) bool
- func (pt *TraceOpts) PipeOut()
- func (pt *TraceOpts) StdOut()
Constants ¶
This section is empty.
Variables ¶
var AstProps = ki.Props{ "StructViewFields": ki.Props{ "Flag": `view:"-"`, "Props": `view:"-"`, }, }
var AstType = gti.AddType(>i.Type{Name: "cogentcore.org/core/pi/parse.Ast", IDName: "ast", Doc: "Ast is a node in the abstract syntax tree generated by the parsing step\nthe name of the node (from ki.Node) is the type of the element\n(e.g., expr, stmt, etc)\nThese nodes are generated by the parse.Rule's by matching tokens", Embeds: []gti.Field{{Name: "Node"}}, Fields: []gti.Field{{Name: "TokReg", Doc: "region in source lexical tokens corresponding to this Ast node -- Ch = index in lex lines"}, {Name: "SrcReg", Doc: "region in source file corresponding to this Ast node"}, {Name: "Src", Doc: "source code corresponding to this Ast node"}, {Name: "Syms", Doc: "stack of symbols created for this node"}}, Instance: &Ast{}})
AstType is the gti.Type for Ast
var DepthLimit = 10000
DepthLimit is the infinite recursion prevention cutoff
var GuiActive = false
Set GuiActive to true if the gui (piview) is active -- ensures that the Ast tree is updated when nodes are swapped in reverse mode, and maybe other things
var RuleMap map[string]*Rule
RuleMap is a map of all the rule names, for quick lookup
var RuleType = gti.AddType(>i.Type{Name: "cogentcore.org/core/pi/parse.Rule", IDName: "rule", Doc: "The first step is matching which searches in order for matches within the\nchildren of parent nodes, and for explicit rule nodes, it looks first\nthrough all the explicit tokens in the rule. If there are no explicit tokens\nthen matching defers to ONLY the first node listed by default -- you can\nadd a @ prefix to indicate a rule that is also essential to match.\n\nAfter a rule matches, it then proceeds through the rules narrowing the scope\nand calling the sub-nodes..", Embeds: []gti.Field{{Name: "Node"}}, Fields: []gti.Field{{Name: "Off", Doc: "disable this rule -- useful for testing and exploration"}, {Name: "Desc", Doc: "description / comments about this rule"}, {Name: "Rule", Doc: "the rule as a space-separated list of rule names and token(s) -- use single quotes around 'tokens' (using token.Tokens names or symbols). For keywords use 'key:keyword'. All tokens are matched at the same nesting depth as the start of the scope of this rule, unless they have a +D relative depth value differential before the token. Use @ prefix for a sub-rule to require that rule to match -- by default explicit tokens are used if available, and then only the first sub-rule failing that. Use ! by itself to define start of an exclusionary rule -- doesn't match when those rule elements DO match. Use : prefix for a special group node that matches a single token at start of scope, and then defers to the child rules to perform full match -- this is used for FirstTokMap when there are multiple versions of a given keyword rule. Use - prefix for tokens anchored by the end (next token) instead of the previous one -- typically just for token prior to 'EOS' but also a block of tokens that need to go backward in the middle of a sequence to avoid ambiguity can be marked with -"}, {Name: "StackMatch", Doc: "if present, this rule only fires if stack has this on it"}, {Name: "Ast", Doc: "what action should be take for this node when it matches"}, {Name: "Acts", Doc: "actions to perform based on parsed Ast tree data, when this rule is done executing"}, {Name: "OptTokMap", Doc: "for group-level rules having lots of children and lots of recursiveness, and also of high-frequency, when we first encounter such a rule, make a map of all the tokens in the entire scope, and use that for a first-pass rejection on matching tokens"}, {Name: "FirstTokMap", Doc: "for group-level rules with a number of rules that match based on first tokens / keywords, build map to directly go to that rule -- must also organize all of these rules sequentially from the start -- if no match, goes directly to first non-lookup case"}, {Name: "Rules", Doc: "rule elements compiled from Rule string"}, {Name: "Order", Doc: "strategic matching order for matching the rules"}, {Name: "FiTokMap", Doc: "map from first tokens / keywords to rules for FirstTokMap case"}, {Name: "FiTokElseIdx", Doc: "for FirstTokMap, the start of the else cases not covered by the map"}, {Name: "ExclKeyIdx", Doc: "exclusionary key index -- this is the token in Rules that we need to exclude matches for using ExclFwd and ExclRev rules"}, {Name: "ExclFwd", Doc: "exclusionary forward-search rule elements compiled from Rule string"}, {Name: "ExclRev", Doc: "exclusionary reverse-search rule elements compiled from Rule string"}}, Instance: &Rule{}})
Functions ¶
This section is empty.
Types ¶
type Act ¶
type Act struct { // at what point during sequence of sub-rules / tokens should this action be run? -1 = at end, 0 = before first rule, 1 = before second rule, etc -- must be at point when relevant Ast nodes have been added, but for scope setting, must be early enough so that scope is present RunIdx int // what action to perform Act Actions // Ast path, relative to current node: empty = current node; specifies a child node by index, and a name specifies it by name -- include name/name for sub-nodes etc -- multiple path options can be specified by | or & and will be tried in order until one succeeds (for |) or all that succeed will be used for &. ... means use all nodes with given name (only for change token) -- for PushStack, this is what to push on the stack Path string `width:"50"` // for ChgToken, the new token type to assign to token at given path Tok token.Tokens // for ChgToken, only change if token is this to start with (only if != None)) FmTok token.Tokens }
Act is one action to perform, operating on the Ast output
type Actions ¶
type Actions int32 //enums:enum
Actions are parsing actions to perform
const ( // ChgToken changes the token to the Tok specified in the Act action ChgToken Actions = iota // AddSymbol means add name as a symbol, using current scoping and token type // or the token specified in the Act action if != None AddSymbol // PushScope means look for an existing symbol of given name // to push onto current scope -- adding a new one if not found -- // does not add new item to overall symbol list. This is useful // for e.g., definitions of methods on a type, where this is not // the definition of the type itself. PushScope // PushNewScope means add a new symbol to the list and also push // onto scope stack, using given token type or the token specified // in the Act action if != None PushNewScope // PopScope means remove the most recently-added scope item PopScope // PopScopeReg means remove the most recently-added scope item, and also // updates the source region for that item based on final SrcReg from // corresponding Ast node -- for "definitional" scope PopScopeReg // AddDetail adds src at given path as detail info for the last-added symbol // if there is already something there, a space is added for this new addition AddDetail // AddType Adds a type with the given name -- sets the Ast node for this rule // and actual type is resolved later in a second language-specific pass AddType // PushStack adds name to stack -- provides context-sensitivity option for // optimizing and ambiguity resolution PushStack // PopStack pops the stack PopStack )
The parsing acts
const ActionsN Actions = 10
ActionsN is the highest valid value for type Actions, plus one.
func ActionsValues ¶
func ActionsValues() []Actions
ActionsValues returns all possible values for the type Actions.
func (Actions) MarshalText ¶
MarshalText implements the encoding.TextMarshaler interface.
func (*Actions) SetString ¶
SetString sets the Actions value from its string representation, and returns an error if the string is invalid.
func (*Actions) UnmarshalText ¶
UnmarshalText implements the encoding.TextUnmarshaler interface.
type Ast ¶
type Ast struct { ki.Node // region in source lexical tokens corresponding to this Ast node -- Ch = index in lex lines TokReg lex.Reg `set:"-"` // region in source file corresponding to this Ast node SrcReg lex.Reg `set:"-"` // source code corresponding to this Ast node Src string `set:"-"` // stack of symbols created for this node Syms syms.SymStack `set:"-"` }
Ast is a node in the abstract syntax tree generated by the parsing step the name of the node (from ki.Node) is the type of the element (e.g., expr, stmt, etc) These nodes are generated by the parse.Rule's by matching tokens
func NewAst ¶
NewAst adds a new Ast with the given name to the given parent: Ast is a node in the abstract syntax tree generated by the parsing step the name of the node (from ki.Node) is the type of the element (e.g., expr, stmt, etc) These nodes are generated by the parse.Rule's by matching tokens
func (*Ast) ChildAst ¶
ChildAst returns the Child at given index as an Ast. Will panic if index is invalid -- use Try if unsure.
func (*Ast) NextSiblingAst ¶
NextSiblingAst returns the next sibling node in the Ast tree, or nil if none
func (*Ast) SetTokRegEnd ¶
SetTokRegEnd updates the ending token region to given position -- token regions are typically over-extended and get narrowed as tokens actually match
type AstActs ¶
type AstActs int32 //enums:enum
AstActs are actions to perform on the Ast nodes
const ( // NoAst means don't create an Ast node for this rule NoAst AstActs = iota // AddAst means create an Ast node for this rule, adding it to the current anchor Ast. // Any sub-rules within this rule are *not* added as children of this node -- see // SubAst and AnchorAst. This is good for token-only terminal nodes and list elements // that should be added to a list. AddAst // SubAst means create an Ast node and add all the elements of *this rule* as // children of this new node (including sub-rules), *except* for the very last rule // which is assumed to be a recursive rule -- that one goes back up to the parent node. // This is good for adding more complex elements with sub-rules to a recursive list, // without creating a new hierarchical depth level for every such element. SubAst // AnchorAst means create an Ast node and set it as the anchor that subsequent // sub-nodes are added into. This is for a new hierarchical depth level // where everything under this rule gets organized. AnchorAst // AnchorFirstAst means create an Ast node and set it as the anchor that subsequent // sub-nodes are added into, *only* if this is the first time that this rule has // matched within the current sequence (i.e., if the parent of this rule is the same // rule then don't add a new Ast node). This is good for starting a new list // of recursively-defined elements, without creating increasing depth levels. AnchorFirstAst )
The Ast actions
const AstActsN AstActs = 5
AstActsN is the highest valid value for type AstActs, plus one.
func AstActsValues ¶
func AstActsValues() []AstActs
AstActsValues returns all possible values for the type AstActs.
func (AstActs) MarshalText ¶
MarshalText implements the encoding.TextMarshaler interface.
func (*AstActs) SetString ¶
SetString sets the AstActs value from its string representation, and returns an error if the string is invalid.
func (*AstActs) UnmarshalText ¶
UnmarshalText implements the encoding.TextUnmarshaler interface.
type MatchStack ¶
type MatchStack []MatchState
MatchStack is the stack of rules that matched or ran for each token point
func (*MatchStack) Add ¶
func (rs *MatchStack) Add(pr *Rule, scope lex.Reg, regs Matches)
Add given rule to stack
func (*MatchStack) Find ¶
func (rs *MatchStack) Find(pr *Rule, scope lex.Reg) (*MatchState, bool)
Find looks for given rule and scope on the stack
type MatchState ¶
type MatchState struct { // rule that either matched or ran here Rule *Rule // scope for match Scope lex.Reg // regions of match for each sub-region Regs Matches }
MatchState holds state info for rules that matched, recorded at starting position of match
type Matches ¶
Matches encodes the regions of each match, Err for no match
type Parser ¶
type Parser interface { ki.Ki // Compile compiles string rules into their runnable elements Compile(ps *State) bool // Validate checks for any errors in the rules and issues warnings, // returns true if valid (no err) and false if invalid (errs) Validate(ps *State) bool // Parse tries to apply rule to given input state, returns rule that matched or nil // par is the parent rule that we're being called from // ast is the current ast node that we add to Parse(ps *State, par *Rule, ast *Ast, scope lex.Reg, optMap lex.TokenMap, depth int) *Rule // AsParseRule returns object as a parse.Rule AsParseRule() *Rule }
Parser is the interface type for parsers -- likely not necessary except is essential for defining the BaseIface for gui in making new nodes
type Rule ¶
type Rule struct { ki.Node // disable this rule -- useful for testing and exploration Off bool // description / comments about this rule Desc string // the rule as a space-separated list of rule names and token(s) -- use single quotes around 'tokens' (using token.Tokens names or symbols). For keywords use 'key:keyword'. All tokens are matched at the same nesting depth as the start of the scope of this rule, unless they have a +D relative depth value differential before the token. Use @ prefix for a sub-rule to require that rule to match -- by default explicit tokens are used if available, and then only the first sub-rule failing that. Use ! by itself to define start of an exclusionary rule -- doesn't match when those rule elements DO match. Use : prefix for a special group node that matches a single token at start of scope, and then defers to the child rules to perform full match -- this is used for FirstTokMap when there are multiple versions of a given keyword rule. Use - prefix for tokens anchored by the end (next token) instead of the previous one -- typically just for token prior to 'EOS' but also a block of tokens that need to go backward in the middle of a sequence to avoid ambiguity can be marked with - Rule string // if present, this rule only fires if stack has this on it StackMatch string // what action should be take for this node when it matches Ast AstActs // actions to perform based on parsed Ast tree data, when this rule is done executing Acts Acts // for group-level rules having lots of children and lots of recursiveness, and also of high-frequency, when we first encounter such a rule, make a map of all the tokens in the entire scope, and use that for a first-pass rejection on matching tokens OptTokMap bool // for group-level rules with a number of rules that match based on first tokens / keywords, build map to directly go to that rule -- must also organize all of these rules sequentially from the start -- if no match, goes directly to first non-lookup case FirstTokMap bool // rule elements compiled from Rule string Rules RuleList `json:"-" xml:"-"` // strategic matching order for matching the rules Order []int `edit:"-" json:"-" xml:"-"` // map from first tokens / keywords to rules for FirstTokMap case FiTokMap map[string]*Rule `edit:"-" json:"-" xml:"-"` // for FirstTokMap, the start of the else cases not covered by the map FiTokElseIdx int `edit:"-" json:"-" xml:"-"` // exclusionary key index -- this is the token in Rules that we need to exclude matches for using ExclFwd and ExclRev rules ExclKeyIdx int `edit:"-" json:"-" xml:"-"` // exclusionary forward-search rule elements compiled from Rule string ExclFwd RuleList `edit:"-" json:"-" xml:"-"` // exclusionary reverse-search rule elements compiled from Rule string ExclRev RuleList `edit:"-" json:"-" xml:"-"` }
The first step is matching which searches in order for matches within the children of parent nodes, and for explicit rule nodes, it looks first through all the explicit tokens in the rule. If there are no explicit tokens then matching defers to ONLY the first node listed by default -- you can add a @ prefix to indicate a rule that is also essential to match.
After a rule matches, it then proceeds through the rules narrowing the scope and calling the sub-nodes..
func NewRule ¶
NewRule adds a new Rule with the given name to the given parent: The first step is matching which searches in order for matches within the children of parent nodes, and for explicit rule nodes, it looks first through all the explicit tokens in the rule. If there are no explicit tokens then matching defers to ONLY the first node listed by default -- you can add a @ prefix to indicate a rule that is also essential to match.
After a rule matches, it then proceeds through the rules narrowing the scope and calling the sub-nodes..
func (*Rule) AsParseRule ¶
func (*Rule) Compile ¶
Compile compiles string rules into their runnable elements. Returns true if everything is ok, false if there were compile errors.
func (*Rule) CompileAll ¶
CompileAll is called on the top-level Rule to compile all nodes it calls SetRuleMap first. Returns true if everything is ok, false if there were compile errors
func (*Rule) CompileExcl ¶
CompileExcl compiles exclusionary rules starting at given point currently only working for single-token matching rule
func (*Rule) CompileTokMap ¶
CompileTokMap compiles first token map
func (*Rule) DoActs ¶
DoActs performs actions at given point in rule execution (ri = rule index, is -1 at end)
func (*Rule) DoRules ¶
func (pr *Rule) DoRules(ps *State, par *Rule, parAst *Ast, scope lex.Reg, mpos Matches, optMap lex.TokenMap, depth int) bool
DoRules after we have matched, goes through rest of the rules -- returns false if there were any issues encountered
func (*Rule) DoRulesRevBinExp ¶
func (pr *Rule) DoRulesRevBinExp(ps *State, par *Rule, parAst *Ast, scope lex.Reg, mpos Matches, ourAst *Ast, optMap lex.TokenMap, depth int) bool
DoRulesRevBinExp reverse version of do rules for binary expression rule with one key token in the middle -- we just pay attention to scoping rest of sub-rules relative to that, and don't otherwise adjust scope or position. In particular all the position updating taking place in sup-rules is then just ignored and we set the position to the end position matched by the "last" rule (which was the first processed)
func (*Rule) Find ¶
Find looks for rules in the tree that contain given string in Rule or Name fields
func (*Rule) Match ¶
func (pr *Rule) Match(ps *State, parAst *Ast, scope lex.Reg, depth int, optMap lex.TokenMap) (bool, lex.Reg, Matches)
Match attempts to match the rule, returns true if it matches, and the match positions, along with any update to the scope
func (*Rule) MatchExclude ¶
func (pr *Rule) MatchExclude(ps *State, scope lex.Reg, ktpos lex.Reg, depth int, optMap lex.TokenMap) bool
MatchExclude looks for matches of exclusion tokens -- if found, they exclude this rule return is true if exclude matches and rule should be excluded
func (*Rule) MatchGroup ¶
func (pr *Rule) MatchGroup(ps *State, parAst *Ast, scope lex.Reg, depth int, optMap lex.TokenMap) (bool, lex.Reg, Matches)
MatchGroup does matching for Group rules
func (*Rule) MatchMixed ¶
func (pr *Rule) MatchMixed(ps *State, parAst *Ast, scope lex.Reg, depth int, optMap lex.TokenMap) (bool, Matches)
MatchMixed matches mixed tokens and non-tokens
func (*Rule) MatchNoToks ¶
func (pr *Rule) MatchNoToks(ps *State, parAst *Ast, scope lex.Reg, depth int, optMap lex.TokenMap) (bool, Matches)
MatchNoToks matches NoToks case -- just does single sub-rule match
func (*Rule) MatchOnlyToks ¶
func (pr *Rule) MatchOnlyToks(ps *State, parAst *Ast, scope lex.Reg, depth int, optMap lex.TokenMap) (bool, Matches)
MatchOnlyToks matches rules having only tokens
func (*Rule) MatchToken ¶
func (pr *Rule) MatchToken(ps *State, rr *RuleEl, ri int, kt token.KeyToken, creg *lex.Reg, mpos Matches, parAst *Ast, scope lex.Reg, depth int, optMap lex.TokenMap) (bool, lex.Pos)
MatchToken matches one token sub-rule -- returns true for match and false if no match -- and the position where it was / should have been
func (*Rule) OptimizeOrder ¶
OptimizeOrder optimizes the order of processing rule elements, including: * A block of reversed elements that match from next
func (*Rule) Parse ¶
func (pr *Rule) Parse(ps *State, par *Rule, parAst *Ast, scope lex.Reg, optMap lex.TokenMap, depth int) *Rule
Parse tries to apply rule to given input state, returns rule that matched or nil par is the parent rule that we're being called from. parAst is the current ast node that we add to. scope is the region to search within, defined by parent or EOS if we have a terminal one
func (*Rule) ParseRules ¶
func (pr *Rule) ParseRules(ps *State, par *Rule, parAst *Ast, scope lex.Reg, optMap lex.TokenMap, depth int) *Rule
ParseRules parses rules and returns this rule if it matches, nil if not
func (*Rule) Scope ¶
Scope finds the potential scope region for looking for tokens -- either from EOS position or State ScopeStack pushed from parents. Returns new scope and false if no valid scope found.
func (*Rule) SetActs ¶
SetActs sets the [Rule.Acts]: actions to perform based on parsed Ast tree data, when this rule is done executing
func (*Rule) SetAst ¶
SetAst sets the [Rule.Ast]: what action should be take for this node when it matches
func (*Rule) SetExclFwd ¶
SetExclFwd sets the [Rule.ExclFwd]: exclusionary forward-search rule elements compiled from Rule string
func (*Rule) SetExclKeyIdx ¶
SetExclKeyIdx sets the [Rule.ExclKeyIdx]: exclusionary key index -- this is the token in Rules that we need to exclude matches for using ExclFwd and ExclRev rules
func (*Rule) SetExclRev ¶
SetExclRev sets the [Rule.ExclRev]: exclusionary reverse-search rule elements compiled from Rule string
func (*Rule) SetFiTokElseIdx ¶
SetFiTokElseIdx sets the [Rule.FiTokElseIdx]: for FirstTokMap, the start of the else cases not covered by the map
func (*Rule) SetFiTokMap ¶
SetFiTokMap sets the [Rule.FiTokMap]: map from first tokens / keywords to rules for FirstTokMap case
func (*Rule) SetFirstTokMap ¶
SetFirstTokMap sets the [Rule.FirstTokMap]: for group-level rules with a number of rules that match based on first tokens / keywords, build map to directly go to that rule -- must also organize all of these rules sequentially from the start -- if no match, goes directly to first non-lookup case
func (*Rule) SetOff ¶
SetOff sets the [Rule.Off]: disable this rule -- useful for testing and exploration
func (*Rule) SetOptTokMap ¶
SetOptTokMap sets the [Rule.OptTokMap]: for group-level rules having lots of children and lots of recursiveness, and also of high-frequency, when we first encounter such a rule, make a map of all the tokens in the entire scope, and use that for a first-pass rejection on matching tokens
func (*Rule) SetOrder ¶
SetOrder sets the [Rule.Order]: strategic matching order for matching the rules
func (*Rule) SetRule ¶
SetRule sets the [Rule.Rule]: the rule as a space-separated list of rule names and token(s) -- use single quotes around 'tokens' (using token.Tokens names or symbols). For keywords use 'key:keyword'. All tokens are matched at the same nesting depth as the start of the scope of this rule, unless they have a +D relative depth value differential before the token. Use @ prefix for a sub-rule to require that rule to match -- by default explicit tokens are used if available, and then only the first sub-rule failing that. Use ! by itself to define start of an exclusionary rule -- doesn't match when those rule elements DO match. Use : prefix for a special group node that matches a single token at start of scope, and then defers to the child rules to perform full match -- this is used for FirstTokMap when there are multiple versions of a given keyword rule. Use - prefix for tokens anchored by the end (next token) instead of the previous one -- typically just for token prior to 'EOS' but also a block of tokens that need to go backward in the middle of a sequence to avoid ambiguity can be marked with -
func (*Rule) SetRuleMap ¶
SetRuleMap is called on the top-level Rule and initializes the RuleMap
func (*Rule) SetStackMatch ¶
SetStackMatch sets the [Rule.StackMatch]: if present, this rule only fires if stack has this on it
func (*Rule) StartParse ¶
StartParse is called on the root of the parse rule tree to start the parsing process
type RuleEl ¶
type RuleEl struct { // sub-rule for this position -- nil if token Rule *Rule // token, None if rule Tok token.KeyToken // start increment for matching -- this is the number of non-optional, non-match items between (start | last match) and this item -- increments start region for matching StInc int // if true, this rule must match for rule to fire -- by default only tokens and, failing that, the first sub-rule is used for matching -- use @ to require a match Match bool // this rule is optional -- will absorb tokens if they exist -- indicated with ? prefix Opt bool // match this rule working backward from the next token -- triggered by - (minus) prefix and optimizes cases where there can be a lot of tokens going forward but few going from end -- must be anchored by a terminal EOS or other FmNext elements and is ignored if at the very end FmNext bool }
RuleEl is an element of a parsing rule -- either a pointer to another rule or a token
type RuleFlags ¶
RuleFlags define bitflags for rule options compiled from rule syntax
const ( // SetsScope means that this rule sets its own scope, because it ends with EOS SetsScope RuleFlags = RuleFlags(ki.FlagsN) + iota // Reverse means that this rule runs in reverse (starts with - sign) -- for arithmetic // binary expressions only: this is needed to produce proper associativity result for // mathematical expressions in the recursive descent parser. // Only for rules of form: Expr '+' Expr -- two sub-rules with a token operator // in the middle. Reverse // NoToks means that this rule doesn't have any explicit tokens -- only refers to // other rules NoToks // OnlyToks means that this rule only has explicit tokens for matching -- can be // optimized OnlyToks // MatchEOS means that the rule ends with a *matched* EOS with StInc = 1. // SetsScope applies for optional and matching EOS rules alike. MatchEOS // MultiEOS means that the rule has multiple EOS tokens within it -- // changes some of the logic MultiEOS // TokMatchGroup is a group node that also has a single token match, so it can // be used in a FirstTokMap to optimize lookup of rules TokMatchGroup )
const RuleFlagsN RuleFlags = 8
RuleFlagsN is the highest valid value for type RuleFlags, plus one.
func RuleFlagsValues ¶
func RuleFlagsValues() []RuleFlags
RuleFlagsValues returns all possible values for the type RuleFlags.
func (RuleFlags) BitIndexString ¶
BitIndexString returns the string representation of this RuleFlags value if it is a bit index value (typically an enum constant), and not an actual bit flag value.
func (RuleFlags) MarshalText ¶
MarshalText implements the encoding.TextMarshaler interface.
func (*RuleFlags) SetFlag ¶
SetFlag sets the value of the given flags in these flags to the given value.
func (*RuleFlags) SetString ¶
SetString sets the RuleFlags value from its string representation, and returns an error if the string is invalid.
func (*RuleFlags) SetStringOr ¶
SetStringOr sets the RuleFlags value from its string representation while preserving any bit flags already set, and returns an error if the string is invalid.
func (*RuleFlags) UnmarshalText ¶
UnmarshalText implements the encoding.TextUnmarshaler interface.
type ScopeRuleSet ¶
type ScopeRuleSet map[ScopeRule]struct{}
ScopeRuleSet is a map by scope of RuleSets, for non-matching rules
type State ¶
type State struct { // source and lexed version of source we're parsing Src *lex.File `view:"no-inline"` // tracing for this parser Trace TraceOpts // root of the Ast abstract syntax tree we're updating Ast *Ast // symbol map that everything gets added to from current file of parsing -- typically best for subsequent management to just have a single outer-most scoping symbol here (e.g., in Go it is the package), and then everything is a child under that Syms syms.SymMap // stack of scope(s) added to FileSyms e.g., package, library, module-level elements of which this file is a part -- these are reset at the start and must be added by parsing actions within the file itself Scopes syms.SymStack // the current lex token position Pos lex.Pos // any error messages accumulated during parsing specifically Errs lex.ErrorList `view:"no-inline"` // rules that matched and ran at each point, in 1-to-1 correspondence with the Src.Lex tokens for the lines and char pos dims Matches [][]MatchStack `view:"no-inline"` // rules that did NOT match -- represented as a map by scope of a RuleSet NonMatches ScopeRuleSet `view:"no-inline"` // stack for context-sensitive rules Stack lex.Stack `view:"no-inline"` }
parse.State is the state maintained for parsing
func (*State) AddNonMatch ¶
AddNonMatch adds given rule to non-matching rule set for this scope
func (*State) AllocRules ¶
func (ps *State) AllocRules()
AllocRules allocate the match, nonmatch rule state in correspondence with the src state
func (*State) AtEof ¶
AtEof returns true if current position is at end of file -- this includes common situation where it is just at the very last token
func (*State) AtEofNext ¶
AtEofNext returns true if current OR NEXT position is at end of file -- this includes common situation where it is just at the very last token
func (*State) FindNameScoped ¶
FindNameScoped searches top-down in the stack for something with the given name in symbols that are of subcategory token.NameScope (i.e., namespace, module, package, library) also looks in ps.Syms if not found in Scope stack.
func (*State) FindNameTopScope ¶
FindNameTopScope searches only in top of current scope for something
with the given name in symbols
also looks in ps.Syms if not found in Scope stack.
func (*State) FindToken ¶
FindToken looks for token in given region, returns position where found, false if not. Only matches when depth is same as at reg.St start at the start of the search. All positions in token indexes.
func (*State) FindTokenReverse ¶
FindTokenReverse looks *backwards* for token in given region, with same depth as reg.Ed-1 end where the search starts. Returns position where found, false if not. Automatically deals with possible confusion with unary operators -- if there are two ambiguous operators in a row, automatically gets the first one. This is mainly / only used for binary operator expressions (mathematical binary operators). All positions are in token indexes.
func (*State) IsMatch ¶
IsMatch looks for rule at given scope in list of matches, if found returns match state info
func (*State) IsNonMatch ¶
IsNonMatch looks for rule in nonmatch list at given scope
func (*State) MatchToken ¶
MatchToken returns true if token matches at given position -- must be a valid position!
func (*State) NextSrcLine ¶
NextSrcLine returns the next line of text
func (*State) ResetNonMatches ¶
func (ps *State) ResetNonMatches()
ResetNonMatches resets the non-match map -- do after every EOS
func (*State) RuleString ¶
RuleString returns the rule info for entire source -- if full then it includes the full stack at each point -- otherwise just the top of stack
type Steps ¶
type Steps int32 //enums:enum
Steps are the different steps of the parsing processing
const ( // Match happens when a rule matches Match Steps = iota // SubMatch is when a sub-rule within a rule matches SubMatch // NoMatch is when the rule fails to match (recorded at first non-match, which terminates // matching process NoMatch // Run is when the rule is running and iterating through its sub-rules Run // RunAct is when the rule is running and performing actions RunAct )
The parsing steps
const StepsN Steps = 5
StepsN is the highest valid value for type Steps, plus one.
func StepsValues ¶
func StepsValues() []Steps
StepsValues returns all possible values for the type Steps.
func (Steps) MarshalText ¶
MarshalText implements the encoding.TextMarshaler interface.
func (*Steps) SetString ¶
SetString sets the Steps value from its string representation, and returns an error if the string is invalid.
func (*Steps) UnmarshalText ¶
UnmarshalText implements the encoding.TextUnmarshaler interface.
type TraceOpts ¶
type TraceOpts struct { // perform tracing On bool // trace specific named rules here (space separated) -- if blank, then all rules are traced Rules string `width:"50"` // trace full rule matches -- when a rule fully matches Match bool // trace sub-rule matches -- when the parts of each rule match SubMatch bool // trace sub-rule non-matches -- why a rule doesn't match -- which terminates the matching process at first non-match (can be a lot of info) NoMatch bool // trace progress running through each of the sub-rules when a rule has matched and is 'running' Run bool // trace actions performed by running rules RunAct bool // if true, shows the full scope source for every trace statement ScopeSrc bool // for the ParseOut display, whether to display the full stack of rules at each position, or just the deepest one FullStackOut bool // list of rules RulesList []string `view:"-" json:"-" xml:"-"` // trace output is written here, connected via os.Pipe to OutRead OutWrite *os.File `view:"-" json:"-" xml:"-"` // trace output is read here -- can connect this to a TextBuf via giv.OutBuf to monitor tracing output OutRead *os.File `view:"-" json:"-" xml:"-"` }
TraceOpts provides options for debugging / monitoring the rule matching and execution process
func (*TraceOpts) Init ¶
func (pt *TraceOpts) Init()
Init intializes tracer after any changes -- opens pipe if not already open
func (*TraceOpts) Out ¶
func (pt *TraceOpts) Out(ps *State, pr *Rule, step Steps, pos lex.Pos, scope lex.Reg, ast *Ast, msg string) bool
Out outputs a trace message -- returns true if actually output