Documentation ¶
Overview ¶
Package lexer provides all the lexing functions that transform text into lexical tokens, using token types defined in the token package. It also has the basic file source and position / region management functionality.
Index ¶
- Variables
- func BracePair(r rune) (match rune, right bool)
- func BracketIndentLine(src [][]rune, tags []Line, ln int, tabSz int) (pInd, delInd, pLn int, ichr indent.Character)
- func DigitValue(ch rune) int
- func FirstNonSpaceRune(src []rune) int
- func FirstWord(str string) string
- func FirstWordApostrophe(str string) string
- func FirstWordDigits(str string) string
- func HasUpperCase(str string) bool
- func InnerBracketScope(str string, brl, brr string) string
- func IsDigit(ch rune) bool
- func IsLetter(ch rune) bool
- func IsLetterOrDigit(ch rune) bool
- func IsWhiteSpace(ch rune) bool
- func LastField(str string) string
- func LastNonSpaceRune(src []rune) int
- func LastScopedString(str string) string
- func LineIndent(src []rune, tabSz int) (ind int, ichr indent.Character)
- func LineStartEndBracket(src []rune, tags Line) (start, end bool)
- func MarkupPathsAsLinks(flds []string, maxFlds int) (orig, link []byte)
- func MatchCase(src, trg string) string
- func OpenFileBytes(fname string) ([]byte, error)
- func PrevLineIndent(src [][]rune, tags []Line, ln int, tabSz int) (ind, pln int, ichr indent.Character)
- func PrintError(w io.Writer, err error)
- func RunesFromBytes(b []byte) [][]rune
- func RunesFromString(str string) [][]rune
- func TrimLeftToAlpha(nm string) string
- type Actions
- func (i Actions) Desc() string
- func (i Actions) Int64() int64
- func (i Actions) MarshalText() ([]byte, error)
- func (i *Actions) SetInt64(in int64)
- func (i *Actions) SetString(s string) error
- func (i Actions) String() string
- func (i *Actions) UnmarshalText(text []byte) error
- func (i Actions) Values() []enums.Enum
- type EosPos
- type Error
- type ErrorList
- func (p *ErrorList) Add(pos Pos, fname, msg string, srcln string, rule tree.Node) *Error
- func (p ErrorList) Err() error
- func (p ErrorList) Error() string
- func (p ErrorList) Len() int
- func (p ErrorList) Less(i, j int) bool
- func (p *ErrorList) RemoveMultiples()
- func (p ErrorList) Report(maxN int, basepath string, showSrc, showRule bool) string
- func (p *ErrorList) Reset()
- func (p ErrorList) Sort()
- func (p ErrorList) Swap(i, j int)
- type File
- func (fl *File) AllocLines()
- func (fl *File) EnsureFinalEos(ln int)
- func (fl *File) InitFromLine(sfl *File, ln int) bool
- func (fl *File) InitFromString(str string, fname string, known fileinfo.Known) bool
- func (fl *File) InsertEos(cp Pos) Pos
- func (fl *File) IsLexPosValid(pos Pos) bool
- func (fl *File) LexAt(cp Pos) *Lex
- func (fl *File) LexAtSafe(cp Pos) Lex
- func (fl *File) LexLine(ln int) Line
- func (fl *File) LexTagSrc() string
- func (fl *File) LexTagSrcLn(ln int) string
- func (fl *File) LinesDeleted(stln, edln int)
- func (fl *File) LinesInserted(stln, nlns int)
- func (fl *File) NLines() int
- func (fl *File) NTokens(ln int) int
- func (fl *File) NextEos(stpos Pos, depth int) (Pos, bool)
- func (fl *File) NextEosAnyDepth(stpos Pos) (Pos, bool)
- func (fl *File) NextTokenPos(pos Pos) (Pos, bool)
- func (fl *File) OpenFile(fname string) error
- func (fl *File) PrevDepth(ln int) int
- func (fl *File) PrevStack(ln int) Stack
- func (fl *File) PrevTokenPos(pos Pos) (Pos, bool)
- func (fl *File) RegSrc(reg Reg) string
- func (fl *File) ReplaceEos(cp Pos)
- func (fl *File) SetBytes(txt []byte)
- func (fl *File) SetLine(ln int, lexs, comments Line, stack Stack)
- func (fl *File) SetLineSrc(ln int, txt []rune) bool
- func (fl *File) SetSrc(src [][]rune, fname, basepath string, known fileinfo.Known)
- func (fl *File) SrcLine(ln int) string
- func (fl *File) Token(pos Pos) token.KeyToken
- func (fl *File) TokenMapReg(reg Reg) TokenMap
- func (fl *File) TokenRegSrc(reg Reg) string
- func (fl *File) TokenSrc(pos Pos) []rune
- func (fl *File) TokenSrcPos(pos Pos) Reg
- func (fl *File) TokenSrcReg(reg Reg) Reg
- func (fl *File) ValidTokenPos(pos Pos) (Pos, bool)
- type LanguageLexer
- type Lex
- type Line
- func (ll *Line) Add(lx Lex)
- func (ll *Line) AddLex(tok token.KeyToken, st, ed int) *Lex
- func (ll *Line) AddSort(lx Lex)
- func (ll *Line) AtPos(pos int) (*Lex, int)
- func (ll *Line) Clone() Line
- func (ll *Line) DeleteIndex(idx int)
- func (ll *Line) DeleteToken(tok token.Tokens)
- func (ll *Line) Insert(idx int, lx Lex)
- func (ll *Line) NonCodeWords(src []rune) Line
- func (ll *Line) RuneStrings(rstr []rune) []string
- func (ll *Line) Sort()
- func (ll *Line) String() string
- func (ll *Line) Strings(src []rune) []string
- func (ll *Line) TagSrc(src []rune) string
- type MatchPos
- func (i MatchPos) Desc() string
- func (i MatchPos) Int64() int64
- func (i MatchPos) MarshalText() ([]byte, error)
- func (i *MatchPos) SetInt64(in int64)
- func (i *MatchPos) SetString(s string) error
- func (i MatchPos) String() string
- func (i *MatchPos) UnmarshalText(text []byte) error
- func (i MatchPos) Values() []enums.Enum
- type Matches
- func (i Matches) Desc() string
- func (i Matches) Int64() int64
- func (i Matches) MarshalText() ([]byte, error)
- func (i *Matches) SetInt64(in int64)
- func (i *Matches) SetString(s string) error
- func (i Matches) String() string
- func (i *Matches) UnmarshalText(text []byte) error
- func (i Matches) Values() []enums.Enum
- type PassTwo
- func (pt *PassTwo) EosDetect(ts *TwoState)
- func (pt *PassTwo) EosDetectPos(ts *TwoState, pos Pos, nln int)
- func (pt *PassTwo) Error(ts *TwoState, msg string)
- func (pt *PassTwo) HasErrs(ts *TwoState) bool
- func (pt *PassTwo) MismatchError(ts *TwoState, tok token.Tokens)
- func (pt *PassTwo) NestDepth(ts *TwoState)
- func (pt *PassTwo) NestDepthLine(line Line, initDepth int)
- func (pt *PassTwo) PopNest(ts *TwoState, tok token.Tokens)
- func (pt *PassTwo) PushNest(ts *TwoState, tok token.Tokens)
- type Pos
- type Reg
- type Rule
- func (lr *Rule) Compile(ls *State) bool
- func (lr *Rule) CompileAll(ls *State) bool
- func (lr *Rule) CompileNameMap(ls *State) bool
- func (lr *Rule) ComputeMatchLen(ls *State)
- func (lr *Rule) DoAct(ls *State, act Actions, tok *token.KeyToken)
- func (lr *Rule) Find(find string) []*Rule
- func (lr *Rule) IsMatch(ls *State) bool
- func (lr *Rule) IsMatchPos(ls *State) bool
- func (lr *Rule) Lex(ls *State) *Rule
- func (lr *Rule) LexStart(ls *State) *Rule
- func (t *Rule) SetActs(v ...Actions) *Rule
- func (t *Rule) SetDesc(v string) *Rule
- func (t *Rule) SetMatch(v Matches) *Rule
- func (t *Rule) SetMatchLen(v int) *Rule
- func (t *Rule) SetNameMap(v bool) *Rule
- func (t *Rule) SetNmMap(v map[string]*Rule) *Rule
- func (t *Rule) SetOff(v bool) *Rule
- func (t *Rule) SetOffset(v int) *Rule
- func (t *Rule) SetPos(v MatchPos) *Rule
- func (t *Rule) SetPushState(v string) *Rule
- func (t *Rule) SetSizeAdj(v int) *Rule
- func (t *Rule) SetString(v string) *Rule
- func (t *Rule) SetToken(v token.Tokens) *Rule
- func (t *Rule) SetUntil(v string) *Rule
- func (lr *Rule) TargetLen(ls *State) int
- func (lr *Rule) Validate(ls *State) bool
- func (lr *Rule) WriteGrammar(writer io.Writer, depth int)
- type Stack
- type State
- func (ls *State) Add(tok token.KeyToken, st, ed int)
- func (ls *State) AtEol() bool
- func (ls *State) CurRune() bool
- func (ls *State) CurState() string
- func (ls *State) Error(pos int, msg string, rule *Rule)
- func (ls *State) Init()
- func (ls *State) LineString() string
- func (ls *State) MatchState(st string) bool
- func (ls *State) Next(inc int) bool
- func (ls *State) NextRune() bool
- func (ls *State) NextSrcLine() string
- func (ls *State) PopState() string
- func (ls *State) PushState(st string)
- func (ls *State) ReadEscape(quote rune) bool
- func (ls *State) ReadName()
- func (ls *State) ReadNameTmp(off int) string
- func (ls *State) ReadNumber() token.Tokens
- func (ls *State) ReadQuoted()
- func (ls *State) ReadUntil(until string)
- func (ls *State) Rune(off int) (rune, bool)
- func (ls *State) ScanMantissa(base int)
- func (ls *State) SetLine(src []rune)
- func (ls *State) String(off, sz int) (string, bool)
- type TokenMap
- type TwoState
Constants ¶
This section is empty.
Variables ¶
var PosErr = Pos{-1, -1}
PosErr represents an error text position (-1 for both line and char) used as a return value for cases where error positions are possible
var PosZero = Pos{}
PosZero is the uninitialized zero text position (which is still a valid position)
var RegZero = Reg{}
RegZero is the zero region
var Trace = false
Trace is whether to print debug trace info.
Functions ¶
func BracePair ¶
BracePair returns the matching brace-like punctuation for given rune, which must be a left or right brace {}, bracket [] or paren (). Also returns true if it is *right*
func BracketIndentLine ¶
func BracketIndentLine(src [][]rune, tags []Line, ln int, tabSz int) (pInd, delInd, pLn int, ichr indent.Character)
BracketIndentLine returns the indentation level for given line based on previous line's indentation level, and any delta change based on brackets starting or ending the previous or current line. indent level is in increments of tabSz for spaces, and tabs for tabs. Operates on rune source with markup lex tags per line.
func DigitValue ¶
func FirstNonSpaceRune ¶
FirstNonSpaceRune returns the index of first non-space rune, -1 if not found
func FirstWord ¶
FirstWord returns the first contiguous sequence of purely unicode.IsLetter runes within the given string. It skips over any leading non-letters until a letter is found. Note that this function does not include numbers. For that, you can use the FirstWordDigits function.
func FirstWordApostrophe ¶
FirstWordApostrophe returns the first contiguous sequence of purely unicode.IsLetter runes that can also contain an apostrophe within the word but not at the end.
func FirstWordDigits ¶
FirstWordDigits returns the first contiguous sequence of purely IsLetterOrDigit runes within the given string. It skips over any leading non-letters until a letter (not digit) is found.
func HasUpperCase ¶
HasUpperCase returns true if string has an upper-case letter
func InnerBracketScope ¶
InnerBracketScope returns the inner scope for a given bracket type if it is imbalanced. It is important to do completion based on just that inner scope if that is where the user is at.
func IsLetterOrDigit ¶
func IsWhiteSpace ¶
func LastNonSpaceRune ¶
LastNonSpaceRune returns the index of last non-space rune, -1 if not found
func LastScopedString ¶
LastScopedString returns the last white-space separated, and bracket enclosed string from given string.
func LineIndent ¶
LineIndent returns the number of tabs or spaces at start of given rune-line, based on target tab-size (only relevant for spaces). If line starts with tabs, then those are counted, else spaces -- combinations of tabs and spaces won't produce sensible results.
func LineStartEndBracket ¶
LineStartEndBracket checks if line starts with a closing bracket or ends with an opening bracket. This is used for auto-indent for example. Bracket is Paren, Bracket, or Brace.
func MarkupPathsAsLinks ¶
MarkupPathsAsLinks checks for strings that look like file paths / urls and returns the original fields as a byte slice along with a marked-up version of that with html link markups for the files (as <a href="file:///..."). Input is field-parsed already, and maxFlds is the maximum number of fields to look for file paths in (e.g., 2 is a reasonable default, to avoid getting other false-alarm info later in the text). This is mainly used for marking up output from commands, for example.
func MatchCase ¶
MatchCase uses the source string case (upper / lower) to set corresponding case in target string, returning that string.
func OpenFileBytes ¶
OpenFileBytes returns bytes in given file, and logs any errors as well
func PrevLineIndent ¶
func PrevLineIndent(src [][]rune, tags []Line, ln int, tabSz int) (ind, pln int, ichr indent.Character)
PrevLineIndent returns indentation level of previous line from given line that has indentation -- skips blank lines. Returns indent level and previous line number, and indent char. indent level is in increments of tabSz for spaces, and tabs for tabs. Operates on rune source with markup lex tags per line.
func PrintError ¶
PrintError is a utility function that prints a list of errors to w, one error per line, if the err parameter is an ErrorList. Otherwise it prints the err string.
func RunesFromBytes ¶
RunesFromBytes returns the lines of runes from a basic byte array
func RunesFromString ¶
RunesFromString returns the lines of runes from a string (more efficient than converting to bytes)
func TrimLeftToAlpha ¶
TrimLeftToAlpha returns string without any leading non-alpha runes.
Types ¶
type Actions ¶
type Actions int32 //enums:enum
Actions are lexing actions to perform
const ( // Next means advance input position to the next character(s) after the matched characters Next Actions = iota // Name means read in an entire name, which is letters, _ and digits after first letter // position will be advanced to just after Name // Number means read in an entire number -- the token type will automatically be // set to the actual type of number that was read in, and position advanced to just after Number // Quoted means read in an entire string enclosed in quote delimeter // that is present at current position, with proper skipping of escaped. // Position advanced to just after Quoted // QuotedRaw means read in an entire string enclosed in quote delimeter // that is present at start position, with proper skipping of escaped. // Position advanced to just after. // Raw version supports multi-line and includes CR etc at end of lines (e.g., back-tick // in various languages) QuotedRaw // EOL means read till the end of the line (e.g., for single-line comments) EOL // ReadUntil reads until string(s) in the Until field are found, // or until the EOL if none are found ReadUntil // PushState means push the given state value onto the state stack PushState // PopState means pop given state value off the state stack PopState // SetGuestLex means install the Name (must be a prior action) as the guest // lexer -- it will take over lexing until PopGuestLex is called SetGuestLex // PopGuestLex removes the current guest lexer and returns to the original // language lexer PopGuestLex )
The lexical acts
const ActionsN Actions = 11
ActionsN is the highest valid value for type Actions, plus one.
func ActionsValues ¶
func ActionsValues() []Actions
ActionsValues returns all possible values for the type Actions.
func (Actions) MarshalText ¶
MarshalText implements the encoding.TextMarshaler interface.
func (*Actions) SetString ¶
SetString sets the Actions value from its string representation, and returns an error if the string is invalid.
func (*Actions) UnmarshalText ¶
UnmarshalText implements the encoding.TextUnmarshaler interface.
type EosPos ¶
type EosPos []int
EosPos is a line of EOS token positions, always sorted low-to-high
type Error ¶
type Error struct { // position where the error occurred in the source Pos Pos // full filename with path Filename string // brief error message Msg string // line of source where error was Src string // lexer or parser rule that emitted the error Rule tree.Node }
In an ErrorList, an error is represented by an *Error. The position Pos, if valid, points to the beginning of the offending token, and the error condition is described by Msg.
func (Error) Error ¶
Error implements the error interface -- gives the minimal version of error string
type ErrorList ¶
type ErrorList []*Error
ErrorList is a list of *Errors. The zero value for an ErrorList is an empty ErrorList ready to use.
func (ErrorList) Err ¶
Err returns an error equivalent to this error list. If the list is empty, Err returns nil.
func (*ErrorList) RemoveMultiples ¶
func (p *ErrorList) RemoveMultiples()
RemoveMultiples sorts an ErrorList and removes all but the first error per line.
func (ErrorList) Report ¶
Report returns all (or up to maxN if > 0) errors in the list in one string with customizable output options for viewing errors: - basepath if non-empty shows filename relative to that path. - showSrc shows the source line on a second line -- truncated to 30 chars around err - showRule prints the rule name
type File ¶
type File struct { // the current file being lex'd Filename string // the known file type, if known (typically only known files are processed) Known fileinfo.Known // base path for reporting file names -- this must be set externally e.g., by gide for the project root path BasePath string // lex'd version of the lines -- allocated to size of Lines Lexs []Line // comment tokens are stored separately here, so parser doesn't need to worry about them, but they are available for highlighting and other uses Comments []Line // stack present at the end of each line -- needed for contextualizing line-at-time lexing while editing LastStacks []Stack // token positions per line for the EOS (end of statement) tokens -- very important for scoping top-down parsing EosPos []EosPos // contents of the file as lines of runes Lines [][]rune }
File contains the contents of the file being parsed -- all kept in memory, and represented by Line as runes, so that positions in the file are directly convertible to indexes in Lines structure
func (*File) AllocLines ¶
func (fl *File) AllocLines()
AllocLines allocates the data per line: lex outputs and stack. We reset state so stale state is not hanging around.
func (*File) EnsureFinalEos ¶
EnsureFinalEos makes sure that the given line ends with an EOS (if it has tokens). Used for line-at-time parsing just to make sure it matches even if you haven't gotten to the end etc.
func (*File) InitFromLine ¶
InitFromLine initializes from one line of source file
func (*File) InitFromString ¶
InitFromString initializes from given string. Returns false if string is empty
func (*File) InsertEos ¶
InsertEos inserts an EOS just after the given token position (e.g., cp = last token in line)
func (*File) IsLexPosValid ¶
IsLexPosValid returns true if given lexical token position is valid
func (*File) LexAtSafe ¶
LexAtSafe returns the Lex item at given position, or last lex item if beyond end
func (*File) LexLine ¶
LexLine returns the lexing output for given line, combining comments and all other tokens and allocating new memory using clone
func (*File) LexTagSrcLn ¶
LexTagSrcLn returns the lex'd tagged source line for given line
func (*File) LinesDeleted ¶
LinesDeleted deletes lines -- called e.g., by core.TextBuf to sync the markup with ongoing edits
func (*File) LinesInserted ¶
LinesInserted inserts new lines -- called e.g., by core.TextBuf to sync the markup with ongoing edits
func (*File) NextEosAnyDepth ¶
NextEosAnyDepth finds the next EOS at any depth
func (*File) NextTokenPos ¶
NextTokenPos returns the next token position, false if at end of tokens
func (*File) PrevTokenPos ¶
PrevTokenPos returns the previous token position, false if at end of tokens
func (*File) ReplaceEos ¶
ReplaceEos replaces given token with an EOS
func (*File) SetLineSrc ¶
SetLineSrc sets source runes from given line of runes. Returns false if out of range.
func (*File) SetSrc ¶
SetSrc sets the source to given content, and alloc Lexs -- if basepath is empty then it is set to the path for the filename
func (*File) TokenMapReg ¶
TokenMapReg creates a TokenMap of tokens in region, including their Cat and SubCat levels -- err's on side of inclusiveness -- used for optimizing token matching
func (*File) TokenRegSrc ¶
TokenRegSrc returns the source code associated with the given token region
func (*File) TokenSrcPos ¶
TokenSrcPos returns source reg associated with lex token at given token position
func (*File) TokenSrcReg ¶
TokenSrcReg translates a region of tokens into a region of source
type LanguageLexer ¶ added in v0.2.3
type LanguageLexer interface { // LexerByName returns the top-level [Rule] for given language (case invariant lookup) LexerByName(lang string) *Rule }
LanguageLexer looks up lexer for given language; implementation in parent parse package so we need the interface
var TheLanguageLexer LanguageLexer
TheLanguageLexer is the instance of LangLexer interface used to lookup lexers for languages -- is set in parse/languages.go
type Lex ¶
type Lex struct { // Token includes cache of keyword for keyword types, and also has nesting depth: starting at 0 at start of file and going up for every increment in bracket / paren / start tag and down for every decrement. Is computed once and used extensively in parsing. Token token.KeyToken // start rune index within original source line for this token St int // end rune index within original source line for this token (exclusive -- ends one before this) Ed int // time when region was set -- used for updating locations in the text based on time stamp (using efficient non-pointer time) Time nptime.Time }
Lex represents a single lexical element, with a token, and start and end rune positions within a line of a file. Critically it also contains the nesting depth computed from all the parens, brackets, braces. Todo: also support XML < > </ > tag depth.
func LastLexIgnoreComment ¶
LastTokenIgnoreComment returns the last token of the tags, ignoring any final comment at end
func ObjPathAt ¶
ObjPathAt returns the starting Lex, before given lex, that include sequences of PunctSepPeriod and NameTag which are used for object paths (e.g., field.field.field)
func (*Lex) ContainsPos ¶
ContainsPos returns true if the Lex element contains given character position
func (*Lex) OverlapsReg ¶
OverlapsReg returns true if the two regions overlap
type Line ¶
type Line []Lex
Line is one line of Lex'd text
func MergeLines ¶
MergeLines merges the two lines of lex regions into a combined list properly ordered by sequence of tags within the line.
func RuneFields ¶
RuneFields returns a Line of Lex's defining the non-white-space "fields" in the given rune string
func (*Line) AddLex ¶
Add adds one element to the lex line with given params, returns pointer to that new lex
func (*Line) AddSort ¶
AddSort adds a new lex element in sorted order to list, sorted by start position, and if at the same start position, then sorted *decreasing* by end position -- this allows outer tags to be processed before inner tags which fits a stack-based tag markup logic.
func (*Line) AtPos ¶
AtPos returns the Lex in place for given position, and index, or nil, -1 if none
func (*Line) DeleteToken ¶
DeleteToken deletes a specific token type from list
func (*Line) NonCodeWords ¶
NonCodeWords returns a Line of white-space separated word tokens in given tagged source that ignores token.IsCode token regions -- i.e., the "regular" words present in the source line -- this is useful for things like spell checking or manual parsing.
func (*Line) RuneStrings ¶
RuneStrings returns array of strings for Lex regions defined in Line, for given rune source string
func (*Line) Sort ¶
func (ll *Line) Sort()
Sort sorts the lex elements by starting pos, and ending pos *decreasing* if a tie
type MatchPos ¶
type MatchPos int32 //enums:enum
MatchPos are special positions for a match to occur
const ( // AnyPos matches at any position AnyPos MatchPos = iota // StartOfLine matches at start of line StartOfLine // EndOfLine matches at end of line EndOfLine // MiddleOfLine matches not at the start or end MiddleOfLine // StartOfWord matches at start of word StartOfWord // EndOfWord matches at end of word EndOfWord // MiddleOfWord matches not at the start or end MiddleOfWord )
Matching position rules
const MatchPosN MatchPos = 7
MatchPosN is the highest valid value for type MatchPos, plus one.
func MatchPosValues ¶
func MatchPosValues() []MatchPos
MatchPosValues returns all possible values for the type MatchPos.
func (MatchPos) MarshalText ¶
MarshalText implements the encoding.TextMarshaler interface.
func (*MatchPos) SetString ¶
SetString sets the MatchPos value from its string representation, and returns an error if the string is invalid.
func (*MatchPos) UnmarshalText ¶
UnmarshalText implements the encoding.TextUnmarshaler interface.
type Matches ¶
type Matches int32 //enums:enum
Matches are what kind of lexing matches to make
const ( // String means match a specific string as given in the rule // Note: this only looks for the string with no constraints on // what happens after this string -- use StrName to match entire names String Matches = iota // StrName means match a specific string that is a complete alpha-numeric // string (including underbar _) with some other char at the end // must use this for all keyword matches to ensure that it isn't just // the start of a longer name StrName // Match any letter, including underscore Letter // Match digit 0-9 Digit // Match any white space (space, tab) -- input is already broken into lines WhiteSpace // CurState means match current state value set by a PushState action, using String value in rule // all CurState cases must generally be first in list of rules so they can preempt // other rules when the state is active CurState // AnyRune means match any rune -- use this as the last condition where other terminators // come first! AnyRune )
Matching rules
const MatchesN Matches = 7
MatchesN is the highest valid value for type Matches, plus one.
func MatchesValues ¶
func MatchesValues() []Matches
MatchesValues returns all possible values for the type Matches.
func (Matches) MarshalText ¶
MarshalText implements the encoding.TextMarshaler interface.
func (*Matches) SetString ¶
SetString sets the Matches value from its string representation, and returns an error if the string is invalid.
func (*Matches) UnmarshalText ¶
UnmarshalText implements the encoding.TextUnmarshaler interface.
type PassTwo ¶
type PassTwo struct { // should we perform EOS detection on this type of file? DoEos bool // use end-of-line as a default EOS, if nesting depth is same as start of line (python) -- see also EolToks Eol bool // replace all semicolons with EOS to keep it consistent (C, Go..) Semi bool // use backslash as a line continuer (python) Backslash bool // if a right-brace } is detected anywhere in the line, insert an EOS *before* RBrace AND after it (needed for Go) -- do not include RBrace in EolToks in this case RBraceEos bool // specific tokens to recognize at the end of a line that trigger an EOS (Go) EolToks token.KeyTokenList }
PassTwo performs second pass(s) through the lexicalized version of the source, computing nesting depth for every token once and for all -- this is essential for properly matching tokens and also for colorization in syntax highlighting. Optionally, a subsequent pass finds end-of-statement (EOS) tokens, which are essential for parsing to first break the source down into statement-sized chunks. A separate list of EOS token positions is maintained for very fast access.
func (*PassTwo) EosDetectPos ¶
Perform EOS detection at given starting position, for given number of lines
func (*PassTwo) MismatchError ¶
MismatchError reports a mismatch for given type of parentheses / bracket
func (*PassTwo) NestDepthLine ¶
Perform nesting depth computation on only one line, starting at given initial depth -- updates the given line
type Pos ¶
Pos is a position within the source file -- it is recorded always in 0, 0 offset positions, but is converted into 1,1 offset for public consumption Ch positions are always in runes, not bytes. Also used for lex token indexes.
func BraceMatch ¶
BraceMatch finds the brace, bracket, or paren that is the partner of the one passed to function, within maxLns lines of start. Operates on rune source with markup lex tags per line (tags exclude comments).
func (*Pos) FromString ¶
FromString decodes text position from a string representation of form: [#]LxxCxx -- used in e.g., URL links -- returns true if successful
type Reg ¶
Reg is a contiguous region within the source file
type Rule ¶
type Rule struct { tree.NodeBase // disable this rule -- useful for testing and exploration Off bool `json:",omitempty"` // description / comments about this rule Desc string `json:",omitempty"` // the token value that this rule generates -- use None for non-terminals Token token.Tokens // the lexical match that we look for to engage this rule Match Matches // position where match can occur Pos MatchPos // if action is LexMatch, this is the string we match String string // offset into the input to look for a match: 0 = current char, 1 = next one, etc Offset int `json:",omitempty"` // adjusts the size of the region (plus or minus) that is processed for the Next action -- allows broader and narrower matching relative to tagging SizeAdj int `json:",omitempty"` // the action(s) to perform, in order, if there is a match -- these are performed prior to iterating over child nodes Acts []Actions // string(s) for ReadUntil action -- will read until any of these strings are found -- separate different options with | -- if you need to read until a literal | just put two || in a row and that will show up as a blank, which is interpreted as a literal | Until string `json:",omitempty"` // the state to push if our action is PushState -- note that State matching is on String, not this value PushState string `json:",omitempty"` // create an optimization map for this rule, which must be a parent with children that all match against a Name string -- this reads the Name and directly activates the associated rule with that String, without having to iterate through them -- use this for keywords etc -- produces a SIGNIFICANT speedup for long lists of keywords. NameMap bool `json:",omitempty"` // length of source that matched -- if Next is called, this is what will be skipped to MatchLen int `display:"-" json:"-" xml:"-"` // NameMap lookup map -- created during Compile NmMap map[string]*Rule `edit:"-" json:"-" xml:"-"` }
Rule operates on the text input to produce the lexical tokens.
Lexing is done line-by-line -- you must push and pop states to coordinate across multiple lines, e.g., for multi-line comments.
There is full access to entire line and you can decide based on future (offset) characters.
In general it is best to keep lexing as simple as possible and leave the more complex things for the parsing step.
func NewRule ¶
NewRule returns a new Rule with the given optional parent: Rule operates on the text input to produce the lexical tokens.
Lexing is done line-by-line -- you must push and pop states to coordinate across multiple lines, e.g., for multi-line comments.
There is full access to entire line and you can decide based on future (offset) characters.
In general it is best to keep lexing as simple as possible and leave the more complex things for the parsing step.
func (*Rule) Compile ¶
Compile performs any one-time compilation steps on the rule returns false if there are any problems.
func (*Rule) CompileAll ¶
CompileAll is called on the top-level Rule to compile all nodes. returns true if everything is ok
func (*Rule) CompileNameMap ¶
CompileNameMap compiles name map -- returns false if there are problems.
func (*Rule) ComputeMatchLen ¶
ComputeMatchLen computes MatchLen based on match type
func (*Rule) Find ¶
Find looks for rules in the tree that contain given string in String or Name fields
func (*Rule) IsMatch ¶
IsMatch tests if the rule matches for current input state, returns true if so, false if not
func (*Rule) IsMatchPos ¶
IsMatchPos tests if the rule matches position
func (*Rule) Lex ¶
Lex tries to apply rule to given input state, returns lowest-level rule that matched, nil if none
func (*Rule) LexStart ¶
LexStart is called on the top-level lex node to start lexing process for one step
func (*Rule) SetActs ¶
SetActs sets the [Rule.Acts]: the action(s) to perform, in order, if there is a match -- these are performed prior to iterating over child nodes
func (*Rule) SetMatch ¶
SetMatch sets the [Rule.Match]: the lexical match that we look for to engage this rule
func (*Rule) SetMatchLen ¶
SetMatchLen sets the [Rule.MatchLen]: length of source that matched -- if Next is called, this is what will be skipped to
func (*Rule) SetNameMap ¶
SetNameMap sets the [Rule.NameMap]: create an optimization map for this rule, which must be a parent with children that all match against a Name string -- this reads the Name and directly activates the associated rule with that String, without having to iterate through them -- use this for keywords etc -- produces a SIGNIFICANT speedup for long lists of keywords.
func (*Rule) SetNmMap ¶
SetNmMap sets the [Rule.NmMap]: NameMap lookup map -- created during Compile
func (*Rule) SetOff ¶
SetOff sets the [Rule.Off]: disable this rule -- useful for testing and exploration
func (*Rule) SetOffset ¶
SetOffset sets the [Rule.Offset]: offset into the input to look for a match: 0 = current char, 1 = next one, etc
func (*Rule) SetPushState ¶
SetPushState sets the [Rule.PushState]: the state to push if our action is PushState -- note that State matching is on String, not this value
func (*Rule) SetSizeAdj ¶
SetSizeAdj sets the [Rule.SizeAdj]: adjusts the size of the region (plus or minus) that is processed for the Next action -- allows broader and narrower matching relative to tagging
func (*Rule) SetString ¶
SetString sets the [Rule.String]: if action is LexMatch, this is the string we match
func (*Rule) SetToken ¶
SetToken sets the [Rule.Token]: the token value that this rule generates -- use None for non-terminals
func (*Rule) SetUntil ¶
SetUntil sets the [Rule.Until]: string(s) for ReadUntil action -- will read until any of these strings are found -- separate different options with | -- if you need to read until a literal | just put two || in a row and that will show up as a blank, which is interpreted as a literal |
type State ¶
type State struct { // the current file being lex'd Filename string // if true, record whitespace tokens -- else ignore KeepWS bool // the current line of source being processed Src []rune // the lex output for this line Lex Line // the comments output for this line -- kept separately Comments Line // the current rune char position within the line Pos int // the line within overall source that we're operating on (0 indexed) Ln int // the current rune read by NextRune Ch rune // state stack Stack Stack // the last name that was read LastName string // a guest lexer that can be installed for managing a different language type, e.g., quoted text in markdown files GuestLex *Rule // copy of stack at point when guest lexer was installed -- restore when popped SaveStack Stack // time stamp for lexing -- set at start of new lex process Time nptime.Time // any error messages accumulated during lexing specifically Errs ErrorList }
State is the state maintained for lexing
func (*State) LineString ¶
LineString returns the current lex output as tagged source
func (*State) MatchState ¶
MatchState returns true if the current state matches the string
func (*State) Next ¶
Next moves to next position using given increment in source line -- returns false if at end
func (*State) NextSrcLine ¶
NextSrcLine returns the next line of text
func (*State) ReadEscape ¶
ReadEscape parses an escape sequence where rune is the accepted escaped quote. In case of a syntax error, it stops at the offending character (without consuming it) and returns false. Otherwise it returns true.
func (*State) ReadName ¶
func (ls *State) ReadName()
ReadName reads a standard alpha-numeric_ name -- saves in LastName
func (*State) ReadNameTmp ¶
ReadNameTmp reads a standard alpha-numeric_ name and returns it. Does not update the lexing position -- a "lookahead" name read
func (*State) ReadNumber ¶
ReadNumber reads a number of any sort, returning the type of the number
func (*State) ReadQuoted ¶
func (ls *State) ReadQuoted()
func (*State) ReadUntil ¶
ReadUntil reads until given string(s) -- does do depth tracking if looking for a bracket open / close kind of symbol. For multiple "until" string options, separate each by | and use empty to match a single | or || in combination with other options. Terminates at end of line without error
func (*State) Rune ¶
Rune gets the rune at given offset from current position, returns false if out of range
func (*State) ScanMantissa ¶
type TokenMap ¶
TokenMap is a token map, for optimizing token exclusion
type TwoState ¶
type TwoState struct { // position in lex tokens we're on Pos Pos // file that we're operating on Src *File // stack of nesting tokens NestStack []token.Tokens // any error messages accumulated during lexing specifically Errs ErrorList }
TwoState is the state maintained for the PassTwo process
func (*TwoState) Init ¶
func (ts *TwoState) Init()
Init initializes state for a new pass -- called at start of NestDepth
func (*TwoState) NestStackStr ¶
NestStackStr returns the token stack as strings