Documentation ¶
Overview ¶
Package prattle implements a general purpose, unicode-aware lexical scanner and top down operator precedence parser suitable for parsing LL(1) grammars. The scanner and parser can be used independently from each other if desired.
Example (Interpreter) ¶
This example demonstrates parsing a simple programming language that consists of a sequence of statements.
package main import ( "fmt" "strconv" "unicode" "github.com/askeladdk/prattle" ) const ( ksemicolon = 1 + iota kassign kplus kident knumber ) func testScan(s *prattle.Scanner) int { s.ExpectAny(unicode.IsSpace) s.Skip() switch { case s.Done(): return 0 case s.ExpectOne(unicode.IsLetter): s.ExpectAny(unicode.IsLetter) return kident case s.ExpectOne(unicode.IsDigit): s.ExpectAny(unicode.IsDigit) return knumber case s.Expect('='): return kassign case s.Expect('+'): return kplus case s.Expect(';'): return ksemicolon } s.Advance() return -1 } type testDriver struct { stack []prattle.Token idents map[string]int } func (d *testDriver) pop() (v prattle.Token) { if n := len(d.stack); n > 0 { v, d.stack = d.stack[n-1], d.stack[:n-1] } return v } func (d *testDriver) push(v prattle.Token) { d.stack = append(d.stack, v) } func (d *testDriver) Precedence(kind int) int { return kind } func (d *testDriver) tonumber(t prattle.Token) int { if t.Kind == kident { return d.idents[t.Text] } num, _ := strconv.Atoi(t.Text) return num } func (d *testDriver) plus(p *prattle.Parser, t prattle.Token) error { if err := p.Parse(d.Precedence(t.Kind)); err != nil { return err } right := d.tonumber(d.pop()) left := d.tonumber(d.pop()) d.push(prattle.Token{ Kind: knumber, Text: strconv.Itoa(left + right), }) return nil } func (d *testDriver) assign(p *prattle.Parser, t prattle.Token) error { if err := p.Parse(d.Precedence(kassign)); err != nil { return err } right := d.pop() left := d.pop() d.idents[left.Text] = d.tonumber(right) return nil } func (d *testDriver) primitive(p *prattle.Parser, t prattle.Token) error { d.push(t) return nil } func (d *testDriver) Prefix(kind int) prattle.ParseFunc { switch kind { default: return nil case kident, knumber: return d.primitive } } func (d *testDriver) Infix(kind int) prattle.ParseFunc { switch kind { default: return nil case kplus: return d.plus case kassign: return d.assign } } func (d *testDriver) ParseError(t prattle.Token) error { return fmt.Errorf("%s: unexpected '%s'", t.Position, t.Text) } // This example demonstrates parsing a simple programming language that consists of a sequence of statements. func main() { c := testDriver{ idents: make(map[string]int), } source := "a = 1;\nb = 2;\nc = a+b+b+a;\n" s := prattle.Scanner{Scan: testScan} p := prattle.Parser{Driver: &c} p.Init(s.InitWithString(source)) // Parse expressions separated by semicolons. for p.Peek().Kind != 0 { if err := p.Parse(ksemicolon); err != nil { fmt.Println(err) return } else if !p.Expect(ksemicolon) { fmt.Println("expected semicolon") return } } fmt.Printf("c = %d\n", c.idents["c"]) }
Output: c = 6
Example (Readme) ¶
This example is described in the readme.
package main import ( "fmt" "strconv" "unicode" "github.com/askeladdk/prattle" ) type driver struct { stack []int } func (d *driver) push(i int) { d.stack = append(d.stack, i) } func (d *driver) pop() (i int) { n := len(d.stack) i, d.stack = d.stack[n-1], d.stack[:n-1] return } func (d *driver) number(p *prattle.Parser, t prattle.Token) error { n, _ := strconv.Atoi(t.Text) d.push(n) return nil } func (d *driver) add(p *prattle.Parser, t prattle.Token) error { // First parse the right hand operator. if err := p.Parse(d.Precedence(t.Kind)); err != nil { return err } right := d.pop() left := d.pop() acc := left + right fmt.Printf("%d + %d = %d\n", left, right, acc) d.push(acc) return nil } func (d *driver) Prefix(k int) prattle.ParseFunc { if k == 2 { return d.number } return nil } func (d *driver) Infix(k int) prattle.ParseFunc { if k == 1 { return d.add } return nil } func (d *driver) Precedence(k int) int { return k } func (d *driver) ParseError(t prattle.Token) error { return fmt.Errorf("%s", t) } // This example is described in the readme. func main() { scanner := prattle.Scanner{ Scan: func(s *prattle.Scanner) int { // Skip whitespaces. s.ExpectAny(unicode.IsSpace) s.Skip() switch { case s.Done(): // Stop if the entire input has been consumed. return 0 case s.Expect('+'): return 1 case s.ExpectOne(unicode.IsDigit): // Read a number consisting of one or more digits. s.ExpectAny(unicode.IsDigit) return 2 } s.Advance() return -1 }, } parser := prattle.Parser{ Driver: &driver{}, } source := "1 + 23 + 456 + 7890" scanner.InitWithString(source) if err := parser.Init(&scanner).Parse(0); err != nil { fmt.Println(err) } }
Output: 1 + 23 = 24 24 + 456 = 480 480 + 7890 = 8370
Example (Sentence) ¶
This example demonstrates tokenising a sentence into words and punctuation.
package main import ( "fmt" "unicode" "github.com/askeladdk/prattle" ) func main() { scan := func(s *prattle.Scanner) int { // Skip any whitespace. s.ExpectAny(unicode.IsSpace) s.Skip() // Return an appropriate token kind. switch { case s.Done(): // EOF return 0 case s.ExpectOne(unicode.IsLetter): // Word s.ExpectAny(unicode.IsLetter) return 1 case s.ExpectOne(unicode.IsPunct): // Punctuation return 2 } // Unrecognized character. s.Advance() return -1 } sentence := "I love it when a plan comes together!" s := (&prattle.Scanner{ Scan: scan, }).InitWithString(sentence) for tok, ok := s.Next(); ok; tok, ok = s.Next() { fmt.Printf("[%d] %s\n", tok.Kind, tok.Text) } }
Output: [1] I [1] love [1] it [1] when [1] a [1] plan [1] comes [1] together [2] !
Index ¶
- Variables
- type AcceptFunc
- type Driver
- type Iterator
- type ParseFunc
- type Parser
- type Position
- type ScanFunc
- type Scanner
- func (s *Scanner) Advance()
- func (s *Scanner) Done() bool
- func (s *Scanner) Expect(r rune) bool
- func (s *Scanner) ExpectAny(accept AcceptFunc)
- func (s *Scanner) ExpectOne(accept AcceptFunc) bool
- func (s *Scanner) InitWithReader(r io.RuneReader) *Scanner
- func (s *Scanner) InitWithString(source string) *Scanner
- func (s *Scanner) Next() (Token, bool)
- func (s *Scanner) Peek() rune
- func (s *Scanner) Skip()
- func (s *Scanner) Text() string
- type Token
Examples ¶
Constants ¶
This section is empty.
Variables ¶
var NonAssoc error = errors.New("non-associative operator")
NonAssoc is returned by infix ParseFuncs to indicate that an operator is non-associative.
Functions ¶
This section is empty.
Types ¶
type AcceptFunc ¶
AcceptFunc accepts a rune.
func OneOf ¶
func OneOf(chars string) AcceptFunc
OneOf returns an AcceptFunc that reports whether a rune appears in chars.
type Driver ¶ added in v0.0.3
type Driver interface { // Infix associates an infix ParseFunc with a token. // Returning nil is a parse error. Infix(kind int) ParseFunc // Prefix associates a prefix ParseFunc with a token. // Returning nil is a parse error. Prefix(kind int) ParseFunc // Precedence associates an operator precedence with a token. // The higher the precedence, the tighter the token binds. Precedence(kind int) (precedence int) // ParseError is called by the Parser when it encounters a token that it cannot parse. ParseError(Token) error }
Driver drives the parsing algorithm by associating tokens to parser functions. It is expected to hold the parse state and results like the syntax tree.
type Iterator ¶ added in v0.0.6
type Iterator interface { // Next returns the next token in the stream // and yields true until the stream ends. Next() (token Token, ok bool) }
Iterator represents a stream of tokens.
type Parser ¶
type Parser struct { // Driver drives the Parser. Driver // contains filtered or unexported fields }
Parser implements the Pratt parsing algorithm, also known as the top down operator precedence (TDOP) algorithm. This is a recursive descent algorithm that handles operator precedence in a simple and flexible manner.
Parser consumes tokens from an Iterator and uses a Driver to determine precedence and executing parsing functions.
type Position ¶
type Position struct { // Filename is the filename of the input, if any. Filename string // Offset is the byte offset, starting at 0. Offset int // Line is the line number, starting at 1. Line int // Column is the column number, starting at 1 (character count per line). Column int }
Position represents the position of a Token in the input string.
type ScanFunc ¶
ScanFunc scans the next token and returns its kind. Zero is reserved to signal end-of-input.
type Scanner ¶
type Scanner struct { // Position of the last read token. // The Filename field will never be modified by Scanner. Position // Scan scans tokens. Scan ScanFunc // contains filtered or unexported fields }
Scanner produces a stream of tokens from a string or an io.RuneReader.
func (*Scanner) ExpectAny ¶
func (s *Scanner) ExpectAny(accept AcceptFunc)
ExpectAny advances the cursor zero or more times.
func (*Scanner) ExpectOne ¶
func (s *Scanner) ExpectOne(accept AcceptFunc) bool
ExpectOne advances the cursor if the current rune is accepted.
func (*Scanner) InitWithReader ¶ added in v0.0.4
func (s *Scanner) InitWithReader(r io.RuneReader) *Scanner
Init initializes a Scanner with an input reader and returns it. When initialized this way, the Scanner tokenizes unbounded streams but must allocate.
func (*Scanner) InitWithString ¶ added in v0.0.4
Init initializes a Scanner with an input string and returns it. When initialized this way, the Scanner tokenizes without allocations.