prattle

package module

v1.0.0 Latest Latest Go to latest Published: Oct 27, 2023 License: ISC Imports: 5 Imported by: 0

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/askeladdk/prattle

Links

Open Source Insights

README ¶

Prattle

Overview

Package prattle implements a general purpose, unicode-aware lexical scanner and top down operator precedence parser suitable for parsing LL(1) grammars. The scanner and parser can be used independently from each other if desired.

Install

go get -u github.com/askeladdk/prattle

Quickstart

Use Scanner to produce a sequence of Tokens by scanning a source text using a ScanFunc. Use the Expect*, Skip and Advance methods to scan tokens.

// Prepare the scanner.
scanner := prattle.Scanner{
	// Scan scans the next token and returns its kind.
	Scan: func(s *prattle.Scanner) (kind int) {
		// Skip any whitespace.
		s.ExpectAny(unicode.IsSpace)
		s.Skip()

		// Scan the next token.
		switch {
		case s.Done(): // Stop when the entire input has been consumed.
			return 0
		case s.Expect('+'): // Scan the addition operator.
			return 1
		case s.ExpectOne(unicode.IsDigit): // Scan a number consisting of one or more digits.
			s.ExpectAny(unicode.IsDigit)
			return 2
		}

		// Invalid token.
		s.Advance()
		return -1
	},
}

Use Parser and Driver to associate tokens produced by Scanner with ParseFuncs. Define the Driver first.

// Define the parsing Driver.
type driver struct {
	stack []int
}

func (d *driver) push(i int) {
	d.stack = append(d.stack, i)
}

func (d *driver) pop() (i int) {
	n := len(d.stack)
	i, d.stack = d.stack[n-1], d.stack[:n-1]
	return
}

func (d *driver) number(p *prattle.Parser, t prattle.Token) error {
	n, _ := strconv.Atoi(t.Text)
	d.push(n)
	return nil
}

func (d *driver) add(p *prattle.Parser, t prattle.Token) error {
	// Parse the right hand operator.
	_ = p.Parse(d.Precedence(t.Kind))

	right := d.pop()
	left := d.pop()
	sum := left + right
	fmt.Printf("%d + %d = %d\n", left, right, sum)
	d.push(sum)
	return nil
}

func (d *driver) Prefix(kind int) prattle.ParseFunc {
	return d.number
}

func (d *driver) Infix(kind int) prattle.ParseFunc {
	return d.add
}

func (d *driver) Precedence(kind int) int {
	return kind
}

func (d *driver) ParseError(t prattle.Token) error {
	return fmt.Errorf("%s", t)
}

// Prepare the parser.
parser := prattle.Parser{
	Driver: &driver{},
}

Finally, Init the scanner and parser, and parse an expression.

source := "1 + 23 + 456 + 7890"
scanner.InitWithString(source)
parser.Init(&scanner).Parse(0)

// Output:
// 1 + 23 = 24
// 24 + 456 = 480
// 480 + 7890 = 8370

See the calculator for a complete example featuring prefix, postfix, infix, left, right and non-associative operators. Also read the documentation on pkg.go.dev.

License

Package prattle is released under the terms of the ISC license.

Documentation ¶

Overview ¶

Package prattle implements a general purpose, unicode-aware lexical scanner and top down operator precedence parser suitable for parsing LL(1) grammars. The scanner and parser can be used independently from each other if desired.

Example (Interpreter) ¶

This example demonstrates parsing a simple programming language that consists of a sequence of statements.

package main

import (
	"fmt"
	"strconv"
	"unicode"

	"github.com/askeladdk/prattle"
)

const (
	ksemicolon = 1 + iota
	kassign
	kplus
	kident
	knumber
)

func testScan(s *prattle.Scanner) int {
	s.ExpectAny(unicode.IsSpace)
	s.Skip()

	switch {
	case s.Done():
		return 0
	case s.ExpectOne(unicode.IsLetter):
		s.ExpectAny(unicode.IsLetter)
		return kident
	case s.ExpectOne(unicode.IsDigit):
		s.ExpectAny(unicode.IsDigit)
		return knumber
	case s.Expect('='):
		return kassign
	case s.Expect('+'):
		return kplus
	case s.Expect(';'):
		return ksemicolon
	}

	s.Advance()
	return -1
}

type testDriver struct {
	stack  []prattle.Token
	idents map[string]int
}

func (d *testDriver) pop() (v prattle.Token) {
	if n := len(d.stack); n > 0 {
		v, d.stack = d.stack[n-1], d.stack[:n-1]
	}
	return v
}

func (d *testDriver) push(v prattle.Token) {
	d.stack = append(d.stack, v)
}

func (d *testDriver) Precedence(kind int) int {
	return kind
}

func (d *testDriver) tonumber(t prattle.Token) int {
	if t.Kind == kident {
		return d.idents[t.Text]
	}
	num, _ := strconv.Atoi(t.Text)
	return num
}

func (d *testDriver) plus(p *prattle.Parser, t prattle.Token) error {
	if err := p.Parse(d.Precedence(t.Kind)); err != nil {
		return err
	}
	right := d.tonumber(d.pop())
	left := d.tonumber(d.pop())
	d.push(prattle.Token{
		Kind: knumber,
		Text: strconv.Itoa(left + right),
	})
	return nil
}

func (d *testDriver) assign(p *prattle.Parser, t prattle.Token) error {
	if err := p.Parse(d.Precedence(kassign)); err != nil {
		return err
	}

	right := d.pop()
	left := d.pop()
	d.idents[left.Text] = d.tonumber(right)
	return nil
}

func (d *testDriver) primitive(p *prattle.Parser, t prattle.Token) error {
	d.push(t)
	return nil
}

func (d *testDriver) Prefix(kind int) prattle.ParseFunc {
	switch kind {
	default:
		return nil
	case kident, knumber:
		return d.primitive
	}
}

func (d *testDriver) Infix(kind int) prattle.ParseFunc {
	switch kind {
	default:
		return nil
	case kplus:
		return d.plus
	case kassign:
		return d.assign
	}
}

func (d *testDriver) ParseError(t prattle.Token) error {
	return fmt.Errorf("%s: unexpected '%s'", t.Position, t.Text)
}

// This example demonstrates parsing a simple programming language that consists of a sequence of statements.
func main() {
	c := testDriver{
		idents: make(map[string]int),
	}

	source := "a = 1;\nb = 2;\nc = a+b+b+a;\n"

	s := prattle.Scanner{Scan: testScan}
	p := prattle.Parser{Driver: &c}
	p.Init(s.InitWithString(source))

	// Parse expressions separated by semicolons.
	for p.Peek().Kind != 0 {
		if err := p.Parse(ksemicolon); err != nil {
			fmt.Println(err)
			return
		} else if !p.Expect(ksemicolon) {
			fmt.Println("expected semicolon")
			return
		}
	}

	fmt.Printf("c = %d\n", c.idents["c"])

}

Output:

c = 6

Example (Readme) ¶

This example is described in the readme.

package main

import (
	"fmt"
	"strconv"
	"unicode"

	"github.com/askeladdk/prattle"
)

type driver struct {
	stack []int
}

func (d *driver) push(i int) {
	d.stack = append(d.stack, i)
}

func (d *driver) pop() (i int) {
	n := len(d.stack)
	i, d.stack = d.stack[n-1], d.stack[:n-1]
	return
}

func (d *driver) number(p *prattle.Parser, t prattle.Token) error {
	n, _ := strconv.Atoi(t.Text)
	d.push(n)
	return nil
}

func (d *driver) add(p *prattle.Parser, t prattle.Token) error {
	// First parse the right hand operator.
	if err := p.Parse(d.Precedence(t.Kind)); err != nil {
		return err
	}

	right := d.pop()
	left := d.pop()
	acc := left + right
	fmt.Printf("%d + %d = %d\n", left, right, acc)
	d.push(acc)
	return nil
}

func (d *driver) Prefix(k int) prattle.ParseFunc {
	if k == 2 {
		return d.number
	}
	return nil
}

func (d *driver) Infix(k int) prattle.ParseFunc {
	if k == 1 {
		return d.add
	}
	return nil
}

func (d *driver) Precedence(k int) int {
	return k
}

func (d *driver) ParseError(t prattle.Token) error {
	return fmt.Errorf("%s", t)
}

// This example is described in the readme.
func main() {
	scanner := prattle.Scanner{
		Scan: func(s *prattle.Scanner) int {
			// Skip whitespaces.
			s.ExpectAny(unicode.IsSpace)
			s.Skip()
			switch {
			case s.Done(): // Stop if the entire input has been consumed.
				return 0
			case s.Expect('+'):
				return 1
			case s.ExpectOne(unicode.IsDigit): // Read a number consisting of one or more digits.
				s.ExpectAny(unicode.IsDigit)
				return 2
			}

			s.Advance()
			return -1
		},
	}

	parser := prattle.Parser{
		Driver: &driver{},
	}

	source := "1 + 23 + 456 + 7890"
	scanner.InitWithString(source)
	if err := parser.Init(&scanner).Parse(0); err != nil {
		fmt.Println(err)
	}

}

Output:

1 + 23 = 24
24 + 456 = 480
480 + 7890 = 8370

Example (Sentence) ¶

This example demonstrates tokenising a sentence into words and punctuation.

package main

import (
	"fmt"
	"unicode"

	"github.com/askeladdk/prattle"
)

func main() {
	scan := func(s *prattle.Scanner) int {
		// Skip any whitespace.
		s.ExpectAny(unicode.IsSpace)
		s.Skip()

		// Return an appropriate token kind.
		switch {
		case s.Done(): // EOF
			return 0
		case s.ExpectOne(unicode.IsLetter): // Word
			s.ExpectAny(unicode.IsLetter)
			return 1
		case s.ExpectOne(unicode.IsPunct): // Punctuation
			return 2
		}

		// Unrecognized character.
		s.Advance()
		return -1
	}

	sentence := "I love it when a plan comes together!"

	s := (&prattle.Scanner{
		Scan: scan,
	}).InitWithString(sentence)

	for tok, ok := s.Next(); ok; tok, ok = s.Next() {
		fmt.Printf("[%d] %s\n", tok.Kind, tok.Text)
	}

}

Output:

[1] I
[1] love
[1] it
[1] when
[1] a
[1] plan
[1] comes
[1] together
[2] !

Index ¶

Variables
type AcceptFunc
- func OneOf(chars string) AcceptFunc
type Driver
type Iterator
type ParseFunc
type Parser
type Position
- func (p Position) IsValid() bool
- func (p Position) String() string
type ScanFunc
type Scanner
type Token
- func (t Token) String() string

Constants ¶

This section is empty.

Variables ¶

View Source

var NonAssoc error = errors.New("non-associative operator")

NonAssoc is returned by infix ParseFuncs to indicate that an operator is non-associative.

Functions ¶

This section is empty.

Types ¶

type AcceptFunc ¶

type AcceptFunc func(rune) bool

AcceptFunc accepts a rune.

func OneOf ¶

func OneOf(chars string) AcceptFunc

OneOf returns an AcceptFunc that reports whether a rune appears in chars.

type Driver ¶ added in v0.0.3

type Driver interface {
	// Infix associates an infix ParseFunc with a token.
	// Returning nil is a parse error.
	Infix(kind int) ParseFunc

	// Prefix associates a prefix ParseFunc with a token.
	// Returning nil is a parse error.
	Prefix(kind int) ParseFunc

	// Precedence associates an operator precedence with a token.
	// The higher the precedence, the tighter the token binds.
	Precedence(kind int) (precedence int)

	// ParseError is called by the Parser when it encounters a token that it cannot parse.
	ParseError(Token) error
}

Driver drives the parsing algorithm by associating tokens to parser functions. It is expected to hold the parse state and results like the syntax tree.

type Iterator ¶ added in v0.0.6

type Iterator interface {
	// Next returns the next token in the stream
	// and yields true until the stream ends.
	Next() (token Token, ok bool)
}

Iterator represents a stream of tokens.

type ParseFunc ¶

type ParseFunc func(*Parser, Token) error

ParseFunc parses an expression.

type Parser ¶

type Parser struct {
	// Driver drives the Parser.
	Driver
	// contains filtered or unexported fields
}

Parser implements the Pratt parsing algorithm, also known as the top down operator precedence (TDOP) algorithm. This is a recursive descent algorithm that handles operator precedence in a simple and flexible manner.

Parser consumes tokens from an Iterator and uses a Driver to determine precedence and executing parsing functions.

func (*Parser) Advance ¶

func (p *Parser) Advance()

Advance reads the next token from the Iterator.

func (*Parser) Expect ¶

func (p *Parser) Expect(kind int) bool

Expect advances to the next token if the current token kind matches.

func (*Parser) Init ¶

func (p *Parser) Init(iter Iterator) *Parser

Init initializes the Parser with an Iterator and returns it.

func (*Parser) Parse ¶ added in v0.0.7

func (p *Parser) Parse(least int) error

Parse parses using the TDOP algorithm until it encounters a token with an equal or lower precedence than least. It may be called in a mutual recursive manner by the parsing functions provided by the Driver.

func (*Parser) Peek ¶

func (p *Parser) Peek() Token

Peek returns the last read token.

type Position ¶

type Position struct {
	// Filename is the filename of the input, if any.
	Filename string

	// Offset is the byte offset, starting at 0.
	Offset int

	// Line is the line number, starting at 1.
	Line int

	// Column is the column number, starting at 1 (character count per line).
	Column int
}

Position represents the position of a Token in the input string.

func (Position) IsValid ¶

func (p Position) IsValid() bool

IsValid reports whether a Position is valid. A Position is valid if its Line field is greater than zero.

func (Position) String ¶

func (p Position) String() string

String implements fmt.Stringer.

type ScanFunc ¶

type ScanFunc func(*Scanner) (kind int)

ScanFunc scans the next token and returns its kind. Zero is reserved to signal end-of-input.

type Scanner ¶

type Scanner struct {
	// Position of the last read token.
	// The Filename field will never be modified by Scanner.
	Position

	// Scan scans tokens.
	Scan ScanFunc
	// contains filtered or unexported fields
}

Scanner produces a stream of tokens from a string or an io.RuneReader.

func (*Scanner) Advance ¶

func (s *Scanner) Advance()

Advance advances the cursor by one rune.

func (*Scanner) Done ¶

func (s *Scanner) Done() bool

Done reports whether the entire input has been consumed.

func (*Scanner) Expect ¶

func (s *Scanner) Expect(r rune) bool

Expect advances the cursor if the current rune matches.

func (*Scanner) ExpectAny ¶

func (s *Scanner) ExpectAny(accept AcceptFunc)

ExpectAny advances the cursor zero or more times.

func (*Scanner) ExpectOne ¶

func (s *Scanner) ExpectOne(accept AcceptFunc) bool

ExpectOne advances the cursor if the current rune is accepted.

func (*Scanner) InitWithReader ¶ added in v0.0.4

func (s *Scanner) InitWithReader(r io.RuneReader) *Scanner

Init initializes a Scanner with an input reader and returns it. When initialized this way, the Scanner tokenizes unbounded streams but must allocate.

func (*Scanner) InitWithString ¶ added in v0.0.4

func (s *Scanner) InitWithString(source string) *Scanner

Init initializes a Scanner with an input string and returns it. When initialized this way, the Scanner tokenizes without allocations.

func (*Scanner) Next ¶

func (s *Scanner) Next() (Token, bool)

Next implements Iterator.

func (*Scanner) Peek ¶

func (s *Scanner) Peek() rune

Peek returns the current rune.

func (*Scanner) Skip ¶

func (s *Scanner) Skip()

Skip swallows the next token.

func (*Scanner) Text ¶ added in v0.0.3

func (s *Scanner) Text() string

Text returns the token string that has been scanned so far.

type Token ¶

type Token struct {
	Position

	// Kind identifies the kind of token.
	Kind int

	// Text is the token value.
	Text string
}

Token is a fragment of tokenised text.

func (Token) String ¶

func (t Token) String() string

String implements fmt.Stringer.

Source Files ¶

View all Source files

Directories ¶

Path	Synopsis
_examples
calculator

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL