textscan

package

v0.6.13 Latest Latest Go to latest Published: Oct 13, 2023 License: MIT Imports: 10 Imported by: 4

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/gookit/goutil

Links

Open Source Insights

README ¶

TextScan

Package textscan implements text scanner for quickly parse text contents. Can use for parse like INI, Properties format contents.

Install

go get github.com/gookit/goutil/strutil/textscan

Examples

package main

import (
	"fmt"

	"github.com/gookit/goutil/dump"
	"github.com/gookit/goutil/strutil/textscan"
	"github.com/gookit/goutil/testutil/assert"
)

func main() {
	ts := textscan.TextScanner{}
	ts.AddMatchers(
		&textscan.CommentsMatcher{},
		&textscan.KeyValueMatcher{},
	)

	ts.SetInput(`
# comments 1
name = inhere

// comments 2
age = 28

/*
multi line
comments 3
*/
desc = '''
a multi
line string
'''
`)

	data := make(map[string]string)
	err := ts.Each(func(t textscan.Token) {
		fmt.Println("====> Token kind:", t.Kind())
		fmt.Println(t.String())

		if t.Kind() == textscan.TokValue {
			v := t.(*textscan.ValueToken)
			data[v.Key()] = v.Value()
		}
	})

	dump.P(data, err)
}

Output:

====> Token kind: Comments
# comments 1
====> Token kind: Value
key: name
value: "inhere"
comments: 
====> Token kind: Comments
// comments 2
====> Token kind: Value
key: age
value: "28"
comments: 
====> Token kind: Comments
/*
multi line
comments 3
*/
====> Token kind: Value
key: desc
value: "\n\na multi\nline string\n"
comments: 

==== Collected data:
map[string]string { #len=3
  "desc": string("
a multi
line string
"), #len=22
  "name": string("inhere"), #len=6
  "age": string("28"), #len=2
},

Projects using `textscan`

textscan is used in these projects:

Documentation ¶

Overview ¶

Package textscan Implemented a parser that quickly scans and analyzes text content. It can be used to parse INI, Properties and other formats

Index ¶

Constants
Variables
func AddKind(k Kind, name string)
func CommentsDetect(str string, inlineChars []byte) (ok, more bool, err error)
func CommentsDetectEnd(line string) bool
func HasKind(k Kind) bool
func IsKindToken(k Kind, tok Token) bool
func KindString(k Kind) string
type BaseToken
- func (t *BaseToken) IsValid() bool
- func (t *BaseToken) Kind() Kind
- func (t *BaseToken) String() string
- func (t *BaseToken) Value() string
type CommentToken
- func NewCommentToken(val string) *CommentToken
- func (t *CommentToken) HasMore() bool
- func (t *CommentToken) MergeSame(tok Token) error
- func (t *CommentToken) ScanMore(ts *TextScanner) error
- func (t *CommentToken) String() string
- func (t *CommentToken) Value() string
type CommentsMatcher
- func (m *CommentsMatcher) Match(text string, prev Token) (Token, error)
- func (m *CommentsMatcher) MatchEnd(text string) bool
type ErrScan
- func (e ErrScan) Error() string
type HandleFn
type KeyValueMatcher
- func (m *KeyValueMatcher) DetectEnd(mark, text string) (ok bool, val string)
- func (m *KeyValueMatcher) Match(text string, prev Token) (Token, error)
type Kind
- func (k Kind) String() string
type LiteToken
type Matcher
type Parser
- func NewParser(fn HandleFn) *Parser
- func (p *Parser) AddMatchers(ms ...Matcher)
- func (p *Parser) Parse(bs []byte) error
- func (p *Parser) ParseFrom(r io.Reader) error
- func (p *Parser) ParseText(text string) error
type StringToken
- func NewEmptyToken() *StringToken
- func NewStringToken(k Kind, val string) *StringToken
- func (t *StringToken) HasMore() bool
- func (t *StringToken) MergeSame(_ Token) error
- func (t *StringToken) ScanMore(_ *TextScanner) error
type TextScanner
- func NewScanner(in any) *TextScanner
- func (s *TextScanner) AddKind(k Kind, name string)
- func (s *TextScanner) AddMatchers(ms ...Matcher)
- func (s *TextScanner) Each(fn func(t Token)) error
- func (s *TextScanner) Err() error
- func (s *TextScanner) Line() int
- func (s *TextScanner) PrevToken() Token
- func (s *TextScanner) Scan() bool
- func (s *TextScanner) ScanNext() (ok bool, text string)
- func (s *TextScanner) SetInput(in any)
- func (s *TextScanner) SetNext(text string)
- func (s *TextScanner) SetSplit(fn bufio.SplitFunc)
- func (s *TextScanner) Token() Token
type Token
type ValueToken
- func (t *ValueToken) Comment() string
- func (t *ValueToken) HasComment() bool
- func (t *ValueToken) HasMore() bool
- func (t *ValueToken) Key() string
- func (t *ValueToken) Mark() string
- func (t *ValueToken) MergeSame(_ Token) error
- func (t *ValueToken) ScanMore(ts *TextScanner) error
- func (t *ValueToken) String() string
- func (t *ValueToken) Value() string
- func (t *ValueToken) Values() []string

Examples ¶

NewScanner

Constants ¶

View Source

const (
	MultiLineValMarkS = "'''"
	MultiLineValMarkD = `"""`
	MultiLineValMarkH = "<<<" // heredoc at start. <<<TXT ... TXT
	MultiLineValMarkQ = "\\"  // at end. eg: properties contents
	MultiLineCmtEnd   = "*/"
)

define special chars consts

Variables ¶

View Source

var ErrCommentsNotEnd = errors.New("not end of multi-line comments")

ErrCommentsNotEnd error

View Source

var ErrMLineValueNotEnd = errors.New("not end of multi line value")

ErrMLineValueNotEnd error

Functions ¶

func AddKind ¶

func AddKind(k Kind, name string)

AddKind add global kind to kinds

func CommentsDetect ¶

func CommentsDetect(str string, inlineChars []byte) (ok, more bool, err error)

CommentsDetect check.

- inlineChars: #

default match:

inline #, //
multi line: /*

func CommentsDetectEnd ¶

func CommentsDetectEnd(line string) bool

CommentsDetectEnd multi line comments end

func HasKind ¶

func HasKind(k Kind) bool

HasKind check

func IsKindToken ¶

func IsKindToken(k Kind, tok Token) bool

IsKindToken check

func KindString ¶

func KindString(k Kind) string

KindString name

Types ¶

type BaseToken ¶

type BaseToken struct {
	// contains filtered or unexported fields
}

BaseToken struct

func (*BaseToken) IsValid ¶

func (t *BaseToken) IsValid() bool

IsValid token

func (*BaseToken) Kind ¶

func (t *BaseToken) Kind() Kind

Kind type

func (*BaseToken) String ¶

func (t *BaseToken) String() string

String of token

func (*BaseToken) Value ¶

func (t *BaseToken) Value() string

Value of token

type CommentToken ¶

type CommentToken struct {
	BaseToken
	// contains filtered or unexported fields
}

CommentToken struct

func NewCommentToken ¶

func NewCommentToken(val string) *CommentToken

NewCommentToken instance.

func (*CommentToken) HasMore ¶

func (t *CommentToken) HasMore() bool

HasMore is multi line values

func (*CommentToken) MergeSame ¶

func (t *CommentToken) MergeSame(tok Token) error

MergeSame comments token

func (*CommentToken) ScanMore ¶

func (t *CommentToken) ScanMore(ts *TextScanner) error

ScanMore scan multi line values

func (*CommentToken) String ¶

func (t *CommentToken) String() string

String for token

func (*CommentToken) Value ¶

func (t *CommentToken) Value() string

Value fo token

type CommentsMatcher ¶

type CommentsMatcher struct {
	// InlineChars for match inline comments. default is: #
	InlineChars []byte
	// MatchFn for comments line
	// - mark 	useful on multi line comments
	MatchFn func(text string) (ok, more bool, err error)
	// DetectEnd for multi line comments
	DetectEnd func(text string) bool
}

CommentsMatcher match comments lines. will auto merge prev comments token

func (*CommentsMatcher) Match ¶

func (m *CommentsMatcher) Match(text string, prev Token) (Token, error)

Match comments token

func (*CommentsMatcher) MatchEnd ¶

func (m *CommentsMatcher) MatchEnd(text string) bool

MatchEnd for multi line comments

type ErrScan ¶

type ErrScan struct {
	Msg  string // error message
	Line int    // error line number, start 1
	Text string // text contents on error
}

ErrScan error on scan or parse contents

func (ErrScan) Error ¶

func (e ErrScan) Error() string

Error string

type HandleFn ¶

type HandleFn func(t Token)

HandleFn for token

type KeyValueMatcher ¶

type KeyValueMatcher struct {
	// Separator string for split key and value, default is "="
	Separator string
	// MergeComments collect previous comments token to value token.
	// If set as True, on each s.Scan() please notice skip TokComments
	MergeComments bool
	// InlineComment parse and split inline comment
	InlineComment bool
	// DisableMultiLine value parse
	DisableMultiLine bool
	// KeyCheckFn set func check key string is valid
	KeyCheckFn func(key string) error
}

KeyValueMatcher match key-value token. Support parse `KEY=VALUE` line text contents.

func (*KeyValueMatcher) DetectEnd ¶

func (m *KeyValueMatcher) DetectEnd(mark, text string) (ok bool, val string)

DetectEnd for multi line value

func (*KeyValueMatcher) Match ¶

func (m *KeyValueMatcher) Match(text string, prev Token) (Token, error)

Match text line.

type Kind ¶

type Kind uint8

Kind type

const (
	TokInvalid Kind = iota
	TokKey
	TokValue
	TokComments
)

builtin defined kinds

func (Kind) String ¶

func (k Kind) String() string

String name for kind

type LiteToken ¶

type LiteToken interface {
	Kind() Kind
	Value() string
	IsValid() bool
}

LiteToken interface

type Matcher ¶

type Matcher interface {
	// Match text line by kind, if success returns a new Token
	Match(line string, prev Token) (tok Token, err error)
}

Matcher interface

type Parser ¶

type Parser struct {

	// Func for handle tokens
	Func HandleFn
	// contains filtered or unexported fields
}

Parser struct

func NewParser ¶

func NewParser(fn HandleFn) *Parser

NewParser instance

func (*Parser) AddMatchers ¶

func (p *Parser) AddMatchers(ms ...Matcher)

AddMatchers register token matchers

func (*Parser) Parse ¶

func (p *Parser) Parse(bs []byte) error

Parse input bytes

func (*Parser) ParseFrom ¶

func (p *Parser) ParseFrom(r io.Reader) error

ParseFrom input reader

func (*Parser) ParseText ¶

func (p *Parser) ParseText(text string) error

ParseText input string

type StringToken ¶

type StringToken struct {
	BaseToken
}

StringToken struct

func NewEmptyToken ¶

func NewEmptyToken() *StringToken

NewEmptyToken instance. Can use for want skip parse some contents

func NewStringToken ¶

func NewStringToken(k Kind, val string) *StringToken

NewStringToken instance.

func (*StringToken) HasMore ¶

func (t *StringToken) HasMore() bool

HasMore is multi line values

func (*StringToken) MergeSame ¶

func (t *StringToken) MergeSame(_ Token) error

MergeSame implements

func (*StringToken) ScanMore ¶

func (t *StringToken) ScanMore(_ *TextScanner) error

ScanMore implements

type TextScanner ¶

type TextScanner struct {
	// contains filtered or unexported fields
}

TextScanner struct.

func NewScanner ¶

func NewScanner(in any) *TextScanner

NewScanner instance

Example ¶

package main

import (
	"fmt"

	"github.com/gookit/goutil/strutil/textscan"
)

func main() {
	ts := textscan.NewScanner(`source code`)
	// add token matcher, can add your custom matcher
	ts.AddMatchers(
		&textscan.CommentsMatcher{
			InlineChars: []byte{'#'},
		},
		&textscan.KeyValueMatcher{
			MergeComments: true,
		},
	)

	// scan and parsing
	for ts.Scan() {
		tok := ts.Token()

		if !tok.IsValid() {
			continue
		}

		// Custom handle the parsed token
		if tok.Kind() == textscan.TokValue {
			vt := tok.(*textscan.ValueToken)
			fmt.Println(vt)
		}
	}

	if ts.Err() != nil {
		fmt.Println("ERROR:", ts.Err())
	}
}

Output:

func (*TextScanner) AddKind ¶

func (s *TextScanner) AddKind(k Kind, name string)

AddKind register new kind

func (*TextScanner) AddMatchers ¶

func (s *TextScanner) AddMatchers(ms ...Matcher)

AddMatchers register token matchers

func (*TextScanner) Each ¶

func (s *TextScanner) Each(fn func(t Token)) error

Each every token by given func

func (*TextScanner) Err ¶

func (s *TextScanner) Err() error

Err get

func (*TextScanner) Line ¶

func (s *TextScanner) Line() int

Line on current

func (*TextScanner) PrevToken ¶

func (s *TextScanner) PrevToken() Token

PrevToken get of previous scan.

func (*TextScanner) Scan ¶

func (s *TextScanner) Scan() bool

Scan source input and parsing. Can use Token() get current parsed token value

Usage:

ts := textscan.NewScanner(`source ...`)
for ts.Scan() {
	tok := ts.Token()
	// do something...
}
fmt.Println(ts.Err())

func (*TextScanner) ScanNext ¶

func (s *TextScanner) ScanNext() (ok bool, text string)

ScanNext advance and fetch next line text

func (*TextScanner) SetInput ¶

func (s *TextScanner) SetInput(in any)

SetInput for scan and parse

func (*TextScanner) SetNext ¶

func (s *TextScanner) SetNext(text string)

SetNext text for scan and parse

func (*TextScanner) SetSplit ¶

func (s *TextScanner) SetSplit(fn bufio.SplitFunc)

SetSplit set split func on scan

func (*TextScanner) Token ¶

func (s *TextScanner) Token() Token

Token get of current scan.

type Token ¶

type Token interface {
	LiteToken
	String() string
	// HasMore is multi line values
	HasMore() bool
	// ScanMore scan multi line values
	ScanMore(ts *TextScanner) error
	MergeSame(tok Token) error
}

Token parser

type ValueToken ¶

type ValueToken struct {
	BaseToken
	// contains filtered or unexported fields
}

ValueToken contains key and value contents

func (*ValueToken) Comment ¶

func (t *ValueToken) Comment() string

Comment lines string

func (*ValueToken) HasComment ¶

func (t *ValueToken) HasComment() bool

HasComment for the value

func (*ValueToken) HasMore ¶

func (t *ValueToken) HasMore() bool

HasMore is multi line values

func (*ValueToken) Key ¶

func (t *ValueToken) Key() string

Key name

func (*ValueToken) Mark ¶ added in v0.5.15

func (t *ValueToken) Mark() string

Mark for multi line values

func (*ValueToken) MergeSame ¶

func (t *ValueToken) MergeSame(_ Token) error

MergeSame comments token

func (*ValueToken) ScanMore ¶

func (t *ValueToken) ScanMore(ts *TextScanner) error

ScanMore scan multi line values

func (*ValueToken) String ¶

func (t *ValueToken) String() string

String of token

func (*ValueToken) Value ¶

func (t *ValueToken) Value() string

Value text string.

func (*ValueToken) Values ¶ added in v0.5.15

func (t *ValueToken) Values() []string

Values for multi line values

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL

README ¶

TextScan

Install

Examples

Projects using textscan

Documentation ¶

Overview ¶

Index ¶

Examples ¶

Constants ¶

Variables ¶

Functions ¶

func AddKind ¶

func CommentsDetect ¶

func CommentsDetectEnd ¶

func HasKind ¶

func IsKindToken ¶

func KindString ¶

Types ¶

type BaseToken ¶

func (*BaseToken) IsValid ¶

func (*BaseToken) Kind ¶

func (*BaseToken) String ¶

func (*BaseToken) Value ¶

type CommentToken ¶

func NewCommentToken ¶

func (*CommentToken) HasMore ¶

func (*CommentToken) MergeSame ¶

func (*CommentToken) ScanMore ¶

func (*CommentToken) String ¶

func (*CommentToken) Value ¶

type CommentsMatcher ¶

func (*CommentsMatcher) Match ¶

func (*CommentsMatcher) MatchEnd ¶

type ErrScan ¶

func (ErrScan) Error ¶

type HandleFn ¶

type KeyValueMatcher ¶

func (*KeyValueMatcher) DetectEnd ¶

func (*KeyValueMatcher) Match ¶

type Kind ¶

func (Kind) String ¶

type LiteToken ¶

type Matcher ¶

type Parser ¶

func NewParser ¶

func (*Parser) AddMatchers ¶

func (*Parser) Parse ¶

func (*Parser) ParseFrom ¶

func (*Parser) ParseText ¶

type StringToken ¶

func NewEmptyToken ¶

func NewStringToken ¶

func (*StringToken) HasMore ¶

func (*StringToken) MergeSame ¶

func (*StringToken) ScanMore ¶

type TextScanner ¶

func NewScanner ¶

func (*TextScanner) AddKind ¶

func (*TextScanner) AddMatchers ¶

func (*TextScanner) Each ¶

func (*TextScanner) Err ¶

func (*TextScanner) Line ¶

func (*TextScanner) PrevToken ¶

func (*TextScanner) Scan ¶

func (*TextScanner) ScanNext ¶

func (*TextScanner) SetInput ¶

func (*TextScanner) SetNext ¶

func (*TextScanner) SetSplit ¶

func (*TextScanner) Token ¶

type Token ¶

type ValueToken ¶

func (*ValueToken) Comment ¶

func (*ValueToken) HasComment ¶

func (*ValueToken) HasMore ¶

func (*ValueToken) Key ¶

func (*ValueToken) Mark ¶ added in v0.5.15

func (*ValueToken) MergeSame ¶

func (*ValueToken) ScanMore ¶

func (*ValueToken) String ¶

Projects using `textscan`