Documentation
¶
Overview ¶
Package textscan Implemented a parser that quickly scans and analyzes text content. It can be used to parse INI, Properties and other formats
Index ¶
- Constants
- Variables
- func AddKind(k Kind, name string)
- func CommentsDetect(str string, inlineChars []byte) (ok, more bool, err error)
- func CommentsDetectEnd(line string) bool
- func HasKind(k Kind) bool
- func IsKindToken(k Kind, tok Token) bool
- func KindString(k Kind) string
- type BaseToken
- type CommentToken
- type CommentsMatcher
- type ErrScan
- type HandleFn
- type KeyValueMatcher
- type Kind
- type LiteToken
- type Matcher
- type Parser
- type StringToken
- type TextScanner
- func (s *TextScanner) AddKind(k Kind, name string)
- func (s *TextScanner) AddMatchers(ms ...Matcher)
- func (s *TextScanner) Each(fn func(t Token)) error
- func (s *TextScanner) Err() error
- func (s *TextScanner) Line() int
- func (s *TextScanner) PrevToken() Token
- func (s *TextScanner) Scan() bool
- func (s *TextScanner) ScanNext() (ok bool, text string)
- func (s *TextScanner) SetInput(in any)
- func (s *TextScanner) SetNext(text string)
- func (s *TextScanner) SetSplit(fn bufio.SplitFunc)
- func (s *TextScanner) Token() Token
- type Token
- type ValueToken
- func (t *ValueToken) Comment() string
- func (t *ValueToken) HasComment() bool
- func (t *ValueToken) HasMore() bool
- func (t *ValueToken) Key() string
- func (t *ValueToken) Mark() string
- func (t *ValueToken) MergeSame(_ Token) error
- func (t *ValueToken) ScanMore(ts *TextScanner) error
- func (t *ValueToken) String() string
- func (t *ValueToken) Value() string
- func (t *ValueToken) Values() []string
Examples ¶
Constants ¶
const ( MultiLineValMarkS = "'''" MultiLineValMarkD = `"""` MultiLineValMarkH = "<<<" // heredoc at start. <<<TXT ... TXT MultiLineValMarkQ = "\\" // at end. eg: properties contents MultiLineCmtEnd = "*/" )
define special chars consts
Variables ¶
var ErrCommentsNotEnd = errors.New("not end of multi-line comments")
ErrCommentsNotEnd error
var ErrMLineValueNotEnd = errors.New("not end of multi line value")
ErrMLineValueNotEnd error
Functions ¶
func CommentsDetect ¶
CommentsDetect check.
- inlineChars: #
default match:
- inline #, //
- multi line: /*
func CommentsDetectEnd ¶
CommentsDetectEnd multi line comments end
Types ¶
type BaseToken ¶
type BaseToken struct {
// contains filtered or unexported fields
}
BaseToken struct
type CommentToken ¶
type CommentToken struct { BaseToken // contains filtered or unexported fields }
CommentToken struct
func (*CommentToken) MergeSame ¶
func (t *CommentToken) MergeSame(tok Token) error
MergeSame comments token
func (*CommentToken) ScanMore ¶
func (t *CommentToken) ScanMore(ts *TextScanner) error
ScanMore scan multi line values
type CommentsMatcher ¶
type CommentsMatcher struct { // InlineChars for match inline comments. default is: # InlineChars []byte // MatchFn for comments line // - mark useful on multi line comments MatchFn func(text string) (ok, more bool, err error) // DetectEnd for multi line comments DetectEnd func(text string) bool }
CommentsMatcher match comments lines. will auto merge prev comments token
func (*CommentsMatcher) Match ¶
func (m *CommentsMatcher) Match(text string, prev Token) (Token, error)
Match comments token
func (*CommentsMatcher) MatchEnd ¶
func (m *CommentsMatcher) MatchEnd(text string) bool
MatchEnd for multi line comments
type ErrScan ¶
type ErrScan struct { Msg string // error message Line int // error line number, start 1 Text string // text contents on error }
ErrScan error on scan or parse contents
type KeyValueMatcher ¶
type KeyValueMatcher struct { // Separator string for split key and value, default is "=" Separator string // MergeComments collect previous comments token to value token. // If set as True, on each s.Scan() please notice skip TokComments MergeComments bool // InlineComment parse and split inline comment InlineComment bool // DisableMultiLine value parse DisableMultiLine bool // KeyCheckFn set func check key string is valid KeyCheckFn func(key string) error }
KeyValueMatcher match key-value token. Support parse `KEY=VALUE` line text contents.
type Matcher ¶
type Matcher interface { // Match text line by kind, if success returns a new Token Match(line string, prev Token) (tok Token, err error) }
Matcher interface
type Parser ¶
type Parser struct { // Func for handle tokens Func HandleFn // contains filtered or unexported fields }
Parser struct
func (*Parser) AddMatchers ¶
AddMatchers register token matchers
type StringToken ¶
type StringToken struct {
BaseToken
}
StringToken struct
func NewEmptyToken ¶
func NewEmptyToken() *StringToken
NewEmptyToken instance. Can use for want skip parse some contents
func (*StringToken) ScanMore ¶
func (t *StringToken) ScanMore(_ *TextScanner) error
ScanMore implements
type TextScanner ¶
type TextScanner struct {
// contains filtered or unexported fields
}
TextScanner struct.
func NewScanner ¶
func NewScanner(in any) *TextScanner
NewScanner instance
Example ¶
package main import ( "fmt" "github.com/gookit/goutil/strutil/textscan" ) func main() { ts := textscan.NewScanner(`source code`) // add token matcher, can add your custom matcher ts.AddMatchers( &textscan.CommentsMatcher{ InlineChars: []byte{'#'}, }, &textscan.KeyValueMatcher{ MergeComments: true, }, ) // scan and parsing for ts.Scan() { tok := ts.Token() if !tok.IsValid() { continue } // Custom handle the parsed token if tok.Kind() == textscan.TokValue { vt := tok.(*textscan.ValueToken) fmt.Println(vt) } } if ts.Err() != nil { fmt.Println("ERROR:", ts.Err()) } }
Output:
func (*TextScanner) AddKind ¶
func (s *TextScanner) AddKind(k Kind, name string)
AddKind register new kind
func (*TextScanner) AddMatchers ¶
func (s *TextScanner) AddMatchers(ms ...Matcher)
AddMatchers register token matchers
func (*TextScanner) Each ¶
func (s *TextScanner) Each(fn func(t Token)) error
Each every token by given func
func (*TextScanner) PrevToken ¶
func (s *TextScanner) PrevToken() Token
PrevToken get of previous scan.
func (*TextScanner) Scan ¶
func (s *TextScanner) Scan() bool
Scan source input and parsing. Can use Token() get current parsed token value
Usage:
ts := textscan.NewScanner(`source ...`) for ts.Scan() { tok := ts.Token() // do something... } fmt.Println(ts.Err())
func (*TextScanner) ScanNext ¶
func (s *TextScanner) ScanNext() (ok bool, text string)
ScanNext advance and fetch next line text
func (*TextScanner) SetNext ¶
func (s *TextScanner) SetNext(text string)
SetNext text for scan and parse
func (*TextScanner) SetSplit ¶
func (s *TextScanner) SetSplit(fn bufio.SplitFunc)
SetSplit set split func on scan
type Token ¶
type Token interface { LiteToken String() string // HasMore is multi line values HasMore() bool // ScanMore scan multi line values ScanMore(ts *TextScanner) error MergeSame(tok Token) error }
Token parser
type ValueToken ¶
type ValueToken struct { BaseToken // contains filtered or unexported fields }
ValueToken contains key and value contents
func (*ValueToken) Mark ¶ added in v0.5.15
func (t *ValueToken) Mark() string
Mark for multi line values
func (*ValueToken) MergeSame ¶
func (t *ValueToken) MergeSame(_ Token) error
MergeSame comments token
func (*ValueToken) ScanMore ¶
func (t *ValueToken) ScanMore(ts *TextScanner) error
ScanMore scan multi line values
func (*ValueToken) Values ¶ added in v0.5.15
func (t *ValueToken) Values() []string
Values for multi line values