Versions in this module Expand all Collapse all v1 v1.1.7 Aug 25, 2024 Changes in this version + var IdeographRegexp = regexp.MustCompile(`\p{Han}|\p{Hangul}|\p{Hiragana}|\p{Katakana}`) + func MakeToken(input []byte) *analysis.Token + func MakeTokenStream(input []byte) analysis.TokenStream + type CharacterTokenizer struct + func NewCharacterTokenizer(f IsTokenRune) *CharacterTokenizer + func NewLetterTokenizer() *CharacterTokenizer + func NewWhitespaceTokenizer() *CharacterTokenizer + func (c *CharacterTokenizer) Tokenize(input []byte) analysis.TokenStream + type ExceptionsTokenizer struct + func NewExceptionsTokenizer(exception *regexp.Regexp, remaining analysis.Tokenizer) *ExceptionsTokenizer + func NewWebTokenizer() *ExceptionsTokenizer + func (t *ExceptionsTokenizer) Tokenize(input []byte) analysis.TokenStream + type IsTokenRune func(r rune) bool + type RegexpTokenizer struct + func NewRegexpTokenizer(r *regexp.Regexp) *RegexpTokenizer + func (rt *RegexpTokenizer) Tokenize(input []byte) analysis.TokenStream + type SingleTokenTokenizer struct + func NewSingleTokenTokenizer() *SingleTokenTokenizer + func (t *SingleTokenTokenizer) Tokenize(input []byte) analysis.TokenStream + type UnicodeTokenizer struct + func NewUnicodeTokenizer() *UnicodeTokenizer + func (rt *UnicodeTokenizer) Tokenize(input []byte) analysis.TokenStream