Versions in this module Expand all Collapse all v0 v0.1.0 Sep 25, 2021 Changes in this version + func NewIterTokenizer(opts ...TokenizerOptFunc) (*iterTokenizer, error) + type PragmaticSegmenter struct + func NewPragmaticSegmenter(lang string) (*PragmaticSegmenter, error) + func (p *PragmaticSegmenter) Tokenize(text string) []string + type PunktSentenceTokenizer struct + func NewPunktSentenceTokenizer() (*PunktSentenceTokenizer, error) + func (p PunktSentenceTokenizer) Split(text string) []string + type RegexpTokenizer struct + func NewBlanklineTokenizer() (*RegexpTokenizer, error) + func NewRegexpTokenizer(pattern string, gaps, discard bool) (*RegexpTokenizer, error) + func NewWordBoundaryTokenizer() (*RegexpTokenizer, error) + func NewWordPunctTokenizer() (*RegexpTokenizer, error) + func (r RegexpTokenizer) Split(text string) []string + type Splitter interface + Split func(s string) []string + type TokenTester func(string) bool + type TokenizerOptFunc func(*iterTokenizer) + func UsingContractions(x []string) TokenizerOptFunc + func UsingEmoticons(x map[string]int) TokenizerOptFunc + func UsingIsUnsplittable(x TokenTester) TokenizerOptFunc + func UsingPrefixes(x []string) TokenizerOptFunc + func UsingSanitizer(x *strings.Replacer) TokenizerOptFunc + func UsingSpecialRE(x *regexp.Regexp) TokenizerOptFunc + func UsingSplitCases(x []string) TokenizerOptFunc + func UsingSuffixes(x []string) TokenizerOptFunc + type TreebankWordTokenizer struct + func NewTreebankWordTokenizer() (*TreebankWordTokenizer, error) + func (t TreebankWordTokenizer) Split(text string) []string