Versions in this module Expand all Collapse all v0 v0.0.2 Sep 18, 2023 v0.0.1 Sep 18, 2023 Changes in this version + func IsCjkPunct(r rune) bool + type AnnotateTokens interface + Annotate func([]*Token) []*Token + func NewAnnotations(s *Storage, p PunctStrings, word WordTokenizer) []AnnotateTokens + type DefaultPunctStrings struct + func NewPunctStrings() *DefaultPunctStrings + func (p *DefaultPunctStrings) HasSentencePunct(text string) bool + func (p *DefaultPunctStrings) NonPunct() string + func (p *DefaultPunctStrings) Punctuation() string + type DefaultSentenceTokenizer struct + Annotations []AnnotateTokens + func NewSentenceTokenizer(s *Storage) *DefaultSentenceTokenizer + func NewTokenizer(s *Storage, word WordTokenizer, lang PunctStrings) *DefaultSentenceTokenizer + func (s *DefaultSentenceTokenizer) AnnotateTokens(tokens []*Token, annotate ...AnnotateTokens) []*Token + func (s *DefaultSentenceTokenizer) AnnotatedTokens(text string) []*Token + func (s *DefaultSentenceTokenizer) SentencePositions(text string) []int + func (s *DefaultSentenceTokenizer) Tokenize(text string) []*Sentence + func (s *DefaultSentenceTokenizer) Tokenize2String(text string) []string + type DefaultTokenGrouper struct + func (p *DefaultTokenGrouper) Group(tokens []*Token) [][2]*Token + type DefaultWordTokenizer struct + func NewWordTokenizer(p PunctStrings) *DefaultWordTokenizer + func (p *DefaultWordTokenizer) FirstLower(t *Token) bool + func (p *DefaultWordTokenizer) FirstUpper(t *Token) bool + func (p *DefaultWordTokenizer) HasPeriodFinal(t *Token) bool + func (p *DefaultWordTokenizer) HasSentEndChars(t *Token) bool + func (p *DefaultWordTokenizer) HasUnreliableEndChars(t *Token) bool + func (p *DefaultWordTokenizer) IsAlpha(t *Token) bool + func (p *DefaultWordTokenizer) IsCoordinatePartOne(t *Token) bool + func (p *DefaultWordTokenizer) IsCoordinatePartTwo(t *Token) bool + func (p *DefaultWordTokenizer) IsEllipsis(t *Token) bool + func (p *DefaultWordTokenizer) IsInitial(t *Token) bool + func (p *DefaultWordTokenizer) IsListNumber(t *Token) bool + func (p *DefaultWordTokenizer) IsNonPunct(t *Token) bool + func (p *DefaultWordTokenizer) IsNumber(t *Token) bool + func (p *DefaultWordTokenizer) Tokenize(text string, onlyPeriodContext bool) []*Token + func (p *DefaultWordTokenizer) Type(t *Token) string + func (p *DefaultWordTokenizer) TypeNoPeriod(t *Token) string + func (p *DefaultWordTokenizer) TypeNoSentPeriod(t *Token) string + type Ortho interface + Heuristic func(*Token) int + type OrthoContext struct + func (o *OrthoContext) Heuristic(token *Token) int + type PunctStrings interface + HasSentencePunct func(string) bool + NonPunct func() string + Punctuation func() string + type Sentence struct + End int + Start int + Text string + func (s Sentence) String() string + type SentenceTokenizer interface + AnnotateTokens func([]*Token, ...AnnotateTokens) []*Token + Tokenize func(string) []*Sentence + type SetString map[string]int + func (ss SetString) Add(str string) + func (ss SetString) Array() []string + func (ss SetString) Has(str string) bool + func (ss SetString) Remove(str string) + type Storage struct + AbbrevTypes SetString + Collocations SetString + OrthoContext SetString + SentStarters SetString + func LoadTraining(data []byte) (*Storage, error) + func NewStorage() *Storage + func (p *Storage) IsAbbr(tokens ...string) bool + type Token struct + Abbr bool + LineStart bool + ParaStart bool + Position int + SentBreak bool + Tok string + func NewToken(token string) *Token + func (p *Token) String() string + type TokenBasedAnnotation struct + func (a *TokenBasedAnnotation) Annotate(tokens []*Token) []*Token + type TokenExistential interface + HasPeriodFinal func(*Token) bool + HasSentEndChars func(*Token) bool + HasUnreliableEndChars func(*Token) bool + IsAlpha func(*Token) bool + IsCoordinatePartOne func(*Token) bool + IsCoordinatePartTwo func(*Token) bool + IsEllipsis func(*Token) bool + IsInitial func(*Token) bool + IsListNumber func(*Token) bool + IsNonPunct func(*Token) bool + IsNumber func(*Token) bool + type TokenFirst interface + FirstLower func(*Token) bool + FirstUpper func(*Token) bool + type TokenGrouper interface + Group func([]*Token) [][2]*Token + type TokenParser interface + type TokenType interface + Type func(*Token) string + TypeNoPeriod func(*Token) string + TypeNoSentPeriod func(*Token) string + type TypeBasedAnnotation struct + func NewTypeBasedAnnotation(s *Storage, p PunctStrings, e TokenExistential) *TypeBasedAnnotation + func (a *TypeBasedAnnotation) Annotate(tokens []*Token) []*Token + type WordTokenizer interface + Tokenize func(string, bool) []*Token