Versions in this module Expand all Collapse all v1 v1.0.0 Oct 28, 2023 Changes in this version + const ClassToken + const DefaultMaxWordChars + const DefaultUnknownToken + const SeparatorToken + const SequenceSeparator + type BasicTokenizer struct + Lower bool + func NewBasicTokenizer(lower bool) *BasicTokenizer + func (bt *BasicTokenizer) Tokenize(text string) []string + type Encode struct + ID int32 + MaskIDs []int32 + Text string + TokenIDs []int32 + Tokens []string + TypeIDs []int32 + type FullTokenizer struct + Basic *BasicTokenizer + SeqLen int + Wordpiece *WordpieceTokenizer + func NewFullTokenizer(voc *Vocab, seqLen int, lower bool) *FullTokenizer + func (tkz *FullTokenizer) Tokenize(text string) *Encode + type ID int32 + func (id ID) Int32() int32 + type Vocab struct + func FromFile(path string) (*Vocab, error) + func New(tokens []string) *Vocab + func (v *Vocab) Add(token string) + func (v *Vocab) GetID(token string) ID + func (v *Vocab) GetToken() map[string]ID + func (v *Vocab) Size() int + type WordpieceTokenizer struct + func NewWordpieceTokenizer(voc *Vocab) *WordpieceTokenizer + func (wp *WordpieceTokenizer) SetMaxWordChars(c int) + func (wp *WordpieceTokenizer) SetUnknownToken(tok string) + func (wp *WordpieceTokenizer) Tokenize(text string) []string