Documentation ¶
Index ¶
- func NewWordLevel(vocab map[string]int, unkToken string) *wordlevel.WordLevel
- type RuneLevel
- type RuneLevelVocab
- type Tokenizer
- type Vocab
- func (v *Vocab[T]) IDToToken(id T) (string, error)
- func (v *Vocab[T]) IDsToTokens(ids []T) (tokens []string, err error)
- func (v *Vocab[T]) TokenToID(token string) (T, error)
- func (v *Vocab[T]) TokensToIDs(tokens []string) (ids []T, err error)
- func (v *Vocab[T]) UnkToken() string
- func (v *Vocab[T]) Vocab() map[string]T
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
Types ¶
type RuneLevel ¶
RuneLevel is a model tokenizer that splits each word into runes and maps runes to IDs.
func NewRuneLevel ¶
func NewRuneLevel(vocab RuneLevelVocab) *RuneLevel
type RuneLevelVocab ¶
type Tokenizer ¶
func (*Tokenizer) EncodeBatchSerially ¶
func (t *Tokenizer) EncodeBatchSerially(inputs []tokenizer.EncodeInput, addSpecialTokens bool) ([]tokenizer.Encoding, error)
EncodeBatchSerially encodes all sentences serially.
type Vocab ¶
type Vocab[T constraints.Integer] struct { // contains filtered or unexported fields }
func NewVocabFromFile ¶
func NewVocabFromFile[T constraints.Integer](filename, separator, unkToken string) (*Vocab[T], error)
func NewVocabFromSlice ¶
func (*Vocab[T]) IDsToTokens ¶
func (*Vocab[T]) TokensToIDs ¶
Click to show internal directories.
Click to hide internal directories.