bart

package

v0.0.0-...-53a6fda Latest Latest Go to latest Published: Aug 6, 2024 License: BSD-2-Clause Imports: 15 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/yinziyang/cybertron

Links

Open Source Insights

Documentation ¶

Index ¶

type BPETokenizer
- func (m *BPETokenizer) Detokenize(tokenIds []int, stripPaddingTokens bool) string
- func (m *BPETokenizer) Tokenize(text string) ([]int, error)
type SentencePieceTokenizer
- func (m *SentencePieceTokenizer) Detokenize(tokenIds []int, stripPaddingTokens bool) string
- func (m *SentencePieceTokenizer) Tokenize(text string) ([]int, error)
type TextGeneration
- func LoadTextGeneration(modelPath string) (*TextGeneration, error)
- func (m *TextGeneration) Generate(ctx context.Context, text string, opts *textgeneration.Options) (textgeneration.Response, error)
type Tokenizer

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

This section is empty.

Types ¶

type BPETokenizer ¶

type BPETokenizer struct {
	*bpetokenizer.BPETokenizer
	EosTokenID           int
	BosTokenID           int
	PadTokenID           int
	DecoderStartTokenID  int
	ExtraSpecialTokenIDs map[int]string
}

func (*BPETokenizer) Detokenize ¶

func (m *BPETokenizer) Detokenize(tokenIds []int, stripPaddingTokens bool) string

Detokenize returns the text of the input token IDs removing the padding token.

func (*BPETokenizer) Tokenize ¶

func (m *BPETokenizer) Tokenize(text string) ([]int, error)

Tokenize returns the token IDs of the input text applying the EOS pad token.

type SentencePieceTokenizer ¶

type SentencePieceTokenizer struct {
	*sentencepiece.Tokenizer
	EosTokenID          int
	BosTokenID          int
	PadTokenID          int
	DecoderStartTokenID int
}

func (*SentencePieceTokenizer) Detokenize ¶

func (m *SentencePieceTokenizer) Detokenize(tokenIds []int, stripPaddingTokens bool) string

Detokenize returns the text of the input token IDs removing the padding token.

func (*SentencePieceTokenizer) Tokenize ¶

func (m *SentencePieceTokenizer) Tokenize(text string) ([]int, error)

Tokenize returns the token IDs of the input text applying the EOS pad token.

type TextGeneration ¶

type TextGeneration struct {
	// Model is the model used for conditional generation.
	Model *bart.ModelForConditionalGeneration
	// Tokenizer is the tokenizer used for conditional generation.
	Tokenizer Tokenizer
}

TextGeneration contains the ModelForConditionalGeneration and the Tokenizer used for conditional generation tasks. For example, Machine Translation and Summarization.

func LoadTextGeneration ¶

func LoadTextGeneration(modelPath string) (*TextGeneration, error)

LoadTextGeneration returns a TextGeneration loading the model, the embeddings and the tokenizer from a directory.

func (*TextGeneration) Generate ¶

func (m *TextGeneration) Generate(ctx context.Context, text string, opts *textgeneration.Options) (textgeneration.Response, error)

Generate generates a text from the input.

type Tokenizer ¶

type Tokenizer interface {
	Tokenize(text string) ([]int, error)
	Detokenize(tokenIds []int, stripPaddingTokens bool) string
}

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL