The highest tagged major version is v2.

bert

package

v1.4.6 Latest Latest Go to latest Published: Jul 27, 2023 License: MIT Imports: 9 Imported by: 0

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/sunhailin-Leo/triton-service-go

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
func GetStrings(tokens []StringOffsetsPair) []string
func IsDefaultSpecial(word string) bool
type BaseTokenizer
- func NewBaseTokenizer(opts ...OptionV1) *BaseTokenizer
- func (t *BaseTokenizer) Tokenize(text string) []StringOffsetsPair
- func (t *BaseTokenizer) TokenizeChinese(text string) []StringOffsetsPair
- func (t *BaseTokenizer) TokenizeChineseCharMode(text string) []StringOffsetsPair
type Dict
- func New(tokens []string) Dict
- func VocabFromFile(path string) (Dict, error)
- func VocabFromSlice(vocabArr []string) (Dict, error)
- func (v Dict) Add(token string)
- func (v Dict) ConvertItems(items []string) []ID
- func (v Dict) ConvertTokens(tokens []string) []ID
- func (v Dict) GetID(token string) ID
- func (v Dict) IsInVocab(token string) bool
- func (v Dict) LongestSubstring(token string) string
- func (v Dict) Size() int
type GenerateModelInferOutputRequest
type GenerateModelInferRequest
type HTTPBatchInput
type HTTPOutput
type HTTPRequestBody
type ID
- func (id ID) Int64() int64
type InferOutputParameter
type InputFeature
type InputObjects
type ModelService
- func NewModelService(bertVocabPath, httpAddr string, httpClient *fasthttp.Client, ...) (*ModelService, error)
- func (m *ModelService) CheckModelReady(modelName, modelVersion string, requestTimeout time.Duration) (bool, error)
- func (m *ModelService) CheckServerAlive(requestTimeout time.Duration) (bool, error)
- func (m *ModelService) CheckServerReady(requestTimeout time.Duration) (bool, error)
- func (m *ModelService) GetAllModelInfo(repoName string, isReady bool, requestTimeout time.Duration) (*nvidia_inferenceserver.RepositoryIndexResponse, error)
- func (m *ModelService) GetModelConfig(modelName, modelVersion string, requestTimeout time.Duration) (interface{}, error)
- func (m *ModelService) GetModelInferIsGRPC() bool
- func (m *ModelService) GetModelInferStats(modelName, modelVersion string, requestTimeout time.Duration) (*nvidia_inferenceserver.ModelStatisticsResponse, error)
- func (m *ModelService) GetModelMeta(modelName, modelVersion string, requestTimeout time.Duration) (*nvidia_inferenceserver.ModelMetadataResponse, error)
- func (m *ModelService) GetModelName() string
- func (m *ModelService) GetServerMeta(requestTimeout time.Duration) (*nvidia_inferenceserver.ServerMetadataResponse, error)
- func (m *ModelService) GetTokenizerIsChineseMode() bool
- func (m *ModelService) ModelInfer(inferData []string, modelName, modelVersion string, ...) ([]interface{}, error)
- func (m *ModelService) SetChineseTokenize(isCharMode bool) *ModelService
- func (m *ModelService) SetJsonDecoder(decoder utils.JSONUnmarshal) *ModelService
- func (m *ModelService) SetJsonEncoder(encoder utils.JSONMarshal) *ModelService
- func (m *ModelService) SetMaxSeqLength(maxSeqLen int) *ModelService
- func (m *ModelService) SetModelInferWithGRPC() *ModelService
- func (m *ModelService) SetModelName(modelPrefix, modelName string) *ModelService
- func (m *ModelService) SetSecondaryServerURL(url string) *ModelService
- func (m *ModelService) SetTokenizerReturnPosInfo() *ModelService
- func (m *ModelService) UnsetChineseTokenize() *ModelService
- func (m *ModelService) UnsetModelInferWithGRPC() *ModelService
- func (m *ModelService) UnsetTokenizerReturnPosInfo() *ModelService
type OffsetsType
- func GetOffsets(tokens []StringOffsetsPair) []OffsetsType
type OptionV1
- func RegisterSpecialWords(specialWords ...string) OptionV1
type Provider
type StringOffsetsPair
- func MakeOffsetPairsFromGroups(text string, tokens []StringOffsetsPair, groups []TokensRange) []StringOffsetsPair
type TokenizerV1
type TokensRange
- func GroupPieces(tokens []StringOffsetsPair) []TokensRange
type WordPieceTokenizer
- func NewWordPieceTokenizer(vocabulary Dict) *WordPieceTokenizer
- func (t *WordPieceTokenizer) Tokenize(text string) []StringOffsetsPair
- func (t *WordPieceTokenizer) TokenizeChinese(text string) []StringOffsetsPair
- func (t *WordPieceTokenizer) TokenizeChineseCharMode(text string) []StringOffsetsPair
- func (t *WordPieceTokenizer) WordPieceTokenize(tokens []StringOffsetsPair) []StringOffsetsPair

Constants ¶

View Source

const (
	DefaultMaxSeqLength                      int    = 48
	ModelRespBodyOutputBinaryDataKey         string = "binary_data"
	ModelRespBodyOutputClassificationDataKey string = "classification"
	ModelBertModelSegmentIdsKey              string = "segment_ids"
	ModelBertModelInputIdsKey                string = "input_ids"
	ModelBertModelInputMaskKey               string = "input_mask"
	ModelInt32DataType                       string = "INT32"
	ModelInt64DataType                       string = "INT64"
)

View Source

const (
	DefaultCLS          string = "[CLS]"
	DefaultSEP          string = "[SEP]"
	DefaultUNK          string = "[UNK]"
	DefaultMask         string = "[MASK]"
	NumPadToken         string = "##"
	DefaultMaxWordChars int    = 200
	DataSplitString     string = " ||| "
)

Variables ¶

This section is empty.

Functions ¶

func GetStrings ¶

func GetStrings(tokens []StringOffsetsPair) []string

GetStrings returns a sequence of string values from the given slice of StringOffsetsPair.

func IsDefaultSpecial ¶

func IsDefaultSpecial(word string) bool

IsDefaultSpecial return whether the word matches a special token, or not.

Types ¶

type BaseTokenizer ¶

type BaseTokenizer struct {
	// contains filtered or unexported fields
}

BaseTokenizer is a straightforward tokenizer implementations, which splits by whitespace and punctuation characters.

func NewBaseTokenizer ¶

func NewBaseTokenizer(opts ...OptionV1) *BaseTokenizer

NewBaseTokenizer returns a new base tokenizer ready to use.

func (*BaseTokenizer) Tokenize ¶

func (t *BaseTokenizer) Tokenize(text string) []StringOffsetsPair

Tokenize converts the input text to a slice of tokens, where each token is a white-separated word, a number or a punctuation sign. The resulting tokens preserve the alignment with the portion of the original text they belong to.

func (*BaseTokenizer) TokenizeChinese ¶

func (t *BaseTokenizer) TokenizeChinese(text string) []StringOffsetsPair

TokenizeChinese Like Tokenize but focus on Chinese.

func (*BaseTokenizer) TokenizeChineseCharMode ¶ added in v1.4.4

func (t *BaseTokenizer) TokenizeChineseCharMode(text string) []StringOffsetsPair

TokenizeChineseCharMode Like TokenizeChinese but focus on Chinese NER.

type Dict ¶

type Dict struct {
	// contains filtered or unexported fields
}

Dict is a container for tokens NOTE: python uses an OrderedDict, unsure of implications.

func New ¶

func New(tokens []string) Dict

New will return a vocab dict from the given tokens, IDs will match index.

func VocabFromFile ¶

func VocabFromFile(path string) (Dict, error)

VocabFromFile will read a newline delimited file into a Dict.

func VocabFromSlice ¶ added in v1.3.0

func VocabFromSlice(vocabArr []string) (Dict, error)

VocabFromSlice will read vocab from config into a Dict.

func (Dict) Add ¶

func (v Dict) Add(token string)

Add will add an item to the vocabulary, is not thread-safe.

func (Dict) ConvertItems ¶

func (v Dict) ConvertItems(items []string) []ID

ConvertItems convert items to ids.

func (Dict) ConvertTokens ¶

func (v Dict) ConvertTokens(tokens []string) []ID

ConvertTokens convert token to id.

func (Dict) GetID ¶

func (v Dict) GetID(token string) ID

GetID will return the ID of the token in the vocab. Will be negative if it doesn't exist.

func (Dict) IsInVocab ¶

func (v Dict) IsInVocab(token string) bool

IsInVocab token is in vocabs.

func (Dict) LongestSubstring ¶

func (v Dict) LongestSubstring(token string) string

LongestSubstring returns the longest token that is a substring of the token.

func (Dict) Size ¶

func (v Dict) Size() int

Size returns the size of the vocabulary.

type GenerateModelInferOutputRequest ¶

type GenerateModelInferOutputRequest func(params ...interface{}) []*nvidia_inferenceserver.ModelInferRequest_InferRequestedOutputTensor

GenerateModelInferOutputRequest model output callback.

type GenerateModelInferRequest ¶

type GenerateModelInferRequest func(batchSize, maxSeqLength int) []*nvidia_inferenceserver.ModelInferRequest_InferInputTensor

GenerateModelInferRequest model input callback.

type HTTPBatchInput ¶

type HTTPBatchInput struct {
	Name     string    `json:"name"`
	Shape    []int64   `json:"shape"`
	DataType string    `json:"datatype"`
	Data     [][]int32 `json:"data"`
}

HTTPBatchInput Model HTTP Batch Request Input Struct (Support batch 1).

type HTTPOutput ¶

type HTTPOutput struct {
	Name       string               `json:"name"`
	Parameters InferOutputParameter `json:"parameters"`
}

HTTPOutput Model HTTP Request Output Struct.

type HTTPRequestBody ¶

type HTTPRequestBody struct {
	Inputs  []HTTPBatchInput `json:"inputs"`
	Outputs []HTTPOutput     `json:"outputs"`
}

HTTPRequestBody Model HTTP Request Body.

type ID ¶

type ID int32

ID is used to identify vocab items.

func (ID) Int64 ¶

func (id ID) Int64() int64

Int64 int32 ID to int64.

type InferOutputParameter ¶

type InferOutputParameter struct {
	BinaryData     bool  `json:"binary_data"`
	Classification int64 `json:"classification"`
}

InferOutputParameter triton inference server infer parameters.

type InputFeature ¶

type InputFeature struct {
	Text     string   // origin text
	Tokens   []string // token. like CLS/SEP after tokenizer
	TokenIDs []int32  // input_ids
	Mask     []int32  // input_mast
	TypeIDs  []int32  // segment_ids
}

InputFeature Bert InputFeature.

type InputObjects ¶

type InputObjects struct {
	Input    string
	Tokens   []string
	PosArray []OffsetsType
}

InputObjects bert input objects for position record.

type ModelService ¶

type ModelService struct {
	BertVocab     Dict
	BertTokenizer *WordPieceTokenizer
	// contains filtered or unexported fields
}

func NewModelService ¶

func NewModelService(
	bertVocabPath, httpAddr string,
	httpClient *fasthttp.Client, grpcConn *grpc.ClientConn,
	modelInputCallback GenerateModelInferRequest,
	modelOutputCallback GenerateModelInferOutputRequest,
	modelInferCallback nvidia_inferenceserver.DecoderFunc,
) (*ModelService, error)

func (*ModelService) CheckModelReady ¶

func (m *ModelService) CheckModelReady(
	modelName, modelVersion string, requestTimeout time.Duration,
) (bool, error)

CheckModelReady check model is ready.

func (*ModelService) CheckServerAlive ¶

func (m *ModelService) CheckServerAlive(requestTimeout time.Duration) (bool, error)

CheckServerAlive check server is alive.

func (*ModelService) CheckServerReady ¶

func (m *ModelService) CheckServerReady(requestTimeout time.Duration) (bool, error)

CheckServerReady check server is ready.

func (*ModelService) GetAllModelInfo ¶

func (m *ModelService) GetAllModelInfo(
	repoName string, isReady bool, requestTimeout time.Duration,
) (*nvidia_inferenceserver.RepositoryIndexResponse, error)

GetAllModelInfo get all model info.

func (*ModelService) GetModelConfig ¶

func (m *ModelService) GetModelConfig(
	modelName, modelVersion string, requestTimeout time.Duration,
) (interface{}, error)

GetModelConfig get model config.

func (*ModelService) GetModelInferIsGRPC ¶ added in v1.2.7

func (m *ModelService) GetModelInferIsGRPC() bool

GetModelInferIsGRPC Get isGRPC flag.

func (*ModelService) GetModelInferStats ¶

func (m *ModelService) GetModelInferStats(
	modelName, modelVersion string, requestTimeout time.Duration,
) (*nvidia_inferenceserver.ModelStatisticsResponse, error)

GetModelInferStats get model infer stats.

func (*ModelService) GetModelMeta ¶

func (m *ModelService) GetModelMeta(
	modelName, modelVersion string, requestTimeout time.Duration,
) (*nvidia_inferenceserver.ModelMetadataResponse, error)

GetModelMeta get model meta.

func (*ModelService) GetModelName ¶

func (m *ModelService) GetModelName() string

GetModelName Get model name.

func (*ModelService) GetServerMeta ¶

func (m *ModelService) GetServerMeta(
	requestTimeout time.Duration,
) (*nvidia_inferenceserver.ServerMetadataResponse, error)

GetServerMeta get server meta.

func (*ModelService) GetTokenizerIsChineseMode ¶ added in v1.2.7

func (m *ModelService) GetTokenizerIsChineseMode() bool

GetTokenizerIsChineseMode Get isChinese flag.

func (*ModelService) ModelInfer ¶

func (m *ModelService) ModelInfer(
	inferData []string,
	modelName, modelVersion string,
	requestTimeout time.Duration,
	params ...interface{},
) ([]interface{}, error)

ModelInfer API to call Triton Inference Server.

func (*ModelService) SetChineseTokenize ¶

func (m *ModelService) SetChineseTokenize(isCharMode bool) *ModelService

SetChineseTokenize Use Chinese Tokenize when tokenize infer data.

func (*ModelService) SetJsonDecoder ¶ added in v1.4.6

func (m *ModelService) SetJsonDecoder(decoder utils.JSONUnmarshal) *ModelService

SetJsonDecoder set json decoder

func (*ModelService) SetJsonEncoder ¶ added in v1.4.6

func (m *ModelService) SetJsonEncoder(encoder utils.JSONMarshal) *ModelService

SetJsonEncoder set json encoder

func (*ModelService) SetMaxSeqLength ¶

func (m *ModelService) SetMaxSeqLength(maxSeqLen int) *ModelService

SetMaxSeqLength Set model infer max sequence length.

func (*ModelService) SetModelInferWithGRPC ¶

func (m *ModelService) SetModelInferWithGRPC() *ModelService

SetModelInferWithGRPC Use grpc to call triton.

func (*ModelService) SetModelName ¶

func (m *ModelService) SetModelName(modelPrefix, modelName string) *ModelService

SetModelName Set model name must equal to Triton config.pbtxt model name.

func (*ModelService) SetSecondaryServerURL ¶ added in v1.4.2

func (m *ModelService) SetSecondaryServerURL(url string) *ModelService

SetSecondaryServerURL set secondary server url【Only HTTP】

func (*ModelService) SetTokenizerReturnPosInfo ¶ added in v1.3.3

func (m *ModelService) SetTokenizerReturnPosInfo() *ModelService

SetTokenizerReturnPosInfo Set tokenizer return pos info.

func (*ModelService) UnsetChineseTokenize ¶

func (m *ModelService) UnsetChineseTokenize() *ModelService

UnsetChineseTokenize Un-use Chinese Tokenize when tokenize infer data.

func (*ModelService) UnsetModelInferWithGRPC ¶

func (m *ModelService) UnsetModelInferWithGRPC() *ModelService

UnsetModelInferWithGRPC Un-use grpc to call triton.

func (*ModelService) UnsetTokenizerReturnPosInfo ¶ added in v1.3.3

func (m *ModelService) UnsetTokenizerReturnPosInfo() *ModelService

UnsetTokenizerReturnPosInfo Un-set tokenizer return pos info.

type OffsetsType ¶

type OffsetsType struct {
	Start int
	End   int
}

OffsetsType represents a (start, end) offsets pair. It usually represents a lower inclusive index position, and an upper exclusive position.

func GetOffsets ¶

func GetOffsets(tokens []StringOffsetsPair) []OffsetsType

GetOffsets returns a sequence of offsets values from the given slice of StringOffsetsPair.

type OptionV1 ¶

type OptionV1 func(*BaseTokenizer)

OptionV1 allows to configure a new BaseTokenizer with your specific needs.

func RegisterSpecialWords ¶

func RegisterSpecialWords(specialWords ...string) OptionV1

RegisterSpecialWords is an option to register a special word.

type Provider ¶

type Provider interface {
	Vocab() Dict
}

Provider is an interface for exposing a vocab.

type StringOffsetsPair ¶

type StringOffsetsPair struct {
	String  string
	Offsets OffsetsType
}

StringOffsetsPair represents a string value paired with offsets bounds. It usually represents a token string and its offsets positions in the original string.

func MakeOffsetPairsFromGroups ¶

func MakeOffsetPairsFromGroups(text string, tokens []StringOffsetsPair, groups []TokensRange) []StringOffsetsPair

MakeOffsetPairsFromGroups creates a sequence tokenizers.StringOffsetsPair elements from the given groups.

type TokenizerV1 ¶

type TokenizerV1 interface {
	Tokenize(text string) []StringOffsetsPair
	TokenizeChinese(text string) []StringOffsetsPair
	TokenizeChineseCharMode(text string) []StringOffsetsPair
}

TokenizerV1 is implemented by any value that has the Tokenize method.

type TokensRange ¶

type TokensRange struct {
	Start int
	End   int
}

TokensRange represents an index offsets pair of a token.

func GroupPieces ¶

func GroupPieces(tokens []StringOffsetsPair) []TokensRange

GroupPieces returns a list of tokens range each of which represents the start and the end index of the tokens that form a complete word.

type WordPieceTokenizer ¶

type WordPieceTokenizer struct {
	// contains filtered or unexported fields
}

WordPieceTokenizer is a tokenizer that breaks tokens into sub-word units based on a supplied vocabulary. See https://arxiv.org/pdf/1609.08144.pdf Section 4.1 for details. WordPieceTokenizers uses BaseTokenizer to preprocess the input text.

func NewWordPieceTokenizer ¶

func NewWordPieceTokenizer(vocabulary Dict) *WordPieceTokenizer

NewWordPieceTokenizer returns a new WordPieceTokenizer.

func (*WordPieceTokenizer) Tokenize ¶

func (t *WordPieceTokenizer) Tokenize(text string) []StringOffsetsPair

Tokenize converts the input text to a slice of words or sub-words token units based on the supplied vocabulary. The resulting tokens preserve the alignment with the portion of the original text they belong to.

func (*WordPieceTokenizer) TokenizeChinese ¶

func (t *WordPieceTokenizer) TokenizeChinese(text string) []StringOffsetsPair

TokenizeChinese Like Tokenize but focus on Chinese

func (*WordPieceTokenizer) TokenizeChineseCharMode ¶ added in v1.4.4

func (t *WordPieceTokenizer) TokenizeChineseCharMode(text string) []StringOffsetsPair

TokenizeChineseCharMode Like TokenizeChinese but focus on Chinese NER

func (*WordPieceTokenizer) WordPieceTokenize ¶

func (t *WordPieceTokenizer) WordPieceTokenize(tokens []StringOffsetsPair) []StringOffsetsPair

WordPieceTokenize transforms the input token in a new slice of words or sub-words units based on the supplied vocabulary. The resulting tokens preserve the alignment with the portion of the original text they belong to.

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL