bert

package
v0.1.15 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Aug 19, 2021 License: Apache-2.0 Imports: 18 Imported by: 0

Documentation

Index

Examples

Constants

This section is empty.

Variables

This section is empty.

Functions

func BertJapaneseTokenizerFromPretrained

func BertJapaneseTokenizerFromPretrained(pretrainedModelNameOrPath string, customParams map[string]interface{}) *tokenizer.Tokenizer

BertJapaneseTokenizerFromPretrained initiate BERT tokenizer for Japanese language from pretrained file.

Types

type BertAttention

type BertAttention struct {
	Bsa    *BertSelfAttention
	Output *BertSelfOutput
}

func NewBertAttention

func NewBertAttention(p *nn.Path, config *BertConfig) *BertAttention

func (*BertAttention) ForwardT

func (ba *BertAttention) ForwardT(hiddenStates, mask, encoderHiddenStates, encoderMask *ts.Tensor, train bool) (retVal, RetValOpt *ts.Tensor)

type BertConfig

type BertConfig struct {
	HiddenAct                 string           `json:"hidden_act"`
	AttentionProbsDropoutProb float64          `json:"attention_probs_dropout_prob"`
	HiddenDropoutProb         float64          `json:"hidden_dropout_prob"`
	HiddenSize                int64            `json:"hidden_size"`
	InitializerRange          float32          `json:"initializer_range"`
	IntermediateSize          int64            `json:"intermediate_size"`
	MaxPositionEmbeddings     int64            `json:"max_position_embeddings"`
	NumAttentionHeads         int64            `json:"num_attention_heads"`
	NumHiddenLayers           int64            `json:"num_hidden_layers"`
	TypeVocabSize             int64            `json:"type_vocab_size"`
	VocabSize                 int64            `json:"vocab_size"`
	OutputAttentions          bool             `json:"output_attentions"`
	OutputHiddenStates        bool             `json:"output_hidden_states"`
	IsDecoder                 bool             `json:"is_decoder"`
	Id2Label                  map[int64]string `json:"id_2_label"`
	Label2Id                  map[string]int64 `json:"label_2_id"`
	NumLabels                 int64            `json:"num_labels"`
}

BertConfig defines the BERT model architecture (i.e., number of layers, hidden layer size, label mapping...)

func ConfigFromFile

func ConfigFromFile(filename string) (*BertConfig, error)

func NewConfig

func NewConfig(customParams map[string]interface{}) *BertConfig

NewBertConfig initiates BertConfig with given input parameters or default values.

func (*BertConfig) GetVocabSize

func (c *BertConfig) GetVocabSize() int64

func (*BertConfig) Load

func (c *BertConfig) Load(modelNameOrPath string, params map[string]interface{}) error

Load loads model configuration from file or model name. It also updates default configuration parameters if provided. This method implements `pretrained.Config` interface.

type BertEmbedding

type BertEmbedding interface {
	ForwardT(inputIds, tokenTypeIds, positionIds, inputEmbeds ts.Tensor, train bool) (ts.Tensor, error)
}

BertEmbedding defines interface for BertModel or RoBertaModel.

type BertEmbeddings

type BertEmbeddings struct {
	WordEmbeddings      *nn.Embedding
	PositionEmbeddings  *nn.Embedding
	TokenTypeEmbeddings *nn.Embedding
	LayerNorm           *nn.LayerNorm
	Dropout             *util.Dropout
}

func NewBertEmbeddings

func NewBertEmbeddings(p *nn.Path, config *BertConfig) *BertEmbeddings

NewBertEmbeddings builds a new BertEmbeddings

func (*BertEmbeddings) ForwardT

func (be *BertEmbeddings) ForwardT(inputIds, tokenTypeIds, positionIds, inputEmbeds *ts.Tensor, train bool) (retVal *ts.Tensor, err error)

ForwardT implements BertEmbedding interface, passes throught the embedding layer

type BertEncoder

type BertEncoder struct {
	OutputAttentions   bool
	OutputHiddenStates bool
	Layers             []BertLayer
}

BertEncoder defines an encoder for BERT model

func NewBertEncoder

func NewBertEncoder(p *nn.Path, config *BertConfig) *BertEncoder

NewBertEncoder creates a new BertEncoder.

func (*BertEncoder) ForwardT

func (be *BertEncoder) ForwardT(hiddenStates, mask, encoderHiddenStates, encoderMask *ts.Tensor, train bool) (retVal *ts.Tensor, retValOpt1, retValOpt2 []*ts.Tensor)

ForwardT forwards pass through the model.

type BertForMaskedLM

type BertForMaskedLM struct {
	// contains filtered or unexported fields
}

BertForMaskedLM is BERT for masked language model

Example
// Config
config := new(bert.BertConfig)
err := config.Load("../data/bert/config.json", nil)
if err != nil {
	log.Fatal(err)
}

// Model
device := gotch.CPU

model := new(bert.BertForMaskedLM)
err = model.Load("../data/bert/model.ot", config, nil, device)
if err != nil {
	log.Fatal(err)
}

tk := getBertTokenizer()
sentence1 := "Looks like one [MASK] is missing"
sentence2 := "It was a very nice and [MASK] day"

var input []tokenizer.EncodeInput
input = append(input, tokenizer.NewSingleEncodeInput(tokenizer.NewInputSequence(sentence1)))
input = append(input, tokenizer.NewSingleEncodeInput(tokenizer.NewInputSequence(sentence2)))

encodings, err := tk.EncodeBatch(input, true)
if err != nil {
	log.Fatal(err)
}

var maxLen int = 0
for _, en := range encodings {
	if len(en.Ids) > maxLen {
		maxLen = len(en.Ids)
	}
}

var tensors []ts.Tensor
for _, en := range encodings {
	var tokInput []int64 = make([]int64, maxLen)
	for i := 0; i < len(en.Ids); i++ {
		tokInput[i] = int64(en.Ids[i])
	}

	tensors = append(tensors, *ts.TensorFrom(tokInput))
}

inputTensor := ts.MustStack(tensors, 0).MustTo(device, true)

var output *ts.Tensor
ts.NoGrad(func() {
	output, _, _ = model.ForwardT(inputTensor, ts.None, ts.None, ts.None, ts.None, ts.None, ts.None, false)
})

index1 := output.MustGet(0).MustGet(4).MustArgmax([]int64{0}, false, false).Int64Values()[0]
index2 := output.MustGet(1).MustGet(7).MustArgmax([]int64{0}, false, false).Int64Values()[0]

got1, ok := tk.IdToToken(int(index1))
if !ok {
	fmt.Printf("Cannot find a corresponding word for the given id (%v) in vocab.\n", index1)
}
got2, ok := tk.IdToToken(int(index2))
if !ok {
	fmt.Printf("Cannot find a corresponding word for the given id (%v) in vocab.\n", index2)
}

fmt.Println(got1)
fmt.Println(got2)
/*
 *   // Output:
 *   // person
 *   // pleasant
 *  */
Output:

func NewBertForMaskedLM

func NewBertForMaskedLM(p *nn.Path, config *BertConfig) *BertForMaskedLM

NewBertForMaskedLM creates BertForMaskedLM.

func (*BertForMaskedLM) ForwardT

func (mlm *BertForMaskedLM) ForwardT(inputIds, mask, tokenTypeIds, positionIds, inputEmbeds, encoderHiddenStates, encoderMask *ts.Tensor, train bool) (retVal1 *ts.Tensor, optRetVal1, optRetVal2 []*ts.Tensor)

ForwardT forwards pass through the model.

Params:

  • `inputIds`: optional input tensor of shape (batch size, sequence length). If None, pre-computed embeddings must be provided (see `inputEmbeds`)
  • `mask`: optional mask of shape (batch size, sequence length). Masked position have value 0, non-masked value 1. If None set to 1.
  • `tokenTypeIds`: optional segment id of shape (batch size, sequence length). Convention is value of 0 for the first sentence (incl. [SEP]) and 1 for the second sentence. If None set to 0.
  • `positionIds`: optional position ids of shape (batch size, sequence length). If None, will be incremented from 0.
  • `inputEmbeds`: optional pre-computed input embeddings of shape (batch size, sequence length, hidden size). If None, input ids must be provided (see `inputIds`).
  • `encoderHiddenStates`: optional encoder hidden state of shape (batch size, encoder sequence length, hidden size). If the model is defined as a decoder and the `encoderHiddenStates` is not None, used in the cross-attention layer as keys and values (query from the decoder).
  • `encoderMask`: optional encoder attention mask of shape (batch size, encoder sequence length). If the model is defined as a decoder and the `encoderHiddenStates` is not None, used to mask encoder values. Positions with value 0 will be masked.
  • `train`: boolean flag to turn on/off the dropout layers in the model. Should be set to false for inference.

Returns:

  • `output`: tensor of shape (batch size, sequence length, hidden size)
  • `hiddenStates`: slice of tensors of length numHiddenLayers with shape (batch size, sequenceLength, hiddenSize)
  • `attentions`: slice of tensors of length numHiddenLayers with shape (batch size, sequenceLength, hiddenSize)

func (*BertForMaskedLM) Load

func (mlm *BertForMaskedLM) Load(modelNameOrPath string, config interface{ pretrained.Config }, params map[string]interface{}, device gotch.Device) error

Load loads model from file or model name. It also updates default configuration parameters if provided. This method implements `PretrainedModel` interface.

type BertForMultipleChoice

type BertForMultipleChoice struct {
	// contains filtered or unexported fields
}

BertForMultipleChoice constructs multiple choices model using a BERT base model and a linear classifier. Input should be in the form `[CLS] Context [SEP] Possible choice [SEP]`. The choice is made along the batch axis, assuming all elements of the batch are alternatives to be chosen from for a given context.

It is made of the following blocks:

  • `bert`: Base BertModel
  • `classifier`: Linear layer for multiple choices

func NewBertForMultipleChoice

func NewBertForMultipleChoice(p *nn.Path, config *BertConfig) *BertForMultipleChoice

NewBertForMultipleChoice creates a new `BertForMultipleChoice`.

Params:

  • `p`: Variable store path for the root of the BertForMultipleChoice model
  • `config`: `BertConfig` object defining the model architecture

func (*BertForMultipleChoice) ForwardT

func (mc *BertForMultipleChoice) ForwardT(inputIds, mask, tokenTypeIds, positionIds *ts.Tensor, train bool) (retVal *ts.Tensor, retValOpt1, retValOpt2 []*ts.Tensor)

ForwardT forwards pass through the model.

Params:

  • `inputIds`: optional input tensor of shape (batch size, sequence length). If None, pre-computed embeddings must be provided (see `inputEmbeds`)
  • `mask`: optional mask of shape (batch size, sequence length). Masked position have value 0, non-masked value 1. If None set to 1.
  • `tokenTypeIds`: optional segment id of shape (batch size, sequence length). Convention is value of 0 for the first sentence (incl. [SEP]) and 1 for the second sentence. If None set to 0.
  • `positionIds`: optional position ids of shape (batch size, sequence length). If None, will be incremented from 0.
  • `train`: boolean flag to turn on/off the dropout layers in the model. Should be set to false for inference.

Returns:

  • `output`: tensor of shape (batch size, sequence length, hidden size)
  • `hiddenStates`: slice of tensors of length numHiddenLayers with shape (batch size, sequenceLength, hiddenSize)
  • `attentions`: slice of tensors of length numHiddenLayers with shape (batch size, sequenceLength, hiddenSize)

type BertForQuestionAnswering

type BertForQuestionAnswering struct {
	// contains filtered or unexported fields
}

BertForQuestionAnswering constructs extractive question-answering model based on a BERT language model. Identifies the segment of a context that answers a provided question.

Please note that a significant amount of pre- and post-processing is required to perform end-to-end question answering. See the question answering pipeline (also provided in this crate) for more details.

It is made of the following blocks:

  • `bert`: Base BertModel
  • `qa_outputs`: Linear layer for question answering

func NewForBertQuestionAnswering

func NewForBertQuestionAnswering(p *nn.Path, config *BertConfig) *BertForQuestionAnswering

NewBertForQuestionAnswering creates a new `BertForQuestionAnswering`.

Params:

  • `p`: Variable store path for the root of the BertForQuestionAnswering model
  • `config`: `BertConfig` object defining the model architecture

func (*BertForQuestionAnswering) ForwardT

func (qa *BertForQuestionAnswering) ForwardT(inputIds, mask, tokenTypeIds, positionIds, inputEmbeds *ts.Tensor, train bool) (retVal1, retVal2 *ts.Tensor, retValOpt1, retValOpt2 []*ts.Tensor)

ForwardT forwards pass through the model.

Params:

  • `inputIds`: optional input tensor of shape (batch size, sequence length). If None, pre-computed embeddings must be provided (see `inputEmbeds`)
  • `mask`: optional mask of shape (batch size, sequence length). Masked position have value 0, non-masked value 1. If None set to 1.
  • `tokenTypeIds`: optional segment id of shape (batch size, sequence length). Convention is value of 0 for the first sentence (incl. [SEP]) and 1 for the second sentence. If None set to 0.
  • `positionIds`: optional position ids of shape (batch size, sequence length). If None, will be incremented from 0.
  • `inputEmbeds`: optional pre-computed input embeddings of shape (batch size, sequence length, hidden size). If None, input ids must be provided (see `inputIds`).
  • `train`: boolean flag to turn on/off the dropout layers in the model. Should be set to false for inference.

Returns:

  • `output`: tensor of shape (batch size, sequence length, hidden size)
  • `hiddenStates`: slice of tensors of length numHiddenLayers with shape (batch size, sequenceLength, hiddenSize)
  • `attentions`: slice of tensors of length numHiddenLayers with shape (batch size, sequenceLength, hiddenSize)

type BertForSequenceClassification

type BertForSequenceClassification struct {
	// contains filtered or unexported fields
}

BertForSequenceClassification is Base BERT model with a classifier head to perform sentence or document-level classification.

It is made of the following blocks:

  • `bert`: Base BertModel
  • `classifier`: BERT linear layer for classification

func NewBertForSequenceClassification

func NewBertForSequenceClassification(p *nn.Path, config *BertConfig) *BertForSequenceClassification

NewBertForSequenceClassification creates a new `BertForSequenceClassification`.

Params:

  • `p`: ariable store path for the root of the BertForSequenceClassification model
  • `config`: `BertConfig` object defining the model architecture and number of classes

Example:

device := gotch.CPU
vs := nn.NewVarStore(device)
config := bert.ConfigFromFile("path/to/config.json")
p := vs.Root()
bert := NewBertForSequenceClassification(p.Sub("bert"), config)

func (*BertForSequenceClassification) ForwardT

func (bsc *BertForSequenceClassification) ForwardT(inputIds, mask, tokenTypeIds, positionIds, inputEmbeds *ts.Tensor, train bool) (retVal *ts.Tensor, retValOpt1, retValOpt2 []*ts.Tensor)

ForwardT forwards pass through the model.

Params:

  • `inputIds`: optional input tensor of shape (batch size, sequence length). If None, pre-computed embeddings must be provided (see `inputEmbeds`)
  • `mask`: optional mask of shape (batch size, sequence length). Masked position have value 0, non-masked value 1. If None set to 1.
  • `tokenTypeIds`: optional segment id of shape (batch size, sequence length). Convention is value of 0 for the first sentence (incl. [SEP]) and 1 for the second sentence. If None set to 0.
  • `positionIds`: optional position ids of shape (batch size, sequence length). If None, will be incremented from 0.
  • `inputEmbeds`: optional pre-computed input embeddings of shape (batch size, sequence length, hidden size). If None, input ids must be provided (see `inputIds`).
  • `encoderHiddenStates`: optional encoder hidden state of shape (batch size, encoder sequence length, hidden size). If the model is defined as a decoder and the `encoderHiddenStates` is not None, used in the cross-attention layer as keys and values (query from the decoder).
  • `encoderMask`: optional encoder attention mask of shape (batch size, encoder sequence length). If the model is defined as a decoder and the `encoderHiddenStates` is not None, used to mask encoder values. Positions with value 0 will be masked.
  • `train`: boolean flag to turn on/off the dropout layers in the model. Should be set to false for inference.

Returns:

  • `output`: tensor of shape (batch size, sequence length, hidden size)
  • `pooledOutput`: tensor of shape (batch size, hidden size)
  • `hiddenStates`: slice of tensors of length numHiddenLayers with shape (batch size, sequenceLength, hiddenSize)
  • `attentions`: slice of tensors of length numHiddenLayers with shape (batch size, sequenceLength, hiddenSize)

type BertForTokenClassification

type BertForTokenClassification struct {
	// contains filtered or unexported fields
}

BertForTokenClassification constructs token-level classifier predicting a label for each token provided. Note that because of wordpiece tokenization, the labels predicted are not necessarily aligned with words in the sentence.

It is made of the following blocks:

  • `bert`: Base BertModel
  • `classifier`: Linear layer for token classification

func NewBertForTokenClassification

func NewBertForTokenClassification(p *nn.Path, config *BertConfig) *BertForTokenClassification

NewBertForTokenClassification creates a new `BertForTokenClassification`

Params:

  • `p`: Variable store path for the root of the BertForTokenClassification model
  • `config`: `BertConfig` object defining the model architecture, number of output labels and label mapping

func (*BertForTokenClassification) ForwardT

func (tc *BertForTokenClassification) ForwardT(inputIds, mask, tokenTypeIds, positionIds, inputEmbeds *ts.Tensor, train bool) (retVal *ts.Tensor, retValOpt1, retValOpt2 []*ts.Tensor)

ForwordT forwards pass through the model.

Params:

  • `inputIds`: optional input tensor of shape (batch size, sequence length). If None, pre-computed embeddings must be provided (see `inputEmbeds`)
  • `mask`: optional mask of shape (batch size, sequence length). Masked position have value 0, non-masked value 1. If None set to 1.
  • `tokenTypeIds`: optional segment id of shape (batch size, sequence length). Convention is value of 0 for the first sentence (incl. [SEP]) and 1 for the second sentence. If None set to 0.
  • `positionIds`: optional position ids of shape (batch size, sequence length). If None, will be incremented from 0.
  • `inputEmbeds`: optional pre-computed input embeddings of shape (batch size, sequence length, hidden size). If None, input ids must be provided (see `inputIds`).
  • `train`: boolean flag to turn on/off the dropout layers in the model. Should be set to false for inference.

Returns:

  • `output`: tensor of shape (batch size, sequence length, hidden size)
  • `hiddenStates`: slice of tensors of length numHiddenLayers with shape (batch size, sequenceLength, hiddenSize)
  • `attentions`: slice of tensors of length numHiddenLayers with shape (batch size, sequenceLength, hiddenSize)

type BertIntermediate

type BertIntermediate struct {
	Lin        *nn.Linear
	Activation util.ActivationFn // interface
}

func NewBertIntermediate

func NewBertIntermediate(p *nn.Path, config *BertConfig) *BertIntermediate

func (*BertIntermediate) Forward

func (bi *BertIntermediate) Forward(hiddenStates *ts.Tensor) (retVal *ts.Tensor)

type BertLMPredictionHead

type BertLMPredictionHead struct {
	Transform *BertPredictionHeadTransform
	Decoder   *util.LinearNoBias
	Bias      *ts.Tensor
}

BertLMPredictionHead constructs layers for BERT prediction head.

func NewBertLMPredictionHead

func NewBertLMPredictionHead(p *nn.Path, config *BertConfig) *BertLMPredictionHead

NewBertLMPredictionHead creates BertLMPredictionHead.

func (*BertLMPredictionHead) Forward

func (ph *BertLMPredictionHead) Forward(hiddenState *ts.Tensor) *ts.Tensor

Forward fowards through the model.

type BertLayer

type BertLayer struct {
	Attention      *BertAttention
	IsDecoder      bool
	CrossAttention *BertAttention
	Intermediate   *BertIntermediate
	Output         *BertOutput
}

BertLayer defines a layer in BERT encoder

func NewBertLayer

func NewBertLayer(p *nn.Path, config *BertConfig) *BertLayer

NewBertLayer creates a new BertLayer.

func (*BertLayer) ForwardT

func (bl *BertLayer) ForwardT(hiddenStates, mask, encoderHiddenStates, encoderMask *ts.Tensor, train bool) (retVal, retValOpt1, retValOpt2 *ts.Tensor)

ForwardT forwards pass through the model.

type BertModel

type BertModel struct {
	Embeddings *BertEmbeddings
	Encoder    *BertEncoder
	Pooler     *BertPooler
	IsDecoder  bool
}

BertModel defines base architecture for BERT models. Task-specific models can be built from this base model.

Fields:

  • Embeddings: for `token`, `position` and `segment` embeddings
  • Encoder: is a vector of layers. Each layer compose of a `self-attention`,

an `intermedate` (linear) and an output ( linear + layer norm) sub-layers.

  • Pooler: linear layer applied to the first element of the sequence (`[MASK]` token)
  • IsDecoder: whether model is used as a decoder. If set to `true`

a casual mask will be applied to hide future positions that should be attended to.

func NewBertModel

func NewBertModel(p *nn.Path, config *BertConfig) *BertModel

NewBertModel builds a new `BertModel`.

Params:

  • `p`: Variable store path for the root of the BERT Model
  • `config`: BertConfig onfiguration for model architecture and decoder status

func (*BertModel) ForwardT

func (b *BertModel) ForwardT(inputIds, mask, tokenTypeIds, positionIds, inputEmbeds, encoderHiddenStates, encoderMask *ts.Tensor, train bool) (retVal1, retVal2 *ts.Tensor, retValOpt1, retValOpt2 []*ts.Tensor, err error)

ForwardT forwards pass through the model.

Params:

  • `inputIds`: optional input tensor of shape (batch size, sequence length). If None, pre-computed embeddings must be provided (see `inputEmbeds`)
  • `mask`: optional mask of shape (batch size, sequence length). Masked position have value 0, non-masked value 1. If None set to 1.
  • `tokenTypeIds`: optional segment id of shape (batch size, sequence length). Convention is value of 0 for the first sentence (incl. [SEP]) and 1 for the second sentence. If None set to 0.
  • `positionIds`: optional position ids of shape (batch size, sequence length). If None, will be incremented from 0.
  • `inputEmbeds`: optional pre-computed input embeddings of shape (batch size, sequence length, hidden size). If None, input ids must be provided (see `inputIds`).
  • `encoderHiddenStates`: optional encoder hidden state of shape (batch size, encoder sequence length, hidden size). If the model is defined as a decoder and the `encoderHiddenStates` is not None, used in the cross-attention layer as keys and values (query from the decoder).
  • `encoderMask`: optional encoder attention mask of shape (batch size, encoder sequence length). If the model is defined as a decoder and the `encoderHiddenStates` is not None, used to mask encoder values. Positions with value 0 will be masked.
  • `train`: boolean flag to turn on/off the dropout layers in the model. Should be set to false for inference.

Returns:

  • `output`: tensor of shape (batch size, sequence length, hidden size)
  • `pooledOutput`: tensor of shape (batch size, hidden size)
  • `hiddenStates`: slice of tensors of length numHiddenLayers with shape (batch size, sequenceLength, hiddenSize)
  • `attentions`: slice of tensors of length numHiddenLayers with shape (batch size, sequenceLength, hiddenSize)

type BertOutput

type BertOutput struct {
	Lin       *nn.Linear
	LayerNorm *nn.LayerNorm
	Dropout   *util.Dropout
}

func NewBertOutput

func NewBertOutput(p *nn.Path, config *BertConfig) *BertOutput

func (*BertOutput) ForwardT

func (bo *BertOutput) ForwardT(hiddenStates, inputTensor *ts.Tensor, train bool) (retVal *ts.Tensor)

type BertPooler

type BertPooler struct {
	Lin *nn.Linear
}

BertPooler defines a linear layer which can be applied to the first element of the sequence(`[MASK]` token)

func NewBertPooler

func NewBertPooler(p *nn.Path, config *BertConfig) *BertPooler

NewBertPooler creates a new BertPooler.

func (*BertPooler) Forward

func (bp *BertPooler) Forward(hiddenStates *ts.Tensor) (retVal *ts.Tensor)

Forward forwards pass through the model.

type BertPredictionHeadTransform

type BertPredictionHeadTransform struct {
	Dense      *nn.Linear
	Activation util.ActivationFn
	LayerNorm  *nn.LayerNorm
}

BertPredictionHeadTransform holds layers of BERT prediction head transform.

func NewBertPredictionHeadTransform

func NewBertPredictionHeadTransform(p *nn.Path, config *BertConfig) *BertPredictionHeadTransform

NewBertPredictionHead creates BertPredictionHeadTransform.

func (*BertPredictionHeadTransform) Forward

func (bpht *BertPredictionHeadTransform) Forward(hiddenStates *ts.Tensor) (retVal *ts.Tensor)

Forward forwards through the model.

type BertSelfAttention

type BertSelfAttention struct {
	NumAttentionHeads int64
	AttentionHeadSize int64
	Dropout           *util.Dropout
	OutputAttentions  bool
	Query             *nn.Linear
	Key               *nn.Linear
	Value             *nn.Linear
}

func NewBertSelfAttention

func NewBertSelfAttention(p *nn.Path, config *BertConfig) *BertSelfAttention

NewBertSelfAttention creates a new `BertSelfAttention`

func (*BertSelfAttention) ForwardT

func (bsa *BertSelfAttention) ForwardT(hiddenStates, mask, encoderHiddenStates, encoderMask *ts.Tensor, train bool) (retVal, retValOpt *ts.Tensor)

ForwardT implements ModuleT interface for BertSelfAttention

NOTE. mask, encoderHiddenStates, encoderMask are optional tensors for `None` value, `ts.None` can be used.

type BertSelfOutput

type BertSelfOutput struct {
	Linear    *nn.Linear
	LayerNorm *nn.LayerNorm
	Dropout   *util.Dropout
}

func NewBertSelfOutput

func NewBertSelfOutput(p *nn.Path, config *BertConfig) *BertSelfOutput

func (*BertSelfOutput) ForwardT

func (bso *BertSelfOutput) ForwardT(hiddenStates *ts.Tensor, inputTensor *ts.Tensor, train bool) (retVal *ts.Tensor)

type BertTokenizerFast

type BertTokenizerFast = tokenizer.Tokenizer

type TensorOpt

type TensorOpt func() *ts.Tensor

TensorOpt is a function type to create pointer to tensor.

func EncoderHiddenStateTensorOpt

func EncoderHiddenStateTensorOpt(t *ts.Tensor) TensorOpt

func EncoderMaskTensorOpt

func EncoderMaskTensorOpt(t *ts.Tensor) TensorOpt

func MaskTensorOpt

func MaskTensorOpt(t *ts.Tensor) TensorOpt

type Tokenizer

type Tokenizer struct {
	*tokenizer.Tokenizer
}

func NewTokenizer

func NewTokenizer() *Tokenizer

func (*Tokenizer) Load

func (bt *Tokenizer) Load(modelNameOrPath string, params map[string]interface{}) error

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL