sequencelabeler

package
v0.1.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 9, 2020 License: BSD-2-Clause Imports: 33 Imported by: 0

Documentation

Overview

Implementation of a sequence labeling architecture composed by Embeddings -> BiRNN -> Scorer -> CRF.

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func Convert

func Convert(modelPath string, flairModelName string)

Convert converts the parameters (weights and bias) of a pre-processed Flair model into spaGO structures. At this moment it is not possible to import directly from the Flair model: a simple Python script takes care of pre-processing and exporting the tensors in a format more compatible with spaGO. I'll make that script available soon, now it's a bit chaotic. In the future it would be even better to import directly from Flair.

Types

type Body

type Body struct {
	Options OptionsType `json:"options"`
	Text    string      `json:"text"`
}

type Config

type Config struct {
	ModelFilename                  string                     `json:"model_filename"`
	WordEmbeddings                 WordEmbeddingsConfig       `json:"word_embeddings"`
	WordEmbeddings2                WordEmbeddingsConfig       `json:"word_embeddings_2"`
	ContextualStringEmbeddings     ContextualEmbeddingsConfig `json:"contextual_string_embeddings"`
	EmbeddingsProjectionInputSize  int                        `json:"embeddings_projection_input_size"`
	EmbeddingsProjectionOutputSize int                        `json:"embeddings_projection_output_size"`
	RecurrentInputSize             int                        `json:"recurrent_input_size"`
	RecurrentOutputSize            int                        `json:"recurrent_output_size"`
	ScorerInputSize                int                        `json:"scorer_input_size"`
	ScorerOutputSize               int                        `json:"scorer_output_size"`
	Labels                         []string                   `json:"labels"`
}

func LoadConfig

func LoadConfig(file string) Config

type ContextualEmbeddingsConfig

type ContextualEmbeddingsConfig struct {
	VocabularySize     int    `json:"vocabulary_size"`
	EmbeddingSize      int    `json:"embedding_size"`
	HiddenSize         int    `json:"hidden_size"`
	OutputSize         int    `json:"output_size"`
	SequenceSeparator  string `json:"sequence_separator"`
	UnknownToken       string `json:"unknown_token"`
	VocabularyFilename string `json:"vocabulary_filename"`
}

type Model

type Model struct {
	Config          Config
	EmbeddingsLayer *stackedembeddings.Model
	TaggerLayer     *birnncrf.Model
	Labels          []string
}

func NewDefaultModel

func NewDefaultModel(config Config, path string, readOnlyEmbeddings bool, forceNewEmbeddingsDB bool) *Model

NewDefaultModel returns a new sequence labeler built based on the architecture of Flair. See https://github.com/flairNLP/flair for more information.

func (*Model) LoadParams

func (m *Model) LoadParams(path string)

func (*Model) LoadVocabulary

func (m *Model) LoadVocabulary(path string)

func (*Model) NewProc

func (m *Model) NewProc(ctx nn.Context) nn.Processor

type OptionsType

type OptionsType struct {
	MergeEntities     bool `json:"mergeEntities"`     // default false
	FilterNotEntities bool `json:"filterNotEntities"` // default false
}

type Processor

type Processor struct {
	nn.BaseProcessor
	EmbeddingsLayer *stackedembeddings.Processor
	TaggerLayer     *birnncrf.Processor
}

func (*Processor) Forward

func (p *Processor) Forward(_ ...ag.Node) []ag.Node

func (*Processor) NegativeLogLoss

func (p *Processor) NegativeLogLoss(targets []int) ag.Node

TODO: it could be more consistent if the targets were the string labels

func (*Processor) Predict

func (p *Processor) Predict(tokens []tokenizers.StringOffsetsPair) []TokenLabel

type Response

type Response struct {
	Tokens []Token `json:"tokens"`
	// Took is the number of milliseconds it took the server to execute the request.
	Took int64 `json:"took"`
}

func (*Response) Dump

func (r *Response) Dump(pretty bool) ([]byte, error)

type Server

type Server struct {

	// UnimplementedSequenceLabelerServer must be embedded to have forward compatible implementations for gRPC.
	grpcapi.UnimplementedSequenceLabelerServer
	// contains filtered or unexported fields
}

func NewServer

func NewServer(model *Model) *Server

func (*Server) Analyze

Sends a request to /analyze. TODO(evanmcclure@gmail.com) Reuse the gRPC message type for HTTP requests.

func (*Server) Start

func (s *Server) Start(address, grpcAddress, tlsCert, tlsKey string, tlsDisable bool)

type Token

type Token struct {
	Text  string `json:"text"`
	Start int    `json:"start"`
	End   int    `json:"end"`
	Label string `json:"label"`
}

type TokenLabel

type TokenLabel struct {
	tokenizers.StringOffsetsPair
	Label string
}

type WordEmbeddingsConfig

type WordEmbeddingsConfig struct {
	WordEmbeddingsFilename string `json:"embeddings_filename"`
	WordEmbeddingsSize     int    `json:"embeddings_size"`
}

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL