ma

package
v1.2.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jul 30, 2017 License: Apache-2.0 Imports: 15 Imported by: 0

Documentation

Index

Constants

View Source
const (
	MSR_SEPARATOR = "|"
	PUNCTUATION   = ",.|?!:;-&»«\"[]()<>"
)
View Source
const ESTIMATED_MORPHS_PER_TOKEN = 5

Variables

View Source
var (
	PUNCT = map[string]string{
		":":   "yyCLN",
		",":   "yyCM",
		"-":   "yyDASH",
		".":   "yyDOT",
		"...": "yyELPS",
		"!":   "yyEXCL",
		"(":   "yyLRB",
		"?":   "yyQM",
		")":   "yyRRB",
		";":   "yySCLN",
		"\"":  "yyQUOT",
	}
	OOVMSRS = []string{
		"NNP-",
		"NNP-gen=F|gen=M|num=S",
		"NNP-gen=M|num=S",
		"NNP-gen=F|num=S",
		"NN-gen=M|num=P|num=S",
		"NN-gen=M|num=S",
		"NN-gen=F|num=S",
		"NN-gen=M|num=P",
		"NN-gen=F|num=P",
	}
	REGEX = []struct {
		RE  *regexp.Regexp
		POS string
	}{
		{regexp.MustCompile("^\\d+(\\.\\d+)?$|^\\d{1,3}(,\\d{3})*(\\.\\d+)?$"), "CD"},
		{regexp.MustCompile("\\d"), "NCD"},
	}
)

Functions

This section is empty.

Types

type AnalyzeStats

type AnalyzeStats struct {
	TotalTokens, OOVTokens    int
	UniqTokens, UniqOOVTokens map[string]int
}

func (*AnalyzeStats) AddOOVToken

func (a *AnalyzeStats) AddOOVToken(token string)

func (*AnalyzeStats) AddToken

func (a *AnalyzeStats) AddToken(token string)

func (*AnalyzeStats) Init

func (a *AnalyzeStats) Init()

type BGULex

type BGULex struct {
	MaxPrefixLen int
	Prefixes     map[string][]BasicMorphemes

	Lex map[string][]BasicMorphemes

	Files []string
	Stats *AnalyzeStats

	AlwaysNNP bool
	LogOOV    bool
	MAType    string
}

func (*BGULex) AddOOVAnalysis

func (l *BGULex) AddOOVAnalysis(lat *Lattice, prefix BasicMorphemes, hostStr string, numToken int)

func (*BGULex) Analyze

func (l *BGULex) Analyze(input []string) (LatticeSentence, interface{})

func (*BGULex) AnalyzeToken

func (l *BGULex) AnalyzeToken(input string, startingNode, indexToken int) (*Lattice, interface{})

func (*BGULex) LoadLex

func (l *BGULex) LoadLex(file string, nnpnofeats bool)

func (*BGULex) LoadPrefixes

func (l *BGULex) LoadPrefixes(file string)

func (*BGULex) OOVForLen

func (l *BGULex) OOVForLen(lat *Lattice, input string, startingNode, numToken, prefixLen int) bool

type MADict

type MADict struct {
	Language  string
	NumTokens int

	// for OOV
	MaxTopPOS, MaxMSRsPerPOS int
	TopPOS                   []string
	OOVMSRs                  []string
	POSMSRs                  map[string]MSRFreq

	// data
	Files []TrainingFile
	Data  TokenDictionary

	Stats *AnalyzeStats

	TopPOSSet map[string]bool
	Dope      bool
}

func (*MADict) AddAnalyses

func (m *MADict) AddAnalyses(token string, morphs BasicMorphemes)

func (*MADict) AddMSRs

func (m *MADict) AddMSRs(morphs BasicMorphemes)

MSR: Morpho-Syntactic Representation

func (*MADict) Analyze

func (m *MADict) Analyze(input []string) (LatticeSentence, interface{})

func (*MADict) ApplyOOV

func (m *MADict) ApplyOOV(token string, lat *Lattice, curID *int, curNode, i int)

func (*MADict) ComputeOOVMSRs

func (m *MADict) ComputeOOVMSRs(maxMSRs int)

func (*MADict) ComputeTopPOS

func (m *MADict) ComputeTopPOS()

func (*MADict) Init

func (m *MADict) Init()

func (*MADict) LearnFromConllU

func (m *MADict) LearnFromConllU(conlluFile string, limit int) (int, error)

func (*MADict) LearnFromLat

func (m *MADict) LearnFromLat(latticeFile, rawFile string, limit int) (int, error)

func (*MADict) Read

func (m *MADict) Read(r io.Reader) error

func (*MADict) ReadFile

func (m *MADict) ReadFile(filename string) error

func (*MADict) Write

func (m *MADict) Write(writer io.Writer) error

func (*MADict) WriteFile

func (m *MADict) WriteFile(filename string) error

type MSRFreq

type MSRFreq map[string]int

type MorphologicalAnalyzer

type MorphologicalAnalyzer interface {
	Analyze(input []string) (LatticeSentence, interface{})
}

type TokenDictionary

type TokenDictionary map[string][]BasicMorphemes

type TrainingFile

type TrainingFile struct {
	Lattice, Raw, LatMD5, RawMD5 string
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL