Documentation ¶
Index ¶
- Constants
- Variables
- type AnalyzeStats
- type BGULex
- func (l *BGULex) AddOOVAnalysis(lat *Lattice, prefix BasicMorphemes, hostStr string, numToken int)
- func (l *BGULex) Analyze(input []string) (LatticeSentence, interface{})
- func (l *BGULex) AnalyzeToken(input string, startingNode, indexToken int) (*Lattice, interface{})
- func (l *BGULex) LoadLex(file string, nnpnofeats bool)
- func (l *BGULex) LoadPrefixes(file string)
- func (l *BGULex) OOVForLen(lat *Lattice, input string, startingNode, numToken, prefixLen int) bool
- type MADict
- func (m *MADict) AddAnalyses(token string, morphs BasicMorphemes)
- func (m *MADict) AddMSRs(morphs BasicMorphemes)
- func (m *MADict) Analyze(input []string) (LatticeSentence, interface{})
- func (m *MADict) ApplyOOV(token string, lat *Lattice, curID *int, curNode, i int)
- func (m *MADict) ComputeOOVMSRs(maxMSRs int)
- func (m *MADict) ComputeTopPOS()
- func (m *MADict) Init()
- func (m *MADict) LearnFromConllU(conlluFile string, limit int) (int, error)
- func (m *MADict) LearnFromLat(latticeFile, rawFile string, limit int) (int, error)
- func (m *MADict) Read(r io.Reader) error
- func (m *MADict) ReadFile(filename string) error
- func (m *MADict) Write(writer io.Writer) error
- func (m *MADict) WriteFile(filename string) error
- type MSRFreq
- type MorphologicalAnalyzer
- type TokenDictionary
- type TrainingFile
Constants ¶
View Source
const ( MSR_SEPARATOR = "|" PUNCTUATION = ",.|?!:;-&»«\"[]()<>" )
View Source
const ESTIMATED_MORPHS_PER_TOKEN = 5
Variables ¶
View Source
var ( PUNCT = map[string]string{ ":": "yyCLN", ",": "yyCM", "-": "yyDASH", ".": "yyDOT", "...": "yyELPS", "!": "yyEXCL", "(": "yyLRB", "?": "yyQM", ")": "yyRRB", ";": "yySCLN", "\"": "yyQUOT", } OOVMSRS = []string{ "NNP-", "NNP-gen=F|gen=M|num=S", "NNP-gen=M|num=S", "NNP-gen=F|num=S", "NN-gen=M|num=P|num=S", "NN-gen=M|num=S", "NN-gen=F|num=S", "NN-gen=M|num=P", "NN-gen=F|num=P", } REGEX = []struct { RE *regexp.Regexp POS string }{ {regexp.MustCompile("^\\d+(\\.\\d+)?$|^\\d{1,3}(,\\d{3})*(\\.\\d+)?$"), "CD"}, {regexp.MustCompile("\\d"), "NCD"}, } )
Functions ¶
This section is empty.
Types ¶
type AnalyzeStats ¶
func (*AnalyzeStats) AddOOVToken ¶
func (a *AnalyzeStats) AddOOVToken(token string)
func (*AnalyzeStats) AddToken ¶
func (a *AnalyzeStats) AddToken(token string)
func (*AnalyzeStats) Init ¶
func (a *AnalyzeStats) Init()
type BGULex ¶
type BGULex struct { MaxPrefixLen int Prefixes map[string][]BasicMorphemes Lex map[string][]BasicMorphemes Files []string Stats *AnalyzeStats AlwaysNNP bool LogOOV bool MAType string }
func (*BGULex) AddOOVAnalysis ¶
func (*BGULex) AnalyzeToken ¶
func (*BGULex) LoadPrefixes ¶
type MADict ¶
type MADict struct { Language string NumTokens int // for OOV MaxTopPOS, MaxMSRsPerPOS int TopPOS []string OOVMSRs []string POSMSRs map[string]MSRFreq // data Files []TrainingFile Data TokenDictionary Stats *AnalyzeStats TopPOSSet map[string]bool Dope bool }
func (*MADict) AddAnalyses ¶
func (*MADict) AddMSRs ¶
func (m *MADict) AddMSRs(morphs BasicMorphemes)
MSR: Morpho-Syntactic Representation
func (*MADict) ComputeOOVMSRs ¶
func (*MADict) ComputeTopPOS ¶
func (m *MADict) ComputeTopPOS()
func (*MADict) LearnFromConllU ¶
func (*MADict) LearnFromLat ¶
type MorphologicalAnalyzer ¶
type MorphologicalAnalyzer interface {
Analyze(input []string) (LatticeSentence, interface{})
}
type TokenDictionary ¶
type TokenDictionary map[string][]BasicMorphemes
type TrainingFile ¶
type TrainingFile struct {
Lattice, Raw, LatMD5, RawMD5 string
}
Click to show internal directories.
Click to hide internal directories.