conllu

package
v1.2.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jul 30, 2017 License: Apache-2.0 Imports: 15 Imported by: 0

Documentation

Index

Constants

View Source
const (
	FIELD_SEPARATOR      = '\t'
	NUM_FIELDS           = 10
	FEATURES_SEPARATOR   = "|"
	FEATURE_SEPARATOR    = "="
	FEATURE_CONCAT_DELIM = ","
)

Variables

View Source
var (
	WORD_TYPE    = "form"
	IGNORE_LEMMA bool
	STRIP_VOICE  bool
)

Functions

func ConllU2Graph

func ConllU2Graph(sent *Sentence, eWord, ePOS, eWPOS, eRel, eMHost, eMSuffix *util.EnumSet) nlp.LabeledDependencyGraph

func ConllU2GraphCorpus

func ConllU2GraphCorpus(corpus []*Sentence, eWord, ePOS, eWPOS, eRel, eMHost, eMSuffix *util.EnumSet) []interface{}

func ConllU2MorphGraph

func ConllU2MorphGraph(sent *Sentence, eWord, ePOS, eWPOS, eRel, eMFeat, eMHost, eMSuffix *util.EnumSet) nlp.MorphDependencyGraph

func ConllU2MorphGraphCorpus

func ConllU2MorphGraphCorpus(corpus []*Sentence, eWord, ePOS, eWPOS, eRel, eMFeat, eMHost, eMSuffix *util.EnumSet) []interface{}

func FormatFeatures

func FormatFeatures(feat map[string]string) string

func GetMorphProperties

func GetMorphProperties(node *transition.TaggedDepNode, eMHost, eMSuffix *util.EnumSet) string

func Graph2ConllUCorpus

func Graph2ConllUCorpus(corpus []interface{}, eMHost, eMSuffix *util.EnumSet) []interface{}

func MergeGraphAndMorph added in v1.2.0

func MergeGraphAndMorph(dep Sentence, morph nlp.MorphDependencyGraph) interface{}

func MergeGraphAndMorphCorpus added in v1.2.0

func MergeGraphAndMorphCorpus(deps, morphs []interface{}) []interface{}

func MorphGraph2ConllCorpus

func MorphGraph2ConllCorpus(corpus []interface{}) []interface{}

func ParseInt

func ParseInt(value string) (int, error)

func ParseString

func ParseString(value string) string

func ParseTokenRow

func ParseTokenRow(record []string) (string, int, error)

func Write

func Write(writer io.Writer, sents []interface{})

func WriteFile

func WriteFile(filename string, sents []interface{}) error

Types

type Features

type Features map[string]string

func ParseFeatures

func ParseFeatures(featuresStr string) (Features, error)

func (Features) MorphHost

func (f Features) MorphHost() string

func (Features) MorphSuffix

func (f Features) MorphSuffix() string

func (Features) String

func (f Features) String() string

type Row

type Row struct {
	ID      int
	Form    string
	Lemma   string
	UPosTag string
	XPosTag string
	Feats   Features
	FeatStr string
	Head    int
	DepRel  string
	Deps    []string
	Misc    string
	TokenID int
}

A Row is a single parsed row of a conll data set

func ParseRow

func ParseRow(record []string) (Row, error)

func (Row) String

func (r Row) String() string

type Sentence

type Sentence struct {
	Deps     map[int]Row
	Tokens   []string
	Mappings nlp.Mappings
}

A Sentence is a map of Rows using their ids and a set of tokens

func Graph2ConllU

func Graph2ConllU(graph nlp.LabeledDependencyGraph, eMHost, eMSuffix *util.EnumSet) Sentence

func MorphGraph2ConllU

func MorphGraph2ConllU(graph nlp.MorphDependencyGraph) Sentence

func NewSentence

func NewSentence() *Sentence

func ReadFile

func ReadFile(filename string, limit int) ([]*Sentence, bool, error)

type Sentences

type Sentences []*Sentence

func Read

func Read(reader io.Reader, limit int) (Sentences, bool, error)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL