Documentation ¶
Index ¶
- func NewFile() corpus.File
- func NewLeipzigCorpus() corpus.Corpus
- func NewPayload(line string) corpus.Payload
- type File
- type LeipzigCorpus
- func (c *LeipzigCorpus) FetchCorpusFile() corpus.File
- func (c *LeipzigCorpus) GetIterator(cache corpus.File) corpus.Iterator
- func (c *LeipzigCorpus) Language() string
- func (c *LeipzigCorpus) Size() string
- func (c *LeipzigCorpus) Source() string
- func (c *LeipzigCorpus) URL() string
- func (c *LeipzigCorpus) WithLanguage(lang string) corpus.Corpus
- func (c *LeipzigCorpus) WithSize(size string) corpus.Corpus
- func (c *LeipzigCorpus) WithSource(source string) corpus.Corpus
- func (c *LeipzigCorpus) WithURL(url string) corpus.Corpus
- func (c *LeipzigCorpus) WithYear(year string) corpus.Corpus
- func (c *LeipzigCorpus) Year() string
- type LeipzigIterator
- type Payload
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func NewLeipzigCorpus ¶
NewLeipzigCorpus returns a new Leipzig corpus
func NewPayload ¶
NewPayload returns a new Payload from a line in the corpus.
Types ¶
type File ¶
type File struct {
// contains filtered or unexported fields
}
File implements the corpus.File interface.
func (File) WithCacheDir ¶
WithCacheDir sets the cache directory
type LeipzigCorpus ¶
type LeipzigCorpus struct {
// contains filtered or unexported fields
}
LeipzigCorpus is a corpus of text data. Implements the Corpus interface.
func (*LeipzigCorpus) FetchCorpusFile ¶
func (c *LeipzigCorpus) FetchCorpusFile() corpus.File
FetchCorpusFile gets the file from the remote url. We assume that the file is compressed somehow, and we want to get a file from the container.
func (*LeipzigCorpus) GetIterator ¶
func (c *LeipzigCorpus) GetIterator(cache corpus.File) corpus.Iterator
GetIterator returns an iterator for the corpus
func (*LeipzigCorpus) Language ¶
func (c *LeipzigCorpus) Language() string
Language returns the language of the corpus
func (*LeipzigCorpus) Size ¶
func (c *LeipzigCorpus) Size() string
Size returns the size of the corpus
func (*LeipzigCorpus) Source ¶
func (c *LeipzigCorpus) Source() string
Source returns the source of the corpus
func (*LeipzigCorpus) WithLanguage ¶
func (c *LeipzigCorpus) WithLanguage(lang string) corpus.Corpus
WithLanguage sets the language of the corpus
func (*LeipzigCorpus) WithSize ¶
func (c *LeipzigCorpus) WithSize(size string) corpus.Corpus
WithSize sets the size of the corpus
func (*LeipzigCorpus) WithSource ¶
func (c *LeipzigCorpus) WithSource(source string) corpus.Corpus
WithSource sets the source of the corpus
func (*LeipzigCorpus) WithURL ¶
func (c *LeipzigCorpus) WithURL(url string) corpus.Corpus
WithURL sets the URL of the corpus The URL corresponds to the base URI where the corpus is stored. Then the corpusFile will be added.
func (*LeipzigCorpus) WithYear ¶
func (c *LeipzigCorpus) WithYear(year string) corpus.Corpus
WithYear sets the year of the corpus
func (*LeipzigCorpus) Year ¶
func (c *LeipzigCorpus) Year() string
Year returns the year of the corpus
type LeipzigIterator ¶
type LeipzigIterator struct {
// contains filtered or unexported fields
}
Implements the Iterator interface.
func (*LeipzigIterator) HasNext ¶
func (c *LeipzigIterator) HasNext() bool
HasNext returns true if there is another sentence in the corpus
func (*LeipzigIterator) Next ¶
func (c *LeipzigIterator) Next() corpus.Payload
Next returns the next sentence from the corpus
type Payload ¶
type Payload struct {
// contains filtered or unexported fields
}
Payload implements the corpus.Payload interface.
func (*Payload) LineNumber ¶
LineNumber returns the payload given a line from the Corpus Iterator If the line number is not a number, it will return -1
func (*Payload) SetContent ¶
SetContent sets the content of the payload
func (*Payload) SetLineNumber ¶
SetLineNumber sets the line number of the payload