Documentation ¶
Index ¶
- func CreateGloomyIndex(conf *gconf.IndexBuilderConf, ngramSize int)
- type DummyNgramBuffer
- type IndexBuilder
- func (b *IndexBuilder) CreateIndices()
- func (b *IndexBuilder) GetNgramList() NgramList
- func (b *IndexBuilder) GetOutputFiles() *gconf.OutputFiles
- func (b *IndexBuilder) ProcStruct(vline *vertigo.Structure)
- func (b *IndexBuilder) ProcStructClose(vline *vertigo.StructureClose)
- func (b *IndexBuilder) ProcToken(vline *vertigo.Token)
- type LargeNgramList
- type NgramBuffer
- type NgramList
- type NgramNode
- type NgramRecord
- type RAMNgramList
- type StdNgramBuffer
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func CreateGloomyIndex ¶
func CreateGloomyIndex(conf *gconf.IndexBuilderConf, ngramSize int)
CreateGloomyIndex is a high level function which based on provided configuration creates an n-gram index.
Types ¶
type DummyNgramBuffer ¶
type DummyNgramBuffer struct { }
func (*DummyNgramBuffer) AddToken ¶
func (n *DummyNgramBuffer) AddToken(token string)
func (*DummyNgramBuffer) GetValue ¶
func (n *DummyNgramBuffer) GetValue() []string
func (*DummyNgramBuffer) IsValid ¶
func (n *DummyNgramBuffer) IsValid() bool
func (*DummyNgramBuffer) Reset ¶
func (n *DummyNgramBuffer) Reset()
func (*DummyNgramBuffer) Stringer ¶
func (n *DummyNgramBuffer) Stringer() string
type IndexBuilder ¶
type IndexBuilder struct {
// contains filtered or unexported fields
}
IndexBuilder is an object for creating n-gram indices
func CreateIndexBuilder ¶
func CreateIndexBuilder(conf *gconf.IndexBuilderConf, ngramSize int) *IndexBuilder
func (*IndexBuilder) CreateIndices ¶
func (b *IndexBuilder) CreateIndices()
func (*IndexBuilder) GetNgramList ¶
func (b *IndexBuilder) GetNgramList() NgramList
func (*IndexBuilder) GetOutputFiles ¶
func (b *IndexBuilder) GetOutputFiles() *gconf.OutputFiles
func (*IndexBuilder) ProcStruct ¶
func (b *IndexBuilder) ProcStruct(vline *vertigo.Structure)
func (*IndexBuilder) ProcStructClose ¶
func (b *IndexBuilder) ProcStructClose(vline *vertigo.StructureClose)
func (*IndexBuilder) ProcToken ¶
func (b *IndexBuilder) ProcToken(vline *vertigo.Token)
type LargeNgramList ¶
type LargeNgramList struct {
// contains filtered or unexported fields
}
func NewLargeNgramList ¶
func NewLargeNgramList(workingDirPath string, chunkSize int) *LargeNgramList
func (*LargeNgramList) Add ¶
func (nn *LargeNgramList) Add(ngram []string, metadata []column.AttrVal)
func (*LargeNgramList) ForEach ¶
func (nn *LargeNgramList) ForEach(fn func(n *NgramRecord))
func (*LargeNgramList) Size ¶
func (nn *LargeNgramList) Size() int
type NgramBuffer ¶
type NgramList ¶
type NgramList interface { ForEach(fn func(n *NgramRecord)) Size() int Add(ngram []string, metadata []column.AttrVal) }
NgramList specifies a required ngram list implementation Gloomy provides a simple in-memory implementation and a more advanced one operating on multiple file chunks for large data
type RAMNgramList ¶
type RAMNgramList struct {
// contains filtered or unexported fields
}
func (*RAMNgramList) ForEach ¶
func (n *RAMNgramList) ForEach(fn func(n *NgramRecord))
func (*RAMNgramList) Size ¶
func (n *RAMNgramList) Size() int
type StdNgramBuffer ¶
type StdNgramBuffer struct { Size int // contains filtered or unexported fields }
StdNgramBuffer is used for continuous "circular" inserting of tokens and their export as n-grams
func NewStdNgramBuffer ¶
func NewStdNgramBuffer(size int) *StdNgramBuffer
NewStdNgramBuffer is a factory function which creates a properly initialized buffer.
func (*StdNgramBuffer) AddToken ¶
func (n *StdNgramBuffer) AddToken(token string)
AddToken add a token to the buffer
func (*StdNgramBuffer) GetValue ¶
func (n *StdNgramBuffer) GetValue() []string
GetValue return current
func (*StdNgramBuffer) IsValid ¶
func (n *StdNgramBuffer) IsValid() bool
IsValid returns true if all the n-grams positions are non-empty. This can be used to filter out incomplete n-grams from the beginning of a sentence
func (*StdNgramBuffer) Reset ¶
func (n *StdNgramBuffer) Reset()
Reset clears out all the values and also internal pointers to start generating n-grams from scratch.
func (*StdNgramBuffer) Stringer ¶
func (n *StdNgramBuffer) Stringer() string
Stringer produces a user-friendly overview of buffer where tokens are separated by spaces. Please note that this works also on non-valid tokens. I.e. to be sure, IsValid must be called.