builder

package
v0.0.0-...-9c66339 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 15, 2018 License: Apache-2.0 Imports: 17 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func CreateGloomyIndex

func CreateGloomyIndex(conf *gconf.IndexBuilderConf, ngramSize int)

CreateGloomyIndex is a high level function which based on provided configuration creates an n-gram index.

Types

type DummyNgramBuffer

type DummyNgramBuffer struct {
}

func (*DummyNgramBuffer) AddToken

func (n *DummyNgramBuffer) AddToken(token string)

func (*DummyNgramBuffer) GetValue

func (n *DummyNgramBuffer) GetValue() []string

func (*DummyNgramBuffer) IsValid

func (n *DummyNgramBuffer) IsValid() bool

func (*DummyNgramBuffer) Reset

func (n *DummyNgramBuffer) Reset()

func (*DummyNgramBuffer) Stringer

func (n *DummyNgramBuffer) Stringer() string

type IndexBuilder

type IndexBuilder struct {
	// contains filtered or unexported fields
}

IndexBuilder is an object for creating n-gram indices

func CreateIndexBuilder

func CreateIndexBuilder(conf *gconf.IndexBuilderConf, ngramSize int) *IndexBuilder

func (*IndexBuilder) CreateIndices

func (b *IndexBuilder) CreateIndices()

func (*IndexBuilder) GetNgramList

func (b *IndexBuilder) GetNgramList() NgramList

func (*IndexBuilder) GetOutputFiles

func (b *IndexBuilder) GetOutputFiles() *gconf.OutputFiles

func (*IndexBuilder) ProcStruct

func (b *IndexBuilder) ProcStruct(vline *vertigo.Structure)

func (*IndexBuilder) ProcStructClose

func (b *IndexBuilder) ProcStructClose(vline *vertigo.StructureClose)

func (*IndexBuilder) ProcToken

func (b *IndexBuilder) ProcToken(vline *vertigo.Token)

type LargeNgramList

type LargeNgramList struct {
	// contains filtered or unexported fields
}

func NewLargeNgramList

func NewLargeNgramList(workingDirPath string, chunkSize int) *LargeNgramList

func (*LargeNgramList) Add

func (nn *LargeNgramList) Add(ngram []string, metadata []column.AttrVal)

func (*LargeNgramList) ForEach

func (nn *LargeNgramList) ForEach(fn func(n *NgramRecord))

func (*LargeNgramList) Size

func (nn *LargeNgramList) Size() int

type NgramBuffer

type NgramBuffer interface {
	AddToken(token string)
	GetValue() []string
	IsValid() bool
	Reset()
	Stringer() string
}

type NgramList

type NgramList interface {
	ForEach(fn func(n *NgramRecord))

	Size() int

	Add(ngram []string, metadata []column.AttrVal)
}

NgramList specifies a required ngram list implementation Gloomy provides a simple in-memory implementation and a more advanced one operating on multiple file chunks for large data

type NgramNode

type NgramNode struct {
	// contains filtered or unexported fields
}

func (*NgramNode) GetCount

func (n *NgramNode) GetCount() int

func (*NgramNode) GetNgram

func (n *NgramNode) GetNgram() []string

type NgramRecord

type NgramRecord struct {
	Ngram []string
	Count int
	Args  []column.AttrVal
}

type RAMNgramList

type RAMNgramList struct {
	// contains filtered or unexported fields
}

func (*RAMNgramList) Add

func (n *RAMNgramList) Add(ngram []string, metadata []column.AttrVal)

func (*RAMNgramList) ForEach

func (n *RAMNgramList) ForEach(fn func(n *NgramRecord))

func (*RAMNgramList) Size

func (n *RAMNgramList) Size() int

type StdNgramBuffer

type StdNgramBuffer struct {
	Size int
	// contains filtered or unexported fields
}

StdNgramBuffer is used for continuous "circular" inserting of tokens and their export as n-grams

func NewStdNgramBuffer

func NewStdNgramBuffer(size int) *StdNgramBuffer

NewStdNgramBuffer is a factory function which creates a properly initialized buffer.

func (*StdNgramBuffer) AddToken

func (n *StdNgramBuffer) AddToken(token string)

AddToken add a token to the buffer

func (*StdNgramBuffer) GetValue

func (n *StdNgramBuffer) GetValue() []string

GetValue return current

func (*StdNgramBuffer) IsValid

func (n *StdNgramBuffer) IsValid() bool

IsValid returns true if all the n-grams positions are non-empty. This can be used to filter out incomplete n-grams from the beginning of a sentence

func (*StdNgramBuffer) Reset

func (n *StdNgramBuffer) Reset()

Reset clears out all the values and also internal pointers to start generating n-grams from scratch.

func (*StdNgramBuffer) Stringer

func (n *StdNgramBuffer) Stringer() string

Stringer produces a user-friendly overview of buffer where tokens are separated by spaces. Please note that this works also on non-valid tokens. I.e. to be sure, IsValid must be called.

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL