segment

package module
v1.0.2 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Aug 25, 2024 License: Apache-2.0 Imports: 4 Imported by: 8

README

Bluge Segment API

PkgGoDev Tests Lint

These interfaces define the relationship between Bluge and it's segment file formats.

The expectation is that these interfaces can be well-defined, and evolve slowly. By doing so, Bluge and the file formats themselves can evolve more quickly, and yet remain compatible.

Documentation

Index

Constants

This section is empty.

Variables

View Source
var ErrClosed = fmt.Errorf("index closed")

Functions

This section is empty.

Types

type Automaton

type Automaton interface {

	// Start returns the start state
	Start() int

	// IsMatch returns true if and only if the state is a match
	IsMatch(int) bool

	// CanMatch returns true if and only if it is possible to reach a match
	// in zero or more steps
	CanMatch(int) bool

	// WillAlwaysMatch returns true if and only if the current state matches
	// and will always match no matter what steps are taken
	WillAlwaysMatch(int) bool

	// Accept returns the next state given the input to the specified state
	Accept(int, byte) int
}

type CollectionStats

type CollectionStats interface {

	// TotalDocumentCount returns the number of documents, regardless of whether or not
	// they have any terms for this field
	TotalDocumentCount() uint64

	// DocumentCount returns the number of documents with at least one term for this field
	DocumentCount() uint64

	// SumTotalTermFrequency returns to total number of tokens across all documents
	SumTotalTermFrequency() uint64

	Merge(CollectionStats)
}

type Data

type Data struct {
	// contains filtered or unexported fields
}

Data is an opaque representation of some data. This data could have been read onto the heap, it could be a live memory-mapped region, or it could be loaded on demand using traditional file I/O.

Micro-benchmarking supported using this concrete structure with simple conditional over an interface with multiple implementations.

func NewDataBytes

func NewDataBytes(b []byte) *Data

func NewDataFile

func NewDataFile(f *os.File) (*Data, error)

func (*Data) Len

func (d *Data) Len() int

func (*Data) Read

func (d *Data) Read(start, end int) ([]byte, error)

func (*Data) Reader

func (d *Data) Reader() *DataReader

func (*Data) Size

func (d *Data) Size() int

func (*Data) Slice

func (d *Data) Slice(start, end int) *Data

func (*Data) WriteTo

func (d *Data) WriteTo(w io.Writer) (int64, error)

type DataReader

type DataReader struct {
	// contains filtered or unexported fields
}

func (*DataReader) Read

func (r *DataReader) Read(p []byte) (n int, err error)

type Dictionary

type Dictionary interface {
	DictionaryLookup

	PostingsList(term []byte, except *roaring.Bitmap, prealloc PostingsList) (PostingsList, error)

	Iterator(a Automaton,
		startKeyInclusive, endKeyExclusive []byte) DictionaryIterator
}

type DictionaryEntry

type DictionaryEntry interface {
	Term() string
	Count() uint64
}

type DictionaryIterator

type DictionaryIterator interface {
	Next() (DictionaryEntry, error)
	Close() error
}

type DictionaryLookup

type DictionaryLookup interface {
	Contains(key []byte) (bool, error)
	Close() error
}

type DocVisitState

type DocVisitState interface {
}

type Document

type Document interface {
	Analyze()
	EachField(vf VisitField)
	Timestamp() int64
}

type DocumentValueReader

type DocumentValueReader interface {
	VisitDocumentValues(number uint64, visitor DocumentValueVisitor) error
}

type DocumentValueVisitor

type DocumentValueVisitor func(field string, term []byte)

DocumentValueVisitor is the callback function used by the DocumentValueReader's VisitDocumentValues method.

type Field

type Field interface {
	Name() string
	Length() int
	EachTerm(vt VisitTerm)
	Value() []byte

	Index() bool
	Store() bool
	IndexDocValues() bool
}

type FieldTerm

type FieldTerm interface {
	Term() []byte
	Frequency() int
	EachLocation(vl VisitLocation)
}

type Location

type Location interface {
	Field() string
	Start() int
	End() int
	Pos() int
	Size() int
}

type Merger

type Merger interface {
	WriteTo(w io.Writer, closeCh chan struct{}) (n int64, err error)
	DocumentNumbers() [][]uint64
}

type Optimizable

type Optimizable interface {
	Optimize(kind string, octx OptimizableContext) (OptimizableContext, error)
}

type OptimizableContext

type OptimizableContext interface {
	// Once all the optimzable resources have been provided the same
	// OptimizableContext instance, the optimization preparations are
	// finished or completed via the Finish() method.
	//
	// Depending on the optimization being performed, the Finish()
	// method might return a non-nil Optimized instance.  For example,
	// the Optimized instance might represent an optimized
	// PostingsIterator instance.
	Finish() (PostingsIterator, error)
}

type OptimizablePostingsIterator

type OptimizablePostingsIterator interface {
	ActualBitmap() *roaring.Bitmap
	DocNum1Hit() (uint64, bool)
	ReplaceActual(*roaring.Bitmap)
}

type Posting

type Posting interface {
	Number() uint64
	SetNumber(uint64)
	Frequency() int
	Norm() float64
	Locations() []Location
	Size() int
}

type PostingsIterator

type PostingsIterator interface {
	// The caller is responsible for copying whatever it needs from
	// the returned Posting instance before calling Next(), as some
	// implementations may return a shared instance to reduce memory
	// allocations.
	Next() (Posting, error)

	// Advance will return the posting with the specified doc number
	// or if there is no such posting, the next posting.
	// Callers MUST NOT attempt to pass a docNum that is less than or
	// equal to the currently visited posting doc Num.
	Advance(docNum uint64) (Posting, error)

	Size() int

	// is this postings iterator empty?
	Empty() bool

	Count() uint64

	Close() error
}

type PostingsList

type PostingsList interface {
	Iterator(includeFreq, includeNorm, includeLocations bool, prealloc PostingsIterator) (PostingsIterator, error)

	Size() int

	Count() uint64
}

type Segment

type Segment interface {
	Dictionary(field string) (Dictionary, error)

	VisitStoredFields(num uint64, visitor StoredFieldVisitor) error

	Count() uint64

	DocsMatchingTerms([]Term) (*roaring.Bitmap, error)

	Fields() []string

	CollectionStats(field string) (CollectionStats, error)

	Size() int

	DocumentValueReader(fields []string) (DocumentValueReader, error)

	WriteTo(w io.Writer, closeCh chan struct{}) (int64, error)

	Type() string
	Version() uint32

	Timestamp() (int64, int64)
}

type StoredFieldVisitor

type StoredFieldVisitor func(field string, value []byte) bool

StoredFieldVisitor defines a callback to be visited for each stored field value. The return value determines if the visitor should keep going. Returning true continues visiting, false stops.

type Term

type Term interface {
	Field() string
	Term() []byte
}

type TermStats

type TermStats interface {

	// DocumentFrequency returns the number of documents using this term
	DocumentFrequency() uint64
}

type VisitField

type VisitField func(Field)

type VisitLocation

type VisitLocation func(Location)

type VisitTerm

type VisitTerm func(FieldTerm)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL