Documentation ¶
Index ¶
- Constants
- Variables
- func FSTValDecode1Hit(v uint64) (docNum uint64, normBits uint64)
- func FSTValEncode1Hit(docNum uint64, normBits uint64) uint64
- func MergeToWriter(segments []*SegmentBase, drops []*roaring.Bitmap, chunkMode uint32, ...) (newDocNums [][]uint64, ...)
- func PersistSegmentBase(sb *SegmentBase, path string) error
- func Plugin() segment.Plugin
- func PostingsIteratorFrom1Hit(docNum1Hit uint64, includeFreqNorm, includeLocs bool) (segment.PostingsIterator, error)
- func PostingsIteratorFromBitmap(bm *roaring.Bitmap, includeFreqNorm, includeLocs bool) (segment.PostingsIterator, error)
- func ReadDocValueBoundary(chunk int, metaHeaders []MetaData) (uint64, uint64)
- type CountHashWriter
- type Dictionary
- func (d *Dictionary) AutomatonIterator(a vellum.Automaton, startKeyInclusive, endKeyExclusive []byte) segment.DictionaryIterator
- func (d *Dictionary) Contains(key []byte) (bool, error)
- func (d *Dictionary) Iterator() segment.DictionaryIterator
- func (d *Dictionary) OnlyIterator(onlyTerms [][]byte, includeCount bool) segment.DictionaryIterator
- func (d *Dictionary) PostingsList(term []byte, except *roaring.Bitmap, prealloc segment.PostingsList) (segment.PostingsList, error)
- func (d *Dictionary) PrefixIterator(prefix string) segment.DictionaryIterator
- func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator
- type DictionaryIterator
- type Location
- type MetaData
- type Posting
- type PostingsIterator
- func (p *PostingsIterator) ActualBitmap() *roaring.Bitmap
- func (i *PostingsIterator) Advance(docNum uint64) (segment.Posting, error)
- func (p *PostingsIterator) DocNum1Hit() (uint64, bool)
- func (i *PostingsIterator) Next() (segment.Posting, error)
- func (p *PostingsIterator) ReplaceActual(abm *roaring.Bitmap)
- func (i *PostingsIterator) Size() int
- type PostingsList
- type Segment
- func (s *Segment) AddRef()
- func (s *Segment) CRC() uint32
- func (s *Segment) ChunkMode() uint32
- func (s *Segment) Close() (err error)
- func (s *Segment) Data() []byte
- func (s *Segment) DecRef() (err error)
- func (s *Segment) DictAddr(field string) (uint64, error)
- func (s *Segment) DocValueOffset() uint64
- func (s *Segment) FieldsIndexOffset() uint64
- func (s *Segment) NumDocs() uint64
- func (s *Segment) Path() string
- func (s *Segment) Size() int
- func (s *Segment) StoredIndexOffset() uint64
- func (s *Segment) Version() uint32
- type SegmentBase
- func (sb *SegmentBase) AddRef()
- func (sb *SegmentBase) Close() (err error)
- func (s *SegmentBase) Count() uint64
- func (sb *SegmentBase) DecRef() (err error)
- func (s *SegmentBase) Dictionary(field string) (segment.TermDictionary, error)
- func (s *SegmentBase) DocID(num uint64) ([]byte, error)
- func (s *SegmentBase) DocNumbers(ids []string) (*roaring.Bitmap, error)
- func (s *SegmentBase) Fields() []string
- func (sb *SegmentBase) Persist(path string) error
- func (sb *SegmentBase) Size() int
- func (s *SegmentBase) VisitDocument(num uint64, visitor segment.DocumentFieldValueVisitor) error
- func (s *SegmentBase) VisitDocumentFieldTerms(localDocNum uint64, fields []string, visitor index.DocumentFieldTermVisitor, ...) (segment.DocVisitState, error)
- func (s *SegmentBase) VisitableDocValueFields() ([]string, error)
- type ZapPlugin
- func (*ZapPlugin) Merge(segments []seg.Segment, drops []*roaring.Bitmap, path string, ...) ([][]uint64, uint64, error)
- func (z *ZapPlugin) New(results []*index.AnalysisResult) (segment.Segment, uint64, error)
- func (*ZapPlugin) Open(path string) (segment.Segment, error)
- func (*ZapPlugin) Type() string
- func (*ZapPlugin) Version() uint32
Constants ¶
const DocNum1HitFinished = math.MaxUint64
const FSTValEncoding1Hit = uint64(0x8000000000000000)
const FSTValEncodingGeneral = uint64(0x0000000000000000)
const FSTValEncodingMask = uint64(0xc000000000000000)
FooterSize is the size of the footer record in bytes crc + ver + chunk + field offset + stored offset + num docs + docValueOffset
const Type string = "zap"
const Version uint32 = 13
Variables ¶
var DefaultChunkMode uint32 = 1025
DefaultChunkMode is the most recent improvement to chunking and should be used by default.
var DefaultFileMergerBufferSize = 1024 * 1024
var LegacyChunkMode uint32 = 1024
LegacyChunkMode was the original chunk mode (always chunk size 1024) this mode is still used for chunking doc values.
var NewSegmentBufferAvgBytesPerDocFactor float64 = 1.0
var NewSegmentBufferNumResultsBump int = 100
var NewSegmentBufferNumResultsFactor float64 = 1.0
var NormBits1Hit = uint64(math.Float32bits(float32(1)))
var ValidateDocFields = func(field document.Field) error { return nil }
ValidateDocFields can be set by applications to perform additional checks on fields in a document being added to a new segment, by default it does nothing. This API is experimental and may be removed at any time.
Functions ¶
func FSTValDecode1Hit ¶
func FSTValEncode1Hit ¶
func MergeToWriter ¶
func MergeToWriter(segments []*SegmentBase, drops []*roaring.Bitmap, chunkMode uint32, cr *CountHashWriter, closeCh chan struct{}) ( newDocNums [][]uint64, numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset uint64, dictLocs []uint64, fieldsInv []string, fieldsMap map[string]uint16, err error)
func PersistSegmentBase ¶
func PersistSegmentBase(sb *SegmentBase, path string) error
PersistSegmentBase persists SegmentBase in the zap file format.
func PostingsIteratorFrom1Hit ¶
func PostingsIteratorFrom1Hit(docNum1Hit uint64, includeFreqNorm, includeLocs bool) (segment.PostingsIterator, error)
PostingsIteratorFrom1Hit constructs a PostingsIterator given a 1-hit docNum.
func PostingsIteratorFromBitmap ¶
func PostingsIteratorFromBitmap(bm *roaring.Bitmap, includeFreqNorm, includeLocs bool) (segment.PostingsIterator, error)
PostingsIteratorFromBitmap constructs a PostingsIterator given an "actual" bitmap.
Types ¶
type CountHashWriter ¶
type CountHashWriter struct {
// contains filtered or unexported fields
}
CountHashWriter is a wrapper around a Writer which counts the number of bytes which have been written and computes a crc32 hash
func NewCountHashWriter ¶
func NewCountHashWriter(w io.Writer) *CountHashWriter
NewCountHashWriter returns a CountHashWriter which wraps the provided Writer
func NewCountHashWriterWithStatsReporter ¶
func NewCountHashWriterWithStatsReporter(w io.Writer, s segment.StatsReporter) *CountHashWriter
func (*CountHashWriter) Count ¶
func (c *CountHashWriter) Count() int
Count returns the number of bytes written
func (*CountHashWriter) Sum32 ¶
func (c *CountHashWriter) Sum32() uint32
Sum32 returns the CRC-32 hash of the content written to this writer
type Dictionary ¶
type Dictionary struct {
// contains filtered or unexported fields
}
Dictionary is the zap representation of the term dictionary
func (*Dictionary) AutomatonIterator ¶
func (d *Dictionary) AutomatonIterator(a vellum.Automaton, startKeyInclusive, endKeyExclusive []byte) segment.DictionaryIterator
AutomatonIterator returns an iterator which only visits terms having the the vellum automaton and start/end key range
func (*Dictionary) Iterator ¶
func (d *Dictionary) Iterator() segment.DictionaryIterator
Iterator returns an iterator for this dictionary
func (*Dictionary) OnlyIterator ¶
func (d *Dictionary) OnlyIterator(onlyTerms [][]byte, includeCount bool) segment.DictionaryIterator
func (*Dictionary) PostingsList ¶
func (d *Dictionary) PostingsList(term []byte, except *roaring.Bitmap, prealloc segment.PostingsList) (segment.PostingsList, error)
PostingsList returns the postings list for the specified term
func (*Dictionary) PrefixIterator ¶
func (d *Dictionary) PrefixIterator(prefix string) segment.DictionaryIterator
PrefixIterator returns an iterator which only visits terms having the the specified prefix
func (*Dictionary) RangeIterator ¶
func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator
RangeIterator returns an iterator which only visits terms between the start and end terms. NOTE: bleve.index API specifies the end is inclusive.
type DictionaryIterator ¶
type DictionaryIterator struct {
// contains filtered or unexported fields
}
DictionaryIterator is an iterator for term dictionary
type Location ¶
type Location struct {
// contains filtered or unexported fields
}
Location represents the location of a single occurrence
func (*Location) ArrayPositions ¶
ArrayPositions returns the array position vector associated with this occurrence
func (*Location) Field ¶
Field returns the name of the field (useful in composite fields to know which original field the value came from)
type MetaData ¶
type MetaData struct { DocNum uint64 // docNum of the data inside the chunk DocDvOffset uint64 // offset of data inside the chunk for the given docid }
MetaData represents the data information inside a chunk.
type Posting ¶
type Posting struct {
// contains filtered or unexported fields
}
Posting is a single entry in a postings list
func (*Posting) Frequency ¶
Frequency returns the frequencies of occurrence of this term in this doc/field
type PostingsIterator ¶
type PostingsIterator struct { Actual roaring.IntPeekable ActualBM *roaring.Bitmap // contains filtered or unexported fields }
PostingsIterator provides a way to iterate through the postings list
func (*PostingsIterator) ActualBitmap ¶
func (p *PostingsIterator) ActualBitmap() *roaring.Bitmap
ActualBitmap returns the underlying actual bitmap which can be used up the stack for optimizations
func (*PostingsIterator) Advance ¶
func (i *PostingsIterator) Advance(docNum uint64) (segment.Posting, error)
Advance returns the posting at the specified docNum or it is not present the next posting, or if the end is reached, nil
func (*PostingsIterator) DocNum1Hit ¶
func (p *PostingsIterator) DocNum1Hit() (uint64, bool)
DocNum1Hit returns the docNum and true if this is "1-hit" optimized and the docNum is available.
func (*PostingsIterator) Next ¶
func (i *PostingsIterator) Next() (segment.Posting, error)
Next returns the next posting on the postings list, or nil at the end
func (*PostingsIterator) ReplaceActual ¶
func (p *PostingsIterator) ReplaceActual(abm *roaring.Bitmap)
ReplaceActual replaces the ActualBM with the provided bitmap
func (*PostingsIterator) Size ¶
func (i *PostingsIterator) Size() int
type PostingsList ¶
type PostingsList struct {
// contains filtered or unexported fields
}
PostingsList is an in-memory representation of a postings list
func (*PostingsList) Count ¶
func (p *PostingsList) Count() uint64
Count returns the number of items on this postings list
func (*PostingsList) Iterator ¶
func (p *PostingsList) Iterator(includeFreq, includeNorm, includeLocs bool, prealloc segment.PostingsIterator) segment.PostingsIterator
Iterator returns an iterator for this postings list
func (*PostingsList) OrInto ¶
func (p *PostingsList) OrInto(receiver *roaring.Bitmap)
func (*PostingsList) Size ¶
func (p *PostingsList) Size() int
type Segment ¶
type Segment struct { SegmentBase // contains filtered or unexported fields }
Segment implements a persisted segment.Segment interface, by embedding an mmap()'ed SegmentBase.
func (*Segment) DictAddr ¶
DictAddr is a helper function to compute the file offset where the dictionary is stored for the specified field.
func (*Segment) DocValueOffset ¶
DocValueOffset returns the docValue offset in the file footer
func (*Segment) FieldsIndexOffset ¶
FieldsIndexOffset returns the fields index offset in the file footer
func (*Segment) StoredIndexOffset ¶
StoredIndexOffset returns the stored value index offset in the file footer
type SegmentBase ¶
type SegmentBase struct {
// contains filtered or unexported fields
}
SegmentBase is a memory only, read-only implementation of the segment.Segment interface, using zap's data representation.
func InitSegmentBase ¶
func (*SegmentBase) AddRef ¶
func (sb *SegmentBase) AddRef()
func (*SegmentBase) Close ¶
func (sb *SegmentBase) Close() (err error)
func (*SegmentBase) Count ¶
func (s *SegmentBase) Count() uint64
Count returns the number of documents in this segment.
func (*SegmentBase) DecRef ¶
func (sb *SegmentBase) DecRef() (err error)
func (*SegmentBase) Dictionary ¶
func (s *SegmentBase) Dictionary(field string) (segment.TermDictionary, error)
Dictionary returns the term dictionary for the specified field
func (*SegmentBase) DocID ¶
func (s *SegmentBase) DocID(num uint64) ([]byte, error)
DocID returns the value of the _id field for the given docNum
func (*SegmentBase) DocNumbers ¶
func (s *SegmentBase) DocNumbers(ids []string) (*roaring.Bitmap, error)
DocNumbers returns a bitset corresponding to the doc numbers of all the provided _id strings
func (*SegmentBase) Fields ¶
func (s *SegmentBase) Fields() []string
Fields returns the field names used in this segment
func (*SegmentBase) Persist ¶
func (sb *SegmentBase) Persist(path string) error
func (*SegmentBase) Size ¶
func (sb *SegmentBase) Size() int
func (*SegmentBase) VisitDocument ¶
func (s *SegmentBase) VisitDocument(num uint64, visitor segment.DocumentFieldValueVisitor) error
VisitDocument invokes the DocFieldValueVistor for each stored field for the specified doc number
func (*SegmentBase) VisitDocumentFieldTerms ¶
func (s *SegmentBase) VisitDocumentFieldTerms(localDocNum uint64, fields []string, visitor index.DocumentFieldTermVisitor, dvsIn segment.DocVisitState) ( segment.DocVisitState, error)
VisitDocumentFieldTerms is an implementation of the DocumentFieldTermVisitable interface
func (*SegmentBase) VisitableDocValueFields ¶
func (s *SegmentBase) VisitableDocValueFields() ([]string, error)
VisitableDocValueFields returns the list of fields with persisted doc value terms ready to be visitable using the VisitDocumentFieldTerms method.
type ZapPlugin ¶
type ZapPlugin struct{}
ZapPlugin implements the Plugin interface of the blevesearch/bleve/index/scorch/segment pkg
func (*ZapPlugin) Merge ¶
func (*ZapPlugin) Merge(segments []seg.Segment, drops []*roaring.Bitmap, path string, closeCh chan struct{}, s seg.StatsReporter) ( [][]uint64, uint64, error)
Merge takes a slice of segments and bit masks describing which documents may be dropped, and creates a new segment containing the remaining data. This new segment is built at the specified path.
func (*ZapPlugin) New ¶
AnalysisResultsToSegmentBase produces an in-memory zap-encoded SegmentBase from analysis results