Documentation ¶
Index ¶
- Constants
- Variables
- func FSTValDecode1Hit(v uint64) (docNum uint64, normBits uint64)
- func FSTValEncode1Hit(docNum uint64, normBits uint64) uint64
- func MergeToWriter(segments []*SegmentBase, drops []*roaring.Bitmap, chunkMode uint32, ...) (newDocNums [][]uint64, numDocs, storedIndexOffset uint64, dictLocs []uint64, ...)
- func PersistSegmentBase(sb *SegmentBase, path string) error
- func PostingsIteratorFrom1Hit(docNum1Hit uint64, includeFreqNorm, includeLocs bool) (segment.PostingsIterator, error)
- func PostingsIteratorFromBitmap(bm *roaring.Bitmap, includeFreqNorm, includeLocs bool) (segment.PostingsIterator, error)
- func ReadDocValueBoundary(chunk int, metaHeaders []MetaData) (uint64, uint64)
- type CountHashWriter
- type Dictionary
- func (d *Dictionary) AutomatonIterator(a segment.Automaton, startKeyInclusive, endKeyExclusive []byte) segment.DictionaryIterator
- func (d *Dictionary) BytesRead() uint64
- func (d *Dictionary) BytesWritten() uint64
- func (d *Dictionary) Contains(key []byte) (bool, error)
- func (d *Dictionary) PostingsList(term []byte, except *roaring.Bitmap, prealloc segment.PostingsList) (segment.PostingsList, error)
- func (d *Dictionary) ResetBytesRead(val uint64)
- type DictionaryIterator
- type Location
- type MetaData
- type Posting
- type PostingsIterator
- func (p *PostingsIterator) ActualBitmap() *roaring.Bitmap
- func (i *PostingsIterator) Advance(docNum uint64) (segment.Posting, error)
- func (i *PostingsIterator) BytesRead() uint64
- func (i *PostingsIterator) BytesWritten() uint64
- func (p *PostingsIterator) DocNum1Hit() (uint64, bool)
- func (i *PostingsIterator) Next() (segment.Posting, error)
- func (p *PostingsIterator) ReplaceActual(abm *roaring.Bitmap)
- func (i *PostingsIterator) ResetBytesRead(val uint64)
- func (i *PostingsIterator) Size() int
- type PostingsList
- func (p *PostingsList) BytesRead() uint64
- func (p *PostingsList) BytesWritten() uint64
- func (p *PostingsList) Count() uint64
- func (p *PostingsList) Iterator(includeFreq, includeNorm, includeLocs bool, prealloc segment.PostingsIterator) segment.PostingsIterator
- func (p *PostingsList) OrInto(receiver *roaring.Bitmap)
- func (p *PostingsList) ResetBytesRead(val uint64)
- func (p *PostingsList) Size() int
- type Segment
- func (s *Segment) AddRef()
- func (s *Segment) BytesRead() uint64
- func (s *Segment) BytesWritten() uint64
- func (s *Segment) CRC() uint32
- func (s *Segment) ChunkMode() uint32
- func (s *Segment) Close() (err error)
- func (s *Segment) Data() []byte
- func (s *Segment) DecRef() (err error)
- func (s *Segment) DictAddr(field string) (uint64, error)
- func (s *Segment) DocValueOffset() uint64
- func (s *Segment) FieldsIndexOffset() uint64
- func (s *Segment) NumDocs() uint64
- func (s *Segment) Path() string
- func (s *Segment) ResetBytesRead(val uint64)
- func (s *Segment) Size() int
- func (s *Segment) StoredIndexOffset() uint64
- func (s *Segment) Version() uint32
- type SegmentBase
- func (sb *SegmentBase) AddRef()
- func (s *SegmentBase) BytesRead() uint64
- func (s *SegmentBase) BytesWritten() uint64
- func (sb *SegmentBase) Close() (err error)
- func (s *SegmentBase) Count() uint64
- func (sb *SegmentBase) DecRef() (err error)
- func (s *SegmentBase) Dictionary(field string) (segment.TermDictionary, error)
- func (s *SegmentBase) DocID(num uint64) ([]byte, error)
- func (s *SegmentBase) DocNumbers(ids []string) (*roaring.Bitmap, error)
- func (s *SegmentBase) Fields() []string
- func (sb *SegmentBase) Persist(path string) error
- func (s *SegmentBase) ResetBytesRead(val uint64)
- func (sb *SegmentBase) Size() int
- func (s *SegmentBase) VisitDocValues(localDocNum uint64, fields []string, visitor index.DocValueVisitor, ...) (segment.DocVisitState, error)
- func (s *SegmentBase) VisitStoredFields(num uint64, visitor segment.StoredFieldValueVisitor) error
- func (s *SegmentBase) VisitableDocValueFields() ([]string, error)
- func (sb *SegmentBase) WriteTo(w io.Writer) (int64, error)
- type ZapPlugin
- func (*ZapPlugin) Merge(segments []seg.Segment, drops []*roaring.Bitmap, path string, ...) ([][]uint64, uint64, error)
- func (z *ZapPlugin) New(results []index.Document) (segment.Segment, uint64, error)
- func (*ZapPlugin) Open(path string) (segment.Segment, error)
- func (*ZapPlugin) Type() string
- func (*ZapPlugin) Version() uint32
Constants ¶
const ( SectionInvertedTextIndex = iota SectionFaissVectorIndex )
const DocNum1HitFinished = math.MaxUint64
const FSTValEncoding1Hit = uint64(0x8000000000000000)
const FSTValEncodingGeneral = uint64(0x0000000000000000)
const FSTValEncodingMask = uint64(0xc000000000000000)
FooterSize is the size of the footer record in bytes crc + ver + chunk + docValueOffset + sectionsIndexOffset + field offset + stored offset + num docs
const IndexSectionsVersion uint32 = 16
const Type string = "zap"
const Version uint32 = 16
Variables ¶
var DefaultChunkMode uint32 = 1026
DefaultChunkMode is the most recent improvement to chunking and should be used by default.
var DefaultFileMergerBufferSize = 1024 * 1024
var LegacyChunkMode uint32 = 1024
LegacyChunkMode was the original chunk mode (always chunk size 1024) this mode is still used for chunking doc values.
var NewSegmentBufferAvgBytesPerDocFactor float64 = 1.0
var NewSegmentBufferNumResultsBump int = 100
var NewSegmentBufferNumResultsFactor float64 = 1.0
var NormBits1Hit = uint64(1)
var SizeOfBool int
var SizeOfFloat32 int
var SizeOfFloat64 int
var SizeOfInt int
var SizeOfMap int
var SizeOfPtr int
var SizeOfSlice int
var SizeOfString int
var SizeOfUint16 int
var SizeOfUint32 int
var SizeOfUint64 int
var SizeOfUint8 int
var ValidateDocFields = func(field index.Field) error { return nil }
ValidateDocFields can be set by applications to perform additional checks on fields in a document being added to a new segment, by default it does nothing. This API is experimental and may be removed at any time.
Functions ¶
func FSTValDecode1Hit ¶
func FSTValEncode1Hit ¶
func MergeToWriter ¶
func PersistSegmentBase ¶
func PersistSegmentBase(sb *SegmentBase, path string) error
PersistSegmentBase persists SegmentBase in the zap file format.
func PostingsIteratorFrom1Hit ¶
func PostingsIteratorFrom1Hit(docNum1Hit uint64, includeFreqNorm, includeLocs bool) (segment.PostingsIterator, error)
PostingsIteratorFrom1Hit constructs a PostingsIterator given a 1-hit docNum.
func PostingsIteratorFromBitmap ¶
func PostingsIteratorFromBitmap(bm *roaring.Bitmap, includeFreqNorm, includeLocs bool) (segment.PostingsIterator, error)
PostingsIteratorFromBitmap constructs a PostingsIterator given an "actual" bitmap.
Types ¶
type CountHashWriter ¶
type CountHashWriter struct {
// contains filtered or unexported fields
}
CountHashWriter is a wrapper around a Writer which counts the number of bytes which have been written and computes a crc32 hash
func NewCountHashWriter ¶
func NewCountHashWriter(w io.Writer) *CountHashWriter
NewCountHashWriter returns a CountHashWriter which wraps the provided Writer
func NewCountHashWriterWithStatsReporter ¶
func NewCountHashWriterWithStatsReporter(w io.Writer, s segment.StatsReporter) *CountHashWriter
func (*CountHashWriter) Count ¶
func (c *CountHashWriter) Count() int
Count returns the number of bytes written
func (*CountHashWriter) Sum32 ¶
func (c *CountHashWriter) Sum32() uint32
Sum32 returns the CRC-32 hash of the content written to this writer
type Dictionary ¶
type Dictionary struct {
// contains filtered or unexported fields
}
Dictionary is the zap representation of the term dictionary
func (*Dictionary) AutomatonIterator ¶
func (d *Dictionary) AutomatonIterator(a segment.Automaton, startKeyInclusive, endKeyExclusive []byte) segment.DictionaryIterator
AutomatonIterator returns an iterator which only visits terms having the the vellum automaton and start/end key range
func (*Dictionary) BytesRead ¶
func (d *Dictionary) BytesRead() uint64
func (*Dictionary) BytesWritten ¶
func (d *Dictionary) BytesWritten() uint64
func (*Dictionary) PostingsList ¶
func (d *Dictionary) PostingsList(term []byte, except *roaring.Bitmap, prealloc segment.PostingsList) (segment.PostingsList, error)
PostingsList returns the postings list for the specified term
func (*Dictionary) ResetBytesRead ¶
func (d *Dictionary) ResetBytesRead(val uint64)
type DictionaryIterator ¶
type DictionaryIterator struct {
// contains filtered or unexported fields
}
DictionaryIterator is an iterator for term dictionary
type Location ¶
type Location struct {
// contains filtered or unexported fields
}
Location represents the location of a single occurrence
func (*Location) ArrayPositions ¶
ArrayPositions returns the array position vector associated with this occurrence
func (*Location) Field ¶
Field returns the name of the field (useful in composite fields to know which original field the value came from)
type MetaData ¶
type MetaData struct { DocNum uint64 // docNum of the data inside the chunk DocDvOffset uint64 // offset of data inside the chunk for the given docid }
MetaData represents the data information inside a chunk.
type Posting ¶
type Posting struct {
// contains filtered or unexported fields
}
Posting is a single entry in a postings list
func (*Posting) Frequency ¶
Frequency returns the frequencies of occurrence of this term in this doc/field
func (*Posting) NormUint64 ¶
NormUint64 returns the norm value as uint64
type PostingsIterator ¶
type PostingsIterator struct { Actual roaring.IntPeekable ActualBM *roaring.Bitmap // contains filtered or unexported fields }
PostingsIterator provides a way to iterate through the postings list
func (*PostingsIterator) ActualBitmap ¶
func (p *PostingsIterator) ActualBitmap() *roaring.Bitmap
ActualBitmap returns the underlying actual bitmap which can be used up the stack for optimizations
func (*PostingsIterator) Advance ¶
func (i *PostingsIterator) Advance(docNum uint64) (segment.Posting, error)
Advance returns the posting at the specified docNum or it is not present the next posting, or if the end is reached, nil
func (*PostingsIterator) BytesRead ¶
func (i *PostingsIterator) BytesRead() uint64
func (*PostingsIterator) BytesWritten ¶
func (i *PostingsIterator) BytesWritten() uint64
func (*PostingsIterator) DocNum1Hit ¶
func (p *PostingsIterator) DocNum1Hit() (uint64, bool)
DocNum1Hit returns the docNum and true if this is "1-hit" optimized and the docNum is available.
func (*PostingsIterator) Next ¶
func (i *PostingsIterator) Next() (segment.Posting, error)
Next returns the next posting on the postings list, or nil at the end
func (*PostingsIterator) ReplaceActual ¶
func (p *PostingsIterator) ReplaceActual(abm *roaring.Bitmap)
ReplaceActual replaces the ActualBM with the provided bitmap
func (*PostingsIterator) ResetBytesRead ¶
func (i *PostingsIterator) ResetBytesRead(val uint64)
Implements the segment.DiskStatsReporter interface The purpose of this implementation is to get the bytes read from the disk which includes the freqNorm and location specific information of a hit
func (*PostingsIterator) Size ¶
func (i *PostingsIterator) Size() int
type PostingsList ¶
type PostingsList struct {
// contains filtered or unexported fields
}
PostingsList is an in-memory representation of a postings list
func (*PostingsList) BytesRead ¶
func (p *PostingsList) BytesRead() uint64
func (*PostingsList) BytesWritten ¶
func (p *PostingsList) BytesWritten() uint64
func (*PostingsList) Count ¶
func (p *PostingsList) Count() uint64
Count returns the number of items on this postings list
func (*PostingsList) Iterator ¶
func (p *PostingsList) Iterator(includeFreq, includeNorm, includeLocs bool, prealloc segment.PostingsIterator) segment.PostingsIterator
Iterator returns an iterator for this postings list
func (*PostingsList) OrInto ¶
func (p *PostingsList) OrInto(receiver *roaring.Bitmap)
func (*PostingsList) ResetBytesRead ¶
func (p *PostingsList) ResetBytesRead(val uint64)
Implements the segment.DiskStatsReporter interface The purpose of this implementation is to get the bytes read from the postings lists stored on disk, while querying
func (*PostingsList) Size ¶
func (p *PostingsList) Size() int
type Segment ¶
type Segment struct { SegmentBase // contains filtered or unexported fields }
Segment implements a persisted segment.Segment interface, by embedding an mmap()'ed SegmentBase.
func (*Segment) BytesWritten ¶
func (*Segment) DictAddr ¶
DictAddr is a helper function to compute the file offset where the dictionary is stored for the specified field.
func (*Segment) DocValueOffset ¶
DocValueOffset returns the docValue offset in the file footer
func (*Segment) FieldsIndexOffset ¶
FieldsIndexOffset returns the fields index offset in the file footer
func (*Segment) ResetBytesRead ¶
Implements the segment.DiskStatsReporter interface Only the persistedSegment type implments the interface, as the intention is to retrieve the bytes read from the on-disk segment as part of the current query.
func (*Segment) StoredIndexOffset ¶
StoredIndexOffset returns the stored value index offset in the file footer
type SegmentBase ¶
type SegmentBase struct {
// contains filtered or unexported fields
}
SegmentBase is a memory only, read-only implementation of the segment.Segment interface, using zap's data representation.
func InitSegmentBase ¶
func (*SegmentBase) AddRef ¶
func (sb *SegmentBase) AddRef()
func (*SegmentBase) BytesRead ¶
func (s *SegmentBase) BytesRead() uint64
func (*SegmentBase) BytesWritten ¶
func (s *SegmentBase) BytesWritten() uint64
func (*SegmentBase) Close ¶
func (sb *SegmentBase) Close() (err error)
func (*SegmentBase) Count ¶
func (s *SegmentBase) Count() uint64
Count returns the number of documents in this segment.
func (*SegmentBase) DecRef ¶
func (sb *SegmentBase) DecRef() (err error)
func (*SegmentBase) Dictionary ¶
func (s *SegmentBase) Dictionary(field string) (segment.TermDictionary, error)
Dictionary returns the term dictionary for the specified field
func (*SegmentBase) DocID ¶
func (s *SegmentBase) DocID(num uint64) ([]byte, error)
DocID returns the value of the _id field for the given docNum
func (*SegmentBase) DocNumbers ¶
func (s *SegmentBase) DocNumbers(ids []string) (*roaring.Bitmap, error)
DocNumbers returns a bitset corresponding to the doc numbers of all the provided _id strings
func (*SegmentBase) Fields ¶
func (s *SegmentBase) Fields() []string
Fields returns the field names used in this segment
func (*SegmentBase) Persist ¶
func (sb *SegmentBase) Persist(path string) error
func (*SegmentBase) ResetBytesRead ¶
func (s *SegmentBase) ResetBytesRead(val uint64)
func (*SegmentBase) Size ¶
func (sb *SegmentBase) Size() int
func (*SegmentBase) VisitDocValues ¶
func (s *SegmentBase) VisitDocValues(localDocNum uint64, fields []string, visitor index.DocValueVisitor, dvsIn segment.DocVisitState) ( segment.DocVisitState, error)
VisitDocValues is an implementation of the DocValueVisitable interface
func (*SegmentBase) VisitStoredFields ¶
func (s *SegmentBase) VisitStoredFields(num uint64, visitor segment.StoredFieldValueVisitor) error
VisitStoredFields invokes the StoredFieldValueVisitor for each stored field for the specified doc number
func (*SegmentBase) VisitableDocValueFields ¶
func (s *SegmentBase) VisitableDocValueFields() ([]string, error)
VisitableDocValueFields returns the list of fields with persisted doc value terms ready to be visitable using the VisitDocumentFieldTerms method.
type ZapPlugin ¶
type ZapPlugin struct{}
ZapPlugin implements the Plugin interface of the blevesearch/scorch_segment_api pkg
func (*ZapPlugin) Merge ¶
func (*ZapPlugin) Merge(segments []seg.Segment, drops []*roaring.Bitmap, path string, closeCh chan struct{}, s seg.StatsReporter) ( [][]uint64, uint64, error)
Merge takes a slice of segments and bit masks describing which documents may be dropped, and creates a new segment containing the remaining data. This new segment is built at the specified path.