Documentation ¶
Index ¶
- Constants
- Variables
- func FSTValDecode1Hit(v uint64) (docNum uint64, normBits uint64)
- func FSTValEncode1Hit(docNum uint64, normBits uint64) uint64
- func Merge(segments []*Segment, drops []*roaring.Bitmap, path string, chunkFactor uint32, ...) ([][]uint64, uint64, error)
- func MergeSegmentBases(segmentBases []*SegmentBase, drops []*roaring.Bitmap, path string, ...) ([][]uint64, uint64, error)
- func MergeToWriter(segments []*SegmentBase, drops []*roaring.Bitmap, chunkFactor uint32, ...) (newDocNums [][]uint64, ...)
- func Open(path string) (segment.Segment, error)
- func PersistSegmentBase(sb *SegmentBase, path string) error
- func ReadDocValueBoundary(chunk int, metaHeaders []MetaData) (uint64, uint64)
- type CountHashWriter
- type Dictionary
- func (d *Dictionary) AutomatonIterator(a vellum.Automaton, startKeyInclusive, endKeyExclusive []byte) segment.DictionaryIterator
- func (d *Dictionary) Contains(key []byte) (bool, error)
- func (d *Dictionary) Iterator() segment.DictionaryIterator
- func (d *Dictionary) OnlyIterator(onlyTerms [][]byte, includeCount bool) segment.DictionaryIterator
- func (d *Dictionary) PostingsList(term []byte, except *roaring.Bitmap, prealloc segment.PostingsList) (segment.PostingsList, error)
- func (d *Dictionary) PrefixIterator(prefix string) segment.DictionaryIterator
- func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator
- type DictionaryIterator
- type Location
- type MetaData
- type Posting
- type PostingsIterator
- type PostingsList
- type Segment
- func (s *Segment) AddRef()
- func (s *Segment) CRC() uint32
- func (s *Segment) ChunkFactor() uint32
- func (s *Segment) Close() (err error)
- func (s *Segment) Data() []byte
- func (s *Segment) DecRef() (err error)
- func (s *Segment) DictAddr(field string) (uint64, error)
- func (s *Segment) DocValueOffset() uint64
- func (s *Segment) FieldsIndexOffset() uint64
- func (s *Segment) NumDocs() uint64
- func (s *Segment) Path() string
- func (s *Segment) Size() int
- func (s *Segment) StoredIndexOffset() uint64
- func (s *Segment) Version() uint32
- type SegmentBase
- func (sb *SegmentBase) AddRef()
- func (sb *SegmentBase) Close() (err error)
- func (s *SegmentBase) Count() uint64
- func (sb *SegmentBase) DecRef() (err error)
- func (s *SegmentBase) Dictionary(field string) (segment.TermDictionary, error)
- func (s *SegmentBase) DocID(num uint64) ([]byte, error)
- func (s *SegmentBase) DocNumbers(ids []string) (*roaring.Bitmap, error)
- func (s *SegmentBase) Fields() []string
- func (sb *SegmentBase) Size() int
- func (s *SegmentBase) VisitDocument(num uint64, visitor segment.DocumentFieldValueVisitor) error
- func (s *SegmentBase) VisitDocumentFieldTerms(localDocNum uint64, fields []string, visitor index.DocumentFieldTermVisitor, ...) (segment.DocVisitState, error)
- func (s *SegmentBase) VisitableDocValueFields() ([]string, error)
Constants ¶
const DocNum1HitFinished = math.MaxUint64
const FSTValEncoding1Hit = uint64(0x8000000000000000)
const FSTValEncodingGeneral = uint64(0x0000000000000000)
const FSTValEncodingMask = uint64(0xc000000000000000)
FooterSize is the size of the footer record in bytes crc + ver + chunk + field offset + stored offset + num docs + docValueOffset
const Type string = "zap"
const Version uint32 = 11
Variables ¶
var DefaultFileMergerBufferSize = 1024 * 1024
var NewSegmentBufferAvgBytesPerDocFactor float64 = 1.0
var NewSegmentBufferNumResultsBump int = 100
var NewSegmentBufferNumResultsFactor float64 = 1.0
var NormBits1Hit = uint64(math.Float32bits(float32(1)))
var ValidateDocFields = func(field document.Field) error { return nil }
ValidateDocFields can be set by applications to perform additional checks on fields in a document being added to a new segment, by default it does nothing. This API is experimental and may be removed at any time.
var ValidateMerge = func(segments []*Segment, memSegments []*SegmentBase, drops []*roaring.Bitmap, newSegment *Segment) error { return nil }
ValidateMerge can be set by applications to perform additional checks on a new segment produced by a merge, by default this does nothing. Caller should provide EITHER segments or memSegments, but not both. This API is experimental and may be removed at any time.
Functions ¶
func FSTValDecode1Hit ¶
func FSTValEncode1Hit ¶
func Merge ¶
func Merge(segments []*Segment, drops []*roaring.Bitmap, path string, chunkFactor uint32, closeCh chan struct{}, s seg.StatsReporter) ( [][]uint64, uint64, error)
Merge takes a slice of zap segments and bit masks describing which documents may be dropped, and creates a new segment containing the remaining data. This new segment is built at the specified path, with the provided chunkFactor.
func MergeSegmentBases ¶
func MergeSegmentBases(segmentBases []*SegmentBase, drops []*roaring.Bitmap, path string, chunkFactor uint32, closeCh chan struct{}, s seg.StatsReporter) ( [][]uint64, uint64, error)
func MergeToWriter ¶
func MergeToWriter(segments []*SegmentBase, drops []*roaring.Bitmap, chunkFactor uint32, cr *CountHashWriter, closeCh chan struct{}) ( newDocNums [][]uint64, numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset uint64, dictLocs []uint64, fieldsInv []string, fieldsMap map[string]uint16, err error)
func PersistSegmentBase ¶
func PersistSegmentBase(sb *SegmentBase, path string) error
PersistSegmentBase persists SegmentBase in the zap file format.
Types ¶
type CountHashWriter ¶
type CountHashWriter struct {
// contains filtered or unexported fields
}
CountHashWriter is a wrapper around a Writer which counts the number of bytes which have been written and computes a crc32 hash
func NewCountHashWriter ¶
func NewCountHashWriter(w io.Writer) *CountHashWriter
NewCountHashWriter returns a CountHashWriter which wraps the provided Writer
func NewCountHashWriterWithStatsReporter ¶
func NewCountHashWriterWithStatsReporter(w io.Writer, s segment.StatsReporter) *CountHashWriter
func (*CountHashWriter) Count ¶
func (c *CountHashWriter) Count() int
Count returns the number of bytes written
func (*CountHashWriter) Sum32 ¶
func (c *CountHashWriter) Sum32() uint32
Sum32 returns the CRC-32 hash of the content written to this writer
type Dictionary ¶
type Dictionary struct {
// contains filtered or unexported fields
}
Dictionary is the zap representation of the term dictionary
func (*Dictionary) AutomatonIterator ¶
func (d *Dictionary) AutomatonIterator(a vellum.Automaton, startKeyInclusive, endKeyExclusive []byte) segment.DictionaryIterator
AutomatonIterator returns an iterator which only visits terms having the the vellum automaton and start/end key range
func (*Dictionary) Iterator ¶
func (d *Dictionary) Iterator() segment.DictionaryIterator
Iterator returns an iterator for this dictionary
func (*Dictionary) OnlyIterator ¶
func (d *Dictionary) OnlyIterator(onlyTerms [][]byte, includeCount bool) segment.DictionaryIterator
func (*Dictionary) PostingsList ¶
func (d *Dictionary) PostingsList(term []byte, except *roaring.Bitmap, prealloc segment.PostingsList) (segment.PostingsList, error)
PostingsList returns the postings list for the specified term
func (*Dictionary) PrefixIterator ¶
func (d *Dictionary) PrefixIterator(prefix string) segment.DictionaryIterator
PrefixIterator returns an iterator which only visits terms having the the specified prefix
func (*Dictionary) RangeIterator ¶
func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator
RangeIterator returns an iterator which only visits terms between the start and end terms. NOTE: bleve.index API specifies the end is inclusive.
type DictionaryIterator ¶
type DictionaryIterator struct {
// contains filtered or unexported fields
}
DictionaryIterator is an iterator for term dictionary
type Location ¶
type Location struct {
// contains filtered or unexported fields
}
Location represents the location of a single occurrence
func (*Location) ArrayPositions ¶
ArrayPositions returns the array position vector associated with this occurrence
func (*Location) Field ¶
Field returns the name of the field (useful in composite fields to know which original field the value came from)
type MetaData ¶
type MetaData struct { DocNum uint64 // docNum of the data inside the chunk DocDvOffset uint64 // offset of data inside the chunk for the given docid }
MetaData represents the data information inside a chunk.
type Posting ¶
type Posting struct {
// contains filtered or unexported fields
}
Posting is a single entry in a postings list
func (*Posting) Frequency ¶
Frequency returns the frequencies of occurrence of this term in this doc/field
type PostingsIterator ¶
type PostingsIterator struct { Actual roaring.IntPeekable ActualBM *roaring.Bitmap // contains filtered or unexported fields }
PostingsIterator provides a way to iterate through the postings list
func PostingsIteratorFrom1Hit ¶
func PostingsIteratorFrom1Hit(docNum1Hit, normBits1Hit uint64, includeFreqNorm, includeLocs bool) (*PostingsIterator, error)
PostingsIteratorFrom1Hit constructs a PostingsIterator given a 1-hit docNum.
func PostingsIteratorFromBitmap ¶
func PostingsIteratorFromBitmap(bm *roaring.Bitmap, includeFreqNorm, includeLocs bool) (*PostingsIterator, error)
PostingsIteratorFromBitmap constructs a PostingsIterator given an "actual" bitmap.
func (*PostingsIterator) Advance ¶
func (i *PostingsIterator) Advance(docNum uint64) (segment.Posting, error)
Advance returns the posting at the specified docNum or it is not present the next posting, or if the end is reached, nil
func (*PostingsIterator) DocNum1Hit ¶
func (p *PostingsIterator) DocNum1Hit() (uint64, bool)
DocNum1Hit returns the docNum and true if this is "1-hit" optimized and the docNum is available.
func (*PostingsIterator) Next ¶
func (i *PostingsIterator) Next() (segment.Posting, error)
Next returns the next posting on the postings list, or nil at the end
func (*PostingsIterator) Size ¶
func (i *PostingsIterator) Size() int
type PostingsList ¶
type PostingsList struct {
// contains filtered or unexported fields
}
PostingsList is an in-memory representation of a postings list
func (*PostingsList) Count ¶
func (p *PostingsList) Count() uint64
Count returns the number of items on this postings list
func (*PostingsList) Iterator ¶
func (p *PostingsList) Iterator(includeFreq, includeNorm, includeLocs bool, prealloc segment.PostingsIterator) segment.PostingsIterator
Iterator returns an iterator for this postings list
func (*PostingsList) OrInto ¶
func (p *PostingsList) OrInto(receiver *roaring.Bitmap)
func (*PostingsList) Size ¶
func (p *PostingsList) Size() int
type Segment ¶
type Segment struct { SegmentBase // contains filtered or unexported fields }
Segment implements a persisted segment.Segment interface, by embedding an mmap()'ed SegmentBase.
func (*Segment) ChunkFactor ¶
ChunkFactor returns the chunk factor in the file footer
func (*Segment) DictAddr ¶
DictAddr is a helper function to compute the file offset where the dictionary is stored for the specified field.
func (*Segment) DocValueOffset ¶
DocValueOffset returns the docValue offset in the file footer
func (*Segment) FieldsIndexOffset ¶
FieldsIndexOffset returns the fields index offset in the file footer
func (*Segment) StoredIndexOffset ¶
StoredIndexOffset returns the stored value index offset in the file footer
type SegmentBase ¶
type SegmentBase struct {
// contains filtered or unexported fields
}
SegmentBase is a memory only, read-only implementation of the segment.Segment interface, using zap's data representation.
func AnalysisResultsToSegmentBase ¶
func AnalysisResultsToSegmentBase(results []*index.AnalysisResult, chunkFactor uint32) (*SegmentBase, uint64, error)
AnalysisResultsToSegmentBase produces an in-memory zap-encoded SegmentBase from analysis results
func InitSegmentBase ¶
func (*SegmentBase) AddRef ¶
func (sb *SegmentBase) AddRef()
func (*SegmentBase) Close ¶
func (sb *SegmentBase) Close() (err error)
func (*SegmentBase) Count ¶
func (s *SegmentBase) Count() uint64
Count returns the number of documents in this segment.
func (*SegmentBase) DecRef ¶
func (sb *SegmentBase) DecRef() (err error)
func (*SegmentBase) Dictionary ¶
func (s *SegmentBase) Dictionary(field string) (segment.TermDictionary, error)
Dictionary returns the term dictionary for the specified field
func (*SegmentBase) DocID ¶
func (s *SegmentBase) DocID(num uint64) ([]byte, error)
DocID returns the value of the _id field for the given docNum
func (*SegmentBase) DocNumbers ¶
func (s *SegmentBase) DocNumbers(ids []string) (*roaring.Bitmap, error)
DocNumbers returns a bitset corresponding to the doc numbers of all the provided _id strings
func (*SegmentBase) Fields ¶
func (s *SegmentBase) Fields() []string
Fields returns the field names used in this segment
func (*SegmentBase) Size ¶
func (sb *SegmentBase) Size() int
func (*SegmentBase) VisitDocument ¶
func (s *SegmentBase) VisitDocument(num uint64, visitor segment.DocumentFieldValueVisitor) error
VisitDocument invokes the DocFieldValueVistor for each stored field for the specified doc number
func (*SegmentBase) VisitDocumentFieldTerms ¶
func (s *SegmentBase) VisitDocumentFieldTerms(localDocNum uint64, fields []string, visitor index.DocumentFieldTermVisitor, dvsIn segment.DocVisitState) ( segment.DocVisitState, error)
VisitDocumentFieldTerms is an implementation of the DocumentFieldTermVisitable interface
func (*SegmentBase) VisitableDocValueFields ¶
func (s *SegmentBase) VisitableDocValueFields() ([]string, error)
VisitableDocValueFields returns the list of fields with persisted doc value terms ready to be visitable using the VisitDocumentFieldTerms method.