Documentation ¶
Index ¶
- Constants
- Variables
- func ConfigFromModel(iicm *models.InvertedIndexConfig) schema.InvertedIndexConfig
- func HasFilterableIndex(prop *models.Property) bool
- func HasInvertedIndex(prop *models.Property) bool
- func HasSearchableIndex(prop *models.Property) bool
- func LexicographicallySortableFloat64(in float64) ([]byte, error)
- func LexicographicallySortableInt64(in int64) ([]byte, error)
- func LexicographicallySortableUint64(in uint64) ([]byte, error)
- func ParseLexicographicallySortableFloat64(in []byte) (float64, error)
- func ParseLexicographicallySortableInt64(in []byte) (int64, error)
- func ParseLexicographicallySortableUint64(in []byte) (uint64, error)
- func PropertyHasSearchableIndex(class *models.Class, tentativePropertyName string) bool
- func ValidateConfig(conf *models.InvertedIndexConfig) error
- func ValidateUserConfigUpdate(initial, updated *models.InvertedIndexConfig) error
- func WriteFile(name string, data []byte, perm os.FileMode) error
- type AllMapPairsAndPropName
- type Analyzer
- func (a *Analyzer) Bool(in bool) ([]Countable, error)
- func (a *Analyzer) BoolArray(in []bool) ([]Countable, error)
- func (a *Analyzer) Float(in float64) ([]Countable, error)
- func (a *Analyzer) FloatArray(in []float64) ([]Countable, error)
- func (a *Analyzer) Int(in int64) ([]Countable, error)
- func (a *Analyzer) IntArray(in []int64) ([]Countable, error)
- func (a *Analyzer) Object(input map[string]any, props []*models.Property, uuid strfmt.UUID) ([]Property, error)
- func (a *Analyzer) Ref(in models.MultipleRef) ([]Countable, error)
- func (a *Analyzer) RefCount(in models.MultipleRef) ([]Countable, error)
- func (a *Analyzer) Text(tokenization, in string) []Countable
- func (a *Analyzer) TextArray(tokenization string, inArr []string) []Countable
- func (a *Analyzer) UUID(in uuid.UUID) ([]Countable, error)
- func (a *Analyzer) UUIDArray(in []uuid.UUID) ([]Countable, error)
- type BM25Searcher
- type ClassSearcher
- type Countable
- type DeltaMergeResult
- type DeltaMerger
- type DeltaNilResults
- type DeltaResults
- type IsFallbackToSearchable
- type JsonPropertyLengthTracker
- func (t *JsonPropertyLengthTracker) Clear()
- func (t *JsonPropertyLengthTracker) Close() error
- func (t *JsonPropertyLengthTracker) Drop() error
- func (t *JsonPropertyLengthTracker) FileName() string
- func (t *JsonPropertyLengthTracker) Flush(flushBackup bool) error
- func (t *JsonPropertyLengthTracker) PropertyMean(propName string) (float32, error)
- func (t *JsonPropertyLengthTracker) PropertyTally(propName string) (int, int, float64, error)
- func (t *JsonPropertyLengthTracker) TrackProperty(propName string, value float32) error
- func (t *JsonPropertyLengthTracker) UnTrackProperty(propName string, value float32) error
- type MapPairsAndPropName
- type MergeDocIDWithFrequency
- type MergeItem
- type MergeProperty
- type NilProperty
- type PropLenData
- type Property
- type PropertyLengthTracker
- func (t *PropertyLengthTracker) BucketCount(propName string, bucket uint16) (uint16, error)
- func (t *PropertyLengthTracker) Close() error
- func (t *PropertyLengthTracker) Drop() error
- func (t *PropertyLengthTracker) FileName() string
- func (t *PropertyLengthTracker) Flush() error
- func (t *PropertyLengthTracker) PropertyMean(propName string) (float32, error)
- func (t *PropertyLengthTracker) PropertyNames() []string
- func (t *PropertyLengthTracker) PropertyTally(propName string) (int, int, float32, error)
- func (t *PropertyLengthTracker) TrackProperty(propName string, value float32) error
- func (t *PropertyLengthTracker) UnTrackProperty(propName string, value float32) error
- type ReadFn
- type RowReader
- type RowReaderFrequency
- type RowReaderRoaringSet
- type Searcher
Constants ¶
const ( // always HasFilterableIndexIdProp = true HasSearchableIndexIdProp = false // only if index.invertedIndexConfig.IndexTimestamps set HasFilterableIndexTimestampProp = true HasSearchableIndexTimestampProp = false // only if property.indexFilterable or property.indexSearchable set HasFilterableIndexMetaCount = true HasSearchableIndexMetaCount = false // only if index.invertedIndexConfig.IndexNullState set // and either property.indexFilterable or property.indexSearchable set HasFilterableIndexPropNull = true HasSearchableIndexPropNull = false // only if index.invertedIndexConfig.IndexPropertyLength set // and either property.indexFilterable or property.indexSearchable set HasFilterableIndexPropLength = true HasSearchableIndexPropLength = false )
Variables ¶
var MAX_BUCKETS = 64
Functions ¶
func ConfigFromModel ¶
func ConfigFromModel(iicm *models.InvertedIndexConfig) schema.InvertedIndexConfig
func HasFilterableIndex ¶ added in v1.19.0
Indicates whether property should be indexed Index holds document ids with property of/containing particular value (index created using bucket of StrategyRoaringSet)
func HasInvertedIndex ¶ added in v1.19.0
func HasSearchableIndex ¶ added in v1.19.0
Indicates whether property should be indexed Index holds document ids with property of/containing particular value and number of its occurrences in that property (index created using bucket of StrategyMapCollection)
func LexicographicallySortableFloat64 ¶
LexicographicallySortableFloat64 transforms a conversion to a lexicographically sortable byte slice. In general, for lexicographical sorting big endian notatino is required. Additionally the sign needs to be flipped in any case, but additionally each remaining byte also needs to be flipped if the number is negative
func LexicographicallySortableInt64 ¶
LexicographicallySortableInt64 performs a conversion to a lexicographically sortable byte slice. For this, big endian notation is required and the sign must be flipped
func LexicographicallySortableUint64 ¶
LexicographicallySortableUint64 performs a conversion to a lexicographically sortable byte slice. For this, big endian notation is required.
func ParseLexicographicallySortableFloat64 ¶
ParseLexicographicallySortableFloat64 reverses the changes in LexicographicallySortableFloat64
func ParseLexicographicallySortableInt64 ¶
ParseLexicographicallySortableInt64 reverses the changes in LexicographicallySortableInt64
func ParseLexicographicallySortableUint64 ¶
ParseLexicographicallySortableUint64 reverses the changes in LexicographicallySortableUint64
func PropertyHasSearchableIndex ¶ added in v1.19.0
func ValidateConfig ¶
func ValidateConfig(conf *models.InvertedIndexConfig) error
func ValidateUserConfigUpdate ¶
func ValidateUserConfigUpdate(initial, updated *models.InvertedIndexConfig) error
Types ¶
type AllMapPairsAndPropName ¶ added in v1.18.0
type AllMapPairsAndPropName []MapPairsAndPropName
func (AllMapPairsAndPropName) Len ¶ added in v1.18.0
func (m AllMapPairsAndPropName) Len() int
provide sort interface
func (AllMapPairsAndPropName) Less ¶ added in v1.18.0
func (m AllMapPairsAndPropName) Less(i, j int) bool
func (AllMapPairsAndPropName) Swap ¶ added in v1.18.0
func (m AllMapPairsAndPropName) Swap(i, j int)
type Analyzer ¶
type Analyzer struct {
// contains filtered or unexported fields
}
func NewAnalyzer ¶
func NewAnalyzer(isFallbackToSearchable IsFallbackToSearchable) *Analyzer
func (*Analyzer) Bool ¶
Bool requires no analysis, so it's actually just a simple conversion to a little-endian ordered byte slice
func (*Analyzer) BoolArray ¶
BoolArray requires no analysis, so it's actually just a simple conversion to a little-endian ordered byte slice
func (*Analyzer) Float ¶
Float requires no analysis, so it's actually just a simple conversion to a lexicographically sortable byte slice.
func (*Analyzer) FloatArray ¶
Float array requires no analysis, so it's actually just a simple conversion to a lexicographically sortable byte slice.
func (*Analyzer) Int ¶
Int requires no analysis, so it's actually just a simple conversion to a string-formatted byte slice of the int
func (*Analyzer) IntArray ¶
Int array requires no analysis, so it's actually just a simple conversion to a string-formatted byte slice of the int
func (*Analyzer) Ref ¶
func (a *Analyzer) Ref(in models.MultipleRef) ([]Countable, error)
Ref indexes references as beacon-strings
func (*Analyzer) RefCount ¶
func (a *Analyzer) RefCount(in models.MultipleRef) ([]Countable, error)
RefCount does not index the content of the refs, but only the count with 0 being an explicitly allowed value as well.
func (*Analyzer) Text ¶
Text tokenizes given input according to selected tokenization, then aggregates duplicates
func (*Analyzer) TextArray ¶
TextArray tokenizes given input according to selected tokenization, then aggregates duplicates
type BM25Searcher ¶
type BM25Searcher struct {
// contains filtered or unexported fields
}
func NewBM25Searcher ¶
func NewBM25Searcher(config schema.BM25Config, store *lsmkv.Store, getClass func(string) *models.Class, propIndices propertyspecific.Indices, classSearcher ClassSearcher, propLenTracker propLengthRetriever, logger logrus.FieldLogger, shardVersion uint16, ) *BM25Searcher
func (*BM25Searcher) BM25F ¶
func (b *BM25Searcher) BM25F(ctx context.Context, filterDocIds helpers.AllowList, className schema.ClassName, limit int, keywordRanking searchparams.KeywordRanking, ) ([]*storobj.Object, []float32, error)
func (*BM25Searcher) GetPropertyLengthTracker ¶ added in v1.23.0
func (b *BM25Searcher) GetPropertyLengthTracker() *JsonPropertyLengthTracker
type ClassSearcher ¶
type ClassSearcher interface { Search(ctx context.Context, params dto.GetParams) ([]search.Result, error) GetQueryMaximumResults() int }
ClassSearcher is anything that allows a root-level ClassSearch
type DeltaMergeResult ¶
type DeltaMergeResult struct { Additions []MergeProperty Deletions []MergeProperty }
type DeltaMerger ¶
type DeltaMerger struct {
// contains filtered or unexported fields
}
DeltaMerger can be used to condense the number of single writes into one big one. Additionally it removes overlaps between additions and deletions. It is meant to be used in batch situation, where 5 ref objects in a row might each increase the doc count by one. Instead of writing 5 additions and 4 deletions, this can be condensed to write just one addition
func NewDeltaMerger ¶
func NewDeltaMerger() *DeltaMerger
func (*DeltaMerger) AddAdditions ¶
func (dm *DeltaMerger) AddAdditions(props []Property, docID uint64)
func (*DeltaMerger) AddDeletions ¶
func (dm *DeltaMerger) AddDeletions(props []Property, docID uint64)
func (*DeltaMerger) Merge ¶
func (dm *DeltaMerger) Merge() DeltaMergeResult
type DeltaNilResults ¶ added in v1.24.0
type DeltaNilResults struct { ToDelete []NilProperty ToAdd []NilProperty }
func DeltaNil ¶ added in v1.24.0
func DeltaNil(previous, next []NilProperty) DeltaNilResults
type DeltaResults ¶
func Delta ¶
func Delta(previous, next []Property) DeltaResults
type IsFallbackToSearchable ¶ added in v1.19.0
type IsFallbackToSearchable func() bool
type JsonPropertyLengthTracker ¶ added in v1.19.0
type JsonPropertyLengthTracker struct { sync.Mutex UnlimitedBuckets bool // contains filtered or unexported fields }
func NewJsonPropertyLengthTracker ¶ added in v1.19.0
func NewJsonPropertyLengthTracker(path string, logger logrus.FieldLogger) (t *JsonPropertyLengthTracker, err error)
NewJsonPropertyLengthTracker creates a new tracker and loads the data from the given path. If the file is in the old format, it will be converted to the new format.
func (*JsonPropertyLengthTracker) Clear ¶ added in v1.19.0
func (t *JsonPropertyLengthTracker) Clear()
func (*JsonPropertyLengthTracker) Close ¶ added in v1.19.0
func (t *JsonPropertyLengthTracker) Close() error
Closes the tracker and removes the backup file
func (*JsonPropertyLengthTracker) Drop ¶ added in v1.19.0
func (t *JsonPropertyLengthTracker) Drop() error
Drop removes the tracker from disk
func (*JsonPropertyLengthTracker) FileName ¶ added in v1.19.0
func (t *JsonPropertyLengthTracker) FileName() string
Path to the file on disk
func (*JsonPropertyLengthTracker) Flush ¶ added in v1.19.0
func (t *JsonPropertyLengthTracker) Flush(flushBackup bool) error
Writes the current state of the tracker to disk. (flushBackup = true) will only write the backup file
func (*JsonPropertyLengthTracker) PropertyMean ¶ added in v1.19.0
func (t *JsonPropertyLengthTracker) PropertyMean(propName string) (float32, error)
Returns the average length of the given property
func (*JsonPropertyLengthTracker) PropertyTally ¶ added in v1.19.0
returns totalPropertyLength, totalCount, average propertyLength = sum / totalCount, total propertylength, totalCount, error
func (*JsonPropertyLengthTracker) TrackProperty ¶ added in v1.19.0
func (t *JsonPropertyLengthTracker) TrackProperty(propName string, value float32) error
Adds a new value to the tracker
func (*JsonPropertyLengthTracker) UnTrackProperty ¶ added in v1.19.0
func (t *JsonPropertyLengthTracker) UnTrackProperty(propName string, value float32) error
Removes a value from the tracker
type MapPairsAndPropName ¶ added in v1.18.0
type MergeDocIDWithFrequency ¶
type MergeItem ¶
type MergeItem struct { Data []byte DocIDs []MergeDocIDWithFrequency }
type MergeProperty ¶
type NilProperty ¶ added in v1.24.0
type PropLenData ¶ added in v1.19.0
type Property ¶
type Property struct { Name string Items []Countable Length int HasFilterableIndex bool // roaring set index HasSearchableIndex bool // map index (with frequencies) }
func DedupItems ¶ added in v1.24.0
type PropertyLengthTracker ¶
Page Design | Bytes | Description | | --------- | ------------------------------------------------ | | start | page is now 0 | 0-1 | uint16 pointer to last index byte | 2-3 | uint16 pointer for property name length | 4-n | property name | ... | repeat length+pointer pattern | 3584-3840 | second property buckets (64 buckets of float32) | 3840-4096 | first property buckets | repeat | page is now 1, repeat all of above
Fixed Assumptions:
- First two bytes always used to indicate end of index, minimal value is 02, as the first possible value with index length=0 is after the two bytes themselves.
- 64 buckets of float32 per property (=256B per prop), excluding the index
- One index row is always 4+len(propName), consisting of a uint16 prop name length pointer, the name itself and an offset pointer pointing to the start (first byte) of the buckets
The counter to the last index byte is only an uint16, so it can at maximum address 65535. This will overflow when the 16th page is added (eg at page=15). To avoid a crash an error is returned in this case, but we will need to change the byteformat to fix this.
func NewPropertyLengthTracker ¶
func NewPropertyLengthTracker(path string) (*PropertyLengthTracker, error)
func (*PropertyLengthTracker) BucketCount ¶ added in v1.19.0
func (t *PropertyLengthTracker) BucketCount(propName string, bucket uint16) (uint16, error)
func (*PropertyLengthTracker) Close ¶
func (t *PropertyLengthTracker) Close() error
func (*PropertyLengthTracker) Drop ¶
func (t *PropertyLengthTracker) Drop() error
func (*PropertyLengthTracker) FileName ¶
func (t *PropertyLengthTracker) FileName() string
func (*PropertyLengthTracker) Flush ¶
func (t *PropertyLengthTracker) Flush() error
func (*PropertyLengthTracker) PropertyMean ¶
func (t *PropertyLengthTracker) PropertyMean(propName string) (float32, error)
func (*PropertyLengthTracker) PropertyNames ¶ added in v1.19.0
func (t *PropertyLengthTracker) PropertyNames() []string
func (*PropertyLengthTracker) PropertyTally ¶ added in v1.19.0
func (*PropertyLengthTracker) TrackProperty ¶
func (t *PropertyLengthTracker) TrackProperty(propName string, value float32) error
func (*PropertyLengthTracker) UnTrackProperty ¶ added in v1.19.0
func (t *PropertyLengthTracker) UnTrackProperty(propName string, value float32) error
type ReadFn ¶
ReadFn will be called 1..n times per match. This means it will also be called on a non-match, in this case v == empty bitmap. It is up to the caller to decide if that is an error case or not.
Note that because what we are parsing is an inverted index row, it can sometimes become confusing what a key and value actually resembles. The variables k and v are the literal row key and value. So this means, the data-value as in "less than 17" where 17 would be the "value" is in the key variable "k". The value will contain bitmap with docIDs having value "k"
The boolean return argument is a way to stop iteration (e.g. when a limit is reached) without producing an error. In normal operation always return true, if false is returned once, the loop is broken.
type RowReader ¶
type RowReader struct {
// contains filtered or unexported fields
}
RowReader reads one or many row(s) depending on the specified operator
func NewRowReader ¶
func NewRowReader(bucket *lsmkv.Bucket, value []byte, operator filters.Operator, keyOnly bool, bitmapFactory *roaringset.BitmapFactory, ) *RowReader
If keyOnly is set, the RowReader will request key-only cursors wherever cursors are used, the specified value arguments in the ReadFn will always be nil
type RowReaderFrequency ¶
type RowReaderFrequency struct {
// contains filtered or unexported fields
}
RowReaderFrequency reads one or many row(s) depending on the specified operator
func NewRowReaderFrequency ¶
func NewRowReaderFrequency(bucket *lsmkv.Bucket, value []byte, operator filters.Operator, keyOnly bool, shardVersion uint16, bitmapFactory *roaringset.BitmapFactory, ) *RowReaderFrequency
type RowReaderRoaringSet ¶ added in v1.18.0
type RowReaderRoaringSet struct {
// contains filtered or unexported fields
}
RowReaderRoaringSet reads one or many row(s) depending on the specified operator
func NewRowReaderRoaringSet ¶ added in v1.18.0
func NewRowReaderRoaringSet(bucket *lsmkv.Bucket, value []byte, operator filters.Operator, keyOnly bool, bitmapFactory *roaringset.BitmapFactory, ) *RowReaderRoaringSet
If keyOnly is set, the RowReaderRoaringSet will request key-only cursors wherever cursors are used, the specified value arguments in the ReadFn will always be empty
func (*RowReaderRoaringSet) Read ¶ added in v1.18.0
func (rr *RowReaderRoaringSet) Read(ctx context.Context, readFn ReadFn) error
Read a row using the specified ReadFn. If RowReader was created with keysOnly==true, the values argument in the readFn will always be nil on all requests involving cursors
type Searcher ¶
type Searcher struct {
// contains filtered or unexported fields
}
func NewSearcher ¶
func NewSearcher(logger logrus.FieldLogger, store *lsmkv.Store, getClass func(string) *models.Class, propIndices propertyspecific.Indices, classSearcher ClassSearcher, stopwords stopwords.StopwordDetector, shardVersion uint16, isFallbackToSearchable IsFallbackToSearchable, tenant string, nestedCrossRefLimit int64, bitmapFactory *roaringset.BitmapFactory, ) *Searcher
func (*Searcher) DocIDs ¶
func (s *Searcher) DocIDs(ctx context.Context, filter *filters.LocalFilter, additional additional.Properties, className schema.ClassName, ) (helpers.AllowList, error)
DocIDs is similar to Objects, but does not actually resolve the docIDs to full objects. Instead it returns the pure object id pointers. They can then be used in a secondary index (e.g. vector index)
DocID queries does not contain a limit by design, as we won't know if the limit wouldn't remove the item that is most important for the follow up query. Imagine the user sets the limit to 1 and the follow-up is a vector search. If we already limited the allowList to 1, the vector search would be pointless, as only the first element would be allowed, regardless of which had the shortest distance
Source Files ¶
- analyzer.go
- bm25_searcher.go
- config.go
- config_update.go
- delta_analyzer.go
- delta_merger.go
- like_regexp.go
- new_prop_length_tracker.go
- objects.go
- prop_length_tracker.go
- prop_value_pairs.go
- row_reader.go
- row_reader_frequency.go
- row_reader_roaring_set.go
- searcher.go
- searcher_doc_bitmap.go
- searcher_ref_filter.go
- searcher_value_extractors.go
- serialization.go