hnsw

package
v0.0.0-...-f09cf9b Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 13, 2025 License: BSD-3-Clause Imports: 48 Imported by: 0

Documentation

Index

Constants

View Source
const (
	// DefaultSearchByDistInitialLimit :
	// the initial limit of 100 here is an
	// arbitrary decision, and can be tuned
	// as needed
	DefaultSearchByDistInitialLimit = 100

	// DefaultSearchByDistLimitMultiplier :
	// the decision to increase the limit in
	// multiples of 10 here is an arbitrary
	// decision, and can be tuned as needed
	DefaultSearchByDistLimitMultiplier = 10
)

Variables

This section is empty.

Functions

func IsHNSWIndex

func IsHNSWIndex(index any) bool

func New

func New(cfg Config, uc ent.UserConfig,
	tombstoneCallbacks cyclemanager.CycleCallbackGroup, store *lsmkv.Store,
) (*hnsw, error)

New creates a new HNSW index, the commit logger is provided through a thunk (a function which can be deferred). This is because creating a commit logger opens files for writing. However, checking whether a file is present, is a criterium for the index to see if it has to recover from disk or if its a truly new index. So instead the index is initialized, with un-biased disk checks first and only then is the commit logger created

func NewCommitLogger

func NewCommitLogger(rootPath, name string, logger logrus.FieldLogger,
	maintenanceCallbacks cyclemanager.CycleCallbackGroup, opts ...CommitlogOption,
) (*hnswCommitLogger, error)

func NewFromJSONDump

func NewFromJSONDump(dumpBytes []byte, vecForID common.VectorForID[float32]) (*hnsw, error)

func NewFromJSONDumpMap

func NewFromJSONDumpMap(dumpBytes []byte, vecForID common.VectorForID[float32]) (*hnsw, error)

func NewMultiVectorForIDThunk

func NewMultiVectorForIDThunk(targetVector string, fn func(ctx context.Context, id uint64, targetVector string) ([][]float32, error)) common.VectorForID[[]float32]

func NewTempMultiVectorForIDThunk

func NewTempMultiVectorForIDThunk(targetVector string, fn func(ctx context.Context, indexID uint64, container *common.VectorSlice, targetVector string) ([][]float32, error)) common.TempVectorForID[[]float32]

func NewTempVectorForIDThunk

func NewTempVectorForIDThunk[T float32 | []float32](targetVector string, fn func(ctx context.Context, indexID uint64, container *common.VectorSlice, targetVector string) ([]T, error)) common.TempVectorForID[T]

func NewVectorForIDThunk

func NewVectorForIDThunk[T float32 | []float32](targetVector string, fn func(ctx context.Context, id uint64, targetVector string) ([]T, error)) common.VectorForID[T]

func NewWriter

func NewWriter(w *os.File) *bufWriter

NewWriter returns a new Writer whose buffer has the default size.

func NewWriterSize

func NewWriterSize(w *os.File, size int) *bufWriter

NewWriterSize returns a new Writer whose buffer has at least the specified size. If the argument *os.File is already a Writer with large enough size, it returns the underlying Writer.

func ValidateUserConfigUpdate

func ValidateUserConfigUpdate(initial, updated config.VectorIndexConfig) error

Types

type BufferedLinksLogger

type BufferedLinksLogger interface {
	AddLinkAtLevel(nodeid uint64, level int, target uint64) error
	ReplaceLinksAtLevel(nodeid uint64, level int, targets []uint64) error
	Close() error // Close should Flush and Close
}

type CommitLogCombiner

type CommitLogCombiner struct {
	// contains filtered or unexported fields
}

func NewCommitLogCombiner

func NewCommitLogCombiner(rootPath, id string, threshold int64,
	logger logrus.FieldLogger,
) *CommitLogCombiner

func (*CommitLogCombiner) Do

func (c *CommitLogCombiner) Do() (bool, error)

type CommitLogger

type CommitLogger interface {
	ID() string
	AddNode(node *vertex) error
	SetEntryPointWithMaxLayer(id uint64, level int) error
	AddLinkAtLevel(nodeid uint64, level int, target uint64) error
	ReplaceLinksAtLevel(nodeid uint64, level int, targets []uint64) error
	AddTombstone(nodeid uint64) error
	RemoveTombstone(nodeid uint64) error
	DeleteNode(nodeid uint64) error
	ClearLinks(nodeid uint64) error
	ClearLinksAtLevel(nodeid uint64, level uint16) error
	Reset() error
	Drop(ctx context.Context) error
	Flush() error
	Shutdown(ctx context.Context) error
	RootPath() string
	SwitchCommitLogs(bool) error
	AddPQCompression(compressionhelpers.PQData) error
	AddSQCompression(compressionhelpers.SQData) error
}

func MakeNoopCommitLogger

func MakeNoopCommitLogger() (CommitLogger, error)

type CommitlogOption

type CommitlogOption func(l *hnswCommitLogger) error

func WithAllocChecker

func WithAllocChecker(mm memwatch.AllocChecker) CommitlogOption

func WithCommitlogThreshold

func WithCommitlogThreshold(size int64) CommitlogOption

func WithCommitlogThresholdForCombining

func WithCommitlogThresholdForCombining(size int64) CommitlogOption

func WithCondensor

func WithCondensor(condensor Condensor) CommitlogOption

type Condensor

type Condensor interface {
	Do(filename string) error
}

type Config

type Config struct {
	// internal
	RootPath                  string
	ID                        string
	MakeCommitLoggerThunk     MakeCommitLogger
	VectorForIDThunk          common.VectorForID[float32]
	MultiVectorForIDThunk     common.VectorForID[[]float32]
	TempVectorForIDThunk      common.TempVectorForID[float32]
	TempMultiVectorForIDThunk common.TempVectorForID[[]float32]
	Logger                    logrus.FieldLogger
	DistanceProvider          distancer.Provider
	PrometheusMetrics         *monitoring.PrometheusMetrics
	AllocChecker              memwatch.AllocChecker
	WaitForCachePrefill       bool
	FlatSearchConcurrency     int

	// metadata for monitoring
	ShardName string
	ClassName string

	VisitedListPoolMaxSize int
}

Config for a new HSNW index, this contains information that is derived internally, e.g. by the shard. All User-settable config is specified in Config.UserConfig

func (Config) Validate

func (c Config) Validate() error

type CorruptCommitLogFixer

type CorruptCommitLogFixer struct {
	// contains filtered or unexported fields
}

CorruptCommitLogFixer helps identify potentially corrupt commit logs and tries to mitigate the problem

func NewCorruptedCommitLogFixer

func NewCorruptedCommitLogFixer(logger logrus.FieldLogger) *CorruptCommitLogFixer

func (*CorruptCommitLogFixer) Do

func (fixer *CorruptCommitLogFixer) Do(fileNames []string) ([]string, error)

Do tries to delete files that could be corrupt and removes them from the returned list, indicating that the index should no longer try to read them

A file is considered corrupt if it has the .condensed suffix - yet there is a file with the same name without that suffix. This would indicate that trying to condense the file has somehow failed or been interrupted, as a successful condensing would have been succeeded by the removal of the original file. We thus assume the file must be corrupted, and delete it, so that the original will be used instead.

type DeserializationResult

type DeserializationResult struct {
	Nodes             []*vertex
	NodesDeleted      map[uint64]struct{}
	Entrypoint        uint64
	Level             uint16
	Tombstones        map[uint64]struct{}
	TombstonesDeleted map[uint64]struct{}
	EntrypointChanged bool
	CompressionPQData *compressionhelpers.PQData
	CompressionSQData *compressionhelpers.SQData
	Compressed        bool

	// If there is no entry for the links at a level to be replaced, we must
	// assume that all links were appended and prior state must exist
	// Similarly if we run into a "Clear" we need to explicitly set the replace
	// flag, so that future appends aren't always appended and we run into a
	// situation where reading multiple condensed logs in succession leads to too
	// many connections as discovered in
	// https://github.com/liutizhong/weaviate/issues/1868
	LinksReplaced map[uint64]map[uint16]struct{}
}
func (dr DeserializationResult) ReplaceLinks(node uint64, level uint16) bool

type Deserializer

type Deserializer struct {
	// contains filtered or unexported fields
}

func NewDeserializer

func NewDeserializer(logger logrus.FieldLogger) *Deserializer

func (*Deserializer) Do

func (d *Deserializer) Do(fd *bufio.Reader, initialState *DeserializationResult, keepLinkReplaceInformation bool) (*DeserializationResult, int, error)
func (d *Deserializer) ReadAddLinks(r io.Reader,
	res *DeserializationResult,
) (int, error)

func (*Deserializer) ReadAddTombstone

func (d *Deserializer) ReadAddTombstone(r io.Reader, tombstones map[uint64]struct{}) error
func (d *Deserializer) ReadClearLinks(r io.Reader, res *DeserializationResult,
	keepReplaceInfo bool,
) error

func (*Deserializer) ReadClearLinksAtLevel

func (d *Deserializer) ReadClearLinksAtLevel(r io.Reader, res *DeserializationResult,
	keepReplaceInfo bool,
) error

func (*Deserializer) ReadCommitType

func (d *Deserializer) ReadCommitType(r io.Reader) (HnswCommitType, error)

func (*Deserializer) ReadDeleteNode

func (d *Deserializer) ReadDeleteNode(r io.Reader, res *DeserializationResult, nodesDeleted map[uint64]struct{}) error

func (*Deserializer) ReadEP

func (d *Deserializer) ReadEP(r io.Reader) (uint64, uint16, error)

func (*Deserializer) ReadKMeansEncoder

func (d *Deserializer) ReadLink(r io.Reader, res *DeserializationResult) error
func (d *Deserializer) ReadLinks(r io.Reader, res *DeserializationResult,
	keepReplaceInfo bool,
) (int, error)

func (*Deserializer) ReadNode

func (d *Deserializer) ReadNode(r io.Reader, res *DeserializationResult) error

func (*Deserializer) ReadPQ

func (d *Deserializer) ReadPQ(r io.Reader, res *DeserializationResult) (int, error)

func (*Deserializer) ReadRemoveTombstone

func (d *Deserializer) ReadRemoveTombstone(r io.Reader, tombstones map[uint64]struct{}, tombstonesDeleted map[uint64]struct{}) error

func (*Deserializer) ReadSQ

func (d *Deserializer) ReadSQ(r io.Reader, res *DeserializationResult) error

func (*Deserializer) ReadTileEncoder

type FastSet

type FastSet struct {
	// contains filtered or unexported fields
}

func NewFastSet

func NewFastSet(allow helpers.AllowList) *FastSet

func (*FastSet) Contains

func (s *FastSet) Contains(node uint64) bool

func (*FastSet) DeepCopy

func (s *FastSet) DeepCopy() helpers.AllowList

func (*FastSet) Insert

func (s *FastSet) Insert(ids ...uint64)

func (*FastSet) IsEmpty

func (s *FastSet) IsEmpty() bool

func (*FastSet) Iterator

func (s *FastSet) Iterator() helpers.AllowListIterator

func (*FastSet) Len

func (s *FastSet) Len() int

func (*FastSet) LimitedIterator

func (s *FastSet) LimitedIterator(limit int) helpers.AllowListIterator

func (*FastSet) Max

func (s *FastSet) Max() uint64

func (*FastSet) Min

func (s *FastSet) Min() uint64

func (*FastSet) Size

func (s *FastSet) Size() uint64

func (*FastSet) Slice

func (s *FastSet) Slice() []uint64

func (*FastSet) Truncate

func (s *FastSet) Truncate(uint64) helpers.AllowList

func (*FastSet) WrapOnWrite

func (s *FastSet) WrapOnWrite() helpers.AllowList

type HnswCommitType

type HnswCommitType uint8 // 256 options, plenty of room for future extensions
const (
	AddNode HnswCommitType = iota
	SetEntryPointMaxLevel
	AddLinkAtLevel
	ReplaceLinksAtLevel
	AddTombstone
	RemoveTombstone
	ClearLinks
	DeleteNode
	ResetIndex
	ClearLinksAtLevel // added in v1.8.0-rc.1, see https://github.com/liutizhong/weaviate/issues/1701
	AddLinksAtLevel   // added in v1.8.0-rc.1, see https://github.com/liutizhong/weaviate/issues/1705
	AddPQ
	AddSQ
)

func (HnswCommitType) String

func (t HnswCommitType) String() string

type HnswStats

type HnswStats struct {
	Dimensions         int32        `json:"dimensions"`
	EntryPointID       uint64       `json:"entryPointID"`
	DistributionLayers map[int]uint `json:"distributionLayers"`
	UnreachablePoints  []uint64     `json:"unreachablePoints"`
	NumTombstones      int          `json:"numTombstones"`
	CacheSize          int32        `json:"cacheSize"`
	PQConfiguration    ent.PQConfig `json:"pqConfiguration"`
}

func (*HnswStats) IndexType

func (s *HnswStats) IndexType() common.IndexType

type Index

type Index interface {
	CleanUpTombstonedNodes(shouldAbort cyclemanager.ShouldAbortCallback) error
}

This interface exposes public methods of the HNSW index that are not part of the VectorIndex interface. It is a workaround to avoid circular dependencies.

func AsHNSWIndex

func AsHNSWIndex(index any) Index

type JSONDump

type JSONDump struct {
	Labels              []string            `json:"labels"`
	ID                  string              `json:"id"`
	Entrypoint          uint64              `json:"entrypoint"`
	CurrentMaximumLayer int                 `json:"currentMaximumLayer"`
	Tombstones          map[uint64]struct{} `json:"tombstones"`
	Nodes               []JSONDumpNode      `json:"nodes"`
}

type JSONDumpMap

type JSONDumpMap struct {
	Labels              []string            `json:"labels"`
	ID                  string              `json:"id"`
	Entrypoint          uint64              `json:"entrypoint"`
	CurrentMaximumLayer int                 `json:"currentMaximumLayer"`
	Tombstones          map[uint64]struct{} `json:"tombstones"`
	Nodes               []JSONDumpNodeMap   `json:"nodes"`
}

type JSONDumpNode

type JSONDumpNode struct {
	ID          uint64     `json:"id"`
	Level       int        `json:"level"`
	Connections [][]uint64 `json:"connections"`
}

type JSONDumpNodeMap

type JSONDumpNodeMap struct {
	ID          uint64           `json:"id"`
	Level       int              `json:"level"`
	Connections map[int][]uint64 `json:"connections"`
}

type MakeCommitLogger

type MakeCommitLogger func() (CommitLogger, error)

type MemoryCondensor

type MemoryCondensor struct {
	// contains filtered or unexported fields
}

func NewMemoryCondensor

func NewMemoryCondensor(logger logrus.FieldLogger) *MemoryCondensor

func (*MemoryCondensor) AddLinkAtLevel

func (c *MemoryCondensor) AddLinkAtLevel(nodeid uint64, level uint16, target uint64) error

func (*MemoryCondensor) AddLinksAtLevel

func (c *MemoryCondensor) AddLinksAtLevel(nodeid uint64, level uint16, targets []uint64) error

func (*MemoryCondensor) AddNode

func (c *MemoryCondensor) AddNode(node *vertex) error

AddNode adds an empty node

func (*MemoryCondensor) AddPQCompression

func (c *MemoryCondensor) AddPQCompression(data compressionhelpers.PQData) error

func (*MemoryCondensor) AddSQCompression

func (c *MemoryCondensor) AddSQCompression(data compressionhelpers.SQData) error

func (*MemoryCondensor) AddTombstone

func (c *MemoryCondensor) AddTombstone(nodeid uint64) error

func (*MemoryCondensor) DeleteNode

func (c *MemoryCondensor) DeleteNode(id uint64) error

func (*MemoryCondensor) Do

func (c *MemoryCondensor) Do(fileName string) error

func (*MemoryCondensor) RemoveTombstone

func (c *MemoryCondensor) RemoveTombstone(nodeid uint64) error

func (*MemoryCondensor) SetEntryPointWithMaxLayer

func (c *MemoryCondensor) SetEntryPointWithMaxLayer(id uint64, level int) error

func (*MemoryCondensor) SetLinksAtLevel

func (c *MemoryCondensor) SetLinksAtLevel(nodeid uint64, level int, targets []uint64) error

type Metrics

type Metrics struct {
	// contains filtered or unexported fields
}

func NewMetrics

func NewMetrics(prom *monitoring.PrometheusMetrics,
	className, shardName string,
) *Metrics

func (*Metrics) AddTombstone

func (m *Metrics) AddTombstone()

func (*Metrics) AddUnexpectedTombstone

func (m *Metrics) AddUnexpectedTombstone(operation string)

func (*Metrics) CleanedUp

func (m *Metrics) CleanedUp()

func (*Metrics) DeleteVector

func (m *Metrics) DeleteVector()

func (*Metrics) EndCleanup

func (m *Metrics) EndCleanup(threads int)

func (*Metrics) EndTombstoneCycle

func (m *Metrics) EndTombstoneCycle()

func (*Metrics) GrowDuration

func (m *Metrics) GrowDuration(start time.Time)

func (*Metrics) InsertVector

func (m *Metrics) InsertVector()

func (*Metrics) RemoveTombstone

func (m *Metrics) RemoveTombstone()

func (*Metrics) SetSize

func (m *Metrics) SetSize(size int)

func (*Metrics) SetTombstoneDeleteListSize

func (m *Metrics) SetTombstoneDeleteListSize(size int)

func (*Metrics) StartCleanup

func (m *Metrics) StartCleanup(threads int)

func (*Metrics) StartTombstoneCycle

func (m *Metrics) StartTombstoneCycle()

func (*Metrics) StartupProgress

func (m *Metrics) StartupProgress(ratio float64)

func (*Metrics) TombstoneCycleProgress

func (m *Metrics) TombstoneCycleProgress(progress float64)

func (*Metrics) TombstoneFindGlobalEntrypoint

func (m *Metrics) TombstoneFindGlobalEntrypoint()

func (*Metrics) TombstoneFindLocalEntrypoint

func (m *Metrics) TombstoneFindLocalEntrypoint()

func (*Metrics) TombstoneReassignNeighbor

func (m *Metrics) TombstoneReassignNeighbor()

func (*Metrics) TrackDelete

func (m *Metrics) TrackDelete(start time.Time, step string)

func (*Metrics) TrackInsertObserver

func (m *Metrics) TrackInsertObserver(step string) Observer

func (*Metrics) TrackStartupIndividual

func (m *Metrics) TrackStartupIndividual(start time.Time)

func (*Metrics) TrackStartupReadCommitlogDiskIO

func (m *Metrics) TrackStartupReadCommitlogDiskIO(read int64, nanoseconds int64)

func (*Metrics) TrackStartupTotal

func (m *Metrics) TrackStartupTotal(start time.Time)

type MmapCondensor

type MmapCondensor struct {
	// contains filtered or unexported fields
}

func NewMmapCondensor

func NewMmapCondensor(connectionsPerLevel int) *MmapCondensor

func (*MmapCondensor) Do

func (c *MmapCondensor) Do(fileName string) error

type MmapCondensorAnalyzer

type MmapCondensorAnalyzer struct {
	// contains filtered or unexported fields
}

func (*MmapCondensorAnalyzer) Do

func (a *MmapCondensorAnalyzer) Do(file *os.File) (mmapIndex, error)

func (*MmapCondensorAnalyzer) ReadAddTombstone

func (a *MmapCondensorAnalyzer) ReadAddTombstone(r io.Reader) error
func (a *MmapCondensorAnalyzer) ReadClearLinks(r io.Reader) error

func (*MmapCondensorAnalyzer) ReadCommitType

func (a *MmapCondensorAnalyzer) ReadCommitType(r io.Reader) (HnswCommitType, error)

func (*MmapCondensorAnalyzer) ReadDeleteNode

func (a *MmapCondensorAnalyzer) ReadDeleteNode(r io.Reader) error

func (*MmapCondensorAnalyzer) ReadEP

func (a *MmapCondensorAnalyzer) ReadEP(r io.Reader) error
func (a *MmapCondensorAnalyzer) ReadLink(r io.Reader) error
func (a *MmapCondensorAnalyzer) ReadLinks(r io.Reader) error

func (*MmapCondensorAnalyzer) ReadNode

func (a *MmapCondensorAnalyzer) ReadNode(r io.Reader) error

func (*MmapCondensorAnalyzer) ReadRemoveTombstone

func (a *MmapCondensorAnalyzer) ReadRemoveTombstone(r io.Reader) error

type MmapCondensorReader

type MmapCondensorReader struct {
	// contains filtered or unexported fields
}

func (*MmapCondensorReader) Do

func (r *MmapCondensorReader) Do(source *os.File, index mmapIndex, targetName string) error

type NoopCommitLogger

type NoopCommitLogger struct{}

NoopCommitLogger implements the CommitLogger interface, but does not actually write anything to disk

func (*NoopCommitLogger) AddLinkAtLevel

func (n *NoopCommitLogger) AddLinkAtLevel(nodeid uint64, level int, target uint64) error

func (*NoopCommitLogger) AddNode

func (n *NoopCommitLogger) AddNode(node *vertex) error

func (*NoopCommitLogger) AddPQCompression

func (n *NoopCommitLogger) AddPQCompression(data compressionhelpers.PQData) error

func (*NoopCommitLogger) AddSQCompression

func (n *NoopCommitLogger) AddSQCompression(data compressionhelpers.SQData) error

func (*NoopCommitLogger) AddTombstone

func (n *NoopCommitLogger) AddTombstone(nodeid uint64) error
func (n *NoopCommitLogger) ClearLinks(nodeid uint64) error

func (*NoopCommitLogger) ClearLinksAtLevel

func (n *NoopCommitLogger) ClearLinksAtLevel(nodeid uint64, level uint16) error

func (*NoopCommitLogger) Close

func (n *NoopCommitLogger) Close() error

func (*NoopCommitLogger) DeleteNode

func (n *NoopCommitLogger) DeleteNode(nodeid uint64) error

func (*NoopCommitLogger) Drop

func (n *NoopCommitLogger) Drop(ctx context.Context) error

func (*NoopCommitLogger) Flush

func (n *NoopCommitLogger) Flush() error

func (*NoopCommitLogger) ID

func (n *NoopCommitLogger) ID() string

func (*NoopCommitLogger) NewBufferedLinksLogger

func (n *NoopCommitLogger) NewBufferedLinksLogger() BufferedLinksLogger

func (*NoopCommitLogger) RemoveTombstone

func (n *NoopCommitLogger) RemoveTombstone(nodeid uint64) error

func (*NoopCommitLogger) ReplaceLinksAtLevel

func (n *NoopCommitLogger) ReplaceLinksAtLevel(nodeid uint64, level int, targets []uint64) error

func (*NoopCommitLogger) Reset

func (n *NoopCommitLogger) Reset() error

func (*NoopCommitLogger) RootPath

func (n *NoopCommitLogger) RootPath() string

func (*NoopCommitLogger) SetEntryPointWithMaxLayer

func (n *NoopCommitLogger) SetEntryPointWithMaxLayer(id uint64, level int) error

func (*NoopCommitLogger) Shutdown

func (n *NoopCommitLogger) Shutdown(context.Context) error

func (*NoopCommitLogger) StartSwitchLogs

func (n *NoopCommitLogger) StartSwitchLogs() chan struct{}

func (*NoopCommitLogger) SwitchCommitLogs

func (n *NoopCommitLogger) SwitchCommitLogs(force bool) error

type Observer

type Observer func(start time.Time)

Directories

Path Synopsis
asm
asm only has amd64 specific implementations at the moment
asm only has amd64 specific implementations at the moment

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL