interfaces

package
v0.0.0-...-e6a8dbb Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 26, 2024 License: Apache-2.0 Imports: 3 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var EmptyChildren = []int{}

Functions

This section is empty.

Types

type AllocatedIndex

type AllocatedIndex interface {
	io.Closer
	Ptr() unsafe.Pointer
	Size() int64
}

type AnnoyIndex

type AnnoyIndex[TV VectorType, TIX IndexTypes] interface {
	io.Closer
	// VectorLength returns the vector length of the index.
	VectorLength() TIX
	// GetItem returns the vector of the given _itemIndex_.
	GetItem(itemIndex TIX) []TV
	// AddItem adds an item to the index. The ownership of the vector _v_ is taken
	// by this function. The _itemIndex_ is a numbering index of the _v_ vector and
	// *SHOULD* be incremental. If same _itemIndex_ is added twice, the last one
	// will be the one in the index.
	AddItem(itemIndex TIX, v []TV)
	// Build will build a a new index. The _numberOfTrees_ is the number of trees
	// to build. The _numWorkers_ is the number of workers to use when building
	// the index. If _numWorkers_ is -1, the number of workers will be set to the
	// number of CPU cores. If _numWorkers_ is 0, the number of workers will be
	// set to 1. Hence, run on current goroutine.
	//
	// The _numberOfTrees_ will be split amongst the workers. The more number
	// of trees, the larger the index. But it also will be more precise.
	Build(numberOfTrees, numWorkers int)
	// CreateContext will create a batch context, that should be used in subsequent
	// calls to `GetNnsByVector` and `GetNnsByItem`.
	//
	// Create a new context per goroutine. Whenever a new index is loaded or saved,
	// a new context *must* be created since it contains vital information about the
	// index. Same applies when the index is *built!*
	CreateContext() AnnoyIndexContext[TV, TIX]
	// GetDistance returns the distance between the two given items.
	GetDistance(i, j TIX) TV
	// GetNnsByItem will search for the closest vectors to the given _item_ in the index. When
	// _numReturn_ is -1, it will search number of trees in index * _numReturn_.
	GetNnsByItem(
		item TIX,
		numReturn, numNodesToInspect int,
		ctx AnnoyIndexContext[TV, TIX],
	) (result []TIX, distances []TV)
	// GetAllNns will search for the closest vectors to the given _vector_. When
	// _numReturn_ is -1, it will search number of trees in index * _numReturn_.
	GetNnsByVector(
		vector []TV,
		numReturn, numNodesToInspect int,
		ctx AnnoyIndexContext[TV, TIX],
	) (result []TIX, distances []TV)
	Save(fileName string) error
	Load(fileName string) error
}

type AnnoyIndexBuildPolicy

type AnnoyIndexBuildPolicy interface {
	// Build will build a a new index. The _numberOfTrees_ is the number of trees
	// to build. The _numWorkers_ is the number of workers to use when building
	// the index. If _numWorkers_ is -1, the number of workers will be set to the
	// number of CPU cores. If _numWorkers_ is 0, the number of workers will be
	// set to 1. Hence, run on current goroutine.
	//
	// The _numberOfTrees_ will be split amongst the workers. The more number
	// of trees, the larger the index. But it also will be more precise.
	//
	// This uses the `AnnoyIndexBuilder` to perform the actual work.
	Build(builder AnnoyIndexBuilder, numberOfTrees, numberOfWorker int)
	LockNNodes()
	UnlockNNodes()
	LockNodes()
	UnlockNodes()
	LockSharedNodes()
	UnlockSharedNodes()
	LockRoots()
	UnlockRoots()
}

type AnnoyIndexBuilder

type AnnoyIndexBuilder interface {
	ThreadBuild(treesPerWorker, workerIdx int, threadedBuildPolicy AnnoyIndexBuildPolicy)
}

type AnnoyIndexContext

type AnnoyIndexContext[TV VectorType, TIX IndexTypes] interface {
}

type BuildIndexAllocator

type BuildIndexAllocator interface {
	// Free frees the memory allocated by the allocator.
	Free()
	//Reallocate will allocate/reallocate memory to fit the given size.
	Reallocate(byteSize int) unsafe.Pointer
}

BuildIndexAllocator is an allocator whilst building an index.

type Distance

type Distance[TV VectorType, TIX IndexTypes] interface {
	// PreProcess will pre-process the data before it is used for distance calculations.
	//
	// The _nodes_ is a pointer to the beginning of the memory where the nodes are stored.
	PreProcess(nodes unsafe.Pointer, node_count TIX)
	// Distance calculates the distance from _x_ to _y_ `Node`.
	Distance(x Node[TV, TIX], y Node[TV, TIX]) TV
	// Normalize will normalize the vector for the _node_.
	Normalize(node Node[TV, TIX])
	// Margin will return the margin for the node.
	Margin(n Node[TV, TIX], y []TV) TV
	// CreateSplit will write to split node _m_ based on the _children_ nodes. The _nodeSize_ is the
	// size of the memory a `Node[TV,TIX]` will occupy. The _vectorLength_ is the length of the vector
	// the node will hold.
	CreateSplit(
		children []Node[TV, TIX],
		nodeSize TIX,
		random Random[TIX],
		m Node[TV, TIX],
	)
	// Side determines which side of the children indices to use when a split is made.
	Side(
		m Node[TV, TIX],
		v []TV,
		random Random[TIX],
	) Side
	// MapNodeToMemory will map the node to existing memory and use that for storage.
	MapNodeToMemory(mem unsafe.Pointer, itemIndex TIX) Node[TV, TIX]
	PQDistance(distance, margin TV, side Side) TV
	// NormalizedDistance will normalize the _distance_ and return it.
	NormalizedDistance(distance TV) TV
	PQInitialValue() TV
	// InitNode will initialize the node. Depending on the implementation
	// it will do different things.
	InitNode(node Node[TV, TIX])
	// MaxNumChildren is the max number of descendants to fit into node by overwriting
	// the vector space.
	MaxNumChildren() TIX
	// NodeSize is the size of the allocated memory for the node. Each node occupy the same
	// amount of memory.
	NodeSize() TIX
	// VectorLength is the length of the vector the this distance operates on.
	VectorLength() TIX
}

type IndexAllocator

type IndexAllocator interface {
	Open(fqFile string) (AllocatedIndex, error)
	Get(fqFile string) (AllocatedIndex, bool)
}

type IndexTypes

type IndexTypes interface {
	uint32 | uint64
}

type Node

type Node[TV VectorType, TIX IndexTypes] interface {
	// GetRawVector returns the raw vector that you have to know the length in order
	// to safely access it.
	GetRawVector() *TV
	// GetVector will allocate a slice header and point to the raw vector.
	//
	// CAUTION: This is a wasteful and should be used with care since it will
	// allocate a new slice header every time!
	//
	// It uses the _vectorLength_ to know how many elements to set as length in
	// the slice. Be *careful* to use the correct length, otherwise it may corrupt
	// the memory upon writes in the vector.
	GetVector(vectorLength TIX) []TV
	// SetVector will set the vector to the given slice. It does this by copying
	// the slice contents to the raw vector.
	SetVector(v []TV)
	// GetRawChildren returns the raw children that you have to know the length in order
	// to safely access it.
	GetRawChildren() *TIX
	// GetChildren returns all children indexes (if n_descendants > 1 && n_descendants <= K).
	// This will allocate a new slice header and point to the raw children.
	GetChildren() []TIX
	// SetChildren will copy the children slice to the node.
	SetChildren(children []TIX)
	GetNumberOfDescendants() TIX
	SetNumberOfDescendants(n TIX)
	GetNorm() TV
	SetNorm(norm TV)
}

type Pair

type Pair[T constraints.Ordered, S constraints.Ordered] struct {
	First  T
	Second S
}

func (*Pair[T, S]) Less

func (p *Pair[T, S]) Less(other *Pair[T, S]) bool

type Pairs

type Pairs[T constraints.Ordered, S constraints.Ordered] []*Pair[T, S]

func (Pairs[TV, TIX]) ContainsFirst

func (pq Pairs[TV, TIX]) ContainsFirst(v TV) bool

func (Pairs[TV, TIX]) ContainsSecond

func (pq Pairs[TV, TIX]) ContainsSecond(v TIX) bool

func (Pairs[_, _]) Len

func (pq Pairs[_, _]) Len() int

func (Pairs[_, _]) Less

func (pq Pairs[_, _]) Less(i, j int) bool

func (*Pairs[_, _]) Pop

func (pq *Pairs[_, _]) Pop() interface{}

func (*Pairs[TP, TV]) Push

func (pq *Pairs[TP, TV]) Push(x interface{})

func (Pairs[_, _]) Swap

func (pq Pairs[_, _]) Swap(i, j int)

func (*Pairs[TP, TV]) Top

func (pq *Pairs[TP, TV]) Top() *Pair[TP, TV]

type Random

type Random[TIX IndexTypes] interface {
	Next() TIX
	NextSide() Side
	// NextIndex draw random integer between 0 and n-1 where n is at most the number of data points you have
	NextIndex(n TIX) TIX
	SetSeed(seed TIX)
	GetSeed() TIX
	// CloneAndReset returns a cloned instance and that is reset with the original seed.
	CloneAndReset() Random[TIX]
}

type Side

type Side int
const (
	SideLeft  Side = 0
	SideRight Side = 1
)

type Sorter

type Sorter[TV VectorType, TIX IndexTypes] interface {
	SortSlice(slice []TIX)
	SortPairs(pairs []*Pair[TV, TIX])
	PartialSortSlice(s Pairs[TV, TIX], begin, middle, end int)
}

type SorterFunctions

type SorterFunctions[TV VectorType, TIX IndexTypes] struct {
	SortSliceFunc        func(slice []TIX)
	SortPairsFunc        func(pairs []*Pair[TV, TIX])
	PartialSortSliceFunc func(s Pairs[TV, TIX], begin, middle, end int)
}

func (*SorterFunctions[TV, TIX]) PartialSortSlice

func (sf *SorterFunctions[TV, TIX]) PartialSortSlice(s Pairs[TV, TIX], begin, middle, end int)

func (*SorterFunctions[TV, TIX]) SortPairs

func (sf *SorterFunctions[TV, TIX]) SortPairs(pairs []*Pair[TV, TIX])

func (*SorterFunctions[TV, TIX]) SortSlice

func (sf *SorterFunctions[TV, TIX]) SortSlice(slice []TIX)

type VectorType

type VectorType interface {
	float32 | float64
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL