knn

package module
v0.7.1-alpha Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Sep 21, 2024 License: MIT Imports: 11 Imported by: 0

README

Go KNN

Go Reference

TODO

  • CI for SIMD
  • MIPS multithread and SIMD

For GPU implementations, see:

Installation

go get github.com/cartersusi/go-knn

Usage

Importing
import "github.com/cartersusi/go-knn"
Creating Tensors

Supported Scalars:

  • float32
  • float64

Supported Dimensions/Ranks:

  • 1
  • 2

Matrix:

matrix := [][]float32{
	{0.1, 0.2, 0.3, 0.4},
	{0.4, 0.5, 0.6, 0.7},
}
m := &knn.Tensor[float32]{}
m.New(matrix)

Vectors:

vector := []float32{0.2, 0.3, 0.4, 0.5}
v := &knn.Tensor[float32]{}
v.New(vector)
Searching

Supported SIMD:

New Instance

s := &knn.Search{
	Data: m,		  // 2D Tensor 
	Query: v,		  // 1D Tensor
	Multithread: true,	  // Enable Multithreading (default = false), MIPS not supported
	MaxWorkers:  m.Shape[0],  // Specify MaxWorkers (default = n_cpu_cores)
	SIMD: true //Use SIMD operations, uses float32, it will cast you floats to float32 if using float64
}

New Query

Seach.Query uses the address of a 1D Tensor, so it can be quickly changed for a new iteration.

for query := range all_queries {
	s.Query = query
	nn, _ := s.L1(2)
}

Example using OpenAI Ada (L1)

package main

import (
	"context"
	"fmt"
	"log"

	"github.com/cartersusi/go-knn"
	openai "github.com/sashabaranov/go-openai"
)

func main() {
	openai_token := "my-key"
	client := openai.NewClient(openai_token)

	sentences := []string{
		"The sailor enjoys sailing on a boat in the sea.",
		"The carpenter enjoys building houses with wood.",
		"The athlete enjoys running on the track.",
		"The chef enjoys cooking in the kitchen.",
		"The doctor enjoys helping patients in the hospital.",
		"The scientist enjoys conducting experiments in the laboratory.",
		"The teacher enjoys teaching students in the classroom.",
		"The artist enjoys painting in the studio.",
	}

	query_sentence := "I am a scientist who enjoys fishing when I'm not in the lab."

	var vector []float32
	var matrix [][]float32

	for _, sentence := range sentences {
		queryReq := openai.EmbeddingRequest{
			Input: []string{sentence},
			Model: openai.AdaEmbeddingV2,
		}

		queryResponse, err := client.CreateEmbeddings(context.Background(), queryReq)
		if err != nil {
			log.Fatal("Error creating query embedding:", err)
		}

		matrix = append(matrix, queryResponse.Data[0].Embedding)
	}

	queryReq := openai.EmbeddingRequest{
		Input: []string{query_sentence},
		Model: openai.AdaEmbeddingV2,
	}

	queryResponse, err := client.CreateEmbeddings(context.Background(), queryReq)
	if err != nil {
		log.Fatal("Error creating target embedding:", err)
	}

	vector = queryResponse.Data[0].Embedding

	d := &knn.Tensor[float32]{}
	d.New(matrix)
	
	q := &knn.Tensor[float32]{}
	q.New(vector)

	s := &knn.Search[float32]{
		Data:        d,
		Query:       q,
		SIMD:        true,
		Multithread: true,
	}

	nn, err := s.L1(2)
	if err != nil {
		fmt.Println(err)
	}

	fmt.Println("Query Sentence:", query_sentence)
	fmt.Println("Nearerst neighbor[0]:", sentences[nn.Indices[0]])
	fmt.Println("Nearerst neighbor[1]:", sentences[nn.Indices[1]])
	fmt.Println("Indices:", nn.Indices)
	fmt.Println("Values:", nn.Values)
}

Output:

Query Sentence: I am a scientist who enjoys fishing when I'm not in the lab.
Nearerst neighbor[0]: The scientist enjoys conducting experiments in the laboratory.
Nearerst neighbor[1]: The sailor enjoys sailing on a boat in the sea.
Indices: [5 0]
Values: [0.877427339553833 0.828193724155426]

Sources:

TPU-KNN: K Nearest Neighbor Search at Peak FLOP/s

Documentation

Index

Constants

View Source
const (
	L1 = iota
	L2
	MIPS
)
View Source
const (
	Info = iota
	Debug
	Warning
	Error
)

Variables

This section is empty.

Functions

func Abs

func Abs[T float32 | float64](a T) T

func Log

func Log(msg string, level int)

Types

type MaxHeap

type MaxHeap[T float32 | float64] struct {
	// contains filtered or unexported fields
}

func (*MaxHeap[T]) Len

func (h *MaxHeap[T]) Len() int

func (*MaxHeap[T]) Less

func (h *MaxHeap[T]) Less(i, j int) bool

func (*MaxHeap[T]) Peek

func (h *MaxHeap[T]) Peek() interface{}

func (*MaxHeap[T]) Pop

func (h *MaxHeap[T]) Pop() interface{}

func (*MaxHeap[T]) Process

func (h *MaxHeap[T]) Process(i *int, k *int, distance *T)

maybe move ??

func (*MaxHeap[T]) Push

func (h *MaxHeap[T]) Push(x interface{})

func (*MaxHeap[T]) Swap

func (h *MaxHeap[T]) Swap(i, j int)

type Neighbors

type Neighbors[T any] struct {
	Indices []int
	Values  []T
}

type Result

type Result[T float32 | float64] struct {
	Index    int
	Distance T
}
type Search[T float32 | float64] struct {
	Data        *Tensor[T]
	Query       *Tensor[T]
	Multithread bool
	MaxWorkers  int
	SIMD        bool
}

func (*Search[T]) Einsum

func (s *Search[T]) Einsum() []T

func (*Search[T]) EstimateBinSize

func (s *Search[T]) EstimateBinSize() int

func (*Search[T]) GetSize

func (s *Search[T]) GetSize() T

func (*Search[T]) HalfNorm

func (s *Search[T]) HalfNorm() []T

func (*Search[T]) L1

func (s *Search[T]) L1(k int) (Neighbors[T], error)

func (*Search[T]) L2

func (s *Search[T]) L2(k int) (Neighbors[T], error)

func (*Search[T]) MIPS

func (s *Search[T]) MIPS(k int, opts ...interface{}) (Neighbors[T], error)

func (*Search[T]) Manhattan

func (s *Search[T]) Manhattan(i *int) T

func (*Search[T]) Print

func (s *Search[T]) Print()

func (*Search[T]) PrintDistances

func (s *Search[T]) PrintDistances()

func (*Search[T]) PrintTypes

func (s *Search[T]) PrintTypes()

type Tensor

type Tensor[T float32 | float64] struct {
	Values interface{}
	Shape  [2]int
	Type   reflect.Type
	Rank   int
}

func (*Tensor[T]) New

func (t *Tensor[T]) New(values interface{}) error

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL