data

package
v0.0.53 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jul 6, 2021 License: Apache-2.0 Imports: 27 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func AngularDistance added in v0.0.53

func AngularDistance(a []float32, b []float32) float64

AngularDistance sim(u.v) = (1 - arccos(cosine_similarity(u, v)) / pi)

func CalculateAverage

func CalculateAverage(avg []float32, p []float32, n float32) []float32

CalculateAverage calculates average of two arrays divided by n

func CloneResult added in v0.0.30

func CloneResult(result []*pb.ScoredDatum) []*pb.ScoredDatum

func CosineSimilarity added in v0.0.23

func CosineSimilarity(a []float32, b []float32) float64

CosineSimilarity for vector similarity

func CosineSimilarity32 added in v0.0.46

func CosineSimilarity32(a []float32, b []float32) float32

CosineSimilarity for vector similarity

func CosineSimilarity64 added in v0.0.46

func CosineSimilarity64(a []float64, b []float64) float64

CosineSimilarity64 for vector similarity

func DefaultSearchConfig added in v0.0.22

func DefaultSearchConfig() *pb.SearchConfig

func EncodeSearchConfig added in v0.0.22

func EncodeSearchConfig(sc *pb.SearchConfig) []byte

func GetDefaultConfig added in v0.0.43

func GetDefaultConfig(name string) *pb.DataConfig

func GetKeyAsBytes added in v0.0.22

func GetKeyAsBytes(datum *pb.Datum) ([]byte, error)

func GetRetention added in v0.0.44

func GetRetention(retention uint64) time.Duration

func GetSearchKey added in v0.0.22

func GetSearchKey(datum *pb.Datum, config *pb.SearchConfig) string

func GetValueAsBytes added in v0.0.22

func GetValueAsBytes(datum *pb.Datum) ([]byte, error)

func GetVectorComparisonFunction added in v0.0.23

func GetVectorComparisonFunction(name string) func(arr1 []float32, arr2 []float32) float64

func InsertConfigFromExpireAt added in v0.0.22

func InsertConfigFromExpireAt(expiresAt uint64) *pb.InsertConfig

func NewDatum added in v0.0.22

func NewDatum(feature []float32,
	dim1 uint32,
	dim2 uint32,
	size1 uint32,
	size2 uint32,
	groupLabel []byte,
	label []byte,
	version uint64,
) *pb.Datum

NewDatum is an utily function to initialize datum type

func QuickVectorDistance added in v0.0.30

func QuickVectorDistance(arr1 []float32, arr2 []float32) float64

func ToDatum added in v0.0.22

func ToDatum(key, value []byte) (*pb.Datum, error)

func ToDatumKey added in v0.0.22

func ToDatumKey(byteArray []byte) (*pb.DatumKey, error)

func ToDatumValue added in v0.0.22

func ToDatumValue(byteArray []byte) (*pb.DatumValue, error)

func VectorDistance

func VectorDistance(arr1 []float32, arr2 []float32) float64

VectorDistance calculates distance of two vector by euclidean distance

func VectorMultiplication added in v0.0.22

func VectorMultiplication(arr1 []float32, arr2 []float32) float64

VectorMultiplication calculates elementwise of multiplication of two vectors

Types

type Aggregator added in v0.0.23

type Aggregator struct {
	Config           *pb.SearchConfig
	List             []*pb.ScoredDatum
	DeDuplicationMap *cache.Cache
	Grouped          bool
	Context          *pb.SearchContext
	ScoreFunc        func(arr1 []float32, arr2 []float32) float64
}

func (*Aggregator) BestScore added in v0.0.23

func (a *Aggregator) BestScore(scoredDatum *pb.ScoredDatum) float64

func (*Aggregator) Insert added in v0.0.23

func (a *Aggregator) Insert(scoredDatum *pb.ScoredDatum) error

func (*Aggregator) InsertToList added in v0.0.23

func (a *Aggregator) InsertToList(scoredDatum *pb.ScoredDatum) error

func (*Aggregator) IsNewScoredBetter added in v0.0.23

func (a *Aggregator) IsNewScoredBetter(old, new float64) bool

func (*Aggregator) One added in v0.0.23

func (a *Aggregator) One() *pb.ScoredDatum

func (*Aggregator) Result added in v0.0.23

func (a *Aggregator) Result() []*pb.ScoredDatum

type AggregatorInterface added in v0.0.23

type AggregatorInterface interface {
	Insert(*pb.ScoredDatum) error
	One() *pb.ScoredDatum
	Result() []*pb.ScoredDatum
}

func NewAggrator added in v0.0.23

func NewAggrator(config *pb.SearchConfig, grouped bool, context *pb.SearchContext) AggregatorInterface

type Annoyer added in v0.0.45

type Annoyer struct {
	sync.RWMutex
	DataIndex  *[]*pb.Datum
	AnnoyIndex annoyindex.AnnoyIndexEuclidean
}

type Collector added in v0.0.22

type Collector struct {
	List           []*pb.ScoredDatum
	ScoreFunc      func(arr1 []float32, arr2 []float32) float64
	MaxScore       float64
	DatumKey       *pb.DatumKey
	N              uint32
	HigherIsBetter bool
	Filters        []string
	GroupFilters   []string
}

Collector collects results

func (*Collector) Insert added in v0.0.22

func (c *Collector) Insert(scoredDatum *pb.ScoredDatum) error

Insert add a new scored datum to collector

func (*Collector) PassesFilters added in v0.0.45

func (c *Collector) PassesFilters(datum *pb.Datum) bool

func (*Collector) Send added in v0.0.22

func (c *Collector) Send(buf *z.Buffer) error

Send collects the results

func (*Collector) ToList added in v0.0.30

func (c *Collector) ToList(key []byte, itr *badger.Iterator) (*bpb.KVList, error)

ToList is a default implementation of KeyToList. It picks up all valid versions of the key, skipping over deleted or expired keys. TODO: update to bagder/v3 allocators

type Data

type Data struct {
	Config      *pb.DataConfig
	Path        string
	Avg         []float32
	N           uint64
	MaxDistance float64
	Hist        []float32
	Timestamp   uint64
	DB          *badger.DB
	DBPath      string
	Dirty       bool
	Sources     *cache.Cache
	QueryCache  *cache.Cache
	Initialized bool
	Alive       bool
	Annoyer     Annoyer
}

Data represents a dataset with similar struture

func NewData

func NewData(config *pb.DataConfig, dataPath string) (*Data, error)

NewData creates a data struct

func NewPreData added in v0.0.22

func NewPreData(config *pb.DataConfig, dataPath string) *Data

NewPreData creates a data struct

func NewTempData

func NewTempData() (*Data, error)

NewTempData return an inmemory badger instance

func (*Data) AddSource added in v0.0.22

func (dt *Data) AddSource(dataSource DataSource) error

AddSource adds a source

func (*Data) AggregatedSearch added in v0.0.23

func (dt *Data) AggregatedSearch(datum *pb.Datum, scoredDatumStreamOutput chan<- *pb.ScoredDatum, upperWaitGroup *sync.WaitGroup, config *pb.SearchConfig) error

AggregatedSearch searches and merges other resources

func (*Data) Close added in v0.0.22

func (dt *Data) Close() error

Close currently closes underlying kv store

func (*Data) Delete

func (dt *Data) Delete(datum *pb.Datum) error

Delete delete data to internal kv store

func (*Data) DeletePath added in v0.0.25

func (dt *Data) DeletePath() error

Delete currently deletes underlying data folder ignores errors.

func (*Data) GetConfig added in v0.0.22

func (d *Data) GetConfig() *pb.DataConfig

func (*Data) GetDataInfo added in v0.0.22

func (dt *Data) GetDataInfo() *pb.DataInfo

GetDataInfo out of data

func (*Data) GetID added in v0.0.22

func (dt *Data) GetID() string

func (*Data) InitData added in v0.0.22

func (dt *Data) InitData() error

func (*Data) Insert

func (dt *Data) Insert(datum *pb.Datum, config *pb.InsertConfig) error

Insert inserts data to internal kv store

func (*Data) InsertStreamSample added in v0.0.22

func (dt *Data) InsertStreamSample(datumStream chan<- *pb.InsertDatumWithConfig, fraction float64) error

func (*Data) MultiAggregatedSearch added in v0.0.23

func (dt *Data) MultiAggregatedSearch(datumList []*pb.Datum, config *pb.SearchConfig, context *pb.SearchContext) ([]*pb.ScoredDatum, error)

MultiAggregatedSearch searches and merges other resources

func (*Data) Process

func (dt *Data) Process(force bool) error

Process runs through keys and calculates statistics

func (*Data) Run

func (dt *Data) Run() error

Run runs statistical calculation regularly

func (*Data) RunOnRandomSources added in v0.0.46

func (dt *Data) RunOnRandomSources(sourceFunction func(dataSource DataSource) error) error

func (*Data) Search added in v0.0.22

func (dt *Data) Search(datum *pb.Datum, config *pb.SearchConfig) *Collector

Search does a search based on distances of keys

func (*Data) SearchAnnoy added in v0.0.45

func (dt *Data) SearchAnnoy(datum *pb.Datum, config *pb.SearchConfig) *Collector

Search does a search based on distances of keys

func (*Data) StreamAll added in v0.0.22

func (dt *Data) StreamAll(datumStream chan<- *pb.Datum) error

func (*Data) StreamSample added in v0.0.22

func (dt *Data) StreamSample(datumStream chan<- *pb.Datum, fraction float64) error

func (*Data) StreamSearch added in v0.0.22

func (dt *Data) StreamSearch(datum *pb.Datum, scoredDatumStream chan<- *pb.ScoredDatum, queryWaitGroup *sync.WaitGroup, config *pb.SearchConfig) error

StreamSearch does a search based on distances of keys

func (*Data) Sync added in v0.0.22

func (dt *Data) Sync(source DataSource, waitGroup *sync.WaitGroup) error

func (*Data) SyncAll added in v0.0.22

func (dt *Data) SyncAll() error

type DataSource added in v0.0.22

type DataSource interface {
	StreamSearch(datumList *pb.Datum, scoredDatumStream chan<- *pb.ScoredDatum, queryWaitGroup *sync.WaitGroup, config *pb.SearchConfig) error
	Insert(datum *pb.Datum, config *pb.InsertConfig) error
	GetDataInfo() *pb.DataInfo
	GetID() string
}

type Dataset added in v0.0.22

type Dataset struct {
	DataList *cache.Cache
	Path     string
	DataPath string
}

func NewDataset added in v0.0.22

func NewDataset(datasetPath string) *Dataset

func (*Dataset) Close added in v0.0.23

func (dts *Dataset) Close() error

func (*Dataset) CreateIfNotExists added in v0.0.22

func (dts *Dataset) CreateIfNotExists(config *pb.DataConfig) error

func (*Dataset) DataConfigList added in v0.0.22

func (dts *Dataset) DataConfigList() []*pb.DataConfig

func (*Dataset) Delete added in v0.0.22

func (dts *Dataset) Delete(name string) error

func (*Dataset) Get added in v0.0.22

func (dts *Dataset) Get(name string) (*Data, error)

func (*Dataset) GetNoCreate added in v0.0.43

func (dts *Dataset) GetNoCreate(name string) (*Data, error)

func (*Dataset) GetNoOp added in v0.0.43

func (dts *Dataset) GetNoOp(name string) (*Data, error)

func (*Dataset) GetOrCreateIfNotExists added in v0.0.22

func (dts *Dataset) GetOrCreateIfNotExists(config *pb.DataConfig) (*Data, error)

func (*Dataset) List added in v0.0.22

func (dts *Dataset) List() []string

func (*Dataset) LoadIndex added in v0.0.23

func (dts *Dataset) LoadIndex() error

func (*Dataset) SaveIndex added in v0.0.23

func (dts *Dataset) SaveIndex() error

type InsertStreamCollector added in v0.0.22

type InsertStreamCollector struct {
	DatumStream chan<- *pb.InsertDatumWithConfig
}

InsertStreamCollector collects results

func (*InsertStreamCollector) Send added in v0.0.22

func (c *InsertStreamCollector) Send(buf *z.Buffer) error

Send collects the results

type StreamCollector added in v0.0.22

type StreamCollector struct {
	DatumStream chan<- *pb.Datum
}

StreamCollector collects results

func (*StreamCollector) Send added in v0.0.22

func (c *StreamCollector) Send(buf *z.Buffer) error

Send collects the results

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL