bm25

package
v0.0.0-...-a928590 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Sep 9, 2024 License: MIT Imports: 7 Imported by: 1

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func CountTermFreq

func CountTermFreq(term string, doc string, tokenizer func(string) []string) (int, error)

CountTermFreq counts the frequency of a term in a document using the provided tokenizer function.

func JoinTokens

func JoinTokens(tokens []string, separator string) string

JoinTokens joins the tokens in a document into a single string using the provided separator.

func Min

func Min(a, b int) int

Min returns the minimum of two integers.

func TopNIndices

func TopNIndices(scores []float64, n int) ([]int, error)

TopNIndices returns the indices of the top N scores in the given slice.

Types

type BM25

type BM25 interface {
	CorpusSize() int
	AvgDocLen() float64
	DocLengths() []int
	IDF(term string) (float64, error)
	GetScores(query []string) ([]float64, error)
	GetBatchScores(query []string, docIDs []int) ([]float64, error)
	GetTopN(query []string, n int) ([]string, error)
}

BM25 is an interface that defines the common methods for all BM25 variants.

type BM25Adpt

type BM25Adpt struct {
	*Bm25Base
	// contains filtered or unexported fields
}

BM25Adpt is an implementation of the BM25Adpt variant.

func NewBM25Adpt

func NewBM25Adpt(corpus []string, tokenizer func(string) []string, k1 float64, b float64, delta float64, logger *log.Logger) (*BM25Adpt, error)

NewBM25Adpt creates a new instance of the BM25Adpt struct.

func (*BM25Adpt) GetBatchScores

func (a *BM25Adpt) GetBatchScores(query []string, docIDs []int) ([]float64, error)

GetBatchScores returns the BM25 scores for the given query and a subset of documents.

func (*BM25Adpt) GetScores

func (a *BM25Adpt) GetScores(query []string) ([]float64, error)

GetScores returns the BM25 scores for the given query.

func (*BM25Adpt) GetTopN

func (a *BM25Adpt) GetTopN(query []string, n int) ([]string, error)

GetTopN returns the top N documents for the given query.

type BM25L

type BM25L struct {
	*Bm25Base
	// contains filtered or unexported fields
}

BM25L is an implementation of the BM25L variant.

func NewBM25L

func NewBM25L(corpus []string, tokenizer func(string) []string, k1 float64, b float64, logger *log.Logger) (*BM25L, error)

NewBM25L creates a new instance of the BM25L struct.

func (*BM25L) GetBatchScores

func (l *BM25L) GetBatchScores(query []string, docIDs []int) ([]float64, error)

GetBatchScores returns the BM25 scores for the given query and a subset of documents.

func (*BM25L) GetScores

func (l *BM25L) GetScores(query []string) ([]float64, error)

GetScores returns the BM25 scores for the given query.

func (*BM25L) GetTopN

func (l *BM25L) GetTopN(query []string, n int) ([]string, error)

GetTopN returns the top N documents for the given query.

type BM25Okapi

type BM25Okapi struct {
	*Bm25Base
	// contains filtered or unexported fields
}

BM25Okapi is an implementation of the Okapi BM25 variant.

func NewBM25Okapi

func NewBM25Okapi(corpus []string, tokenizer func(string) []string, k1 float64, b float64, logger *log.Logger) (*BM25Okapi, error)

NewBM25Okapi creates a new instance of the BM25Okapi struct.

func (*BM25Okapi) GetBatchScores

func (o *BM25Okapi) GetBatchScores(query []string, docIDs []int) ([]float64, error)

GetBatchScores returns the BM25 scores for the given query and a subset of documents.

func (*BM25Okapi) GetScores

func (o *BM25Okapi) GetScores(query []string) ([]float64, error)

GetScores returns the BM25 scores for the given query.

func (*BM25Okapi) GetTopN

func (o *BM25Okapi) GetTopN(query []string, n int) ([]string, error)

GetTopN returns the top N documents for the given query.

type BM25Plus

type BM25Plus struct {
	*Bm25Base
	// contains filtered or unexported fields
}

BM25Plus is an implementation of the BM25Plus variant.

func NewBM25Plus

func NewBM25Plus(corpus []string, tokenizer func(string) []string, k1 float64, b float64, delta float64, epsilon float64, logger *log.Logger) (*BM25Plus, error)

NewBM25Plus creates a new instance of the BM25Plus struct.

func (*BM25Plus) GetBatchScores

func (p *BM25Plus) GetBatchScores(query []string, docIDs []int) ([]float64, error)

GetBatchScores returns the BM25 scores for the given query and a subset of documents.

func (*BM25Plus) GetScores

func (p *BM25Plus) GetScores(query []string) ([]float64, error)

GetScores returns the BM25 scores for the given query.

func (*BM25Plus) GetTopN

func (p *BM25Plus) GetTopN(query []string, n int) ([]string, error)

GetTopN returns the top N documents for the given query.

type BM25T

type BM25T struct {
	*Bm25Base
	// contains filtered or unexported fields
}

BM25T is an implementation of the BM25T variant.

func NewBM25T

func NewBM25T(corpus []string, tokenizer func(string) []string, k1 float64, b float64, delta float64, logger *log.Logger) (*BM25T, error)

NewBM25T creates a new instance of the BM25T struct.

func (*BM25T) GetBatchScores

func (t *BM25T) GetBatchScores(query []string, docIDs []int) ([]float64, error)

GetBatchScores returns the BM25 scores for the given query and a subset of documents.

func (*BM25T) GetScores

func (t *BM25T) GetScores(query []string) ([]float64, error)

GetScores returns the BM25 scores for the given query.

func (*BM25T) GetTopN

func (t *BM25T) GetTopN(query []string, n int) ([]string, error)

GetTopN returns the top N documents for the given query.

type Bm25Base

type Bm25Base struct {
	// contains filtered or unexported fields
}

Bm25Base is a base struct that holds common fields and methods for all BM25 variants.

func NewBM25Base

func NewBM25Base(corpus []string, tokenizer func(string) []string, logger *log.Logger) (*Bm25Base, error)

NewBM25Base creates a new instance of the Bm25Base struct.

func (*Bm25Base) AvgDocLen

func (b *Bm25Base) AvgDocLen() float64

AvgDocLen returns the average document length in the corpus.

func (*Bm25Base) CorpusSize

func (b *Bm25Base) CorpusSize() int

CorpusSize returns the size of the corpus.

func (*Bm25Base) DocLengths

func (b *Bm25Base) DocLengths() []int

DocLengths returns the lengths of all documents in the corpus.

func (*Bm25Base) GetBatchScores

func (b *Bm25Base) GetBatchScores(query []string, docIDs []int) ([]float64, error)

GetBatchScores returns the BM25 scores for the given query and a subset of documents.

func (*Bm25Base) GetBatchScoresBatched

func (b *Bm25Base) GetBatchScoresBatched(query []string, docIDs []int, bm25 BM25, batchSize int) ([]float64, error)

GetBatchScoresBatched returns the BM25 scores for the given query and a subset of documents using parallel computation with batching.

func (*Bm25Base) GetBatchScoresParallel

func (b *Bm25Base) GetBatchScoresParallel(query []string, docIDs []int, bm25 BM25) ([]float64, error)

GetBatchScoresParallel returns the BM25 scores for the given query and a subset of documents using parallel computation.

func (*Bm25Base) GetScores

func (b *Bm25Base) GetScores(query []string) ([]float64, error)

GetScores returns the BM25 scores for the given query.

func (*Bm25Base) GetScoresBatched

func (b *Bm25Base) GetScoresBatched(query []string, bm25 BM25, batchSize int) ([]float64, error)

GetScoresBatched returns the BM25 scores for the given query using parallel computation with batching.

func (*Bm25Base) GetScoresParallel

func (b *Bm25Base) GetScoresParallel(query []string, bm25 BM25) ([]float64, error)

GetScoresParallel returns the BM25 scores for the given query using parallel computation.

func (*Bm25Base) GetTopN

func (b *Bm25Base) GetTopN(query []string, n int) ([]string, error)

GetTopN returns the top N documents for the given query.

func (*Bm25Base) GetTopNBatched

func (b *Bm25Base) GetTopNBatched(query []string, n int, bm25 BM25, batchSize int) ([]string, error)

GetTopNBatched returns the top N documents for the given query using parallel computation with batching.

func (*Bm25Base) GetTopNParallel

func (b *Bm25Base) GetTopNParallel(query []string, n int, bm25 BM25) ([]string, error)

GetTopNParallel returns the top N documents for the given query using parallel computation.

func (*Bm25Base) IDF

func (b *Bm25Base) IDF(term string) (float64, error)

IDF returns the inverse document frequency (IDF) of the given term.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL