Documentation
¶
Index ¶
- func CountTermFreq(term string, doc string, tokenizer func(string) []string) (int, error)
- func JoinTokens(tokens []string, separator string) string
- func Min(a, b int) int
- func TopNIndices(scores []float64, n int) ([]int, error)
- type BM25
- type BM25Adpt
- type BM25L
- type BM25Okapi
- type BM25Plus
- type BM25T
- type Bm25Base
- func (b *Bm25Base) AvgDocLen() float64
- func (b *Bm25Base) CorpusSize() int
- func (b *Bm25Base) DocLengths() []int
- func (b *Bm25Base) GetBatchScores(query []string, docIDs []int) ([]float64, error)
- func (b *Bm25Base) GetBatchScoresBatched(query []string, docIDs []int, bm25 BM25, batchSize int) ([]float64, error)
- func (b *Bm25Base) GetBatchScoresParallel(query []string, docIDs []int, bm25 BM25) ([]float64, error)
- func (b *Bm25Base) GetScores(query []string) ([]float64, error)
- func (b *Bm25Base) GetScoresBatched(query []string, bm25 BM25, batchSize int) ([]float64, error)
- func (b *Bm25Base) GetScoresParallel(query []string, bm25 BM25) ([]float64, error)
- func (b *Bm25Base) GetTopN(query []string, n int) ([]string, error)
- func (b *Bm25Base) GetTopNBatched(query []string, n int, bm25 BM25, batchSize int) ([]string, error)
- func (b *Bm25Base) GetTopNParallel(query []string, n int, bm25 BM25) ([]string, error)
- func (b *Bm25Base) IDF(term string) (float64, error)
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func CountTermFreq ¶
CountTermFreq counts the frequency of a term in a document using the provided tokenizer function.
func JoinTokens ¶
JoinTokens joins the tokens in a document into a single string using the provided separator.
Types ¶
type BM25 ¶
type BM25 interface { CorpusSize() int AvgDocLen() float64 DocLengths() []int IDF(term string) (float64, error) GetScores(query []string) ([]float64, error) GetBatchScores(query []string, docIDs []int) ([]float64, error) GetTopN(query []string, n int) ([]string, error) }
BM25 is an interface that defines the common methods for all BM25 variants.
type BM25Adpt ¶
type BM25Adpt struct { *Bm25Base // contains filtered or unexported fields }
BM25Adpt is an implementation of the BM25Adpt variant.
func NewBM25Adpt ¶
func NewBM25Adpt(corpus []string, tokenizer func(string) []string, k1 float64, b float64, delta float64, logger *log.Logger) (*BM25Adpt, error)
NewBM25Adpt creates a new instance of the BM25Adpt struct.
func (*BM25Adpt) GetBatchScores ¶
GetBatchScores returns the BM25 scores for the given query and a subset of documents.
type BM25L ¶
type BM25L struct { *Bm25Base // contains filtered or unexported fields }
BM25L is an implementation of the BM25L variant.
func NewBM25L ¶
func NewBM25L(corpus []string, tokenizer func(string) []string, k1 float64, b float64, logger *log.Logger) (*BM25L, error)
NewBM25L creates a new instance of the BM25L struct.
func (*BM25L) GetBatchScores ¶
GetBatchScores returns the BM25 scores for the given query and a subset of documents.
type BM25Okapi ¶
type BM25Okapi struct { *Bm25Base // contains filtered or unexported fields }
BM25Okapi is an implementation of the Okapi BM25 variant.
func NewBM25Okapi ¶
func NewBM25Okapi(corpus []string, tokenizer func(string) []string, k1 float64, b float64, logger *log.Logger) (*BM25Okapi, error)
NewBM25Okapi creates a new instance of the BM25Okapi struct.
func (*BM25Okapi) GetBatchScores ¶
GetBatchScores returns the BM25 scores for the given query and a subset of documents.
type BM25Plus ¶
type BM25Plus struct { *Bm25Base // contains filtered or unexported fields }
BM25Plus is an implementation of the BM25Plus variant.
func NewBM25Plus ¶
func NewBM25Plus(corpus []string, tokenizer func(string) []string, k1 float64, b float64, delta float64, epsilon float64, logger *log.Logger) (*BM25Plus, error)
NewBM25Plus creates a new instance of the BM25Plus struct.
func (*BM25Plus) GetBatchScores ¶
GetBatchScores returns the BM25 scores for the given query and a subset of documents.
type BM25T ¶
type BM25T struct { *Bm25Base // contains filtered or unexported fields }
BM25T is an implementation of the BM25T variant.
func NewBM25T ¶
func NewBM25T(corpus []string, tokenizer func(string) []string, k1 float64, b float64, delta float64, logger *log.Logger) (*BM25T, error)
NewBM25T creates a new instance of the BM25T struct.
func (*BM25T) GetBatchScores ¶
GetBatchScores returns the BM25 scores for the given query and a subset of documents.
type Bm25Base ¶
type Bm25Base struct {
// contains filtered or unexported fields
}
Bm25Base is a base struct that holds common fields and methods for all BM25 variants.
func NewBM25Base ¶
func NewBM25Base(corpus []string, tokenizer func(string) []string, logger *log.Logger) (*Bm25Base, error)
NewBM25Base creates a new instance of the Bm25Base struct.
func (*Bm25Base) CorpusSize ¶
CorpusSize returns the size of the corpus.
func (*Bm25Base) DocLengths ¶
DocLengths returns the lengths of all documents in the corpus.
func (*Bm25Base) GetBatchScores ¶
GetBatchScores returns the BM25 scores for the given query and a subset of documents.
func (*Bm25Base) GetBatchScoresBatched ¶
func (b *Bm25Base) GetBatchScoresBatched(query []string, docIDs []int, bm25 BM25, batchSize int) ([]float64, error)
GetBatchScoresBatched returns the BM25 scores for the given query and a subset of documents using parallel computation with batching.
func (*Bm25Base) GetBatchScoresParallel ¶
func (b *Bm25Base) GetBatchScoresParallel(query []string, docIDs []int, bm25 BM25) ([]float64, error)
GetBatchScoresParallel returns the BM25 scores for the given query and a subset of documents using parallel computation.
func (*Bm25Base) GetScoresBatched ¶
GetScoresBatched returns the BM25 scores for the given query using parallel computation with batching.
func (*Bm25Base) GetScoresParallel ¶
GetScoresParallel returns the BM25 scores for the given query using parallel computation.
func (*Bm25Base) GetTopNBatched ¶
func (b *Bm25Base) GetTopNBatched(query []string, n int, bm25 BM25, batchSize int) ([]string, error)
GetTopNBatched returns the top N documents for the given query using parallel computation with batching.
func (*Bm25Base) GetTopNParallel ¶
GetTopNParallel returns the top N documents for the given query using parallel computation.