Documentation ¶
Overview ¶
Functions for finding documents by full text search
Functions for parsing a search query
Index ¶
- Variables
- func Bigrams(terms []string) []string
- func LoadColMap(r io.Reader) (map[string]string, error)
- func LoadDocInfo(r io.Reader) (map[string]DocInfo, map[string]DocInfo)
- type BM25Score
- type Collection
- type DictQueryParser
- type DocFinder
- type DocInfo
- type DocTitleRecord
- type Document
- type QueryParser
- type QueryResults
- type TermFreqDocFinder
- type TextSegment
- type TitleFinder
- func NewFileTitleFinder(colMap map[string]string, dInfoCN, docMap map[string]DocInfo) TitleFinder
- func NewFirestoreTitleFinder(client fsClient, corpus string, generation int, colMap map[string]string, ...) TitleFinder
- func NewMysqlTitleFinder(ctx context.Context, database *sql.DB, colMap map[string]string, ...) (TitleFinder, error)
Constants ¶
This section is empty.
Variables ¶
var WEIGHT = []float64{0.3606522, 2.4427158, 3.84494291, 2.74137199} // [BM25 words, BM25 bigrams, bit vector, similar title]
From logistic regression
Functions ¶
func LoadColMap ¶ added in v0.0.112
LoadColMap gets the list of titles of collections in the corpus key: gloss_file, value: title
Types ¶
type Collection ¶
type Collection struct {
GlossFile, Title string
}
type DictQueryParser ¶
type DictQueryParser struct {
Tokenizer *tokenizer.DictTokenizer[*dicttypes.Word]
}
func (DictQueryParser) ParseQuery ¶
func (parser DictQueryParser) ParseQuery(query string) []TextSegment
The method for parsing the query text in this function is based on dictionary lookups
type DocFinder ¶ added in v0.0.17
type DocFinder interface { FindDocuments(ctx context.Context, dictSearcher dictionary.ReverseIndex, parser QueryParser, query string, advanced bool) (*QueryResults, error) FindDocumentsInCol(ctx context.Context, dictSearcher dictionary.ReverseIndex, parser QueryParser, query, col_gloss_file string) (*QueryResults, error) }
DocFinder finds documents.
func NewDocFinder ¶ added in v0.0.17
func NewDocFinder(tfDocFinder TermFreqDocFinder, titleFinder TitleFinder) DocFinder
NewDocFinder creates and initializes an implementation of the DocFinder interface
type DocInfo ¶ added in v0.0.60
type DocInfo struct {
CorpusFile, GlossFile, Title, TitleCN, TitleEN, CollectionFile, CollectionTitle string
}
type DocTitleRecord ¶ added in v0.0.123
type DocTitleRecord struct { RawFile string `firestore:"plain_text_file"` GlossFile string `firestore:"gloss_file"` DocTitle string `firestore:"title"` DocTitleZh string `firestore:"title_zh"` DocTitleEn string `firestore:"title_en"` ColGlossFile string `firestore:"col_gloss_file"` ColTitle string `firestore:"col_title"` ColPlusDocTitle string `firestore:"col_plus_doc_title"` Substrings []string `firestore:"substrings"` }
DocTitleRecord holds expanded document title information plain_text_file", "gloss_file", "title", "title_cn", "title_en", "col_gloss_file", "col_title", "col_plus_doc_title
type Document ¶
type QueryParser ¶
type QueryParser interface {
ParseQuery(query string) []TextSegment
}
Parses input queries into a slice of text segments
func NewQueryParser ¶ added in v0.0.101
func NewQueryParser(dict map[string]*dicttypes.Word) QueryParser
Creates a QueryParser
type QueryResults ¶
type QueryResults struct {
Query, CollectionFile string
NumCollections, NumDocuments int
Collections []Collection
Documents []Document
Terms []TextSegment
SimilarTerms []TextSegment
}
type TermFreqDocFinder ¶ added in v0.0.105
type TermFreqDocFinder interface { FindDocsTermFreq(ctx context.Context, terms []string) ([]BM25Score, error) FindDocsBigramFreq(ctx context.Context, bigrams []string) ([]BM25Score, error) FindDocsTermCo(ctx context.Context, terms []string, col string) ([]BM25Score, error) FindDocsBigramCo(ctx context.Context, bigrams []string, col string) ([]BM25Score, error) }
DocFinder finds documents.
type TextSegment ¶
A text segment contains the QueryText searched for and possibly a matching dictionary entry. There will only be matching dictionary entries for Chinese words in the dictionary. Non-Chinese text, punctuation, and unknown Chinese words will have nil DictEntry values and matching values will be included in the Senses field.
type TitleFinder ¶ added in v0.0.105
type TitleFinder interface { CountCollections(ctx context.Context, query string) (int, error) FindCollections(ctx context.Context, query string) []Collection FindDocsByTitle(ctx context.Context, query string) ([]Document, error) FindDocsByTitleInCol(ctx context.Context, query, col_gloss_file string) ([]Document, error) ColMap() map[string]string DocMap() map[string]DocInfo }
func NewFileTitleFinder ¶ added in v0.0.107
func NewFileTitleFinder(colMap map[string]string, dInfoCN, docMap map[string]DocInfo) TitleFinder
NewDocTitleFinder initializes a DocTitleFinder implementation
func NewFirestoreTitleFinder ¶ added in v0.0.138
func NewFirestoreTitleFinder(client fsClient, corpus string, generation int, colMap map[string]string, dInfoCN, docMap map[string]DocInfo) TitleFinder
NewFirestoreTitleFinder initializes a DocTitleFinder implementation using Firestore queries