Documentation ¶
Index ¶
- type DocStore
- type DocVector
- type InvertedIndex
- type Metadata
- type PipeIndexProcessor
- func (p *PipeIndexProcessor) BuildQueryVector(concordance map[string]uint64) *QueryVector
- func (p *PipeIndexProcessor) BuildTFIDF()
- func (p *PipeIndexProcessor) Dump()
- func (p *PipeIndexProcessor) GetDoc() uint64
- func (p *PipeIndexProcessor) GetDocCapacity() uint64
- func (p *PipeIndexProcessor) GetVocabulary() uint64
- func (p *PipeIndexProcessor) GetVocabularyCapacity() uint64
- func (p *PipeIndexProcessor) Load()
- func (p *PipeIndexProcessor) MarkServiceAvailable()
- func (p *PipeIndexProcessor) MarkServiceUnavailable()
- func (p *PipeIndexProcessor) ServiceAvailable() bool
- func (p *PipeIndexProcessor) TermsIndexing(pGroup *sync.WaitGroup, input common.ConcordanceChannel)
- func (p *PipeIndexProcessor) TopK(k uint32, q *QueryVector) []string
- type Posting
- type PostingList
- type PriorityQueue
- type QueryVector
- type Shard
- type SimilarObject
- type TFIDF
- type VocabularyStore
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type DocStore ¶
type DocStore struct { BitSet [][]byte `json:"bit_set"` // contains filtered or unexported fields }
DocStore 用于存储文档记录
type InvertedIndex ¶
InvertedIndex 倒排索引数据结构
type Metadata ¶
type Metadata struct { Doc uint64 `json:"doc"` DocStore *DocStore `json:"doc_store"` Vocabulary uint64 `json:"vocabulary"` VocabularyStore *VocabularyStore `json:"vocabulary_store"` MaxTermFrequency uint64 `json:"max_term_frequency"` }
Metadata 倒排索引数据结构的元数据
type PipeIndexProcessor ¶
type PipeIndexProcessor struct {
// contains filtered or unexported fields
}
PipeIndexProcessor 索引器 目前规划最大支持10万词汇量, 1万文档.
func NewPipeIndexProcessor ¶
func NewPipeIndexProcessor(cfg *conf.IndexerConfig, storage storage.Persister) *PipeIndexProcessor
NewPipeIndexProcessor 新建索引器.
func (*PipeIndexProcessor) BuildQueryVector ¶
func (p *PipeIndexProcessor) BuildQueryVector(concordance map[string]uint64) *QueryVector
BuildQueryVector 构造查询向量.
func (*PipeIndexProcessor) BuildTFIDF ¶
func (p *PipeIndexProcessor) BuildTFIDF()
BuildTFIDF 构造TF-IDF数据结构.
func (*PipeIndexProcessor) GetDocCapacity ¶
func (p *PipeIndexProcessor) GetDocCapacity() uint64
GetDocCapacity 返回文档总量上限.
func (*PipeIndexProcessor) GetVocabulary ¶
func (p *PipeIndexProcessor) GetVocabulary() uint64
GetVocabulary 返回词汇总量.
func (*PipeIndexProcessor) GetVocabularyCapacity ¶
func (p *PipeIndexProcessor) GetVocabularyCapacity() uint64
GetVocabularyCapacity 返回词汇总量上限.
func (*PipeIndexProcessor) MarkServiceAvailable ¶
func (p *PipeIndexProcessor) MarkServiceAvailable()
MarkServiceAvailable 将服务标记为可用.
func (*PipeIndexProcessor) MarkServiceUnavailable ¶
func (p *PipeIndexProcessor) MarkServiceUnavailable()
MarkServiceUnavailable 将服务标记为不可用.
func (*PipeIndexProcessor) ServiceAvailable ¶
func (p *PipeIndexProcessor) ServiceAvailable() bool
ServiceAvailable 服务是否可用.
func (*PipeIndexProcessor) TermsIndexing ¶
func (p *PipeIndexProcessor) TermsIndexing(pGroup *sync.WaitGroup, input common.ConcordanceChannel)
TermsIndexing 为词条建立索引结构.
func (*PipeIndexProcessor) TopK ¶
func (p *PipeIndexProcessor) TopK(k uint32, q *QueryVector) []string
TopK 计算查询向量与文档向量集合中各个向量的相似度,并返回最相似的k个文档
type Posting ¶
type Posting struct { TermFrequency uint64 `json:"term_frequency"` DocIdx uint64 `json:"doc_idx"` DocID string `json:"doc_id"` Next *Posting `json:"next"` }
Posting 信息单元
type PostingList ¶
type PostingList struct { TermID string `json:"term_id"` DocFrequency uint64 `json:"doc_frequency"` Postings *Posting `json:"postings"` }
PostingList 信息列表
type PriorityQueue ¶
type PriorityQueue []*SimilarObject
PriorityQueue 用于筛选TopK文档的优先队列
func (PriorityQueue) Len ¶
func (pq PriorityQueue) Len() int
func (PriorityQueue) Less ¶
func (pq PriorityQueue) Less(i, j int) bool
func (*PriorityQueue) Pop ¶
func (pq *PriorityQueue) Pop() interface{}
func (*PriorityQueue) Push ¶
func (pq *PriorityQueue) Push(x interface{})
func (PriorityQueue) Swap ¶
func (pq PriorityQueue) Swap(i, j int)
type Shard ¶
type Shard struct { Backend map[string]*PostingList `json:"backend"` // contains filtered or unexported fields }
Shard 倒排索引数据结构的局部字典
type SimilarObject ¶
SimilarObject 相似文档记录
type VocabularyStore ¶
type VocabularyStore struct { BitSet [][]byte `json:"bit_set"` // contains filtered or unexported fields }
VocabularyStore 用于存储词汇量记录
Click to show internal directories.
Click to hide internal directories.