search

package
v0.0.0-...-6f23c6b Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jul 25, 2022 License: Apache-2.0 Imports: 19 Imported by: 0

Documentation

Index

Constants

View Source
const SpiltThresholdDocNum int = 50000

Variables

This section is empty.

Functions

func Index

func Index(c config.Config)

func Merge

func Merge(srcPath, dstPath string)

func MergeAll

func MergeAll(c config.Config, files []string)

func Remove

func Remove(dir string, reg *regexp.Regexp) error

func Spilt

func Spilt(c config.Config, filePrefix string) (files []string)

func Walk

func Walk(dir string, re *regexp.Regexp) ([]string, error)

Types

type DoubleBuffer

type DoubleBuffer struct {
	CurrentIdx uint32 //current write index

	Indices []*index.HashMapIndex
	Queues  []chan index.Document
	// contains filtered or unexported fields
}

func NewDoubleBuffer

func NewDoubleBuffer() *DoubleBuffer

func (*DoubleBuffer) Add

func (b *DoubleBuffer) Add(doc index.Document)

func (*DoubleBuffer) Clear

func (b *DoubleBuffer) Clear()

func (*DoubleBuffer) DoAdd

func (b *DoubleBuffer) DoAdd()

func (*DoubleBuffer) DoFlush

func (b *DoubleBuffer) DoFlush()

DoFlush unsafe

func (*DoubleBuffer) Flush

func (b *DoubleBuffer) Flush()

func (*DoubleBuffer) ReadIndex

func (b *DoubleBuffer) ReadIndex() *index.HashMapIndex

func (*DoubleBuffer) Start

func (b *DoubleBuffer) Start() chan Message

func (*DoubleBuffer) Stop

func (b *DoubleBuffer) Stop()

func (*DoubleBuffer) WithDataRange

func (b *DoubleBuffer) WithDataRange(timestamp int64) *DoubleBuffer

type IndexArray

type IndexArray struct {
	// contains filtered or unexported fields
}

func NewIndexArray

func NewIndexArray() *IndexArray

func (*IndexArray) Add

func (b *IndexArray) Add(idx *index.BTreeIndex)

func (*IndexArray) Evict

func (b *IndexArray) Evict(dr index.DataRange) []*index.BTreeIndex

Evict 淘汰dr范围内的index

func (*IndexArray) Hit

Hit 查找包含dr的index

func (*IndexArray) Indices

func (b *IndexArray) Indices() []*index.BTreeIndex

func (*IndexArray) Swap

func (b *IndexArray) Swap(old *index.BTreeIndex, new *index.BTreeIndex) bool

func (*IndexArray) WithFile

func (b *IndexArray) WithFile(file string) *IndexArray

type IndexType

type IndexType int
const (
	FullIndex IndexType = iota
	AuxIndex
)

type Indexer

type Indexer interface {
	// Drain data to file. sort by key
	Drain(file string)
	Merge(file string)
}

type Message

type Message struct {
	MsgType MsgType
	Msg     string
}

type MsgType

type MsgType int
const (
	STOP MsgType = iota
	FLUSH
)

type Searcher

type Searcher struct {
	// contains filtered or unexported fields
}

func NewSearcher

func NewSearcher(file string) *Searcher

func (*Searcher) Add

func (srh *Searcher) Add(doc index.Document)

Add doc to index double-buffer async write need lock but read do not

func (*Searcher) Clear

func (srh *Searcher) Clear()

func (*Searcher) Count

func (srh *Searcher) Count() int

func (*Searcher) Del

func (srh *Searcher) Del(doc index.Document)

Del doc from index

func (*Searcher) Drain

func (srh *Searcher) Drain(timestamp int)

Drain incremental index to disk 实际的原地更新策略,需要PostingList末尾预留足够空间,否则大量PostingList需要移动效率更低 磁盘空间足够时使用再合并策略,实现简单且不影响并发,但需要足够的内存

func (*Searcher) Filter

func (srh *Searcher) Filter(docs []index.Doc) []index.Doc

Filter deleted docs

func (*Searcher) InitParaphrase

func (srh *Searcher) InitParaphrase(file string)

func (*Searcher) Load

func (srh *Searcher) Load(file string, flag IndexType)

Load index, use for rebuild index

func (*Searcher) Paraphrase

func (srh *Searcher) Paraphrase(texts []string, n int) []string

func (*Searcher) Retrieval

func (srh *Searcher) Retrieval(terms []string, ext []string, model index.SearchModel) []index.Doc

func (*Searcher) Search

func (srh *Searcher) Search(query string) []index.Doc

Search queries the index for the given text. todo: 检索召回(多路召回) -> 粗排sort(CTR by LR) -> 精排sort(CVR by DNN) -> topN(堆排序)

func (*Searcher) SearchTips

func (srh *Searcher) SearchTips() []string

SearchTips todo: 支持搜索提示 Trie 适合英文词典,如果系统中存在大量字符串且这些字符串基本没有公共前缀,则相应的trie树将非常消耗内存(数据结构之trie树) Double Array Trie 适合做中文词典,内存占用小

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL