Documentation ¶
Overview ¶
Package query provides simple query dsl on top of sorted arrays of integers
Index ¶
- Constants
- Variables
- func AppendFileNameTerm(fn string, docs []int32) error
- func AppendFilePayload(f *os.File, size int64, b []byte) error
- func AppendFileTerm(f *os.File, docs []int32) error
- type AndQuery
- func (q *AndQuery) AddSubQuery(sub Query) Query
- func (q *AndQuery) Advance(target int32) int32
- func (q *AndQuery) Cost() int
- func (q *AndQuery) GetDocId() int32
- func (q *AndQuery) Next() int32
- func (q *AndQuery) PayloadDecode(p Payload)
- func (q *AndQuery) Score() float32
- func (q *AndQuery) SetBoost(b float32) Query
- func (q *AndQuery) SetNot(not Query) *AndQuery
- func (q *AndQuery) String() string
- type ConstantQuery
- func (q *ConstantQuery) AddSubQuery(Query) Query
- func (q *ConstantQuery) Advance(target int32) int32
- func (q *ConstantQuery) Cost() int
- func (q *ConstantQuery) GetDocId() int32
- func (q *ConstantQuery) Next() int32
- func (q *ConstantQuery) PayloadDecode(p Payload)
- func (q *ConstantQuery) Score() float32
- func (q *ConstantQuery) SetBoost(b float32) Query
- func (q *ConstantQuery) String() string
- type DisMaxQuery
- func (q *DisMaxQuery) AddSubQuery(sub Query) Query
- func (q *DisMaxQuery) Advance(target int32) int32
- func (q *DisMaxQuery) Cost() int
- func (q *DisMaxQuery) GetDocId() int32
- func (q *DisMaxQuery) Next() int32
- func (q *DisMaxQuery) PayloadDecode(p Payload)
- func (q *DisMaxQuery) Score() float32
- func (q *DisMaxQuery) SetBoost(b float32) Query
- func (q *DisMaxQuery) String() string
- type FileTermData
- func (t *FileTermData) AddSubQuery(Query) Query
- func (t *FileTermData) Advance(target int32) int32
- func (t *FileTermData) Close()
- func (t *FileTermData) Cost() int
- func (t *FileTermData) GetDocId() int32
- func (t *FileTermData) Next() int32
- func (t *FileTermData) PayloadDecode(p Payload)
- func (t *FileTermData) Score() float32
- func (t *FileTermData) SetBoost(b float32) Query
- func (t *FileTermData) String() string
- type OrQuery
- func (q *OrQuery) AddSubQuery(sub Query) Query
- func (q *OrQuery) Advance(target int32) int32
- func (q *OrQuery) Cost() int
- func (q *OrQuery) GetDocId() int32
- func (q *OrQuery) Next() int32
- func (q *OrQuery) PayloadDecode(p Payload)
- func (q *OrQuery) Score() float32
- func (q *OrQuery) SetBoost(b float32) Query
- func (q *OrQuery) String() string
- type Payload
- type PayloadTermQuery
- func (t *PayloadTermQuery) AddSubQuery(Query) Query
- func (t *PayloadTermQuery) Advance(target int32) int32
- func (t *PayloadTermQuery) Cost() int
- func (t *PayloadTermQuery) GetDocId() int32
- func (t *PayloadTermQuery) Next() int32
- func (t *PayloadTermQuery) PayloadDecode(p Payload)
- func (t *PayloadTermQuery) Score() float32
- func (t *PayloadTermQuery) SetBoost(b float32) Query
- func (t *PayloadTermQuery) String() string
- type Query
- type TermQuery
- func (t *TermQuery) AddSubQuery(Query) Query
- func (t *TermQuery) Advance(target int32) int32
- func (t *TermQuery) Cost() int
- func (t *TermQuery) GetDocId() int32
- func (t *TermQuery) Next() int32
- func (t *TermQuery) PayloadDecode(p Payload)
- func (t *TermQuery) Score() float32
- func (t *TermQuery) SetBoost(b float32) Query
- func (t *TermQuery) String() string
- type TermTFQuery
- func (t *TermTFQuery) AddSubQuery(Query) Query
- func (t *TermTFQuery) Advance(target int32) int32
- func (t *TermTFQuery) Cost() int
- func (t *TermTFQuery) GetDocId() int32
- func (t *TermTFQuery) Next() int32
- func (t *TermTFQuery) PayloadDecode(p Payload)
- func (t *TermTFQuery) Score() float32
- func (t *TermTFQuery) SetBoost(b float32) Query
- func (t *TermTFQuery) String() string
Constants ¶
const ( NO_MORE = int32(math.MaxInt32) NOT_READY = int32(-1) )
Variables ¶
var ByteOrder = binary.LittleEndian
var TERM_CHUNK_SIZE = 4096
splits the postings list into chunks that are binary searched and inside each chunk linearly searching for next advance()
Functions ¶
func AppendFileNameTerm ¶
Types ¶
type AndQuery ¶
type AndQuery struct {
// contains filtered or unexported fields
}
func (*AndQuery) AddSubQuery ¶
func (*AndQuery) PayloadDecode ¶
type ConstantQuery ¶
type ConstantQuery struct {
// contains filtered or unexported fields
}
func Constant ¶
func Constant(boost float32, q Query) *ConstantQuery
func (*ConstantQuery) AddSubQuery ¶
func (q *ConstantQuery) AddSubQuery(Query) Query
func (*ConstantQuery) Advance ¶
func (q *ConstantQuery) Advance(target int32) int32
func (*ConstantQuery) Cost ¶
func (q *ConstantQuery) Cost() int
func (*ConstantQuery) GetDocId ¶
func (q *ConstantQuery) GetDocId() int32
func (*ConstantQuery) Next ¶
func (q *ConstantQuery) Next() int32
func (*ConstantQuery) PayloadDecode ¶
func (q *ConstantQuery) PayloadDecode(p Payload)
func (*ConstantQuery) Score ¶
func (q *ConstantQuery) Score() float32
func (*ConstantQuery) SetBoost ¶
func (q *ConstantQuery) SetBoost(b float32) Query
func (*ConstantQuery) String ¶
func (q *ConstantQuery) String() string
type DisMaxQuery ¶
type DisMaxQuery struct {
// contains filtered or unexported fields
}
func DisMax ¶
func DisMax(tieBreaker float32, queries ...Query) *DisMaxQuery
Creates DisMax query, for example if the query is:
DisMax(0.5, "name:amsterdam","name:university","name:free")
lets say we have an index with following idf: amsterdam: 1.3, free: 0.2, university: 2.1 the score is computed by:
max(score(amsterdam),score(university), score(free)) = 2.1 (university) + score(free) * tiebreaker = 0.1 + score(amsterdam) * tiebreaker = 0.65 = 2.85
func (*DisMaxQuery) AddSubQuery ¶
func (q *DisMaxQuery) AddSubQuery(sub Query) Query
func (*DisMaxQuery) Advance ¶
func (q *DisMaxQuery) Advance(target int32) int32
func (*DisMaxQuery) Cost ¶
func (q *DisMaxQuery) Cost() int
func (*DisMaxQuery) GetDocId ¶
func (q *DisMaxQuery) GetDocId() int32
func (*DisMaxQuery) Next ¶
func (q *DisMaxQuery) Next() int32
func (*DisMaxQuery) PayloadDecode ¶
func (q *DisMaxQuery) PayloadDecode(p Payload)
func (*DisMaxQuery) Score ¶
func (q *DisMaxQuery) Score() float32
func (*DisMaxQuery) SetBoost ¶
func (q *DisMaxQuery) SetBoost(b float32) Query
func (*DisMaxQuery) String ¶
func (q *DisMaxQuery) String() string
type FileTermData ¶
type FileTermData struct {
// contains filtered or unexported fields
}
func FileTerm ¶
func FileTerm(totalDocumentsInIndex int, fn string) *FileTermData
Create new lazy term from stored ByteOrder (by default little endian) encoded array of integers
The file will be closed automatically when the query is exhausted (reaches the end)
WARNING: you must exhaust the query, otherwise you will leak file descriptors.
func (*FileTermData) AddSubQuery ¶
func (t *FileTermData) AddSubQuery(Query) Query
func (*FileTermData) Advance ¶
func (t *FileTermData) Advance(target int32) int32
func (*FileTermData) Close ¶
func (t *FileTermData) Close()
func (*FileTermData) Cost ¶
func (t *FileTermData) Cost() int
func (*FileTermData) GetDocId ¶
func (t *FileTermData) GetDocId() int32
func (*FileTermData) Next ¶
func (t *FileTermData) Next() int32
func (*FileTermData) PayloadDecode ¶
func (t *FileTermData) PayloadDecode(p Payload)
func (*FileTermData) Score ¶
func (t *FileTermData) Score() float32
func (*FileTermData) SetBoost ¶
func (t *FileTermData) SetBoost(b float32) Query
func (*FileTermData) String ¶
func (t *FileTermData) String() string
type OrQuery ¶
type OrQuery struct {
// contains filtered or unexported fields
}
func (*OrQuery) AddSubQuery ¶
func (*OrQuery) PayloadDecode ¶
type PayloadTermQuery ¶
type PayloadTermQuery struct {
// contains filtered or unexported fields
}
func PayloadTerm ¶
func PayloadTerm(totalDocumentsInIndex int, t string, postings []int32, payload []byte) *PayloadTermQuery
func (*PayloadTermQuery) AddSubQuery ¶
func (t *PayloadTermQuery) AddSubQuery(Query) Query
func (*PayloadTermQuery) Advance ¶
func (t *PayloadTermQuery) Advance(target int32) int32
func (*PayloadTermQuery) Cost ¶
func (t *PayloadTermQuery) Cost() int
func (*PayloadTermQuery) GetDocId ¶
func (t *PayloadTermQuery) GetDocId() int32
func (*PayloadTermQuery) Next ¶
func (t *PayloadTermQuery) Next() int32
func (*PayloadTermQuery) PayloadDecode ¶
func (t *PayloadTermQuery) PayloadDecode(p Payload)
func (*PayloadTermQuery) Score ¶
func (t *PayloadTermQuery) Score() float32
func (*PayloadTermQuery) SetBoost ¶
func (t *PayloadTermQuery) SetBoost(b float32) Query
func (*PayloadTermQuery) String ¶
func (t *PayloadTermQuery) String() string
type Query ¶
type Query interface { Advance(int32) int32 Next() int32 GetDocId() int32 Score() float32 SetBoost(float32) Query Cost() int String() string AddSubQuery(Query) Query PayloadDecode(p Payload) }
Reuse/Concurrency: None of the queries are safe to be re-used. WARNING: the query *can not* be reused WARNING: the query it not thread safe
Example Iteration:
q := Term([]int32{1,2,3}) for q.Next() != query.NO_MORE { did := q.GetDocId() score := q.Score() fmt.Printf("matching %d, score: %f\n", did, score) }
type TermQuery ¶
type TermQuery struct {
// contains filtered or unexported fields
}
func Term ¶
Basic []int32{} that the whole interface works on top score is IDF (not tf*idf, just 1*idf, since we dont store the term frequency for now) if you dont know totalDocumentsInIndex, which could be the case sometimes, pass any constant > 0 WARNING: the query *can not* be reused WARNING: the query it not thread safe
func (*TermQuery) AddSubQuery ¶
func (*TermQuery) PayloadDecode ¶
type TermTFQuery ¶
type TermTFQuery struct {
// contains filtered or unexported fields
}
func TermTF ¶
func TermTF(totalDocumentsInIndex int, freqBits int32, t string, postings []int32) *TermTFQuery
Splits the postings list into chunks that are binary searched and inside each chunk linearly searching for next advance() Basic []int32{} that the whole interface works on top. The Score is TF*IDF you have to specify how many bits from the docID are actually term frequency e.g if you want to store the frequency in 4 bits then document id 999 with term frequency 2 for this specific term could be stored as (999 << 4) | 2, usually you just store the floored sqrt(frequency), so 3-4 bits are enough. it is zero based, so 0 is frequency 1
if you dont know totalDocumentsInIndex, which could be the case sometimes, pass any constant > 0 WARNING: the query *can not* be reused WARNING: the query it not thread safe
func (*TermTFQuery) AddSubQuery ¶
func (t *TermTFQuery) AddSubQuery(Query) Query
func (*TermTFQuery) Advance ¶
func (t *TermTFQuery) Advance(target int32) int32
func (*TermTFQuery) Cost ¶
func (t *TermTFQuery) Cost() int
func (*TermTFQuery) GetDocId ¶
func (t *TermTFQuery) GetDocId() int32
func (*TermTFQuery) Next ¶
func (t *TermTFQuery) Next() int32
func (*TermTFQuery) PayloadDecode ¶
func (t *TermTFQuery) PayloadDecode(p Payload)
func (*TermTFQuery) Score ¶
func (t *TermTFQuery) Score() float32
func (*TermTFQuery) SetBoost ¶
func (t *TermTFQuery) SetBoost(b float32) Query
func (*TermTFQuery) String ¶
func (t *TermTFQuery) String() string