Documentation
¶
Index ¶
- Constants
- func NewBlockTreeTermsReader(dir store.Directory, fieldInfos FieldInfos, info *SegmentInfo, ...) (p FieldsProducer, err error)
- type BlockTreeTermsReader
- type BlockTreeTermsWriter
- type BlockTreeTermsWriterSPI
- type FieldMetaData
- type FieldReader
- type PendingBlock
- type PendingEntry
- type PendingTerm
- type PostingsWriterBase
- type SegmentTermsEnum
- func (e *SegmentTermsEnum) Comparator() sort.Interface
- func (e *SegmentTermsEnum) DocFreq() (df int, err error)
- func (e *SegmentTermsEnum) DocsAndPositionsByFlags(skipDocs util.Bits, reuse DocsAndPositionsEnum, flags int) DocsAndPositionsEnum
- func (e *SegmentTermsEnum) DocsByFlags(skipDocs util.Bits, reuse DocsEnum, flags int) (de DocsEnum, err error)
- func (e *SegmentTermsEnum) Next() (buf []byte, err error)
- func (e *SegmentTermsEnum) Ord() int64
- func (e *SegmentTermsEnum) SeekCeil(text []byte) SeekStatus
- func (e *SegmentTermsEnum) SeekExact(target []byte) (ok bool, err error)
- func (e *SegmentTermsEnum) SeekExactByPosition(ord int64) error
- func (e *SegmentTermsEnum) SeekExactFromLast(target []byte, otherState TermState) error
- func (e *SegmentTermsEnum) String() string
- func (e *SegmentTermsEnum) Term() []byte
- func (e *SegmentTermsEnum) TermState() (ts TermState, err error)
- func (e *SegmentTermsEnum) TotalTermFreq() (tf int64, err error)
- type TermsWriter
- func (w *TermsWriter) Comparator() func(a, b []byte) bool
- func (w *TermsWriter) Finish(sumTotalTermFreq, sumDocFreq int64, docCount int) (err error)
- func (w *TermsWriter) FinishTerm(text []byte, stats *codec.TermStats) (err error)
- func (w *TermsWriter) StartTerm(text []byte) (codec.PostingsConsumer, error)
Constants ¶
const ( BTT_OUTPUT_FLAGS_NUM_BITS = 2 BTT_OUTPUT_FLAG_IS_FLOOR = 1 BTT_OUTPUT_FLAG_HAS_TERMS = 2 )
const ( /* Suggested degault value for the minItemsInBlock parameter. */ DEFAULT_MIN_BLOCK_SIZE = 25 /* Suggested default value for the maxItemsInBlock parameter. */ DEFAULT_MAX_BLOCK_SIZE = 48 /* Extension of terms file */ TERMS_EXTENSION = "tim" TERMS_CODEC_NAME = "BLOCK_TREE_TERMS_DICT" TERMS_VERSION_START = 0 /* Append-only */ TERMS_VERSION_APPEND_ONLY = 1 TERMS_VERSION_META_ARRAY = 2 TERMS_VERSION_CHECKSUM = 3 TERMS_VERSION_MIN_MAX_TERMS = 4 /* Current terms format. */ TERMS_VERSION_CURRENT = TERMS_VERSION_MIN_MAX_TERMS /* Extension of terms index file */ TERMS_INDEX_EXTENSION = "tip" TERMS_INDEX_CODEC_NAME = "BLOCK_TREE_TERMS_INDEX" )
codec/BlockTreeTermsWriter.java
Variables ¶
This section is empty.
Functions ¶
Types ¶
type BlockTreeTermsReader ¶
type BlockTreeTermsReader struct {
// contains filtered or unexported fields
}
A block-based terms index and dictionary that assigns
terms to variable length blocks according to how they share prefixes. The terms index is a prefix trie whose leaves are term blocks. The advantage of this approach is that seekExact is often able to determine a term cannot exist without doing any IO, and intersection with Automata is very fast. NOte that this terms dictionary has its own fixed terms index (ie, it does not support a pluggable terms index implementation).
NOTE: this terms dictionary does not support index divisor when opening an IndexReader. Instead, you can change the min/maxItemsPerBlock during indexing.
The data strucure used by this implementation is very similar to a [burst trie] (http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.18.3499), but with added logic to break up too-large blocks of all terms sharing a given prefix into smaller ones.
Use CheckIndex with the -verbose option to see summary statistics on the blocks in the dictionary.
func (*BlockTreeTermsReader) Close ¶
func (r *BlockTreeTermsReader) Close() error
func (*BlockTreeTermsReader) Terms ¶
func (r *BlockTreeTermsReader) Terms(field string) Terms
type BlockTreeTermsWriter ¶
type BlockTreeTermsWriter struct {
// contains filtered or unexported fields
}
func NewBlockTreeTermsWriter ¶
func NewBlockTreeTermsWriter(state *SegmentWriteState, postingsWriter PostingsWriterBase, minItemsInBlock, maxItemsInBlock int) (*BlockTreeTermsWriter, error)
Create a new writer. The number of items (terms or sub-blocks) per block will aim tobe between minItermsPerBlock and maxItemsPerBlock, though in some cases, the blocks may be smaller than the min.
func (*BlockTreeTermsWriter) AddField ¶
func (w *BlockTreeTermsWriter) AddField(field *FieldInfo) (TermsConsumer, error)
func (*BlockTreeTermsWriter) Close ¶
func (w *BlockTreeTermsWriter) Close() (err error)
func (*BlockTreeTermsWriter) WriteHeader ¶
func (w *BlockTreeTermsWriter) WriteHeader(out store.IndexOutput) error
func (*BlockTreeTermsWriter) WriteIndexHeader ¶
func (w *BlockTreeTermsWriter) WriteIndexHeader(out store.IndexOutput) error
type BlockTreeTermsWriterSPI ¶
type BlockTreeTermsWriterSPI interface { WriteHeader(store.IndexOutput) error WriteIndexHeader(store.IndexOutput) error }
type FieldMetaData ¶
type FieldMetaData struct {
// contains filtered or unexported fields
}
type FieldReader ¶
type FieldReader struct {
// contains filtered or unexported fields
}
func (*FieldReader) DocCount ¶
func (r *FieldReader) DocCount() int
func (*FieldReader) Iterator ¶
func (r *FieldReader) Iterator(reuse TermsEnum) TermsEnum
func (*FieldReader) SumDocFreq ¶
func (r *FieldReader) SumDocFreq() int64
func (*FieldReader) SumTotalTermFreq ¶
func (r *FieldReader) SumTotalTermFreq() int64
type PendingBlock ¶
type PendingBlock struct {
// contains filtered or unexported fields
}
func (*PendingBlock) String ¶
func (b *PendingBlock) String() string
type PendingEntry ¶
type PendingEntry interface {
// contains filtered or unexported methods
}
type PendingTerm ¶
type PendingTerm struct {
// contains filtered or unexported fields
}
func (*PendingTerm) String ¶
func (t *PendingTerm) String() string
type PostingsWriterBase ¶
type PostingsWriterBase interface { codec.PostingsConsumer io.Closer // Called once after startup, before any terms have been added. // Implementations typically write a header to the provided termsOut. Init(store.IndexOutput) error NewTermState() *BlockTermState // Start a new term. Note that a matching call to finishTerm() is // done, only if the term has at least one document. StartTerm() error // Finishes the current term. The provided TermStats contains the // term's summary statistics. FinishTerm(*BlockTermState) error EncodeTerm([]int64, util.DataOutput, *FieldInfo, *BlockTermState, bool) error // Called when the writing switches to another field. SetField(fieldInfo *FieldInfo) int }
Extension of PostingsConsumer to support pluggable term dictionaries.
This class contains additional hooks to interact with the provided term dictionaries such as BlockTreeTermsWriter. If you want to re-use an existing implementation and are only interested in customizing the format of the postings list, extend this class instead.
type SegmentTermsEnum ¶
type SegmentTermsEnum struct { *TermsEnumImpl // contains filtered or unexported fields }
Iterates through terms in this field
func (*SegmentTermsEnum) Comparator ¶
func (e *SegmentTermsEnum) Comparator() sort.Interface
func (*SegmentTermsEnum) DocFreq ¶
func (e *SegmentTermsEnum) DocFreq() (df int, err error)
func (*SegmentTermsEnum) DocsAndPositionsByFlags ¶
func (e *SegmentTermsEnum) DocsAndPositionsByFlags(skipDocs util.Bits, reuse DocsAndPositionsEnum, flags int) DocsAndPositionsEnum
func (*SegmentTermsEnum) DocsByFlags ¶
func (e *SegmentTermsEnum) DocsByFlags(skipDocs util.Bits, reuse DocsEnum, flags int) (de DocsEnum, err error)
func (*SegmentTermsEnum) Next ¶
func (e *SegmentTermsEnum) Next() (buf []byte, err error)
func (*SegmentTermsEnum) Ord ¶
func (e *SegmentTermsEnum) Ord() int64
func (*SegmentTermsEnum) SeekCeil ¶
func (e *SegmentTermsEnum) SeekCeil(text []byte) SeekStatus
func (*SegmentTermsEnum) SeekExact ¶
func (e *SegmentTermsEnum) SeekExact(target []byte) (ok bool, err error)
func (*SegmentTermsEnum) SeekExactByPosition ¶
func (e *SegmentTermsEnum) SeekExactByPosition(ord int64) error
func (*SegmentTermsEnum) SeekExactFromLast ¶
func (e *SegmentTermsEnum) SeekExactFromLast(target []byte, otherState TermState) error
func (*SegmentTermsEnum) String ¶
func (e *SegmentTermsEnum) String() string
func (*SegmentTermsEnum) Term ¶
func (e *SegmentTermsEnum) Term() []byte
func (*SegmentTermsEnum) TermState ¶
func (e *SegmentTermsEnum) TermState() (ts TermState, err error)
func (*SegmentTermsEnum) TotalTermFreq ¶
func (e *SegmentTermsEnum) TotalTermFreq() (tf int64, err error)
type TermsWriter ¶
type TermsWriter struct {
// contains filtered or unexported fields
}
func (*TermsWriter) Comparator ¶
func (w *TermsWriter) Comparator() func(a, b []byte) bool
func (*TermsWriter) Finish ¶
func (w *TermsWriter) Finish(sumTotalTermFreq, sumDocFreq int64, docCount int) (err error)
func (*TermsWriter) FinishTerm ¶
func (w *TermsWriter) FinishTerm(text []byte, stats *codec.TermStats) (err error)
func (*TermsWriter) StartTerm ¶
func (w *TermsWriter) StartTerm(text []byte) (codec.PostingsConsumer, error)