blocktree

package

v0.0.0-...-d0be9ee Latest Latest Go to latest Published: Dec 10, 2015 License: Apache-2.0 Imports: 14 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/shaybix/golucene

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
func NewBlockTreeTermsReader(dir store.Directory, fieldInfos FieldInfos, info *SegmentInfo, ...) (p FieldsProducer, err error)
type BlockTreeTermsReader
- func (r *BlockTreeTermsReader) Close() error
- func (r *BlockTreeTermsReader) Terms(field string) Terms
type BlockTreeTermsWriter
- func NewBlockTreeTermsWriter(state *SegmentWriteState, postingsWriter PostingsWriterBase, ...) (*BlockTreeTermsWriter, error)
- func (w *BlockTreeTermsWriter) AddField(field *FieldInfo) (TermsConsumer, error)
- func (w *BlockTreeTermsWriter) Close() (err error)
- func (w *BlockTreeTermsWriter) WriteHeader(out store.IndexOutput) error
- func (w *BlockTreeTermsWriter) WriteIndexHeader(out store.IndexOutput) error
type BlockTreeTermsWriterSPI
type FieldMetaData
type FieldReader
- func (r *FieldReader) DocCount() int
- func (r *FieldReader) Iterator(reuse TermsEnum) TermsEnum
- func (r *FieldReader) SumDocFreq() int64
- func (r *FieldReader) SumTotalTermFreq() int64
type PendingBlock
- func (b *PendingBlock) String() string
type PendingEntry
type PendingTerm
- func (t *PendingTerm) String() string
type PostingsWriterBase
type SegmentTermsEnum
- func (e *SegmentTermsEnum) Comparator() sort.Interface
- func (e *SegmentTermsEnum) DocFreq() (df int, err error)
- func (e *SegmentTermsEnum) DocsAndPositionsByFlags(skipDocs util.Bits, reuse DocsAndPositionsEnum, flags int) DocsAndPositionsEnum
- func (e *SegmentTermsEnum) DocsByFlags(skipDocs util.Bits, reuse DocsEnum, flags int) (de DocsEnum, err error)
- func (e *SegmentTermsEnum) Next() (buf []byte, err error)
- func (e *SegmentTermsEnum) Ord() int64
- func (e *SegmentTermsEnum) SeekCeil(text []byte) SeekStatus
- func (e *SegmentTermsEnum) SeekExact(target []byte) (ok bool, err error)
- func (e *SegmentTermsEnum) SeekExactByPosition(ord int64) error
- func (e *SegmentTermsEnum) SeekExactFromLast(target []byte, otherState TermState) error
- func (e *SegmentTermsEnum) String() string
- func (e *SegmentTermsEnum) Term() []byte
- func (e *SegmentTermsEnum) TermState() (ts TermState, err error)
- func (e *SegmentTermsEnum) TotalTermFreq() (tf int64, err error)
type TermsWriter
- func (w *TermsWriter) Comparator() func(a, b []byte) bool
- func (w *TermsWriter) Finish(sumTotalTermFreq, sumDocFreq int64, docCount int) (err error)
- func (w *TermsWriter) FinishTerm(text []byte, stats *codec.TermStats) (err error)
- func (w *TermsWriter) StartTerm(text []byte) (codec.PostingsConsumer, error)

Constants ¶

View Source

const (
	BTT_OUTPUT_FLAGS_NUM_BITS = 2
	BTT_OUTPUT_FLAG_IS_FLOOR  = 1
	BTT_OUTPUT_FLAG_HAS_TERMS = 2
)

View Source

const (
	/* Suggested degault value for the minItemsInBlock parameter. */
	DEFAULT_MIN_BLOCK_SIZE = 25

	/* Suggested default value for the maxItemsInBlock parameter. */
	DEFAULT_MAX_BLOCK_SIZE = 48

	/* Extension of terms file */
	TERMS_EXTENSION  = "tim"
	TERMS_CODEC_NAME = "BLOCK_TREE_TERMS_DICT"

	TERMS_VERSION_START = 0
	/* Append-only */
	TERMS_VERSION_APPEND_ONLY   = 1
	TERMS_VERSION_META_ARRAY    = 2
	TERMS_VERSION_CHECKSUM      = 3
	TERMS_VERSION_MIN_MAX_TERMS = 4
	/* Current terms format. */
	TERMS_VERSION_CURRENT = TERMS_VERSION_MIN_MAX_TERMS

	/* Extension of terms index file */
	TERMS_INDEX_EXTENSION  = "tip"
	TERMS_INDEX_CODEC_NAME = "BLOCK_TREE_TERMS_INDEX"
)

codec/BlockTreeTermsWriter.java

Variables ¶

This section is empty.

Functions ¶

func NewBlockTreeTermsReader ¶

func NewBlockTreeTermsReader(dir store.Directory,
	fieldInfos FieldInfos, info *SegmentInfo,
	postingsReader PostingsReaderBase, ctx store.IOContext,
	segmentSuffix string, indexDivisor int) (p FieldsProducer, err error)

Types ¶

type BlockTreeTermsReader ¶

type BlockTreeTermsReader struct {
	// contains filtered or unexported fields
}

A block-based terms index and dictionary that assigns

terms to variable length blocks according to how they share prefixes. The terms index is a prefix trie whose leaves are term blocks. The advantage of this approach is that seekExact is often able to determine a term cannot exist without doing any IO, and intersection with Automata is very fast. NOte that this terms dictionary has its own fixed terms index (ie, it does not support a pluggable terms index implementation).

NOTE: this terms dictionary does not support index divisor when opening an IndexReader. Instead, you can change the min/maxItemsPerBlock during indexing.

The data strucure used by this implementation is very similar to a [burst trie] (http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.18.3499), but with added logic to break up too-large blocks of all terms sharing a given prefix into smaller ones.

Use CheckIndex with the -verbose option to see summary statistics on the blocks in the dictionary.

func (*BlockTreeTermsReader) Close ¶

func (r *BlockTreeTermsReader) Close() error

func (*BlockTreeTermsReader) Terms ¶

func (r *BlockTreeTermsReader) Terms(field string) Terms

type BlockTreeTermsWriter ¶

type BlockTreeTermsWriter struct {
	// contains filtered or unexported fields
}

func NewBlockTreeTermsWriter ¶

func NewBlockTreeTermsWriter(state *SegmentWriteState,
	postingsWriter PostingsWriterBase,
	minItemsInBlock, maxItemsInBlock int) (*BlockTreeTermsWriter, error)

Create a new writer. The number of items (terms or sub-blocks) per block will aim tobe between minItermsPerBlock and maxItemsPerBlock, though in some cases, the blocks may be smaller than the min.

func (*BlockTreeTermsWriter) AddField ¶

func (w *BlockTreeTermsWriter) AddField(field *FieldInfo) (TermsConsumer, error)

func (*BlockTreeTermsWriter) Close ¶

func (w *BlockTreeTermsWriter) Close() (err error)

func (*BlockTreeTermsWriter) WriteHeader ¶

func (w *BlockTreeTermsWriter) WriteHeader(out store.IndexOutput) error

func (*BlockTreeTermsWriter) WriteIndexHeader ¶

func (w *BlockTreeTermsWriter) WriteIndexHeader(out store.IndexOutput) error

type BlockTreeTermsWriterSPI ¶

type BlockTreeTermsWriterSPI interface {
	WriteHeader(store.IndexOutput) error
	WriteIndexHeader(store.IndexOutput) error
}

type FieldMetaData ¶

type FieldMetaData struct {
	// contains filtered or unexported fields
}

type FieldReader ¶

type FieldReader struct {
	// contains filtered or unexported fields
}

func (*FieldReader) DocCount ¶

func (r *FieldReader) DocCount() int

func (*FieldReader) Iterator ¶

func (r *FieldReader) Iterator(reuse TermsEnum) TermsEnum

func (*FieldReader) SumDocFreq ¶

func (r *FieldReader) SumDocFreq() int64

func (*FieldReader) SumTotalTermFreq ¶

func (r *FieldReader) SumTotalTermFreq() int64

type PendingBlock ¶

type PendingBlock struct {
	// contains filtered or unexported fields
}

func (*PendingBlock) String ¶

func (b *PendingBlock) String() string

type PendingEntry ¶

type PendingEntry interface {
	// contains filtered or unexported methods
}

type PendingTerm ¶

type PendingTerm struct {
	// contains filtered or unexported fields
}

func (*PendingTerm) String ¶

func (t *PendingTerm) String() string

type PostingsWriterBase ¶

type PostingsWriterBase interface {
	codec.PostingsConsumer
	io.Closer

	// Called once after startup, before any terms have been added.
	// Implementations typically write a header to the provided termsOut.
	Init(store.IndexOutput) error
	NewTermState() *BlockTermState
	// Start a new term. Note that a matching call to finishTerm() is
	// done, only if the term has at least one document.
	StartTerm() error
	// Finishes the current term. The provided TermStats contains the
	// term's summary statistics.
	FinishTerm(*BlockTermState) error
	EncodeTerm([]int64, util.DataOutput, *FieldInfo, *BlockTermState, bool) error
	// Called when the writing switches to another field.
	SetField(fieldInfo *FieldInfo) int
}

Extension of PostingsConsumer to support pluggable term dictionaries.

This class contains additional hooks to interact with the provided term dictionaries such as BlockTreeTermsWriter. If you want to re-use an existing implementation and are only interested in customizing the format of the postings list, extend this class instead.

type SegmentTermsEnum ¶

type SegmentTermsEnum struct {
	*TermsEnumImpl
	// contains filtered or unexported fields
}

Iterates through terms in this field

func (*SegmentTermsEnum) Comparator ¶

func (e *SegmentTermsEnum) Comparator() sort.Interface

func (*SegmentTermsEnum) DocFreq ¶

func (e *SegmentTermsEnum) DocFreq() (df int, err error)

func (*SegmentTermsEnum) DocsAndPositionsByFlags ¶

func (e *SegmentTermsEnum) DocsAndPositionsByFlags(skipDocs util.Bits, reuse DocsAndPositionsEnum, flags int) DocsAndPositionsEnum

func (*SegmentTermsEnum) DocsByFlags ¶

func (e *SegmentTermsEnum) DocsByFlags(skipDocs util.Bits, reuse DocsEnum, flags int) (de DocsEnum, err error)

func (*SegmentTermsEnum) Next ¶

func (e *SegmentTermsEnum) Next() (buf []byte, err error)

func (*SegmentTermsEnum) Ord ¶

func (e *SegmentTermsEnum) Ord() int64

func (*SegmentTermsEnum) SeekCeil ¶

func (e *SegmentTermsEnum) SeekCeil(text []byte) SeekStatus

func (*SegmentTermsEnum) SeekExact ¶

func (e *SegmentTermsEnum) SeekExact(target []byte) (ok bool, err error)

func (*SegmentTermsEnum) SeekExactByPosition ¶

func (e *SegmentTermsEnum) SeekExactByPosition(ord int64) error

func (*SegmentTermsEnum) SeekExactFromLast ¶

func (e *SegmentTermsEnum) SeekExactFromLast(target []byte, otherState TermState) error

func (*SegmentTermsEnum) String ¶

func (e *SegmentTermsEnum) String() string

func (*SegmentTermsEnum) Term ¶

func (e *SegmentTermsEnum) Term() []byte

func (*SegmentTermsEnum) TermState ¶

func (e *SegmentTermsEnum) TermState() (ts TermState, err error)

func (*SegmentTermsEnum) TotalTermFreq ¶

func (e *SegmentTermsEnum) TotalTermFreq() (tf int64, err error)

type TermsWriter ¶

type TermsWriter struct {
	// contains filtered or unexported fields
}

func (*TermsWriter) Comparator ¶

func (w *TermsWriter) Comparator() func(a, b []byte) bool

func (*TermsWriter) Finish ¶

func (w *TermsWriter) Finish(sumTotalTermFreq, sumDocFreq int64, docCount int) (err error)

func (*TermsWriter) FinishTerm ¶

func (w *TermsWriter) FinishTerm(text []byte, stats *codec.TermStats) (err error)

func (*TermsWriter) StartTerm ¶

func (w *TermsWriter) StartTerm(text []byte) (codec.PostingsConsumer, error)

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL