Documentation ¶
Index ¶
- Constants
- Variables
- func GenerationFromSegmentsFileName(fileName string) int64
- func GetMultiFields(r IndexReader) Fields
- func GetMultiTerms(r IndexReader, field string) Terms
- func IsIndexExists(directory store.Directory) (ok bool, err error)
- func IsIndexFileExists(files []string) bool
- func LastCommitGeneration(files []string) int64
- func MergeTriggerName(trigger MergeTrigger) string
- func ReadFieldInfos(info *SegmentCommitInfo) (fis FieldInfos, err error)
- func SubIndex(n int, leaves []*AtomicReaderContext) int
- type ARFieldsReader
- type ApplyDeletesResult
- type AtomicReader
- type AtomicReaderContext
- type AtomicReaderImpl
- func (r *AtomicReaderImpl) Context() IndexReaderContext
- func (r *AtomicReaderImpl) DocCount(field string) (n int, err error)
- func (r *AtomicReaderImpl) DocFreq(term *Term) (int, error)
- func (r *AtomicReaderImpl) SumDocFreq(field string) (n int64, err error)
- func (r *AtomicReaderImpl) SumTotalTermFreq(field string) (n int64, err error)
- func (r *AtomicReaderImpl) Terms(field string) Terms
- func (r *AtomicReaderImpl) TotalTermFreq(term *Term) (n int64, err error)
- type AtomicReaderImplSPI
- type BaseCompositeReader
- func (r *BaseCompositeReader) DocCount(field string) int
- func (r *BaseCompositeReader) DocFreq(term *Term) (int, error)
- func (r *BaseCompositeReader) MaxDoc() int
- func (r *BaseCompositeReader) NumDocs() int
- func (r *BaseCompositeReader) SumDocFreq(field string) int64
- func (r *BaseCompositeReader) SumTotalTermFreq(field string) int64
- func (r *BaseCompositeReader) TermVectors(docID int) error
- func (r *BaseCompositeReader) TotalTermFreq(term *Term) int64
- func (r *BaseCompositeReader) VisitDocument(docID int, visitor StoredFieldVisitor) error
- type BaseCompositeReaderSPI
- type BlockedFlush
- type BufferedUpdates
- type BufferedUpdatesStream
- type BySizeDescendingSegments
- type ByteSliceReader
- type CheckAbort
- type CheckAbortNone
- type CheckIndex
- type CheckIndexStatus
- type ClosingControl
- type CoalescedUpdates
- type CommitPoint
- func (cp *CommitPoint) Delete()
- func (cp *CommitPoint) Directory() store.Directory
- func (cp *CommitPoint) FileNames() []string
- func (cp *CommitPoint) Generation() int64
- func (cp *CommitPoint) IsDeleted() bool
- func (cp *CommitPoint) SegmentCount() int
- func (cp *CommitPoint) SegmentsFileName() string
- func (cp *CommitPoint) String() string
- func (cp *CommitPoint) UserData() map[string]string
- type CompositeReader
- type CompositeReaderContext
- type CompositeReaderContextBuilder
- type CompositeReaderImpl
- type CompositeReaderSPI
- type ConcurrentMergeScheduler
- func (cms *ConcurrentMergeScheduler) Close() error
- func (cms *ConcurrentMergeScheduler) Merge(writer *IndexWriter, trigger MergeTrigger, newMergesFound bool) error
- func (cms *ConcurrentMergeScheduler) SetMaxMergesAndRoutines(maxMergeCount, maxRoutineCount int)
- func (cms *ConcurrentMergeScheduler) String() string
- type CoreClosedListener
- type DefaultIndexingChain
- type DeleteSlice
- type DirectoryReader
- type DirectoryReaderImpl
- type DocConsumer
- type DocValuesFieldUpdates
- type DocValuesFieldUpdatesContainer
- type DocValuesStatus
- type DocValuesUpdate
- type DocValuesWriter
- type DocumentsWriter
- type DocumentsWriterDeleteQueue
- type DocumentsWriterFlushControl
- type DocumentsWriterFlushQueue
- type DocumentsWriterPerThread
- type DocumentsWriterPerThreadPool
- type DocumentsWriterStallControl
- type Event
- type FieldInvertState
- type FieldNormStatus
- type FindSegmentsFile
- type FlushByRamOrCountsPolicy
- type FlushPolicy
- type FlushPolicyImpl
- type FlushPolicyImplSPI
- type FlushTicket
- type FlushTicketImpl
- type FlushedSegment
- type FreqProxPostingsArray
- type FreqProxTermsWriter
- type FreqProxTermsWriterPerField
- type FreqProxTermsWriterPerFields
- type FrozenBufferedUpdates
- type IndexCommit
- type IndexCommits
- type IndexDeletionPolicy
- type IndexFileDeleter
- type IndexReader
- type IndexReaderContext
- type IndexReaderContextImpl
- type IndexReaderImpl
- type IndexReaderImplSPI
- type IndexReaderWarmer
- type IndexWriter
- func (w *IndexWriter) AddDocument(doc []IndexableField) error
- func (w *IndexWriter) AddDocumentWithAnalyzer(doc []IndexableField, analyzer analysis.Analyzer) error
- func (w *IndexWriter) Close() error
- func (w *IndexWriter) Commit() error
- func (w *IndexWriter) Directory() store.Directory
- func (w *IndexWriter) MergingSegments() map[*SegmentCommitInfo]bool
- func (w *IndexWriter) Rollback() error
- func (w *IndexWriter) UpdateDocument(term *Term, doc []IndexableField, analyzer analysis.Analyzer) error
- type IndexWriterConfig
- func (conf *IndexWriterConfig) InfoStream() util.InfoStream
- func (conf *IndexWriterConfig) MergePolicy() MergePolicy
- func (conf *IndexWriterConfig) SetIndexDeletionPolicy(delPolicy IndexDeletionPolicy) *IndexWriterConfig
- func (conf *IndexWriterConfig) SetInfoStream(infoStream util.InfoStream) *IndexWriterConfig
- func (conf *IndexWriterConfig) SetMaxBufferedDocs(maxBufferedDocs int) *IndexWriterConfig
- func (conf *IndexWriterConfig) SetMergeScheduler(mergeScheduler MergeScheduler) *IndexWriterConfig
- func (conf *IndexWriterConfig) SetMergedSegmentWarmer(mergeSegmentWarmer IndexReaderWarmer) *IndexWriterConfig
- func (conf *IndexWriterConfig) SetReaderPooling(readerPooling bool) *IndexWriterConfig
- func (conf *IndexWriterConfig) SetReaderTermsIndexDivisor(divisor int) *IndexWriterConfig
- func (conf *IndexWriterConfig) SetSimilarity(similarity Similarity) *IndexWriterConfig
- func (conf *IndexWriterConfig) SetUseCompoundFile(useCompoundFile bool) *IndexWriterConfig
- func (conf *IndexWriterConfig) String() string
- type IndexingChain
- type IntBlockAllocator
- type KeepOnlyLastCommitDeletionPolicy
- type LiveIndexWriterConfig
- type LiveIndexWriterConfigImpl
- func (conf *LiveIndexWriterConfigImpl) Codec() Codec
- func (conf *LiveIndexWriterConfigImpl) InfoStream() util.InfoStream
- func (conf *LiveIndexWriterConfigImpl) MaxBufferedDocs() int
- func (conf *LiveIndexWriterConfigImpl) MergePolicy() MergePolicy
- func (conf *LiveIndexWriterConfigImpl) RAMBufferSizeMB() float64
- func (conf *LiveIndexWriterConfigImpl) RAMPerThreadHardLimitMB() int
- func (conf *LiveIndexWriterConfigImpl) SetMaxBufferedDocs(maxBufferedDocs int) *LiveIndexWriterConfigImpl
- func (conf *LiveIndexWriterConfigImpl) SetMergePolicy(mergePolicy MergePolicy) *LiveIndexWriterConfigImpl
- func (conf *LiveIndexWriterConfigImpl) SetMergedSegmentWarmer(mergeSegmentWarmer IndexReaderWarmer) *LiveIndexWriterConfigImpl
- func (conf *LiveIndexWriterConfigImpl) SetReaderTermsIndexDivisor(divisor int) *LiveIndexWriterConfigImpl
- func (conf *LiveIndexWriterConfigImpl) SetUseCompoundFile(useCompoundFile bool) *LiveIndexWriterConfigImpl
- func (conf *LiveIndexWriterConfigImpl) Similarity() Similarity
- func (conf *LiveIndexWriterConfigImpl) String() string
- func (conf *LiveIndexWriterConfigImpl) TermIndexInterval() int
- func (conf *LiveIndexWriterConfigImpl) UseCompoundFile() bool
- type LogByteSizeMergePolicy
- type LogDocMergePolicy
- type LogMergePolicy
- func (mp *LogMergePolicy) FindForcedMerges(infos *SegmentInfos, maxSegmentCount int, ...) (MergeSpecification, error)
- func (mp *LogMergePolicy) FindMerges(mergeTrigger MergeTrigger, infos *SegmentInfos, w *IndexWriter) (spec MergeSpecification, err error)
- func (mp *LogMergePolicy) SetCalbrateSizeByDeletes(calibrateSizeByDeletes bool)
- func (mp *LogMergePolicy) SetMergeFactor(mergeFactor int)
- func (mp *LogMergePolicy) String() string
- type MergeAbortedError
- type MergeControl
- type MergeJob
- type MergePolicy
- type MergePolicyImpl
- type MergePolicyImplSPI
- type MergeScheduler
- type MergeScore
- type MergeSpecification
- type MergeSpecifier
- type MergeTrigger
- type MultiFields
- type MultiTerms
- type NoDeletionPolicy
- type Node
- type NumericDocValuesWriter
- type NumericIterator
- type OneMerge
- type OpenMode
- type ParallelPostingsArray
- type PerField
- type PostingsArray
- type PostingsBytesStartArray
- type PrefixCodedTerms
- type PrefixCodedTermsBuilder
- type Query
- type QueryAndLimit
- type RandomCodec
- type ReaderClosedListener
- type ReaderPool
- type ReaderSlice
- type ReadersAndUpdates
- type RefCount
- type SegInfoByDelGen
- type SegmentCoreReaders
- type SegmentFlushTicket
- type SegmentInfoAndLevel
- type SegmentInfoAndLevels
- type SegmentInfoStatus
- type SegmentInfos
- type SegmentReader
- func (r *SegmentReader) BinaryDocValues(field string) (v BinaryDocValues, err error)
- func (r *SegmentReader) CombinedCoreAndDeletesKey() interface{}
- func (r *SegmentReader) CoreCacheKey() interface{}
- func (r *SegmentReader) Directory() store.Directory
- func (r *SegmentReader) FieldInfos() FieldInfos
- func (r *SegmentReader) Fields() Fields
- func (r *SegmentReader) FieldsReader() StoredFieldsReader
- func (r *SegmentReader) LiveDocs() util.Bits
- func (r *SegmentReader) MaxDoc() int
- func (r *SegmentReader) NormValues(field string) (v NumericDocValues, err error)
- func (r *SegmentReader) NumDocs() int
- func (r *SegmentReader) NumericDocValues(field string) (v NumericDocValues, err error)
- func (r *SegmentReader) SegmentInfos() *SegmentCommitInfo
- func (r *SegmentReader) SegmentName() string
- func (r *SegmentReader) SortedDocValues(field string) (v SortedDocValues, err error)
- func (r *SegmentReader) SortedSetDocValues(field string) (v SortedSetDocValues, err error)
- func (r *SegmentReader) String() string
- func (r *SegmentReader) TermInfosIndexDivisor() int
- func (r *SegmentReader) TermVectors(docID int) (fs Fields, err error)
- func (r *SegmentReader) TermVectorsReader() TermVectorsReader
- func (r *SegmentReader) VisitDocument(docID int, visitor StoredFieldVisitor) error
- type SerialMergeScheduler
- type Similarity
- type SimpleMergedSegmentWarmer
- type StandardDirectoryReader
- type StoredFieldStatus
- type Term
- type TermContext
- type TermIndexStatus
- type TermSorter
- type TermVectorStatus
- type TermVectorsConsumer
- type TermVectorsConsumerPerField
- type TermVectorsConsumerPerFields
- type TermVectorsPostingArray
- type TermsHash
- type TermsHashImpl
- type TermsHashImplSPI
- type TermsHashPerField
- type TermsHashPerFieldImpl
- type TermsHashPerFieldSPI
- type ThreadState
- type TieredMergePolicy
- func (tmp *TieredMergePolicy) FindForcedMerges(infos *SegmentInfos, maxSegmentCount int, ...) (MergeSpecification, error)
- func (tmp *TieredMergePolicy) FindMerges(mergeTrigger MergeTrigger, infos *SegmentInfos, w *IndexWriter) (spec MergeSpecification, err error)
- func (tmp *TieredMergePolicy) SetFloorSegmentMB(v float64) *TieredMergePolicy
- func (tmp *TieredMergePolicy) SetForceMergeDeletesPctAllowed(v float64) *TieredMergePolicy
- func (tmp *TieredMergePolicy) SetMaxMergeAtOnce(v int) *TieredMergePolicy
- func (tmp *TieredMergePolicy) SetMaxMergeAtOnceExplicit(v int) *TieredMergePolicy
- func (tmp *TieredMergePolicy) SetMaxMergedSegmentMB(v float64) *TieredMergePolicy
- func (tmp *TieredMergePolicy) SetReclaimDeletesWeight(v float64) *TieredMergePolicy
- func (tmp *TieredMergePolicy) SetSegmentsPerTier(v float64) *TieredMergePolicy
- func (tmp *TieredMergePolicy) String() string
Constants ¶
const ( // Creates a new index or overwrites an existing one. OPEN_MODE_CREATE = OpenMode(1) // Opens an existing index. OPEN_MODE_APPEND = OpenMode(2) // Creates a new index if one does not exist, // otherwise it opens the index and documents will be appended. OPEN_MODE_CREATE_OR_APPEND = OpenMode(3) )
const ( INDEX_FILENAME_SEGMENTS = "segments" INDEX_FILENAME_SEGMENTS_GEN = "segments.gen" )
const ( // Merge was triggered by a segment flush. MERGE_TRIGGER_SEGMENT_FLUSH = MergeTrigger(1) // Merge was triggered by a full flush. Full flushes can be caused // by a commit, NRT reader reopen or close call on the index writer MERGE_TRIGGER_FULL_FLUSH = MergeTrigger(2) /* Merge has been triggerd explicitly by the user. */ MERGE_TRIGGER_EXPLICIT = MergeTrigger(3) /* Merge was triggered by a successfully finished merge. */ MERGE_FINISHED = MergeTrigger(4) // Merge was triggered by a closing IndexWriter. MERGE_CLOSING = MergeTrigger(5) )
const ( VERSION_40 = 0 VERSION_46 = 1 VERSION_48 = 2 VERSION_49 = 3 // Used for the segments.gen file only! // Whenver you add a new format, make it 1 smaller (negative version logic)! FORMAT_SEGMENTS_GEN_47 = -2 FORMAT_SEGMENTS_GEN_CHECKSUM = -3 FORMAT_SEGMENTS_GEN_START = FORMAT_SEGMENTS_GEN_47 // Current format of segments.gen FORMAT_SEGMENTS_GEN_CURRENT = FORMAT_SEGMENTS_GEN_CHECKSUM )
const BYTES_PER_DEL_DOCID = 2 * util.NUM_BYTES_INT
Go slice consumes two int for an extra doc ID, assuming 50% pre-allocation.
const BYTES_PER_DEL_QUERY = 40 + util.NUM_BYTES_OBJECT_REF + util.NUM_BYTES_INT
Go map (amd64) consumes about 40 bytes for an extra entry.
const BYTES_PER_POSTING = 3 * util.NUM_BYTES_INT
const DEFAULT_CHECK_INTEGRITY_AT_MERGE = false
Default value for calling checkIntegrity() before merging segments (set to false). You can set this to true for additional safety.
const DEFAULT_DELETION_POLICY = KeepOnlyLastCommitDeletionPolicy(true)
const DEFAULT_MAX_BUFFERED_DELETE_TERMS = DISABLE_AUTO_FLUSH
Disabled by default (because IndexWriter flushes by RAM usage by default).
const DEFAULT_MAX_BUFFERED_DOCS = DISABLE_AUTO_FLUSH
Disabled by default (because IndexWriter flushes by RAM usage by default).
const DEFAULT_MAX_CFS_SEGMENT_SIZE = math.MaxInt64
Default max segment size in order to use compound file system. Set to maxInt64.
const DEFAULT_MAX_MERGE_COUNT = 2
Default maxMergeCount.
const DEFAULT_MAX_MERGE_MB = 2048
Default maximum segment size. A segment of this size or larger will never be merged.
const DEFAULT_MAX_ROUTINE_COUNT = 1
Default maxThreadCount. We default to 1: tests on spinning-magnet drives showed slower indexing performance if more than one merge routine runs at once (though on an SSD it was faster)
const DEFAULT_MAX_THREAD_STATES = 8
The maximum number of simultaneous threads that may be indexing documents at once in IndexWriter; if more than this many threads arrive they will wait for others to finish. Default value is 8.
const DEFAULT_MERGE_FACTOR = 10
Default merge factor, which is how many segments are merged at a time
const DEFAULT_MIN_MERGE_DOCS = 1000
Default minimum segment size.
const DEFAULT_NO_CFS_RATIO = 0.1
Default noCFSRatio. If a merge's size is >= 10% of the index, then we disable compound file for it.
const DEFAULT_RAM_BUFFER_SIZE_MB = 16
Default value is 16 MB (which means flush when buffered docs consume approximately 16 MB RAM)
const DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB = 1945
Default value is 1945.
const DEFAULT_READER_POOLING = false
const DEFAULT_READER_TERMS_INDEX_DIVISOR = DEFAULT_TERMS_INDEX_DIVISOR
Default value is 1.
const DEFAULT_TERMS_INDEX_DIVISOR = 1
const DEFAULT_TERM_INDEX_INTERVAL = 32 // TODO: this should be private to the codec, not settable here
Default value is 32.
const DEFAULT_USE_COMPOUND_FILE_SYSTEM = true
Default value for compound file system for newly written segments (set to true). For batch indexing with very large ram buffers use false.
const DISABLE_AUTO_FLUSH = -1
Denotes a flush trigger is disabled.
const DWPT_VERBOSE = false
const HASH_INIT_SIZE = 4
const LEVEL_LOG_SPAN = 0.75
Defines the allowed range of log(size) for each level. A level is computed by taking the max segment log size, minus LEVEL_LOG_SPAN, and finding all segments falling within that range.
const MAX_DOCS = math.MaxInt32 - 128
Hard limit on maximum number of documents that may be added to the index. If you try to add more than this, you'll hit panic.
const MAX_INT = int(math.MaxInt32)
const MAX_TERM_LENGTH = MAX_TERM_LENGTH_UTF8
Absolute hard maximum length for a term, in bytes once encoded as UTF8. If a term arrives from the analyzer longer than this length, it panics and a message is printed to infoStream, if set (see SetInfoStream()).
const MAX_TERM_LENGTH_UTF8 = util.BYTE_BLOCK_SIZE - 2
L600 if you increase this, you must fix field cache impl for Terms/TermsIndex requires <= 32768
const MISSING int64 = 0
const NO_DELETION_POLICY = NoDeletionPolicy(true)
const SOURCE_FLUSH = "flush"
Source of a segment which results from a flush.
const UNBOUNDED_MAX_MERGE_SEGMENTS = -1
const VERBOSE = false
const VERBOSE_REF_COUNT = false
const WRITE_LOCK_NAME = "write.lock"
Name of the write lock in the index.
const WRITE_LOCK_TIMEOUT = 1000
Default value for the write lock timeout (1,000 ms)
Variables ¶
var DEFAULT_MAX_MERGE_MB_FOR_FORCED_MERGE int64 = math.MaxInt64
Default maximum segment size. A segment of this size or larger will never be merged during forceMerge.
var DEFAULT_MIN_MERGE_MB = 1.6
Default minimum segment size.
var DefaultSimilarity func() Similarity
Used by search package to assign a default similarity
var (
EMPTY_ARRAY = []ReaderSlice{}
)
Functions ¶
func GetMultiFields ¶
func GetMultiFields(r IndexReader) Fields
func GetMultiTerms ¶
func GetMultiTerms(r IndexReader, field string) Terms
func IsIndexExists ¶
Returns true if an index likely exists at the specified directory. Note that if a corrupt index exists, or if an index in the process of committing
func LastCommitGeneration ¶
func MergeTriggerName ¶
func MergeTriggerName(trigger MergeTrigger) string
func ReadFieldInfos ¶
func ReadFieldInfos(info *SegmentCommitInfo) (fis FieldInfos, err error)
Reads the most recent FieldInfos of the given segment info.
func SubIndex ¶
func SubIndex(n int, leaves []*AtomicReaderContext) int
Returns index of the searcher/reader for document n in the slice used to construct this searcher/reader.
Types ¶
type ARFieldsReader ¶
type ARFieldsReader interface { Terms(field string) Terms Fields() Fields LiveDocs() util.Bits /** Returns {@link NumericDocValues} representing norms * for this field, or null if no {@link NumericDocValues} * were indexed. The returned instance should only be * used by a single thread. */ NormValues(field string) (ndv NumericDocValues, err error) }
type ApplyDeletesResult ¶
type ApplyDeletesResult struct {
// contains filtered or unexported fields
}
type AtomicReader ¶
type AtomicReader interface { IndexReader ARFieldsReader }
type AtomicReaderContext ¶
type AtomicReaderContext struct { *IndexReaderContextImpl Ord, DocBase int // contains filtered or unexported fields }
func (*AtomicReaderContext) Children ¶
func (ctx *AtomicReaderContext) Children() []IndexReaderContext
func (*AtomicReaderContext) Leaves ¶
func (ctx *AtomicReaderContext) Leaves() []*AtomicReaderContext
func (*AtomicReaderContext) Reader ¶
func (ctx *AtomicReaderContext) Reader() IndexReader
func (*AtomicReaderContext) String ¶
func (ctx *AtomicReaderContext) String() string
type AtomicReaderImpl ¶
type AtomicReaderImpl struct { *IndexReaderImpl ARFieldsReader // contains filtered or unexported fields }
func (*AtomicReaderImpl) Context ¶
func (r *AtomicReaderImpl) Context() IndexReaderContext
func (*AtomicReaderImpl) DocCount ¶
func (r *AtomicReaderImpl) DocCount(field string) (n int, err error)
func (*AtomicReaderImpl) SumDocFreq ¶
func (r *AtomicReaderImpl) SumDocFreq(field string) (n int64, err error)
func (*AtomicReaderImpl) SumTotalTermFreq ¶
func (r *AtomicReaderImpl) SumTotalTermFreq(field string) (n int64, err error)
func (*AtomicReaderImpl) Terms ¶
func (r *AtomicReaderImpl) Terms(field string) Terms
func (*AtomicReaderImpl) TotalTermFreq ¶
func (r *AtomicReaderImpl) TotalTermFreq(term *Term) (n int64, err error)
type AtomicReaderImplSPI ¶
type AtomicReaderImplSPI interface { IndexReaderImplSPI ARFieldsReader }
type BaseCompositeReader ¶
type BaseCompositeReader struct { *CompositeReaderImpl // contains filtered or unexported fields }
func (*BaseCompositeReader) DocCount ¶
func (r *BaseCompositeReader) DocCount(field string) int
func (*BaseCompositeReader) DocFreq ¶
func (r *BaseCompositeReader) DocFreq(term *Term) (int, error)
func (*BaseCompositeReader) MaxDoc ¶
func (r *BaseCompositeReader) MaxDoc() int
func (*BaseCompositeReader) NumDocs ¶
func (r *BaseCompositeReader) NumDocs() int
func (*BaseCompositeReader) SumDocFreq ¶
func (r *BaseCompositeReader) SumDocFreq(field string) int64
func (*BaseCompositeReader) SumTotalTermFreq ¶
func (r *BaseCompositeReader) SumTotalTermFreq(field string) int64
func (*BaseCompositeReader) TermVectors ¶
func (r *BaseCompositeReader) TermVectors(docID int) error
func (*BaseCompositeReader) TotalTermFreq ¶
func (r *BaseCompositeReader) TotalTermFreq(term *Term) int64
func (*BaseCompositeReader) VisitDocument ¶
func (r *BaseCompositeReader) VisitDocument(docID int, visitor StoredFieldVisitor) error
type BaseCompositeReaderSPI ¶
type BaseCompositeReaderSPI interface { IndexReaderImplSPI CompositeReaderSPI }
type BlockedFlush ¶
type BlockedFlush struct {
// contains filtered or unexported fields
}
type BufferedUpdates ¶
type BufferedUpdates struct {
// contains filtered or unexported fields
}
Holds buffered deletes, by docID, term or query for a single segment. This is used to hold buffered pending deletes against the to-be-flushed segment. Once the deletes are pushed (on flush in DW), these deletes are converted to a FronzenDeletes instance.
NOTE: instances of this class are accessed either via a private instance on DocumentsWriterPerThread, or via sync'd code by DocumentsWriterDeleteQueue
func (*BufferedUpdates) String ¶
func (bd *BufferedUpdates) String() string
type BufferedUpdatesStream ¶
Tracks the stream of BufferedUpdates. When DocumentsWriterPerThread flushes, its buffered deletes and updates are appended to this stream. We later apply them (resolve them to the actual docIDs, per segment) when a merge is started (only to the to-be-merged segments). We also apply to all segments when NRT reader is pulled, commit/close is called, or when too many deletes or updates are buffered and must be flushed (by RAM usage or by count).
Each packet is assigned a generation, and each flushed or merged segment is also assigned a generation, so we can track when BufferedUpdates packets to apply to any given segment.
func (*BufferedUpdatesStream) RamBytesUsed ¶
func (ds *BufferedUpdatesStream) RamBytesUsed() int64
type BySizeDescendingSegments ¶
type BySizeDescendingSegments struct {
// contains filtered or unexported fields
}
func (*BySizeDescendingSegments) Len ¶
func (a *BySizeDescendingSegments) Len() int
func (*BySizeDescendingSegments) Less ¶
func (a *BySizeDescendingSegments) Less(i, j int) bool
func (*BySizeDescendingSegments) Swap ¶
func (a *BySizeDescendingSegments) Swap(i, j int)
type ByteSliceReader ¶
type ByteSliceReader struct { *util.DataInputImpl // contains filtered or unexported fields }
IndexInput that knows how to read the byte slices written by Posting and PostingVector. We read the bytes in each slice until we hit the end of that slice at which point we read the forwarding address of the next slice and then jump to it.
func (*ByteSliceReader) ReadByte ¶
func (r *ByteSliceReader) ReadByte() (byte, error)
func (*ByteSliceReader) ReadBytes ¶
func (r *ByteSliceReader) ReadBytes(buf []byte) error
type CheckAbort ¶
type CheckAbort interface {
// contains filtered or unexported methods
}
Recording units of work when merging segments.
type CheckAbortNone ¶
type CheckAbortNone int
If you use this: IW.close(false) cannot abort your merge!
type CheckIndex ¶
type CheckIndex struct {
// contains filtered or unexported fields
}
Basic tool and API to check the health of an index and write a new segments file that removes reference to problematic segments.
As this tool checks every byte in the index, on a large index it can take a long time to run.
func NewCheckIndex ¶
func (*CheckIndex) CheckIndex ¶
func (ch *CheckIndex) CheckIndex(onlySegments []string) *CheckIndexStatus
Returns a Status instance detailing the state of the index.
As this method checks every byte in the specified segments, on a large index it can take quite a long time to run.
WARNING: make sure you only call this when the index is not opened by any writer.
type CheckIndexStatus ¶
type CheckIndexStatus struct { // True if no problems found with the index. Clean bool // True if we were unable to locate and load the segments_N file. MissingSegments bool // contains filtered or unexported fields }
Returned from checkIndex() detailing the health and status of the index
type ClosingControl ¶
type ClosingControl struct {
// contains filtered or unexported fields
}
Use a seprate goroutine to protect closing control
type CoalescedUpdates ¶
type CoalescedUpdates struct {
// contains filtered or unexported fields
}
func (*CoalescedUpdates) String ¶
func (cd *CoalescedUpdates) String() string
type CommitPoint ¶
type CommitPoint struct {
// contains filtered or unexported fields
}
Holds details for each commit point. This class is also passed to the deletion policy. Note: this class has a natural ordering that is inconsistent with equals.
func (*CommitPoint) Delete ¶
func (cp *CommitPoint) Delete()
func (*CommitPoint) Directory ¶
func (cp *CommitPoint) Directory() store.Directory
func (*CommitPoint) FileNames ¶
func (cp *CommitPoint) FileNames() []string
func (*CommitPoint) Generation ¶
func (cp *CommitPoint) Generation() int64
func (*CommitPoint) IsDeleted ¶
func (cp *CommitPoint) IsDeleted() bool
func (*CommitPoint) SegmentCount ¶
func (cp *CommitPoint) SegmentCount() int
func (*CommitPoint) SegmentsFileName ¶
func (cp *CommitPoint) SegmentsFileName() string
func (*CommitPoint) String ¶
func (cp *CommitPoint) String() string
func (*CommitPoint) UserData ¶
func (cp *CommitPoint) UserData() map[string]string
type CompositeReader ¶
type CompositeReader interface { IndexReader CompositeReaderSPI }
type CompositeReaderContext ¶
type CompositeReaderContext struct { *IndexReaderContextImpl // contains filtered or unexported fields }
func (*CompositeReaderContext) Children ¶
func (ctx *CompositeReaderContext) Children() []IndexReaderContext
func (*CompositeReaderContext) Leaves ¶
func (ctx *CompositeReaderContext) Leaves() []*AtomicReaderContext
func (*CompositeReaderContext) Reader ¶
func (ctx *CompositeReaderContext) Reader() IndexReader
func (*CompositeReaderContext) String ¶
func (ctx *CompositeReaderContext) String() string
type CompositeReaderContextBuilder ¶
type CompositeReaderContextBuilder struct {
// contains filtered or unexported fields
}
type CompositeReaderImpl ¶
type CompositeReaderImpl struct { *IndexReaderImpl CompositeReaderSPI // contains filtered or unexported fields }
func (*CompositeReaderImpl) Context ¶
func (r *CompositeReaderImpl) Context() IndexReaderContext
func (*CompositeReaderImpl) String ¶
func (r *CompositeReaderImpl) String() string
type CompositeReaderSPI ¶
type CompositeReaderSPI interface {
// contains filtered or unexported methods
}
type ConcurrentMergeScheduler ¶
A MergeScheduler that runs each merge using a separate goroutine.
Specify the max number of goroutines that may run at once, and the maximum number of simultaneous merges with SetMaxMergesAndRoutines().
If the number of merges exceeds the max number of threads then the largest merges are paused until one of the smaller merges completes.
If more than MaxMergeCount() merges are requested then this class will forcefully throttle the incoming goroutines by pausing until one or more merges complete.
func NewConcurrentMergeScheduler ¶
func NewConcurrentMergeScheduler() *ConcurrentMergeScheduler
func (*ConcurrentMergeScheduler) Close ¶
func (cms *ConcurrentMergeScheduler) Close() error
func (*ConcurrentMergeScheduler) Merge ¶
func (cms *ConcurrentMergeScheduler) Merge(writer *IndexWriter, trigger MergeTrigger, newMergesFound bool) error
func (*ConcurrentMergeScheduler) SetMaxMergesAndRoutines ¶
func (cms *ConcurrentMergeScheduler) SetMaxMergesAndRoutines(maxMergeCount, maxRoutineCount int)
Sets the maximum number of merge goroutines and simultaneous merges allowed.
func (*ConcurrentMergeScheduler) String ¶
func (cms *ConcurrentMergeScheduler) String() string
type CoreClosedListener ¶
type CoreClosedListener interface {
// contains filtered or unexported methods
}
type DefaultIndexingChain ¶
type DefaultIndexingChain struct {
// contains filtered or unexported fields
}
Default general purpose indexing chain, which handles indexing all types of fields
type DeleteSlice ¶
type DeleteSlice struct {
// contains filtered or unexported fields
}
type DirectoryReader ¶
type DirectoryReader interface { IndexReader // doOpenIfChanged() error // doOpenIfChanged(c IndexCommit) error // doOpenIfChanged(w IndexWriter, c IndexCommit) error Version() int64 IsCurrent() bool }
func OpenDirectoryReader ¶
func OpenDirectoryReader(directory store.Directory) (r DirectoryReader, err error)
type DirectoryReaderImpl ¶
type DirectoryReaderImpl struct { *BaseCompositeReader // contains filtered or unexported fields }
type DocConsumer ¶
type DocConsumer interface {
// contains filtered or unexported methods
}
type DocValuesFieldUpdates ¶
type DocValuesFieldUpdates struct { }
Holds updates of a single DocValues field, for a set of documents.
type DocValuesFieldUpdatesContainer ¶
type DocValuesFieldUpdatesContainer struct { }
func (*DocValuesFieldUpdatesContainer) String ¶
func (c *DocValuesFieldUpdatesContainer) String() string
type DocValuesStatus ¶
type DocValuesStatus struct {
// contains filtered or unexported fields
}
type DocValuesUpdate ¶
type DocValuesUpdate struct {
// contains filtered or unexported fields
}
An in-place update to a DocValues field.
func (*DocValuesUpdate) String ¶
func (u *DocValuesUpdate) String() string
type DocValuesWriter ¶
type DocValuesWriter interface {
// contains filtered or unexported methods
}
type DocumentsWriter ¶
This class accepts multiple added documents and directly writes segment files.
Each added document is passed to the indexing chain, which in turn processes the document into the different codec formats. Some format write bytes to files immediately, e.g. stored fields and term vectors, while others are buffered by the indexing chain and written only on flush.
Other consumers e.g. FreqProxTermsWriter and NormsConsumer, buffer bytes in RAM and flush only when a new segment is produced.
Once we have used our allowed RAM buffer, or the number of aded docs is large enough (in the case we are flushing by doc count instead of RAM usage), we create a real segment and flush it to the Directory.
Goroutines:
Multiple Goroutines are allowed into AddDocument at once. There is an initial synchronized call to ThreadState() which allocates a TheadState for this goroutine. The same goroutine will get the same ThreadState over time (goroutine affinity) so that if there are consistent patterns (for example each goroutine is indexing a different content source) then we make better use of RAM. Then processDocument() is called on tha tThreadState without synchronization (most of the "heavy lifting" is in this call). Finally the synchronized "finishDocument" is called to flush changes to the directory.
When flush is called by IndexWriter we forcefully idle all goroutines and flush only once they are all idle. This means you can call flush with a given goroutine even while other goroutines are actively adding/deleting documents.
Exceptions:
Because this class directly updates in-memory posting lists, and flushes stored fields and term vectors directly to files in the directory, there are certain limited times when an error can corrupt this state. For example, a disk full while flushing stored fields leaves this file in a corrupt state. Or, a memory issue while appending to the in-memory posting lists can corrupt that posting list. We call such errors "aborting errors". In these cases we must call abort() to discard all docs added since the last flush.
All other errors ("non-aborting errors") can still partially update the index structures. These updates are consistent, but, they represent only a part of the document seen up until the error was hit. When this happens, we immediately mark the document as deleted so that the document is always atomically ("all or none") added to the index.
type DocumentsWriterDeleteQueue ¶
type DocumentsWriterDeleteQueue struct {
// contains filtered or unexported fields
}
DocumentsWriterDeleteQueue is a non-blocking linked pending deletes queue. In contrast to other queue implementation we only maintain the tail of the queue. A delete queue is always used in a context of a set of DWPTs and a global delete pool. Each of the DWPT and the global pool need to maintain their 'own' head of the queue (as a DeleteSlice instance per DWPT). The difference between the DWPT and the global pool is that the DWPT starts maintaining a head once it has added its first document since for its segments private deletes only the deletes after that document are relevant. The global pool instead starts maintaining the head once this instance is created by taking the sentinel instance as its initial head.
Since each DeleteSlice maintains its own head and list is only single linked, the garbage collector takes care of pruning the list for us. All nodes in the list that are still relevant should be either directly or indirectly referenced by one of the DWPT's private DeleteSlice or by the global BufferedUpdates slice.
Each DWPT as well as the global delete pool maintain their private DeleteSlice instance. In the DWPT case, updating a slice is equivalent to atomically finishing the document. The slice update guarantees a "happens before" relationship to all other updates in the same indexing session. When a DWPT updates a document it:
- consumes a document and finishes its processing
- updates its private DeleteSlice either by calling updateSlice() or addTermToDeleteSlice() (if the document has a delTerm)
- applies all deletes in the slice to its private BufferedUpdates and resets it
- increments its internal document id
The DWPT also doesn't apply its current docments delete term until it has updated its delete slice which ensures the consistency of the update. If the update fails before the DeleteSlice could have been updated the deleteTerm will also not be added to its private deletes neither to the global deletes.
func (*DocumentsWriterDeleteQueue) RamBytesUsed ¶
func (q *DocumentsWriterDeleteQueue) RamBytesUsed() int64
func (*DocumentsWriterDeleteQueue) String ¶
func (dq *DocumentsWriterDeleteQueue) String() string
type DocumentsWriterFlushControl ¶
type DocumentsWriterFlushControl struct { sync.Locker *DocumentsWriterStallControl // mixin // contains filtered or unexported fields }
This class controls DocumentsWriterPerThread (DWPT) flushing during indexing. It tracks the memory consumption per DWPT and uses a configured FlushPolicy to decide if a DWPT must flush.
In addition to the FlushPolicy the flush control might set certain DWPT as flush pending iff a DWPT exceeds the RAMPerThreadHardLimitMB() to prevent address space exhaustion.
func (*DocumentsWriterFlushControl) String ¶
func (fc *DocumentsWriterFlushControl) String() string
type DocumentsWriterPerThread ¶
type DocumentsWriterPerThread struct {
// contains filtered or unexported fields
}
func (*DocumentsWriterPerThread) String ¶
func (w *DocumentsWriterPerThread) String() string
type DocumentsWriterPerThreadPool ¶
DocumentsWriterPerThreadPool controls ThreadState instances and their goroutine assignment during indexing. Each TheadState holds a reference to a DocumentsWriterPerThread that is once a ThreadState is obtained from the pool exclusively used for indexing a single document by the obtaining thread. Each indexing thread must obtain such a ThreadState to make progress. Depending on the DocumentsWriterPerThreadPool implementation ThreadState assingments might differ from document to document.
Once a DocumentWriterPerThread is selected for flush the thread pool is reusing the flushing DocumentsWriterPerthread's ThreadState with a new DocumentsWriterPerThread instance.
GoRoutine is different from Java's thread. So intead of thread affinity, I will use channels and concurrent running goroutines to hold individual DocumentsWriterPerThread instances and states.
func NewDocumentsWriterPerThreadPool ¶
func NewDocumentsWriterPerThreadPool(maxNumThreadStates int) *DocumentsWriterPerThreadPool
type DocumentsWriterStallControl ¶
type DocumentsWriterStallControl struct { sync.Locker *sync.Cond // contains filtered or unexported fields }
Controls the health status of a DocumentsWriter sessions. This class used to block incoming index threads if flushing significantly slower than indexing to ensure the DocumentsWriter's healthiness. If flushing is significantly slower than indexing the net memory used within an IndexWriter session can increase very quickly and easily exceed the JVM's available memory.
To prevent OOM errors and ensure IndexWriter's stability this class blocks incoming threads from indexing once 2 x number of available ThreadState(s) in DocumentsWriterPerThreadPool is exceeded. Once flushing catches up and number of flushing DWPT is equal of lower than the number of active ThreadState(s) threads are released and can continue indexing.
type Event ¶
type Event func(writer *IndexWriter, triggerMerge, clearBuffers bool) error
Interface for internal atomic events. See DocumentsWriter fo details. Events are executed concurrently and no order is guaranteed. Each event should only rely on the serializeability within its process method. All actions that must happen before or after a certain action must be encoded inside the process() method.
type FieldInvertState ¶
type FieldInvertState struct {
// contains filtered or unexported fields
}
Tracks the number and position / offset parameters of terms being added to the index. The information collected in this class is also used to calculate the normalization factor for a field
func (*FieldInvertState) Boost ¶
func (st *FieldInvertState) Boost() float32
Get boost value. This is the cumulative product of document boost and field boost for all field instances sharing the same field name.
func (*FieldInvertState) Length ¶
func (st *FieldInvertState) Length() int
Get total number of terms in this field.
func (*FieldInvertState) NumOverlap ¶
func (st *FieldInvertState) NumOverlap() int
Get the number of terms with positionIncrement == 0.
type FieldNormStatus ¶
type FieldNormStatus struct {
// contains filtered or unexported fields
}
type FindSegmentsFile ¶
type FindSegmentsFile struct {
// contains filtered or unexported fields
}
func NewFindSegmentsFile ¶
func NewFindSegmentsFile(directory store.Directory, doBody func(segmentFileName string) (interface{}, error)) *FindSegmentsFile
type FlushByRamOrCountsPolicy ¶
type FlushByRamOrCountsPolicy struct {
*FlushPolicyImpl
}
Default FlushPolicy implementation that flushes new segments based on RAM used and document count depending on the IndexWriter's IndexWriterConfig. It also applies pending deletes based on the number of buffered delete terms.
1. onDelete() - applies pending delete operations based on the global number of buffered delete terms iff MaxBufferedDeleteTerms() is enabled 2. onInsert() - flushes either on the number of documents per DocumentsWriterPerThread (NumDocsInRAM()) or on the global active memory consumption in the current indexing session iff MaxBufferedDocs() or RAMBufferSizeMB() is enabled respectively 3. onUpdate() - calls onInsert() and onDelete() in order
All IndexWriterConfig settings are used to mark DocumentsWriterPerThread as flush pending during indexing with respect to their live updates.
If SetRAMBufferSizeMB() is enabled, the largest ram consuming DocumentsWriterPerThread will be marked as pending iff the global active RAM consumption is >= the configured max RAM buffer.
type FlushPolicy ¶
type FlushPolicy interface {
// contains filtered or unexported methods
}
FlushPlicy controls when segments are flushed from a RAM resident internal data-structure to the IndexWriter's Directory.
Segments are traditionally flushed by: 1. RAM consumption - configured via IndexWriterConfig.SetRAMBufferSizeMB() 2. Number of RAM resident documents - configured via IndexWriterConfig.SetMaxBufferedDocs()
The policy also applies pending delete operations (by term and/or query), given the threshold set in IndexcWriterConfig.SetMaxBufferedDeleteTerms().
IndexWriter consults the provided FlushPolicy to control the flushing process. The policy is informed for each added or updated document as well as for each delete term. Based on the FlushPolicy, the information provided via ThreadState and DocumentsWriterFlushControl, the FlushPolicy decides if a DocumentsWriterPerThread needs flushing and mark it as flush-pending via DocumentsWriterFlushControl.SetFLushingPending(), or if deletes need to be applied.
type FlushPolicyImpl ¶
type FlushPolicyImplSPI ¶
type FlushPolicyImplSPI interface {
// contains filtered or unexported methods
}
type FlushTicket ¶
type FlushTicket interface {
// contains filtered or unexported methods
}
type FlushTicketImpl ¶
type FlushTicketImpl struct {
// contains filtered or unexported fields
}
type FlushedSegment ¶
type FlushedSegment struct {
// contains filtered or unexported fields
}
type FreqProxPostingsArray ¶
type FreqProxPostingsArray struct { *ParallelPostingsArray // contains filtered or unexported fields }
type FreqProxTermsWriter ¶
type FreqProxTermsWriter struct {
*TermsHashImpl
}
type FreqProxTermsWriterPerField ¶
type FreqProxTermsWriterPerField struct { *TermsHashPerFieldImpl // contains filtered or unexported fields }
TODO: break into separate freq and prox writers as codes; make separate container (tii/tis/skip/*) that can be configured as any number of files 1..N
type FreqProxTermsWriterPerFields ¶
type FreqProxTermsWriterPerFields []*FreqProxTermsWriterPerField
func (FreqProxTermsWriterPerFields) Len ¶
func (a FreqProxTermsWriterPerFields) Len() int
func (FreqProxTermsWriterPerFields) Less ¶
func (a FreqProxTermsWriterPerFields) Less(i, j int) bool
func (FreqProxTermsWriterPerFields) Swap ¶
func (a FreqProxTermsWriterPerFields) Swap(i, j int)
type FrozenBufferedUpdates ¶
type FrozenBufferedUpdates struct {
// contains filtered or unexported fields
}
Holds buffered deletes and updates by term or query, once pushed. Pushed deletes/updates are write-once, so we shift to more memory efficient data structure to hold them. We don't hold docIDs because these are applied on flush.
func (*FrozenBufferedUpdates) String ¶
func (bd *FrozenBufferedUpdates) String() string
type IndexCommit ¶
type IndexCommit interface { // Get the segments file (segments_N) associated with the commit point. SegmentsFileName() string // Returns all index files referenced by this commit point. FileNames() []string // Returns the Directory for the index. Directory() store.Directory /* Delete this commit point. This only applies when using the commit point in the context of IndexWriter's IndexDeletionPolicy. Upon calling this, the writer is notified that this commit point should be deleted. Decision that a commit-point should be deleted is taken by the IndexDeletionPolicy in effect and therefore this should only be called by its onInit() or onCommit() methods. */ Delete() // Returns true if this commit should be deleted; this is only used // by IndexWriter after invoking the IndexDeletionPolicy. IsDeleted() bool // returns number of segments referenced by this commit. SegmentCount() int // Returns the generation (the _N in segments_N) for this IndexCommit Generation() int64 // Returns userData, previously passed to SetCommitData(map) for this commit. UserData() map[string]string }
Expert: represents a single commit into an index as seen by the IndexDeletionPolicy or IndexReader.
Changes to the content of an index are made visible only after the writer who made that change commits by writing a new segments file (segments_N). This point in time, when the action of writing of a new segments file to the directory is completed, is an index commit.
Each index commit oint has a unique segments file associated with it. The segments file associated with a later index commit point would have a larger N.
type IndexCommits ¶
type IndexCommits []IndexCommit
func (IndexCommits) Len ¶
func (s IndexCommits) Len() int
func (IndexCommits) Less ¶
func (s IndexCommits) Less(i, j int) bool
func (IndexCommits) Swap ¶
func (s IndexCommits) Swap(i, j int)
type IndexDeletionPolicy ¶
type IndexDeletionPolicy interface {
// contains filtered or unexported methods
}
Expert: policy for deletion of stale index commits.
Implement this interface, and pass it to one of the IndexWriter or IndexReader constructors, to customize when older point-in-time-commits are deleted from the index directory. The default deletion policy is KeepOnlyLastCommitDeletionPolicy, which always remove old commits as soon as a new commit is done (this matches the behavior before 2.2).
One expected use case for this ( and the reason why it was first created) is to work around problems with an index directory accessed via filesystems like NFS because NFS does not provide the "delete on last close" semantics that Lucene's "point in time" search normally relies on. By implementing a custom deletion policy, such as "a commit is only removed once it has been stale for more than X minutes", you can give your readers time to refresh to the new commit before IndexWriter removes the old commits. Note that doing so will increase the storage requirements of the index. See [LUCENE-710] for details.
Implementers of sub-classes should make sure that Clone() returns an independent instance able to work with any other IndexWriter or Directory instance.
type IndexFileDeleter ¶
type IndexFileDeleter struct {
// contains filtered or unexported fields
}
This class keeps track of each SegmentInfos instance that is still "live", either because it corresponds to a segments_N file in the Directory (a "commit", i.e. a commited egmentInfos) or because it's an in-memory SegmentInfos that a writer is actively updating but has not yet committed. This class uses simple reference counting to map the live SegmentInfos instances to individual files in the Directory.
The same directory file maybe referenced by more than one IndexCommit, i.e. more than one SegmentInfos. Therefore we count how many commits reference each file. When all the commits referencing a certain file have been deleted, the refcount for that file becomes zero, and the file is deleted.
A separate deletion policy interface (IndexDeletionPolicy) is consulted on creation (onInit) and once per commit (onCommit), to decide when a commit should be removed.
It is the business of the IndexDeletionPolicy to choose when to delete commit points. The actual mechanics of file deletion, retrying, etc, derived from the deletion of commit points is the business of the IndexFileDeleter.
The current default deletion policy is KeepOnlyLastCommitDeletionPolicy, which removes all prior commits when a new commit has completed. This matches the bahavior before 2.2.
Note that you must hold the write.lock before instantiating this class. It opens segments_N file(s) directly with no retry logic.
func (*IndexFileDeleter) Close ¶
func (fd *IndexFileDeleter) Close() error
type IndexReader ¶
type IndexReader interface { io.Closer NumDocs() int MaxDoc() int /** Expert: visits the fields of a stored document, for * custom processing/loading of each field. If you * simply want to load all fields, use {@link * #document(int)}. If you want to load a subset, use * {@link DocumentStoredFieldVisitor}. */ VisitDocument(docID int, visitor StoredFieldVisitor) error /** * Returns the stored fields of the <code>n</code><sup>th</sup> * <code>Document</code> in this index. This is just * sugar for using {@link DocumentStoredFieldVisitor}. * <p> * <b>NOTE:</b> for performance reasons, this method does not check if the * requested document is deleted, and therefore asking for a deleted document * may yield unspecified results. Usually this is not required, however you * can test if the doc is deleted by checking the {@link * Bits} returned from {@link MultiFields#getLiveDocs}. * * <b>NOTE:</b> only the content of a field is returned, * if that field was stored during indexing. Metadata * like boost, omitNorm, IndexOptions, tokenized, etc., * are not preserved. * * @throws IOException if there is a low-level IO error */ // TODO: we need a separate StoredField, so that the // Document returned here contains that class not //model.IndexableField Document(docID int) (doc *docu.Document, err error) Context() IndexReaderContext Leaves() []*AtomicReaderContext // Returns the number of documents containing the term. This method // returns 0 if the term of field does not exists. This method does // not take into account deleted documents that have not yet been // merged away. DocFreq(*Term) (int, error) // contains filtered or unexported methods }
type IndexReaderContext ¶
type IndexReaderContext interface { Reader() IndexReader Parent() *CompositeReaderContext Leaves() []*AtomicReaderContext Children() []IndexReaderContext }
func TopLevelContext ¶
func TopLevelContext(ctx IndexReaderContext) IndexReaderContext
Walks up the reader tree and return the given context's top level reader context, or in other words the reader tree's root context.
type IndexReaderContextImpl ¶
type IndexReaderContextImpl struct {
// contains filtered or unexported fields
}
func (*IndexReaderContextImpl) Parent ¶
func (ctx *IndexReaderContextImpl) Parent() *CompositeReaderContext
type IndexReaderImpl ¶
type IndexReaderImpl struct { IndexReaderImplSPI // contains filtered or unexported fields }
func (*IndexReaderImpl) Close ¶
func (r *IndexReaderImpl) Close() error
func (*IndexReaderImpl) Document ¶
func (r *IndexReaderImpl) Document(docID int) (doc *docu.Document, err error)
func (*IndexReaderImpl) Leaves ¶
func (r *IndexReaderImpl) Leaves() []*AtomicReaderContext
type IndexReaderImplSPI ¶
type IndexReaderWarmer ¶
type IndexReaderWarmer interface {
// contains filtered or unexported methods
}
If openDirectoryReader() has been called (ie, this writer is in near real-time mode), then after a merge comletes, this class can be invoked to warm the reader on the newly merged segment, before the merge commits. This is not required for near real-time search, but will reduce search latency on opening a new near real-time reader after a merge completes.
NOTE: warm is called before any deletes have been carried over to the merged segment.
type IndexWriter ¶
type IndexWriter struct { sync.Locker *ClosingControl *MergeControl // contains filtered or unexported fields }
An IndexWriter creates and maintains an index.
The OpenMode option on IndexWriterConfig.SetOpenMode() determines whether a new index is created, or whether an existing index is opened. Note that you can open an index with OPEN_MODE_CREATE even while readers are using the index. The old readers will continue to search the "point in time" snapshot they had opened, and won't see the newly created index until they re-open. If OPEN_MODE_CREATE_OR_APPEND is used, IndexWriter will create a new index if there is not already an index at the provided path and otherwise open th existing index.
In either case, documents are added with AddDocument() and removed with DeleteDocumentsByTerm() or DeleteDocumentsByQuery(). A document can be updated with UpdateDocuments() (which just deletes and then adds the entire document). When finished adding, deleting and updating documents, Close() should be called.
...
func NewIndexWriter ¶
func NewIndexWriter(d store.Directory, conf *IndexWriterConfig) (w *IndexWriter, err error)
Constructs a new IndexWriter per the settings given in conf. If you want to make "live" changes to this writer instance, use Config().
NOTE: after this writer is created, the given configuration instance cannot be passed to another writer. If you intend to do so, you should clone it beforehand.
func (*IndexWriter) AddDocument ¶
func (w *IndexWriter) AddDocument(doc []IndexableField) error
L1201
Adds a document to this index.
Note that if an Error is hit (for example disk full) then the index will be consistent, but this document may not have been added. Furthermore, it's possible the index will have one segment in non-compound format even when using compound files (when a merge has partially succeeded).
This method periodically flushes pending documents to the Directory (see flush()), and also periodically triggers segment merges in the index according to the MergePolicy in use.
Merges temporarily consume space in the directory. The amount of space required is up to 1X the size of all segments being merged, when no readers/searchers are open against the index, and up to 2X the size of all segments being merged when readers/searchers are open against the index (see forceMerge() for details). The sequence of primitive merge operations performed is governed by the merge policy.
Note that each term in the document can be no longer than MAX_TERM_LENGTH in bytes, otherwise error will be returned.
Note that it's possible to creat an invalid Unicode string in Java if a UTF16 surrogate pair is malformed. In this case, the invalid characters are silently replaced with the Unicode replacement character U+FFFD.
func (*IndexWriter) AddDocumentWithAnalyzer ¶
func (w *IndexWriter) AddDocumentWithAnalyzer(doc []IndexableField, analyzer analysis.Analyzer) error
Adds a document to this index, using the provided analyzer instead of the value of Analyzer().
See AddDocument() for details on index and IndexWriter state after an error, and flushing/merging temporary free space requirements.
NOTE: if this method hits a memory issue, you hsould immediately close the writer. See above for details.
func (*IndexWriter) Close ¶
func (w *IndexWriter) Close() error
Commits all changes to an index, wait for pending merges to complete, and closes all associate files.
Note that:
- If you called prepare Commit but failed to call commit, this method will panic and the IndexWriter will not be closed.
- If this method throws any other exception, the IndexWriter will be closed, but changes may have been lost.
Note that this may be a costly operation, so, try to re-use a single writer instead of closing and opening a new one. See commit() for caveats about write caching done by some IO devices.
NOTE: You must ensure no other threads are still making changes at the same time that this method is invoked.
func (*IndexWriter) Commit ¶
func (w *IndexWriter) Commit() error
Commits all pending changes (added & deleted documents, segment merges, added indexes, etc.) to the index, and syncs all referenced index files, such that a reader will see the changes and the index updates will survive an OS or machine crash or power loss. Note that this does not wait for any running background merges to finish. This may be a costly operation, so you should test the cost in your application and do it only when really necessary.
Note that this operation calls Directory.sync on the index files. That call should not return until the file contents & metadata are on stable storage. For FSDirectory, this calls the OS's fsync. But, beware: some hardware devices may in fact cache writes even during fsync, and return before the bits are actually on stable storage, to give the appearance of faster performance. If you have such a device, and it does not hav a battery backup (for example) then on power loss it may still lose data. Lucene cannot guarantee consistency on such devices.
func (*IndexWriter) Directory ¶
func (w *IndexWriter) Directory() store.Directory
Retuns the Directory used by this index.
func (*IndexWriter) MergingSegments ¶
func (w *IndexWriter) MergingSegments() map[*SegmentCommitInfo]bool
Experts: to be used by a MergePolicy to avoid selecting merges for segments already being merged. The returned collection is not cloned, and thus is only safe to access if you hold IndexWriter's lock (which you do when IndexWriter invokes the MergePolicy).
func (*IndexWriter) Rollback ¶
func (w *IndexWriter) Rollback() error
Close the IndexWriter without committing any changes that have occurred since the last commit (or since it was opened, if commit hasn't been called). This removes any temporary files that had been created, after which the state of the index will be the same as it was when commit() was last called or when this writer was first opened. This also clears a previous call to prepareCommit()
func (*IndexWriter) UpdateDocument ¶
func (w *IndexWriter) UpdateDocument(term *Term, doc []IndexableField, analyzer analysis.Analyzer) error
L1545
Updates a document by first deleting the document(s) containing term and then adding the new document. The delete and then add are atomic as seen by a reader on the same index (flush may happen only after the add).
type IndexWriterConfig ¶
type IndexWriterConfig struct { *LiveIndexWriterConfigImpl // contains filtered or unexported fields }
Holds all the configuration that is used to create an IndexWriter. Once IndexWriter has been created with this object, changes to this object will not affect the IndexWriter instance. For that, use LiveIndexWriterConfig that is returned from IndexWriter.Config().
All setter methods return IndexWriterConfig to allow chaining settings conveniently, for example:
conf := NewIndexWriterConfig(analyzer) .setter1() .setter2()
func NewIndexWriterConfig ¶
func NewIndexWriterConfig(matchVersion util.Version, analyzer analysis.Analyzer) *IndexWriterConfig
Creates a new config that with defaults that match the specified Version as well as the default Analyzer. If matchVersion is >= 3.2, TieredMergePolicy is used for merging; else LogByteSizeMergePolicy. Note that TieredMergePolicy is free to select non-contiguous merges, which means docIDs may not remain monotonic over time. If this is a problem, you should switch to LogByteSizeMergePolicy or LogDocMergePolicy.
func (*IndexWriterConfig) InfoStream ¶
func (conf *IndexWriterConfig) InfoStream() util.InfoStream
L478
func (*IndexWriterConfig) MergePolicy ¶
func (conf *IndexWriterConfig) MergePolicy() MergePolicy
L310
func (*IndexWriterConfig) SetIndexDeletionPolicy ¶
func (conf *IndexWriterConfig) SetIndexDeletionPolicy(delPolicy IndexDeletionPolicy) *IndexWriterConfig
Expert: allows an optional IndexDeletionPolicy implementation to be specified. You can use this to control when prior commits are deleted from the index. The default policy is KeepOnlyLastCommitDeletionPolicy which removes all prior commits as soon as a new commit is done (this matches behavior before 2.2). Creating your own policy can allow you to explicitly keep previous "point in time" commits alive in the index for some time, to allow readers to refresh to the new commit without having the old commit deleted out from under them. This is necessary on filesystems like NFS that do not support "delete on last close" semantics, which Lucene's "point in time" search normally relies on.
NOTE: the deletion policy can not be nil
func (*IndexWriterConfig) SetInfoStream ¶
func (conf *IndexWriterConfig) SetInfoStream(infoStream util.InfoStream) *IndexWriterConfig
L523
Information about merges, deletes and a message when maxFieldLength is reached will be printed to this. Must not be nil, but NO_OUTPUT may be used to surpress output.
func (*IndexWriterConfig) SetMaxBufferedDocs ¶
func (conf *IndexWriterConfig) SetMaxBufferedDocs(maxBufferedDocs int) *IndexWriterConfig
L548
func (*IndexWriterConfig) SetMergeScheduler ¶
func (conf *IndexWriterConfig) SetMergeScheduler(mergeScheduler MergeScheduler) *IndexWriterConfig
Expert: sets the merge scheduler used by this writer. The default is ConcurentMergeScheduler.
NOTE: the merge scheduler cannot be nil.
Only takes effect when IndexWriter is first created.
func (*IndexWriterConfig) SetMergedSegmentWarmer ¶
func (conf *IndexWriterConfig) SetMergedSegmentWarmer(mergeSegmentWarmer IndexReaderWarmer) *IndexWriterConfig
func (*IndexWriterConfig) SetReaderPooling ¶
func (conf *IndexWriterConfig) SetReaderPooling(readerPooling bool) *IndexWriterConfig
L406
By default, IndexWriter does not pool the SegmentReaders it must open for deletions and merging, unless a near-real-time reader has been obtained by calling openDirectoryReader(IndexWriter, bool). This method lets you enable pooling without getting a near-real-time reader. NOTE: if you set this to false, IndexWriter will still pool readers once openDirectoryReader(IndexWriter, bool) is called.
func (*IndexWriterConfig) SetReaderTermsIndexDivisor ¶
func (conf *IndexWriterConfig) SetReaderTermsIndexDivisor(divisor int) *IndexWriterConfig
func (*IndexWriterConfig) SetSimilarity ¶
func (conf *IndexWriterConfig) SetSimilarity(similarity Similarity) *IndexWriterConfig
L259
Expert: set the Similarity implementation used by this IndexWriter.
NOTE: the similarity cannot be nil.
Only takes effect when IndexWriter is first created.
func (*IndexWriterConfig) SetUseCompoundFile ¶
func (conf *IndexWriterConfig) SetUseCompoundFile(useCompoundFile bool) *IndexWriterConfig
func (*IndexWriterConfig) String ¶
func (conf *IndexWriterConfig) String() string
type IndexingChain ¶
type IndexingChain func(documentsWriterPerThread *DocumentsWriterPerThread) DocConsumer
Returns the DocConsumer that the DocumentsWriter calls to process the documents.
type IntBlockAllocator ¶
type IntBlockAllocator struct { *util.IntAllocatorImpl // contains filtered or unexported fields }
func (*IntBlockAllocator) Recycle ¶
func (alloc *IntBlockAllocator) Recycle(blocks [][]int)
type KeepOnlyLastCommitDeletionPolicy ¶
type KeepOnlyLastCommitDeletionPolicy bool
This IndexDeletionPolicy implementation that keeps only the most recent commit and immediately removes all prior commits after a new commit is done. This is the default deletion policy.
func (KeepOnlyLastCommitDeletionPolicy) Clone ¶
func (p KeepOnlyLastCommitDeletionPolicy) Clone() IndexDeletionPolicy
type LiveIndexWriterConfig ¶
type LiveIndexWriterConfig interface { TermIndexInterval() int MaxBufferedDocs() int RAMBufferSizeMB() float64 Similarity() Similarity Codec() Codec MergePolicy() MergePolicy RAMPerThreadHardLimitMB() int InfoStream() util.InfoStream UseCompoundFile() bool // contains filtered or unexported methods }
Holds all the configuration used by IndexWriter with few setters for settings that can be changed on an IndexWriter instance "live".
All the fields are either readonly or volatile.
type LiveIndexWriterConfigImpl ¶
type LiveIndexWriterConfigImpl struct {
// contains filtered or unexported fields
}
func (*LiveIndexWriterConfigImpl) Codec ¶
func (conf *LiveIndexWriterConfigImpl) Codec() Codec
Returns the current Codec.
func (*LiveIndexWriterConfigImpl) InfoStream ¶
func (conf *LiveIndexWriterConfigImpl) InfoStream() util.InfoStream
Returns InfoStream used for debugging.
func (*LiveIndexWriterConfigImpl) MaxBufferedDocs ¶
func (conf *LiveIndexWriterConfigImpl) MaxBufferedDocs() int
Returns the number of buffered added documents that will trigger a flush if enabled.
func (*LiveIndexWriterConfigImpl) MergePolicy ¶
func (conf *LiveIndexWriterConfigImpl) MergePolicy() MergePolicy
L477
Returns the current MergePolicy in use by this writer.
func (*LiveIndexWriterConfigImpl) RAMBufferSizeMB ¶
func (conf *LiveIndexWriterConfigImpl) RAMBufferSizeMB() float64
func (*LiveIndexWriterConfigImpl) RAMPerThreadHardLimitMB ¶
func (conf *LiveIndexWriterConfigImpl) RAMPerThreadHardLimitMB() int
func (*LiveIndexWriterConfigImpl) SetMaxBufferedDocs ¶
func (conf *LiveIndexWriterConfigImpl) SetMaxBufferedDocs(maxBufferedDocs int) *LiveIndexWriterConfigImpl
L358
Determines the minimal number of documents required before the buffered in-memory documents are flushed as a new Segment. Large values generally give faster indexing.
When this is set, the writer will flush every maxBufferedDocs added documents. Pass in DISABLE_AUTO_FLUSH to prevent triggering a flush due to number of buffered documents. Note that if flushing by RAM usage is also enabled, then the flush will be triggered by whichever comes first.
Disabled by default (writer flushes by RAM usage).
Takes effect immediately, but only the next time a document is added, updated or deleted.
func (*LiveIndexWriterConfigImpl) SetMergePolicy ¶
func (conf *LiveIndexWriterConfigImpl) SetMergePolicy(mergePolicy MergePolicy) *LiveIndexWriterConfigImpl
Expert: MergePolicy is invoked whenver there are changes to the segments in the index. Its role is to select which merges to do, if any, and return a MergeSpecification describing the merges. It also selects merges to do for forceMerge.
func (*LiveIndexWriterConfigImpl) SetMergedSegmentWarmer ¶
func (conf *LiveIndexWriterConfigImpl) SetMergedSegmentWarmer(mergeSegmentWarmer IndexReaderWarmer) *LiveIndexWriterConfigImpl
Sets the merged segment warmer.
Take effect on the next merge.
func (*LiveIndexWriterConfigImpl) SetReaderTermsIndexDivisor ¶
func (conf *LiveIndexWriterConfigImpl) SetReaderTermsIndexDivisor(divisor int) *LiveIndexWriterConfigImpl
Sets the termsIndeDivisor passed to any readers that IndexWriter opens, for example when applying deletes or creating a near-real-time reader in OpenDirectoryReader(). If you pass -1, the terms index won't be loaded by the readers. This is only useful in advanced siguations when you will only .Next() through all terms; attempts to seek will hit an error.
takes effect immediately, but only applies to readers opened after this call
NOTE: divisor settings > 1 do not apply to all PostingsFormat implementation, including the default one in this release. It only makes sense for terms indexes that can efficiently re-sample terms at load time.
func (*LiveIndexWriterConfigImpl) SetUseCompoundFile ¶
func (conf *LiveIndexWriterConfigImpl) SetUseCompoundFile(useCompoundFile bool) *LiveIndexWriterConfigImpl
Sets if the IndexWriter should pack newly written segments in a compound file. Default is true.
Use false for batch indexing with very large ram buffer settings.
Note: To control compound file usage during segment merges see SetNoCFSRatio() and SetMaxCFSSegmentSizeMB(). This setting only applies to newly created segment.
func (*LiveIndexWriterConfigImpl) Similarity ¶
func (conf *LiveIndexWriterConfigImpl) Similarity() Similarity
func (*LiveIndexWriterConfigImpl) String ¶
func (conf *LiveIndexWriterConfigImpl) String() string
func (*LiveIndexWriterConfigImpl) TermIndexInterval ¶
func (conf *LiveIndexWriterConfigImpl) TermIndexInterval() int
func (*LiveIndexWriterConfigImpl) UseCompoundFile ¶
func (conf *LiveIndexWriterConfigImpl) UseCompoundFile() bool
type LogByteSizeMergePolicy ¶
type LogByteSizeMergePolicy struct {
*LogMergePolicy
}
this is a LogMergePolicy that measures size of a segment as the total byte size of the segment's files.
func (*LogByteSizeMergePolicy) Size ¶
func (p *LogByteSizeMergePolicy) Size(info *SegmentCommitInfo, w *IndexWriter) (int64, error)
type LogDocMergePolicy ¶
type LogDocMergePolicy struct {
*LogMergePolicy
}
This is a LogMergePolicy that measures size of a segment as the number of documents (not taking deletions into account).
func (*LogDocMergePolicy) Size ¶
func (p *LogDocMergePolicy) Size(info *SegmentCommitInfo, w *IndexWriter) (int64, error)
type LogMergePolicy ¶
type LogMergePolicy struct { *MergePolicyImpl // contains filtered or unexported fields }
This class implements a MergePolicy that tries to merge segments into levels of exponentially increasing size, where each level has fewer segments than the value of the merge factor. Whenver extra segments (beyond the merge factor upper bound) are encountered, all segments within the level are merged. You can get or set the merge factor using MergeFactor() and SetMergeFactor() repectively.
This class is abstract and required a subclass to define the Size() method which specifies how a segment's size is determined. LogDocMergePolicy is one subclass that measures size by document count in the segment. LogByteSizeMergePolicy is another subclass that measures size as the total byte size of the file(s) for the segment.
func NewLogByteSizeMergePolicy ¶
func NewLogByteSizeMergePolicy() *LogMergePolicy
func NewLogDocMergePolicy ¶
func NewLogDocMergePolicy() *LogMergePolicy
func NewLogMergePolicy ¶
func NewLogMergePolicy(min, max int64) *LogMergePolicy
func (*LogMergePolicy) FindForcedMerges ¶
func (mp *LogMergePolicy) FindForcedMerges(infos *SegmentInfos, maxSegmentCount int, segmentsToMerge map[*SegmentCommitInfo]bool, w *IndexWriter) (MergeSpecification, error)
func (*LogMergePolicy) FindMerges ¶
func (mp *LogMergePolicy) FindMerges(mergeTrigger MergeTrigger, infos *SegmentInfos, w *IndexWriter) (spec MergeSpecification, err error)
Checks if any merges are now necessary and returns a MergeSpecification if so. A merge is necessary when there are more than SetMergeFactor() segments at a given level. When multiple levels have too many segments, this method will return multiple merges, allowing the MergeScheduler to use concurrency.
func (*LogMergePolicy) SetCalbrateSizeByDeletes ¶
func (mp *LogMergePolicy) SetCalbrateSizeByDeletes(calibrateSizeByDeletes bool)
Sets whether the segment size should be calibrated by the number of delets when choosing segments to merge
func (*LogMergePolicy) SetMergeFactor ¶
func (mp *LogMergePolicy) SetMergeFactor(mergeFactor int)
Determines how often segment indices are merged by AdDocument(). With smaller values, less RAM is used while indexing, and searches are faster, but indexing speed is slower. With larger values, more RAM is used during indexing, and while searches is slower, indexing is faster. Thus larger values (> 10) are best for batch index creation, and smaller values (< 10) for indces that are interactively maintained.
func (*LogMergePolicy) String ¶
func (mp *LogMergePolicy) String() string
type MergeAbortedError ¶
type MergeAbortedError string
Thrown when a merge was explicitly aborted because IndexWriter.close() was called with false. Normally this error is privately caught and suppressed by IndexWriter.
func (MergeAbortedError) Error ¶
func (err MergeAbortedError) Error() string
type MergeControl ¶
type MergePolicy ¶
type MergePolicy interface { SetNoCFSRatio(noCFSRatio float64) SetMaxCFSSegmentSizeMB(v float64) MergeSpecifier }
Expert: a MergePolicy determines the sequence of primitive merge operations.
Whenever the segments in an index have been altered by IndexWriter, either the addition of a newly flushed segment, addition of many segments from addIndexes* calls, or a previous merge that may now seed to cascade, IndexWriter invokes findMerges() to give the MergePolicy a chance to pick merges that are now required. This method returns a MergeSpecification instance describing the set of merges that should be done, or nil if no merges are necessary. When IndexWriter.forceMerge() is called, it calls findForcedMerges() and the MergePolicy should then return the necessary merges.
Note that the policy can return more than one merge at a time. In this case, if the writer is using SerialMergeScheduler, the merges will be run sequentially but if it is using ConcurrentMergeScheduler they will be run concurrently.
The default MergePolicy is TieredMergePolicy.
type MergePolicyImpl ¶
type MergePolicyImpl struct { SizeSPI MergePolicyImplSPI // contains filtered or unexported fields }
func NewDefaultMergePolicyImpl ¶
func NewDefaultMergePolicyImpl(self MergeSpecifier) *MergePolicyImpl
Creates a new merge policy instance. Note that if you intend to use it without passing it to IndexWriter, you should call SetIndexWriter()
func (*MergePolicyImpl) SetMaxCFSSegmentSizeMB ¶
func (mp *MergePolicyImpl) SetMaxCFSSegmentSizeMB(v float64)
If a merged segment will be more than this value, leave the segment as non-compound file even if compound file is enabled. Set this to math.Inf(1) (default) and noCFSRatio to 1.0 to always use CFS regardless of merge size.
func (*MergePolicyImpl) SetNoCFSRatio ¶
func (mp *MergePolicyImpl) SetNoCFSRatio(noCFSRatio float64)
If a merged segment will be more than this percentage of the total size of the index, leave the segment as non-compound file even if compound file is enabled. Set to 1.0 to always use CFS regardless or merge size.
func (*MergePolicyImpl) Size ¶
func (mp *MergePolicyImpl) Size(info *SegmentCommitInfo, w *IndexWriter) (n int64, err error)
type MergePolicyImplSPI ¶
type MergePolicyImplSPI interface { // Return the byte size of the provided SegmentCommitInfo, // pro-rated by percentage of non-deleted documents if // SetCalibrateSizeByDeletes() is set. Size(*SegmentCommitInfo, *IndexWriter) (int64, error) }
type MergeScheduler ¶
type MergeScheduler interface { io.Closer Merge(*IndexWriter, MergeTrigger, bool) error }
Expert: IndexWriter uses an instance implementing this interface to execute the merges selected by a MergePolicy. The default MergeScheduler is ConcurrentMergeScheduler.
Implementers of sub-classes shold make sure that Clone() returns an independent instance able to work with any IndexWriter instance.
type MergeScore ¶
type MergeScore interface{}
type MergeSpecification ¶
type MergeSpecification []*OneMerge
A MergeSpecification instance provides the information necessary to perform multiple merges. It simply contains a list of OneMerge instances.
type MergeSpecifier ¶
type MergeSpecifier interface { // Determine what set of merge operations are now necessary on the // index. IndexWriter calls this whenever there is a change to the // segments. This call is always synchronized on the IndexWriter // instance so only one thread at a time will call this method. FindMerges(MergeTrigger, *SegmentInfos, *IndexWriter) (MergeSpecification, error) // Determine what set of merge operations is necessary in order to // merge to <= the specified segment count. IndexWriter calls this // when its forceMerge() method is called. This call is always // synchronized on the IndexWriter instance so only one thread at a // time will call this method. FindForcedMerges(*SegmentInfos, int, map[*SegmentCommitInfo]bool, *IndexWriter) (MergeSpecification, error) }
type MergeTrigger ¶
type MergeTrigger int
Passed to MergePolicy.FindMerges(MergeTrigger, SegmentInfos) to indicate the event that triggered the merge
type MultiFields ¶
type MultiFields struct {
// contains filtered or unexported fields
}
func NewMultiFields ¶
func NewMultiFields(subs []Fields, subSlices []ReaderSlice) MultiFields
func (MultiFields) Terms ¶
func (mf MultiFields) Terms(field string) Terms
type MultiTerms ¶
type MultiTerms struct {
// contains filtered or unexported fields
}
func NewMultiTerms ¶
func NewMultiTerms(subs []Terms, subSlices []ReaderSlice) *MultiTerms
func (*MultiTerms) DocCount ¶
func (mt *MultiTerms) DocCount() int
func (*MultiTerms) Iterator ¶
func (mt *MultiTerms) Iterator(reuse TermsEnum) TermsEnum
func (*MultiTerms) SumDocFreq ¶
func (mt *MultiTerms) SumDocFreq() int64
func (*MultiTerms) SumTotalTermFreq ¶
func (mt *MultiTerms) SumTotalTermFreq() int64
type NoDeletionPolicy ¶
type NoDeletionPolicy bool
An IndexDeletionPolicy which keeps all index commits around, never deleting them. This class is a singleton and can be accessed by referencing INSTANCE.
func (NoDeletionPolicy) Clone ¶
func (p NoDeletionPolicy) Clone() IndexDeletionPolicy
type NumericDocValuesWriter ¶
type NumericDocValuesWriter struct {
// contains filtered or unexported fields
}
Buffers up pending long per doc, then flushes when segment flushes.
type OneMerge ¶
OneMerge provides the information necessary to perform an individual primitive merge operation, resulting in a single new segment. The merge spec includes the subset of segments to be merged as well as whether the new segment should use the compound file format.
func NewOneMerge ¶
func NewOneMerge(segments []*SegmentCommitInfo) *OneMerge
type ParallelPostingsArray ¶
type ParallelPostingsArray struct { PostingsArray // contains filtered or unexported fields }
type PerField ¶
type PerField struct { *DefaultIndexingChain // acess at least docState, termsHash. // contains filtered or unexported fields }
type PostingsArray ¶
type PostingsArray interface {
// contains filtered or unexported methods
}
type PostingsBytesStartArray ¶
type PostingsBytesStartArray struct {
// contains filtered or unexported fields
}
func (*PostingsBytesStartArray) BytesUsed ¶
func (ss *PostingsBytesStartArray) BytesUsed() util.Counter
func (*PostingsBytesStartArray) Clear ¶
func (ss *PostingsBytesStartArray) Clear() []int
func (*PostingsBytesStartArray) Grow ¶
func (ss *PostingsBytesStartArray) Grow() []int
func (*PostingsBytesStartArray) Init ¶
func (ss *PostingsBytesStartArray) Init() []int
type PrefixCodedTerms ¶
type PrefixCodedTerms struct {
// contains filtered or unexported fields
}
Prefix codes term instances (prefixes are shared)
func (*PrefixCodedTerms) RamBytesUsed ¶
func (terms *PrefixCodedTerms) RamBytesUsed() int64
type PrefixCodedTermsBuilder ¶
type PrefixCodedTermsBuilder struct {
// contains filtered or unexported fields
}
Builds a PrefixCodedTerms: call add repeatedly, then finish.
type QueryAndLimit ¶
type QueryAndLimit struct { }
type RandomCodec ¶
type RandomCodec struct {
*CodecImpl
}
Codec that assigns per-field random postings format.
The same field/format assignment will happen regardless of order, a hash is computed up front that determines the mapping. This means fields can be put into things like HashSets and added to documents in different orders and the tests will still be deterministic and reproducible.
func NewRandomCodec ¶
func NewRandomCodec(r *rand.Rand, avoidCodecs map[string]bool) *RandomCodec
type ReaderClosedListener ¶
type ReaderClosedListener interface {
// contains filtered or unexported methods
}
A custom listener that's invoked when the IndexReader is closed.
type ReaderPool ¶
func (*ReaderPool) Close ¶
func (pool *ReaderPool) Close() error
type ReaderSlice ¶
type ReaderSlice struct {
// contains filtered or unexported fields
}
func (ReaderSlice) String ¶
func (rs ReaderSlice) String() string
type ReadersAndUpdates ¶
Used by IndexWriter to hold open SegmentReaders (for searching or merging), plus pending deletes and updates, for a given segment.
func (*ReadersAndUpdates) String ¶
func (rld *ReadersAndUpdates) String() string
type RefCount ¶
type RefCount struct {
// contains filtered or unexported fields
}
Tracks the reference count for a single index file:
type SegInfoByDelGen ¶
type SegInfoByDelGen []*SegmentCommitInfo
func (SegInfoByDelGen) Len ¶
func (a SegInfoByDelGen) Len() int
func (SegInfoByDelGen) Less ¶
func (a SegInfoByDelGen) Less(i, j int) bool
func (SegInfoByDelGen) Swap ¶
func (a SegInfoByDelGen) Swap(i, j int)
type SegmentCoreReaders ¶
type SegmentCoreReaders struct {
// contains filtered or unexported fields
}
type SegmentFlushTicket ¶
type SegmentFlushTicket struct { *FlushTicketImpl // contains filtered or unexported fields }
type SegmentInfoAndLevel ¶
type SegmentInfoAndLevel struct {
// contains filtered or unexported fields
}
type SegmentInfoAndLevels ¶
type SegmentInfoAndLevels []SegmentInfoAndLevel
func (SegmentInfoAndLevels) Len ¶
func (ss SegmentInfoAndLevels) Len() int
func (SegmentInfoAndLevels) Less ¶
func (ss SegmentInfoAndLevels) Less(i, j int) bool
func (SegmentInfoAndLevels) Swap ¶
func (ss SegmentInfoAndLevels) Swap(i, j int)
type SegmentInfoStatus ¶
type SegmentInfoStatus struct {
// contains filtered or unexported fields
}
Holds the status of each segment in the index.
type SegmentInfos ¶
type SegmentInfos struct { Segments []*SegmentCommitInfo // contains filtered or unexported fields }
A collection of segmentInfo objects with methods for operating on those segments in relation to the file system.
The active segments in the index are stored in the segment into file, segments_N. There may be one or more segments_N files in the index; however, hte one with the largest generation is the activbe one (when older segments_N files are present it's because they temporarily cannot be deleted, or, a writer is in the process of committing, or a custom IndexDeletionPolicy is in use). This file lists each segment by name and has details about the codec and generation of deletes.
There is also a file segments.gen. This file contains the current generation (the _N in segments_N) of the index. This is used only as a fallback in case the current generation cannot be accurately determined by directory listing alone (as is the case for some NFS clients with time-based directory cache expiration). This file simply contains an Int32 version header (FORMAT_SEGMENTS_GEN_CURRENT), followed by the generation recorded as int64, written twice.
Files:
- segments.gen: GenHeader, Generation, Generation, Footer
- segments_N: Header, Version, NameCounter, SegCount, <SegName, SegCodec, DelGen, DeletionCount, FieldInfosGen, DocValuesGen, UpdatesFiles>^SegCount, CommitUserData, Footer
Data types:
- Header --> CodecHeader - Genheader, NameCounter, SegCount, DeletionCount --> int32 - Generation, Version, DelGen, Checksum --> int64 - SegName, SegCodec --> string - CommitUserData --> map[string]string - UpdatesFiles --> map[int32]map[string]bool> - Footer --> CodecFooter
Field Descriptions:
- Version counts how often the index has been changed by adding or deleting docments.
- NameCounter is used to generate names for new segment files.
- SegName is the name of the segment, and is used as the file name prefix for all of the files that compose the segment's index.
- DelGen is the generation count of the deletes file. If this is -1, there are no deletes. Anything above zero means there are deletes stored by LiveDocsFormat.
- DeletionCount records the number of deleted documents in this segment.
- SegCodec is the nme of the Codec that encoded this segment.
- CommitUserData stores an optional user-spplied opaue map[string]string that was passed to SetCommitData().
- FieldInfosGen is the generation count of the fieldInfos file. If this is -1, there are no updates to the fieldInfos in that segment. Anything above zero means there are updates to the fieldInfos stored by FieldInfosFormat.
- DocValuesGen is the generation count of the updatable DocValues. If this is -1, there are no udpates to DocValues in that segment. Anything above zero means there are updates to DocValues stored by DocvaluesFormat.
- UpdatesFiles stores the set of files that were updated in that segment per file.
func (*SegmentInfos) Clear ¶
func (sis *SegmentInfos) Clear()
func (*SegmentInfos) Clone ¶
func (sis *SegmentInfos) Clone() *SegmentInfos
Returns a copy of this instance, also copying each SegmentInfo.
func (*SegmentInfos) Read ¶
func (sis *SegmentInfos) Read(directory store.Directory, segmentFileName string) (err error)
Read a particular segmentFileName. Note that this may return IO error if a commit is in process.
func (*SegmentInfos) SegmentsFileName ¶
func (sis *SegmentInfos) SegmentsFileName() string
type SegmentReader ¶
type SegmentReader struct { *AtomicReaderImpl // contains filtered or unexported fields }
*
- IndexReader implementation over a single segment.
- <p>
- Instances pointing to the same segment (but with different deletes, etc)
- may share the same core data.
- @lucene.experimental
func NewSegmentReader ¶
func NewSegmentReader(si *SegmentCommitInfo, termInfosIndexDivisor int, context store.IOContext) (r *SegmentReader, err error)
*
- Constructs a new SegmentReader with a new core.
- @throws CorruptIndexException if the index is corrupt
- @throws IOException if there is a low-level IO error
TODO: why is this public?
func (*SegmentReader) BinaryDocValues ¶
func (r *SegmentReader) BinaryDocValues(field string) (v BinaryDocValues, err error)
func (*SegmentReader) CombinedCoreAndDeletesKey ¶
func (r *SegmentReader) CombinedCoreAndDeletesKey() interface{}
func (*SegmentReader) CoreCacheKey ¶
func (r *SegmentReader) CoreCacheKey() interface{}
func (*SegmentReader) Directory ¶
func (r *SegmentReader) Directory() store.Directory
func (*SegmentReader) FieldInfos ¶
func (r *SegmentReader) FieldInfos() FieldInfos
func (*SegmentReader) Fields ¶
func (r *SegmentReader) Fields() Fields
func (*SegmentReader) FieldsReader ¶
func (r *SegmentReader) FieldsReader() StoredFieldsReader
Expert: retrieve thread-private StoredFieldsReader
func (*SegmentReader) LiveDocs ¶
func (r *SegmentReader) LiveDocs() util.Bits
func (*SegmentReader) MaxDoc ¶
func (r *SegmentReader) MaxDoc() int
func (*SegmentReader) NormValues ¶
func (r *SegmentReader) NormValues(field string) (v NumericDocValues, err error)
func (*SegmentReader) NumDocs ¶
func (r *SegmentReader) NumDocs() int
func (*SegmentReader) NumericDocValues ¶
func (r *SegmentReader) NumericDocValues(field string) (v NumericDocValues, err error)
func (*SegmentReader) SegmentInfos ¶
func (r *SegmentReader) SegmentInfos() *SegmentCommitInfo
func (*SegmentReader) SegmentName ¶
func (r *SegmentReader) SegmentName() string
func (*SegmentReader) SortedDocValues ¶
func (r *SegmentReader) SortedDocValues(field string) (v SortedDocValues, err error)
func (*SegmentReader) SortedSetDocValues ¶
func (r *SegmentReader) SortedSetDocValues(field string) (v SortedSetDocValues, err error)
func (*SegmentReader) TermInfosIndexDivisor ¶
func (r *SegmentReader) TermInfosIndexDivisor() int
func (*SegmentReader) TermVectors ¶
func (r *SegmentReader) TermVectors(docID int) (fs Fields, err error)
func (*SegmentReader) TermVectorsReader ¶
func (r *SegmentReader) TermVectorsReader() TermVectorsReader
func (*SegmentReader) VisitDocument ¶
func (r *SegmentReader) VisitDocument(docID int, visitor StoredFieldVisitor) error
type SerialMergeScheduler ¶
A MergeScheduler that simply does each merge sequentially, using the current thread.
func NewSerialMergeScheduler ¶
func NewSerialMergeScheduler() *SerialMergeScheduler
func (*SerialMergeScheduler) Close ¶
func (ms *SerialMergeScheduler) Close() error
func (*SerialMergeScheduler) Merge ¶
func (ms *SerialMergeScheduler) Merge(writer *IndexWriter, trigger MergeTrigger, newMergesFound bool) (err error)
type Similarity ¶
type Similarity interface {
ComputeNorm(fs *FieldInvertState) int64
}
type SimpleMergedSegmentWarmer ¶
type SimpleMergedSegmentWarmer struct {
// contains filtered or unexported fields
}
A very simple meged segment warmer that just ensures data structures are initialized.
func NewSimpleMergedSegmentWarmer ¶
func NewSimpleMergedSegmentWarmer(infoStream util.InfoStream) *SimpleMergedSegmentWarmer
Creates a new SimpleMergedSegmentWarmer
type StandardDirectoryReader ¶
type StandardDirectoryReader struct { *DirectoryReaderImpl // contains filtered or unexported fields }
func (*StandardDirectoryReader) IsCurrent ¶
func (r *StandardDirectoryReader) IsCurrent() bool
func (*StandardDirectoryReader) String ¶
func (r *StandardDirectoryReader) String() string
func (*StandardDirectoryReader) Version ¶
func (r *StandardDirectoryReader) Version() int64
type StoredFieldStatus ¶
type StoredFieldStatus struct {
// contains filtered or unexported fields
}
type Term ¶
A Term represents a word from text. This is the unit of search. It is composed of two elements, the text of the word, as a string, and the name of the field that the text occurred in.
Note that terms may represents more than words from text fields, but also things like dates, email addresses, urls, etc.
func NewEmptyTerm ¶
Constructs a Term with the given field and empty text. This serves two purposes: 1) reuse of a Term with the same field. 2) pattern for a query.
func NewTermFromBytes ¶
type TermContext ¶
type TermContext struct { TopReaderContext IndexReaderContext DocFreq int TotalTermFreq int64 // contains filtered or unexported fields }
func NewTermContext ¶
func NewTermContext(ctx IndexReaderContext) *TermContext
*
- Creates an empty {@link TermContext} from a {@link IndexReaderContext}
func NewTermContextFromTerm ¶
func NewTermContextFromTerm(ctx IndexReaderContext, t *Term) (tc *TermContext, err error)
*
- Creates a {@link TermContext} from a top-level {@link IndexReaderContext} and the
- given {@link Term}. This method will lookup the given term in all context's leaf readers
- and register each of the readers containing the term in the returned {@link TermContext}
- using the leaf reader's ordinal.
- <p>
- Note: the given context must be a top-level context.
func (*TermContext) State ¶
func (tc *TermContext) State(ord int) TermState
type TermIndexStatus ¶
type TermIndexStatus struct {
// contains filtered or unexported fields
}
type TermSorter ¶
type TermSorter []*Term
func (TermSorter) Len ¶
func (s TermSorter) Len() int
func (TermSorter) Less ¶
func (s TermSorter) Less(i, j int) bool
func (TermSorter) Swap ¶
func (s TermSorter) Swap(i, j int)
type TermVectorStatus ¶
type TermVectorStatus struct {
// contains filtered or unexported fields
}
type TermVectorsConsumer ¶
type TermVectorsConsumer struct { *TermsHashImpl // contains filtered or unexported fields }
type TermVectorsConsumerPerField ¶
type TermVectorsConsumerPerField struct { *TermsHashPerFieldImpl // contains filtered or unexported fields }
type TermVectorsConsumerPerFields ¶
type TermVectorsConsumerPerFields []*TermVectorsConsumerPerField
func (TermVectorsConsumerPerFields) Len ¶
func (a TermVectorsConsumerPerFields) Len() int
func (TermVectorsConsumerPerFields) Less ¶
func (a TermVectorsConsumerPerFields) Less(i, j int) bool
func (TermVectorsConsumerPerFields) Swap ¶
func (a TermVectorsConsumerPerFields) Swap(i, j int)
type TermVectorsPostingArray ¶
type TermVectorsPostingArray struct {
// contains filtered or unexported fields
}
type TermsHash ¶
type TermsHash interface { TermsHashImplSPI // contains filtered or unexported methods }
This class is passed each token produced by the analyzer on each field during indexing, and it stores these tokens in a hash table, and allocates separate byte streams per token. Consumers of this class, eg FreqProxTermsWriter and TermVectorsConsumer, write their own byte streams under each term.
type TermsHashImpl ¶
type TermsHashImpl struct {
// contains filtered or unexported fields
}
type TermsHashImplSPI ¶
type TermsHashImplSPI interface {
// contains filtered or unexported methods
}
type TermsHashPerField ¶
type TermsHashPerField interface {
// contains filtered or unexported methods
}
type TermsHashPerFieldImpl ¶
type TermsHashPerFieldImpl struct {
// contains filtered or unexported fields
}
type TermsHashPerFieldSPI ¶
type TermsHashPerFieldSPI interface {
// contains filtered or unexported methods
}
type ThreadState ¶
type ThreadState struct {
// contains filtered or unexported fields
}
ThreadState references and guards a DocumentsWriterPerThread instance that is used during indexing to build a in-memory index segment. ThreadState also holds all flush related per-thread data controlled by DocumentsWriterFlushControl.
A ThreadState, its methods and members should only accessed by one goroutine a time. users must acquire the lock via lock() and release the lock in a finally block via unlock() before accesing the state.
type TieredMergePolicy ¶
type TieredMergePolicy struct { *MergePolicyImpl // contains filtered or unexported fields }
Merges segments of approximately equal size, subject to an allowed number of segments per tier. This is similar to LogByteSizeMergePolicy, except this merge policy is able to merge non-adjacent segment, and separates how many segments are merged at once (SetMaxMergeAtOnce()) from how many segments are allowed per tier (SetSegmentsPerTier()). This merge policy also does not over-merge (i.e. cascade merges).
For normal merging, this policy first computes a "budget" of how many segments are allowed to be in the index. If the index is over-budget, then the policy sorts segments by decreasing size (pro-rating by percent deletes), and then finds the least-cost merge. Merge cost is measured by a combination of the "skew" of the merge (size of largest segments divided by smallest segment), total merge size and percent deletes reclaimed, so tha tmerges with lower skew, smaller size and those reclaiming more deletes, are flavored.
If a merge wil produce a segment that's larger than SetMaxMergedSegmentMB(), then the policy will merge fewer segments (down to 1 at once, if that one has deletions) to keep the segment size under budget.
NOTE: this policy freely merges non-adjacent segments; if this is a problem, use LogMergePolicy.
NOTE: This policy always merges by byte size of the segments, always pro-rates by percent deletes, and does not apply any maximum segment size duirng forceMerge (unlike LogByteSizeMergePolicy).
func NewTieredMergePolicy ¶
func NewTieredMergePolicy() *TieredMergePolicy
func (*TieredMergePolicy) FindForcedMerges ¶
func (tmp *TieredMergePolicy) FindForcedMerges(infos *SegmentInfos, maxSegmentCount int, segmentsToMerge map[*SegmentCommitInfo]bool, w *IndexWriter) (MergeSpecification, error)
func (*TieredMergePolicy) FindMerges ¶
func (tmp *TieredMergePolicy) FindMerges(mergeTrigger MergeTrigger, infos *SegmentInfos, w *IndexWriter) (spec MergeSpecification, err error)
func (*TieredMergePolicy) SetFloorSegmentMB ¶
func (tmp *TieredMergePolicy) SetFloorSegmentMB(v float64) *TieredMergePolicy
Segments smaller than this are "rounded up" to this size, ie treated as equal (floor) size for merge selection. This is to prevent frequent flushing of tiny segments from allowing a long tail in the index. Default is 2 MB.
func (*TieredMergePolicy) SetForceMergeDeletesPctAllowed ¶
func (tmp *TieredMergePolicy) SetForceMergeDeletesPctAllowed(v float64) *TieredMergePolicy
When forceMergeDeletes is called, we only merge away a segment if its delete percentage is over this threshold. Default is 10%.
func (*TieredMergePolicy) SetMaxMergeAtOnce ¶
func (tmp *TieredMergePolicy) SetMaxMergeAtOnce(v int) *TieredMergePolicy
Maximum number of segments to be merged at a time during "normal" merging. For explicit merging (e.g., forceMerge or forceMergeDeletes was called), see SetMaxMergeAtonceExplicit(). Default is 10.
func (*TieredMergePolicy) SetMaxMergeAtOnceExplicit ¶
func (tmp *TieredMergePolicy) SetMaxMergeAtOnceExplicit(v int) *TieredMergePolicy
Maximum number of segments to be merged at a time, during forceMerge or forceMergeDeletes. Default is 30.
func (*TieredMergePolicy) SetMaxMergedSegmentMB ¶
func (tmp *TieredMergePolicy) SetMaxMergedSegmentMB(v float64) *TieredMergePolicy
Maximum sized segment to produce during normal merging. This setting is approximate: the estimate of the merged segment size is made by summing sizes of to-be-merged segments(compensating for percent deleted docs). Default is 5 GB.
func (*TieredMergePolicy) SetReclaimDeletesWeight ¶
func (tmp *TieredMergePolicy) SetReclaimDeletesWeight(v float64) *TieredMergePolicy
Controls how aggressively merges that reclaim more deletions are favored. Higher values will more aggresively target merges that reclaim deletions, but be careful not to go so high that way too much merging takes place; a value of 3.0 is probably nearly too high. A value of 0.0 means deletions don't impact merge selection.
func (*TieredMergePolicy) SetSegmentsPerTier ¶
func (tmp *TieredMergePolicy) SetSegmentsPerTier(v float64) *TieredMergePolicy
Sets the allowed number of segments per tier. Smaller values mean more merging but fewer segments.
NOTE: this value should be >= the SetMaxMergeAtOnce otherwise you'll force too much merging to occur.
func (*TieredMergePolicy) String ¶
func (tmp *TieredMergePolicy) String() string
Source Files ¶
- bufferedDeletes.go
- byteSliceReader.go
- checkIndex.go
- compositeReader.go
- concurrentMergeScheduler.go
- config.go
- defaultIndexingChain.go
- deleteQueue.go
- deleteStream.go
- deleter.go
- directoryReader.go
- docConsumer.go
- docFieldConsumer.go
- docFieldConsumerPerField.go
- docValuesFieldUpdates.go
- docValuesUpdate.go
- docValuesWriter.go
- documentsWriter.go
- dwpt.go
- event.go
- fields.go
- fileDeleter.go
- flush.go
- flushControl.go
- flushQueue.go
- indexFilenames.go
- invert.go
- invertedDocConsumer.go
- invertedDocConsumerPerField.go
- invertedDocEndConsumer.go
- invertedDocEndConsumerPerField.go
- live.go
- merge.go
- mergeControl.go
- perfield.go
- prefixCodedTerms.go
- random.go
- reader.go
- readerPool.go
- readerUtil.go
- readerUtils.go
- readersAndLiveDocs.go
- segmentInfos.go
- segments.go
- stallControl.go
- storedFieldsConsumer.go
- terms.go
- termsHashConsumer.go
- termsHashConsumerPerField.go
- threadPool.go
- warmer.go
- writer.go