Documentation
¶
Overview ¶
Package sstable implements readers and writers of pebble tables.
Tables are either opened for reading or created for writing but not both.
A reader can create iterators, which allow seeking and next/prev iteration. There may be multiple key/value pairs that have the same key and different sequence numbers.
A reader can be used concurrently. Multiple goroutines can call NewIter concurrently, and each iterator can run concurrently with other iterators. However, any particular iterator should not be used concurrently, and iterators should not be used once a reader is closed.
A writer writes key/value pairs in increasing key order, and cannot be used concurrently. A table cannot be read until the writer has finished.
Readers and writers can be created with various options. Passing a nil Options pointer is valid and means to use the default values.
One such option is to define the 'less than' ordering for keys. The default Comparer uses the natural ordering consistent with bytes.Compare. The same ordering should be used for reading and writing a table.
To return the value for a key:
r := table.NewReader(file, options) defer r.Close() i := r.NewIter(nil, nil) defer i.Close() ikey, value := r.SeekGE(key) if options.Comparer.Compare(ikey.UserKey, key) != 0 { // not found } else { // value is the first record containing key }
To count the number of entries in a table:
i, n := r.NewIter(nil, nil), 0 for key, value := i.First(); key != nil; key, value = i.Next() { n++ } if err := i.Close(); err != nil { return 0, err } return n, nil
To write a table with three entries:
w := table.NewWriter(file, options) if err := w.Set([]byte("apple"), []byte("red")); err != nil { w.Close() return err } if err := w.Set([]byte("banana"), []byte("yellow")); err != nil { w.Close() return err } if err := w.Set([]byte("cherry"), []byte("red")); err != nil { w.Close() return err } return w.Close()
Index ¶
- Constants
- Variables
- func CopySpan(ctx context.Context, input objstorage.Readable, r *Reader, rOpts ReaderOptions, ...) (size uint64, _ error)
- func DeterministicReadBlockDurationForTesting() func()
- func NewSimpleReadable(r ReadableFile) (objstorage.Readable, error)
- func ReadAll(r objstorage.Readable, ro ReaderOptions) (points []base.InternalKV, rangeDels, rangeKeys []keyspan.Span)
- func RewriteKeySuffixesAndReturnFormat(sst []byte, rOpts ReaderOptions, out objstorage.Writable, o WriterOptions, ...) (*WriterMetadata, TableFormat, error)
- type AbbreviatedKey
- type BlockInterval
- type BlockIntervalCollector
- func (b *BlockIntervalCollector) AddCollectedWithSuffixReplacement(oldProp []byte, oldSuffix, newSuffix []byte) error
- func (b *BlockIntervalCollector) AddPointKey(key InternalKey, value []byte) error
- func (b *BlockIntervalCollector) AddPrevDataBlockToIndexBlock()
- func (b *BlockIntervalCollector) AddRangeKeys(span Span) error
- func (b *BlockIntervalCollector) FinishDataBlock(buf []byte) ([]byte, error)
- func (b *BlockIntervalCollector) FinishIndexBlock(buf []byte) ([]byte, error)
- func (b *BlockIntervalCollector) FinishTable(buf []byte) ([]byte, error)
- func (b *BlockIntervalCollector) Name() string
- func (b *BlockIntervalCollector) SupportsSuffixReplacement() bool
- type BlockIntervalFilter
- func (b *BlockIntervalFilter) Init(name string, lower, upper uint64, suffixReplacer BlockIntervalSuffixReplacer)
- func (b *BlockIntervalFilter) Intersects(prop []byte) (bool, error)
- func (b *BlockIntervalFilter) Name() string
- func (b *BlockIntervalFilter) SetInterval(lower, upper uint64)
- func (b *BlockIntervalFilter) SyntheticSuffixIntersects(prop []byte, suffix []byte) (bool, error)
- type BlockIntervalSuffixReplacer
- type BlockPropertiesFilterer
- type BlockPropertyCollector
- type BlockPropertyFilter
- type BoundLimitedBlockPropertyFilter
- type BufferPool
- type Category
- type CategoryAndQoS
- type CategoryStats
- type CategoryStatsAggregate
- type CategoryStatsCollector
- type CommonProperties
- type CommonReader
- type Compare
- type Comparer
- type Comparers
- type Equal
- type FilterBlockSizeLimit
- type FilterMetrics
- type FilterMetricsTracker
- type FilterPolicy
- type FilterType
- type FilterWriter
- type FragmentIterTransforms
- type InternalKey
- type IntervalMapper
- type IterStatsAccumulator
- type IterTransforms
- type Iterator
- type KeySchemas
- type Layout
- type Merger
- type Mergers
- type NamedBlockHandle
- type Properties
- type QoSLevel
- type RawColumnWriter
- func (w *RawColumnWriter) AddWithForceObsolete(key InternalKey, value []byte, forceObsolete bool) error
- func (w *RawColumnWriter) Close() (err error)
- func (w *RawColumnWriter) ComparePrev(k []byte) int
- func (w *RawColumnWriter) EncodeSpan(span keyspan.Span) error
- func (w *RawColumnWriter) Error() error
- func (w *RawColumnWriter) EstimatedSize() uint64
- func (w *RawColumnWriter) Metadata() (*WriterMetadata, error)
- func (w *RawColumnWriter) SetSnapshotPinnedProperties(pinnedKeyCount, pinnedKeySize, pinnedValueSize uint64)
- type RawRowWriter
- func (w *RawRowWriter) AddWithForceObsolete(key InternalKey, value []byte, forceObsolete bool) error
- func (w *RawRowWriter) Close() (err error)
- func (w *RawRowWriter) ComparePrev(k []byte) int
- func (w *RawRowWriter) EncodeSpan(span keyspan.Span) error
- func (w *RawRowWriter) Error() error
- func (w *RawRowWriter) EstimatedSize() uint64
- func (w *RawRowWriter) Metadata() (*WriterMetadata, error)
- func (w *RawRowWriter) SetSnapshotPinnedProperties(pinnedKeyCount, pinnedKeySize, pinnedValueSize uint64)
- type RawWriter
- type ReadableFile
- type Reader
- func (r *Reader) Close() error
- func (r *Reader) CommonProperties() *CommonProperties
- func (r *Reader) EstimateDiskUsage(start, end []byte) (uint64, error)
- func (r *Reader) Layout() (*Layout, error)
- func (r *Reader) NewCompactionIter(transforms IterTransforms, statsAccum IterStatsAccumulator, rp ReaderProvider, ...) (Iterator, error)
- func (r *Reader) NewIter(transforms IterTransforms, lower, upper []byte) (Iterator, error)
- func (r *Reader) NewPointIter(ctx context.Context, transforms IterTransforms, lower, upper []byte, ...) (Iterator, error)
- func (r *Reader) NewRawRangeDelIter(ctx context.Context, transforms FragmentIterTransforms) (iter keyspan.FragmentIterator, err error)
- func (r *Reader) NewRawRangeKeyIter(ctx context.Context, transforms FragmentIterTransforms) (iter keyspan.FragmentIterator, err error)
- func (r *Reader) TableFormat() (TableFormat, error)
- func (r *Reader) TryAddBlockPropertyFilterForHideObsoletePoints(snapshotForHideObsoletePoints base.SeqNum, fileLargestSeqNum base.SeqNum, ...) (hideObsoletePoints bool, filters []BlockPropertyFilter)
- func (r *Reader) ValidateBlockChecksums() error
- type ReaderOptions
- type ReaderProvider
- type Separator
- type Span
- type Split
- type Successor
- type SyntheticPrefix
- type SyntheticPrefixAndSuffix
- type SyntheticSeqNum
- type SyntheticSuffix
- type TableFormat
- type TestFixtureInfo
- type TestKeysMaskingFilter
- type UserKeyPrefixBound
- type VirtualReader
- func (v *VirtualReader) CommonProperties() *CommonProperties
- func (v *VirtualReader) EstimateDiskUsage(start, end []byte) (uint64, error)
- func (v *VirtualReader) NewCompactionIter(transforms IterTransforms, statsAccum IterStatsAccumulator, rp ReaderProvider, ...) (Iterator, error)
- func (v *VirtualReader) NewPointIter(ctx context.Context, transforms IterTransforms, lower, upper []byte, ...) (Iterator, error)
- func (v *VirtualReader) NewRawRangeDelIter(ctx context.Context, transforms FragmentIterTransforms) (keyspan.FragmentIterator, error)
- func (v *VirtualReader) NewRawRangeKeyIter(ctx context.Context, transforms FragmentIterTransforms) (keyspan.FragmentIterator, error)
- func (v *VirtualReader) UnsafeReader() *Reader
- func (v *VirtualReader) ValidateBlockChecksumsOnBacking() error
- type VirtualReaderParams
- type Writer
- func (w *Writer) Close() (err error)
- func (w *Writer) Delete(key []byte) error
- func (w *Writer) DeleteRange(start, end []byte) error
- func (w *Writer) Error() error
- func (w *Writer) Merge(key, value []byte) error
- func (w *Writer) Metadata() (*WriterMetadata, error)
- func (w *Writer) RangeKeyDelete(start, end []byte) error
- func (w *Writer) RangeKeySet(start, end, suffix, value []byte) error
- func (w *Writer) RangeKeyUnset(start, end, suffix []byte) error
- func (w *Writer) Raw() RawWriter
- func (w *Writer) Set(key, value []byte) error
- type WriterMetadata
- func (m *WriterMetadata) SetLargestPointKey(k InternalKey)
- func (m *WriterMetadata) SetLargestRangeDelKey(k InternalKey)
- func (m *WriterMetadata) SetLargestRangeKey(k InternalKey)
- func (m *WriterMetadata) SetSmallestPointKey(k InternalKey)
- func (m *WriterMetadata) SetSmallestRangeDelKey(k InternalKey)
- func (m *WriterMetadata) SetSmallestRangeKey(k InternalKey)
- type WriterOptions
Constants ¶
const ( InternalKeyKindDelete = base.InternalKeyKindDelete InternalKeyKindSet = base.InternalKeyKindSet InternalKeyKindMerge = base.InternalKeyKindMerge InternalKeyKindLogData = base.InternalKeyKindLogData InternalKeyKindSingleDelete = base.InternalKeyKindSingleDelete InternalKeyKindRangeDelete = base.InternalKeyKindRangeDelete InternalKeyKindSetWithDelete = base.InternalKeyKindSetWithDelete InternalKeyKindDeleteSized = base.InternalKeyKindDeleteSized InternalKeyKindMax = base.InternalKeyKindMax InternalKeyKindInvalid = base.InternalKeyKindInvalid )
These constants are part of the file format, and should not be changed.
const ( // MaximumBlockSize is the maximum permissible size of a block. MaximumBlockSize = rowblk.MaximumSize // DefaultNumDeletionsThreshold defines the minimum number of point // tombstones that must be present in a data block for it to be // considered tombstone-dense. DefaultNumDeletionsThreshold = 100 // DefaultDeletionSizeRatioThreshold defines the minimum ratio of the size // of point tombstones to the size of the data block in order to consider the // block as tombstone-dense. DefaultDeletionSizeRatioThreshold = 0.5 )
const NoSyntheticSeqNum = block.NoSyntheticSeqNum
NoSyntheticSeqNum is the default zero value for SyntheticSeqNum, which disables overriding the sequence number.
const (
TableFilter = base.TableFilter
)
Exported TableFilter constants.
Variables ¶
var DefaultComparer = base.DefaultComparer
DefaultComparer exports the base.DefaultComparer variable.
var ErrEmptySpan = errors.New("cannot copy empty span")
ErrEmptySpan is returned by CopySpan if the input sstable has no keys in the requested span.
Note that CopySpan's determination of block overlap is best effort - we may copy a block that doesn't actually contain any keys in the span, in which case we won't generate this error. We currently only generate this error when the span start is beyond all keys in the physical sstable.
var JemallocSizeClasses = []int{
16 * 1024,
20 * 1024, 24 * 1024, 28 * 1024, 32 * 1024,
40 * 1024, 48 * 1024, 56 * 1024, 64 * 1024,
80 * 1024, 96 * 1024, 112 * 1024, 128 * 1024,
160 * 1024, 192 * 1024, 224 * 1024, 256 * 1024,
320 * 1024,
}
JemallocSizeClasses are a subset of available size classes in jemalloc[1], suitable for the AllocatorSizeClasses option.
The size classes are used when writing sstables for determining target block sizes for flushes, with the goal of reducing internal memory fragmentation when the blocks are later loaded into the block cache. We only use the size classes between 16KiB - 256KiB as block limits fall in that range.
[1] https://jemalloc.net/jemalloc.3.html#size_classes
var NoFragmentTransforms = block.NoFragmentTransforms
NoFragmentTransforms is the default value for FragmentIterTransforms.
var NoTransforms = block.NoTransforms
NoTransforms is the default value for IterTransforms.
var TestFixtures = []TestFixtureInfo{ { Filename: "h.sst", Compression: block.SnappyCompression, FullKeyFilter: false, PrefixFilter: false, IndexBlockSize: fixtureDefaultIndexBlockSize, UseFixtureComparer: false, }, { Filename: "h.no-compression.sst", Compression: block.NoCompression, FullKeyFilter: false, PrefixFilter: false, IndexBlockSize: fixtureDefaultIndexBlockSize, UseFixtureComparer: false, }, { Filename: "h.table-bloom.sst", Compression: block.SnappyCompression, FullKeyFilter: true, PrefixFilter: false, IndexBlockSize: fixtureDefaultIndexBlockSize, UseFixtureComparer: false, }, { Filename: "h.table-bloom.no-compression.sst", Compression: block.NoCompression, FullKeyFilter: true, PrefixFilter: false, IndexBlockSize: fixtureDefaultIndexBlockSize, UseFixtureComparer: false, }, { Filename: "h.table-bloom.no-compression.prefix_extractor.no_whole_key_filter.sst", Compression: block.NoCompression, FullKeyFilter: false, PrefixFilter: true, IndexBlockSize: fixtureDefaultIndexBlockSize, UseFixtureComparer: true, }, { Filename: "h.no-compression.two_level_index.sst", Compression: block.NoCompression, FullKeyFilter: false, PrefixFilter: false, IndexBlockSize: fixtureSmallIndexBlockSize, UseFixtureComparer: false, }, { Filename: "h.zstd-compression.sst", Compression: block.ZstdCompression, FullKeyFilter: false, PrefixFilter: false, IndexBlockSize: fixtureDefaultIndexBlockSize, UseFixtureComparer: false, }, }
TestFixtures contains all metadata necessary to generate the test SSTs.
Functions ¶
func CopySpan ¶
func CopySpan( ctx context.Context, input objstorage.Readable, r *Reader, rOpts ReaderOptions, output objstorage.Writable, o WriterOptions, start, end InternalKey, ) (size uint64, _ error)
CopySpan produces a copy of a approximate subset of an input sstable.
The produced sstable contains all keys from the input sstable in the span [start, end), as well as potentially some additional keys from the original file that were adjacent to but outside that span.
CopySpan differs from simply seeking a reader to start and iterating until the end passing the results to a writer in that it does not write the new sstable from scratch, key-by-key, recompressing each key into new blocks and computing new filters and properties. Instead, it finds data _blocks_ that intersect the requested span and copies those, whole, to the new file, avoiding all decompression and recompression work. It then copies the original bloom filter - this filter is valid for the subset of data as well, just with potentially a higher false positive rate compared to one that would be computed just from the keys in it.
The resulting sstable will have no block properties.
The function might return ErrEmptySpan if there are no blocks that could include keys in the given range. See ErrEmptySpan for more details.
Closes input and finishes or aborts output in all cases, including on errors.
Note that CopySpan is not aware of any suffix or prefix replacement; the caller must account for those when specifying the bounds.
func DeterministicReadBlockDurationForTesting ¶
func DeterministicReadBlockDurationForTesting() func()
DeterministicReadBlockDurationForTesting is for tests that want a deterministic value of the time to read a block (that is not in the cache). The return value is a function that must be called before the test exits.
func NewSimpleReadable ¶
func NewSimpleReadable(r ReadableFile) (objstorage.Readable, error)
NewSimpleReadable wraps a ReadableFile in a objstorage.Readable implementation (which does not support read-ahead)
func ReadAll ¶
func ReadAll( r objstorage.Readable, ro ReaderOptions, ) (points []base.InternalKV, rangeDels, rangeKeys []keyspan.Span)
ReadAll returns all point keys, range del spans, and range key spans from an sstable. Closes the Readable. Panics on errors.
func RewriteKeySuffixesAndReturnFormat ¶
func RewriteKeySuffixesAndReturnFormat( sst []byte, rOpts ReaderOptions, out objstorage.Writable, o WriterOptions, from, to []byte, concurrency int, ) (*WriterMetadata, TableFormat, error)
RewriteKeySuffixesAndReturnFormat copies the content of the passed SSTable bytes to a new sstable, written to `out`, in which the suffix `from` has is replaced with `to` in every key. The input sstable must consist of only Sets or RangeKeySets and every key must have `from` as its suffix as determined by the Split function of the Comparer in the passed WriterOptions. Range deletes must not exist in this sstable, as they will be ignored.
Data blocks are rewritten in parallel by `concurrency` workers and then assembled into a final SST. Filters are copied from the original SST without modification as they are not affected by the suffix, while block and table properties are only minimally recomputed.
TODO(sumeer): document limitations, if any, due to this limited re-computation of properties (is there any loss of fidelity?).
Any block property collectors configured in the WriterOptions must implement AddCollectedWithSuffixChange.
The WriterOptions.TableFormat is ignored, and the output sstable has the same TableFormat as the input, which is returned in case the caller wants to do some error checking. Suffix rewriting is meant to be efficient, and allowing changes in the TableFormat detracts from that efficiency.
Any obsolete bits that key-value pairs may be annotated with are ignored and lost during the rewrite. Additionally, the output sstable has the pebble.obsolete.is_strict property set to false. These limitations could be removed if needed. The current use case for RewriteKeySuffixesAndReturnFormat in CockroachDB is for MVCC-compliant file ingestion, where these files do not contain RANGEDELs and have one key-value pair per userkey -- so they trivially satisfy the strict criteria, and we don't need the obsolete bit as a performance optimization. For disaggregated storage, strict obsolete sstables are needed for L5 and L6, but at the time of writing, we expect such MVCC-compliant file ingestion to only ingest into levels L4 and higher. If this changes, we can do one of two things to get rid of this limitation:
- Validate that there are no duplicate userkeys and no RANGEDELs/MERGEs in the sstable to be rewritten. Validating no duplicate userkeys is non-trivial when rewriting blocks in parallel, so we could encode the pre-existing condition in the (existing) SnapshotPinnedKeys property -- we need to update the external sst writer to calculate and encode this property.
- Preserve the obsolete bit (with changes to the blockIter).
Types ¶
type AbbreviatedKey ¶
type AbbreviatedKey = base.AbbreviatedKey
AbbreviatedKey exports the base.AbbreviatedKey type.
type BlockInterval ¶
BlockInterval represents the [Lower, Upper) interval of 64-bit values corresponding to a set of keys. The meaning of the values themselves is opaque to the BlockIntervalCollector.
If Lower >= Upper, the interval is the empty set.
func (BlockInterval) Intersects ¶
func (i BlockInterval) Intersects(other BlockInterval) bool
Intersects returns true if the two intervals intersect.
func (BlockInterval) IsEmpty ¶
func (i BlockInterval) IsEmpty() bool
IsEmpty returns true if the interval is empty.
func (*BlockInterval) UnionWith ¶
func (i *BlockInterval) UnionWith(other BlockInterval)
UnionWith extends the receiver to include another interval.
type BlockIntervalCollector ¶
type BlockIntervalCollector struct {
// contains filtered or unexported fields
}
BlockIntervalCollector is a helper implementation of BlockPropertyCollector for users who want to represent a set of the form [lower,upper) where both lower and upper are uint64, and lower <= upper.
The set is encoded as: - Two varint integers, (lower,upper-lower), when upper-lower > 0 - Nil, when upper-lower=0
Users must not expect this to preserve differences between empty sets -- they will all get turned into the semantically equivalent [0,0).
A BlockIntervalCollector that collects over point and range keys needs to have both the point and range DataBlockIntervalCollector specified, since point and range keys are fed to the BlockIntervalCollector in an interleaved fashion, independently of one another. This also implies that the DataBlockIntervalCollectors for point and range keys should be references to independent instances, rather than references to the same collector, as point and range keys are tracked independently.
func (*BlockIntervalCollector) AddCollectedWithSuffixReplacement ¶
func (b *BlockIntervalCollector) AddCollectedWithSuffixReplacement( oldProp []byte, oldSuffix, newSuffix []byte, ) error
AddCollectedWithSuffixReplacement is part of the BlockPropertyCollector interface.
func (*BlockIntervalCollector) AddPointKey ¶
func (b *BlockIntervalCollector) AddPointKey(key InternalKey, value []byte) error
AddPointKey is part of the BlockPropertyCollector interface.
func (*BlockIntervalCollector) AddPrevDataBlockToIndexBlock ¶
func (b *BlockIntervalCollector) AddPrevDataBlockToIndexBlock()
AddPrevDataBlockToIndexBlock implements the BlockPropertyCollector interface.
func (*BlockIntervalCollector) AddRangeKeys ¶
func (b *BlockIntervalCollector) AddRangeKeys(span Span) error
AddRangeKeys is part of the BlockPropertyCollector interface.
func (*BlockIntervalCollector) FinishDataBlock ¶
func (b *BlockIntervalCollector) FinishDataBlock(buf []byte) ([]byte, error)
FinishDataBlock is part of the BlockPropertyCollector interface.
func (*BlockIntervalCollector) FinishIndexBlock ¶
func (b *BlockIntervalCollector) FinishIndexBlock(buf []byte) ([]byte, error)
FinishIndexBlock implements the BlockPropertyCollector interface.
func (*BlockIntervalCollector) FinishTable ¶
func (b *BlockIntervalCollector) FinishTable(buf []byte) ([]byte, error)
FinishTable implements the BlockPropertyCollector interface.
func (*BlockIntervalCollector) Name ¶
func (b *BlockIntervalCollector) Name() string
Name is part of the BlockPropertyCollector interface.
func (*BlockIntervalCollector) SupportsSuffixReplacement ¶
func (b *BlockIntervalCollector) SupportsSuffixReplacement() bool
SupportsSuffixReplacement is part of the BlockPropertyCollector interface.
type BlockIntervalFilter ¶
type BlockIntervalFilter struct {
// contains filtered or unexported fields
}
BlockIntervalFilter is an implementation of BlockPropertyFilter when the corresponding collector is a BlockIntervalCollector. That is, the set is of the form [lower, upper).
func NewBlockIntervalFilter ¶
func NewBlockIntervalFilter( name string, lower uint64, upper uint64, suffixReplacer BlockIntervalSuffixReplacer, ) *BlockIntervalFilter
NewBlockIntervalFilter constructs a BlockPropertyFilter that filters blocks based on an interval property collected by BlockIntervalCollector and the given [lower, upper) bounds. The given name specifies the BlockIntervalCollector's properties to read.
func NewTestKeysBlockPropertyFilter ¶
func NewTestKeysBlockPropertyFilter(filterMin, filterMax uint64) *BlockIntervalFilter
NewTestKeysBlockPropertyFilter constructs a new block-property filter that excludes blocks containing exclusively suffixed keys where all the suffixes fall outside of the range [filterMin, filterMax).
The filter only filters based on data derived from the key. The iteration results of this block property filter are deterministic for unsuffixed keys and keys with suffixes within the range [filterMin, filterMax). For keys with suffixes outside the range, iteration is nondeterministic.
func (*BlockIntervalFilter) Init ¶
func (b *BlockIntervalFilter) Init( name string, lower, upper uint64, suffixReplacer BlockIntervalSuffixReplacer, )
Init initializes (or re-initializes, clearing previous state) an existing BLockPropertyFilter to filter blocks based on an interval property collected by BlockIntervalCollector and the given [lower, upper) bounds. The given name specifies the BlockIntervalCollector's properties to read.
func (*BlockIntervalFilter) Intersects ¶
func (b *BlockIntervalFilter) Intersects(prop []byte) (bool, error)
Intersects implements the BlockPropertyFilter interface.
func (*BlockIntervalFilter) Name ¶
func (b *BlockIntervalFilter) Name() string
Name implements the BlockPropertyFilter interface.
func (*BlockIntervalFilter) SetInterval ¶
func (b *BlockIntervalFilter) SetInterval(lower, upper uint64)
SetInterval adjusts the [lower, upper) bounds used by the filter. It is not generally safe to alter the filter while it's in use, except as part of the implementation of BlockPropertyFilterMask.SetSuffix used for range-key masking.
func (*BlockIntervalFilter) SyntheticSuffixIntersects ¶
func (b *BlockIntervalFilter) SyntheticSuffixIntersects(prop []byte, suffix []byte) (bool, error)
SyntheticSuffixIntersects implements the BlockPropertyFilter interface.
type BlockIntervalSuffixReplacer ¶
type BlockIntervalSuffixReplacer interface { // ApplySuffixReplacement recalculates a previously calculated interval (which // corresponds to an arbitrary collection of keys) under the assumption // that those keys are rewritten with a new prefix. // // Such a transformation is possible when the intervals depend only on the // suffixes. ApplySuffixReplacement(interval BlockInterval, newSuffix []byte) (BlockInterval, error) }
BlockIntervalSuffixReplacer provides methods to conduct just in time adjustments of a passed in block prop interval before filtering.
type BlockPropertiesFilterer ¶
type BlockPropertiesFilterer struct {
// contains filtered or unexported fields
}
BlockPropertiesFilterer provides filtering support when reading an sstable in the context of an iterator that has a slice of BlockPropertyFilters. After the call to NewBlockPropertiesFilterer, the caller must call IntersectsUserPropsAndFinishInit to check if the sstable intersects with the filters. If it does intersect, this function also finishes initializing the BlockPropertiesFilterer using the shortIDs for the relevant filters. Subsequent checks for relevance of a block should use the intersects method.
func IntersectsTable ¶
func IntersectsTable( filters []BlockPropertyFilter, limited BoundLimitedBlockPropertyFilter, userProperties map[string]string, syntheticSuffix SyntheticSuffix, ) (*BlockPropertiesFilterer, error)
IntersectsTable evaluates the provided block-property filter against the provided set of table-level properties. If there is no intersection between the filters and the table or an error is encountered, IntersectsTable returns a nil filterer (and possibly an error). If there is an intersection, IntersectsTable returns a non-nil filterer that may be used by an iterator reading the table.
type BlockPropertyCollector ¶
type BlockPropertyCollector interface { // Name returns the name of the block property collector. Name() string // AddPointKey is called with each new key added to a data block in the // sstable. The callee can assume that these are in sorted order. AddPointKey(key InternalKey, value []byte) error // AddRangeKeys is called for each range span added to the sstable. The range // key properties are stored separately and don't contribute to data block // properties. They are only used when FinishTable is called. // TODO(radu): clean up this subtle semantic. AddRangeKeys(span keyspan.Span) error // AddCollectedWithSuffixReplacement adds previously collected property data // and updates it to reflect a change of suffix on all keys: the old property // data is assumed to be constructed from keys that all have the same // oldSuffix and is recalculated to reflect the same keys but with newSuffix. // // A collector which supports this method must be able to derive its updated // value from its old value and the change being made to the suffix, without // needing to be passed each updated K/V. // // For example, a collector that only inspects values can simply copy its // previously computed property as-is, since key-suffix replacement does not // change values, while a collector that depends only on key suffixes, like // one which collected mvcc-timestamp bounds from timestamp-suffixed keys, can // just set its new bounds from the new suffix, as it is common to all keys, // without needing to recompute it from every key. // // This method is optional (if it is not implemented, it always returns an // error). SupportsSuffixReplacement() can be used to check if this method is // implemented. AddCollectedWithSuffixReplacement(oldProp []byte, oldSuffix, newSuffix []byte) error // SupportsSuffixReplacement returns whether the collector supports the // AddCollectedWithSuffixReplacement method. SupportsSuffixReplacement() bool // FinishDataBlock is called when all the entries have been added to a // data block. Subsequent Add calls will be for the next data block. It // returns the property value for the finished block. FinishDataBlock(buf []byte) ([]byte, error) // AddPrevDataBlockToIndexBlock adds the entry corresponding to the // previous FinishDataBlock to the current index block. AddPrevDataBlockToIndexBlock() // FinishIndexBlock is called when an index block, containing all the // key-value pairs since the last FinishIndexBlock, will no longer see new // entries. It returns the property value for the index block. FinishIndexBlock(buf []byte) ([]byte, error) // FinishTable is called when the sstable is finished, and returns the // property value for the sstable. FinishTable(buf []byte) ([]byte, error) }
BlockPropertyCollector is used when writing a sstable.
All calls to Add are included in the next FinishDataBlock, after which the next data block is expected to start.
The index entry generated for the data block, which contains the return value from FinishDataBlock, is not immediately included in the current index block. It is included when AddPrevDataBlockToIndexBlock is called. An alternative would be to return an opaque handle from FinishDataBlock and pass it to a new AddToIndexBlock method, which requires more plumbing, and passing of an interface{} results in a undesirable heap allocation. AddPrevDataBlockToIndexBlock must be called before keys are added to the new data block.
func NewBlockIntervalCollector ¶
func NewBlockIntervalCollector( name string, mapper IntervalMapper, suffixReplacer BlockIntervalSuffixReplacer, ) BlockPropertyCollector
NewBlockIntervalCollector constructs a BlockIntervalCollector with the given name. The BlockIntervalCollector makes use of the given point and range key DataBlockIntervalCollectors when encountering point and range keys, respectively.
The caller may pass a nil DataBlockIntervalCollector for one of the point or range key collectors, in which case keys of those types will be ignored. This allows for flexible construction of BlockIntervalCollectors that operate on just point keys, just range keys, or both point and range keys.
If both point and range keys are to be tracked, two independent collectors should be provided, rather than the same collector passed in twice (see the comment on BlockIntervalCollector for more detail) XXX update
func NewTestKeysBlockPropertyCollector ¶
func NewTestKeysBlockPropertyCollector() BlockPropertyCollector
NewTestKeysBlockPropertyCollector constructs a sstable property collector over testkey suffixes.
type BlockPropertyFilter ¶
type BlockPropertyFilter = base.BlockPropertyFilter
BlockPropertyFilter is used in an Iterator to filter sstables and blocks within the sstable. It should not maintain any per-sstable state, and must be thread-safe.
type BoundLimitedBlockPropertyFilter ¶
type BoundLimitedBlockPropertyFilter interface { BlockPropertyFilter // KeyIsWithinLowerBound tests whether the provided internal key falls // within the current lower bound of the filter. A true return value // indicates that the filter may be used to filter blocks that exclusively // contain keys ≥ `key`, so long as the blocks' keys also satisfy the upper // bound. KeyIsWithinLowerBound(key []byte) bool // KeyIsWithinUpperBound tests whether the provided internal key falls // within the current upper bound of the filter. A true return value // indicates that the filter may be used to filter blocks that exclusively // contain keys ≤ `key`, so long as the blocks' keys also satisfy the lower // bound. KeyIsWithinUpperBound(key []byte) bool }
BoundLimitedBlockPropertyFilter implements the block-property filter but imposes an additional constraint on its usage, requiring that only blocks containing exclusively keys between its lower and upper bounds may be filtered. The bounds may be change during iteration, so the filter doesn't expose the bounds, instead implementing KeyIsWithin[Lower,Upper]Bound methods for performing bound comparisons.
To be used, a BoundLimitedBlockPropertyFilter must be supplied directly through NewBlockPropertiesFilterer's dedicated parameter. If supplied through the ordinary slice of block property filters, this filter's bounds will be ignored.
The current [lower,upper) bounds of the filter are unknown, because they may be changing. During forward iteration the lower bound is externally guaranteed, meaning Intersects only returns false if the sstable iterator is already known to be positioned at a key ≥ lower. The sstable iterator is then only responsible for ensuring filtered blocks also meet the upper bound, and should only allow a block to be filtered if all its keys are < upper. The sstable iterator may invoke KeyIsWithinUpperBound(key) to perform this check, where key is an inclusive upper bound on the block's keys.
During backward iteration the upper bound is externally guaranteed, and Intersects only returns false if the sstable iterator is already known to be positioned at a key < upper. The sstable iterator is responsible for ensuring filtered blocks also meet the lower bound, enforcing that a block is only filtered if all its keys are ≥ lower. This check is made through passing the block's inclusive lower bound to KeyIsWithinLowerBound.
Implementations may become active or inactive through implementing Intersects to return true whenever the filter is disabled.
Usage of BoundLimitedBlockPropertyFilter is subtle, and Pebble consumers should not implement this interface directly. This interface is an internal detail in the implementation of block-property range-key masking.
type Category ¶
type Category string
Category is a user-understandable string, where stats are aggregated for each category. The cardinality of this should be low, say < 20. The prefix "pebble-" is reserved for internal Pebble categories.
Examples of categories that can be useful in the CockroachDB context are: sql-user, sql-stats, raft, rangefeed, mvcc-gc, range-snapshot.
type CategoryAndQoS ¶
CategoryAndQoS specifies both the Category and the QoSLevel.
type CategoryStats ¶
type CategoryStats struct { // BlockBytes is the bytes in the loaded blocks. If the block was // compressed, this is the compressed bytes. Currently, only the index // blocks, data blocks containing points, and filter blocks are included. // Additionally, value blocks read after the corresponding iterator is // closed are not included. BlockBytes uint64 // BlockBytesInCache is the subset of BlockBytes that were in the block // cache. BlockBytesInCache uint64 // BlockReadDuration is the total duration to read the bytes not in the // cache, i.e., BlockBytes-BlockBytesInCache. BlockReadDuration time.Duration }
CategoryStats provides stats about a category of reads.
type CategoryStatsAggregate ¶
type CategoryStatsAggregate struct { Category Category QoSLevel QoSLevel CategoryStats CategoryStats }
CategoryStatsAggregate is the aggregate for the given category.
type CategoryStatsCollector ¶
type CategoryStatsCollector struct {
// contains filtered or unexported fields
}
CategoryStatsCollector collects and aggregates the stats per category.
func (*CategoryStatsCollector) Accumulator ¶
func (c *CategoryStatsCollector) Accumulator(p uint64, caq CategoryAndQoS) IterStatsAccumulator
Accumulator returns a stats accumulator for the given category. The provided p is used to detrmine which shard to write stats to.
func (*CategoryStatsCollector) GetStats ¶
func (c *CategoryStatsCollector) GetStats() []CategoryStatsAggregate
GetStats returns the aggregated stats.
type CommonProperties ¶
type CommonProperties struct { // The number of entries in this table. NumEntries uint64 `prop:"rocksdb.num.entries"` // Total raw key size. RawKeySize uint64 `prop:"rocksdb.raw.key.size"` // Total raw value size. RawValueSize uint64 `prop:"rocksdb.raw.value.size"` // Total raw key size of point deletion tombstones. This value is comparable // to RawKeySize. RawPointTombstoneKeySize uint64 `prop:"pebble.raw.point-tombstone.key.size"` // Sum of the raw value sizes carried by point deletion tombstones // containing size estimates. See the DeleteSized key kind. This value is // comparable to Raw{Key,Value}Size. RawPointTombstoneValueSize uint64 `prop:"pebble.raw.point-tombstone.value.size"` // The number of point deletion entries ("tombstones") in this table that // carry a size hint indicating the size of the value the tombstone deletes. NumSizedDeletions uint64 `prop:"pebble.num.deletions.sized"` // The number of deletion entries in this table, including both point and // range deletions. NumDeletions uint64 `prop:"rocksdb.deleted.keys"` // The number of range deletions in this table. NumRangeDeletions uint64 `prop:"rocksdb.num.range-deletions"` // The number of RANGEKEYDELs in this table. NumRangeKeyDels uint64 `prop:"pebble.num.range-key-dels"` // The number of RANGEKEYSETs in this table. NumRangeKeySets uint64 `prop:"pebble.num.range-key-sets"` // Total size of value blocks and value index block. Only serialized if > 0. ValueBlocksSize uint64 `prop:"pebble.value-blocks.size"` // NumDataBlocks is the number of data blocks in this table. NumDataBlocks uint64 `prop:"rocksdb.num.data.blocks"` // NumTombstoneDenseBlocks is the number of data blocks in this table that // are considered tombstone-dense. See the TombstoneDenseBlocksRatio field // in manifest.TableStats for the criteria used to determine if a data // block is tombstone-dense. NumTombstoneDenseBlocks uint64 `prop:"pebble.num.tombstone-dense-blocks"` // The compression algorithm used to compress blocks. CompressionName string `prop:"rocksdb.compression"` // The compression options used to compress blocks. CompressionOptions string `prop:"rocksdb.compression_options"` }
CommonProperties holds properties for either a virtual or a physical sstable. This can be used by code which doesn't care to make the distinction between physical and virtual sstables properties.
For virtual sstables, fields are constructed through extrapolation upon virtual reader construction. See MakeVirtualReader for implementation details.
NB: The values of these properties can affect correctness. For example, if NumRangeKeySets == 0, but the sstable actually contains range keys, then the iterators will behave incorrectly.
func (*CommonProperties) NumPointDeletions ¶
func (c *CommonProperties) NumPointDeletions() uint64
NumPointDeletions is the number of point deletions in the sstable. For virtual sstables, this is an estimate.
func (*CommonProperties) String ¶
func (c *CommonProperties) String() string
String is only used for testing purposes.
type CommonReader ¶
type CommonReader interface { NewRawRangeKeyIter( ctx context.Context, transforms FragmentIterTransforms, ) (keyspan.FragmentIterator, error) NewRawRangeDelIter( ctx context.Context, transforms FragmentIterTransforms, ) (keyspan.FragmentIterator, error) NewPointIter( ctx context.Context, transforms IterTransforms, lower, upper []byte, filterer *BlockPropertiesFilterer, filterBlockSizeLimit FilterBlockSizeLimit, stats *base.InternalIteratorStats, statsAccum IterStatsAccumulator, rp ReaderProvider, ) (Iterator, error) NewCompactionIter( transforms IterTransforms, statsAccum IterStatsAccumulator, rp ReaderProvider, bufferPool *block.BufferPool, ) (Iterator, error) EstimateDiskUsage(start, end []byte) (uint64, error) CommonProperties() *CommonProperties }
CommonReader abstracts functionality over a Reader or a VirtualReader. This can be used by code which doesn't care to distinguish between a reader and a virtual reader.
type Comparers ¶
Comparers is a map from comparer name to comparer. It is used for debugging tools which may be used on multiple databases configured with different comparers.
type FilterBlockSizeLimit ¶
type FilterBlockSizeLimit uint32
FilterBlockSizeLimit is a size limit for bloom filter blocks - if a bloom filter is present, it is used only when it is at most this size.
const ( // NeverUseFilterBlock indicates that bloom filter blocks should never be used. NeverUseFilterBlock FilterBlockSizeLimit = 0 // AlwaysUseFilterBlock indicates that bloom filter blocks should always be // used, regardless of size. AlwaysUseFilterBlock FilterBlockSizeLimit = math.MaxUint32 )
type FilterMetrics ¶
type FilterMetrics struct { // The number of hits for the filter policy. This is the // number of times the filter policy was successfully used to avoid access // of a data block. Hits int64 // The number of misses for the filter policy. This is the number of times // the filter policy was checked but was unable to filter an access of a data // block. Misses int64 }
FilterMetrics holds metrics for the filter policy.
type FilterMetricsTracker ¶
type FilterMetricsTracker struct {
// contains filtered or unexported fields
}
FilterMetricsTracker is used to keep track of filter metrics. It contains the same metrics as FilterMetrics, but they can be updated atomically. An instance of FilterMetricsTracker can be passed to a Reader as a ReaderOption.
func (*FilterMetricsTracker) Load ¶
func (m *FilterMetricsTracker) Load() FilterMetrics
Load returns the current values as FilterMetrics.
type FilterPolicy ¶
type FilterPolicy = base.FilterPolicy
FilterPolicy exports the base.FilterPolicy type.
type FilterWriter ¶
type FilterWriter = base.FilterWriter
FilterWriter exports the base.FilterWriter type.
type FragmentIterTransforms ¶
type FragmentIterTransforms = block.FragmentIterTransforms
FragmentIterTransforms re-exports block.FragmentIterTransforms.
type InternalKey ¶
type InternalKey = base.InternalKey
InternalKey exports the base.InternalKey type.
type IntervalMapper ¶
type IntervalMapper interface { // MapPointKey maps a point key to an interval. The interval can be empty, which // means that this key will effectively be ignored. MapPointKey(key InternalKey, value []byte) (BlockInterval, error) // MapRangeKeys maps a range key span to an interval. The interval can be // empty, which means that this span will effectively be ignored. MapRangeKeys(span Span) (BlockInterval, error) }
IntervalMapper is an interface through which a user can define the mapping between keys and intervals. The interval for any collection of keys (e.g. a data block, a table) is the union of intervals for all keys.
type IterStatsAccumulator ¶
type IterStatsAccumulator interface { // Accumulate accumulates the provided stats. Accumulate(cas CategoryStats) }
type IterTransforms ¶
type IterTransforms = block.IterTransforms
IterTransforms re-exports block.IterTransforms.
type Iterator ¶
type Iterator interface { base.InternalIterator // NextPrefix implements (base.InternalIterator).NextPrefix. NextPrefix(succKey []byte) *base.InternalKV SetCloseHook(fn func(i Iterator) error) }
Iterator iterates over an entire table of data.
type KeySchemas ¶
KeySchemas is a map from key schema name to key schema. A single database may contain sstables with multiple key schemas.
func MakeKeySchemas ¶
func MakeKeySchemas(keySchemas ...*colblk.KeySchema) KeySchemas
MakeKeySchemas constructs a KeySchemas from a slice of key schemas.
type Layout ¶
type Layout struct { Data []block.HandleWithProperties Index []block.Handle TopIndex block.Handle Filter []NamedBlockHandle RangeDel block.Handle RangeKey block.Handle ValueBlock []block.Handle ValueIndex block.Handle Properties block.Handle MetaIndex block.Handle Format TableFormat }
Layout describes the block organization of an sstable.
func (*Layout) Describe ¶
func (l *Layout) Describe( verbose bool, r *Reader, fmtKV func(key *base.InternalKey, value []byte) string, ) string
Describe returns a description of the layout. If the verbose parameter is true, details of the structure of each block are returned as well. If verbose is true and fmtKV is non-nil, the output includes the KVs (as formatted by this function).
type Mergers ¶
Mergers is a map from merger name to merger. It is used for debugging tools which may be used on multiple databases configured with different mergers.
type NamedBlockHandle ¶
NamedBlockHandle holds a block.Handle and corresponding name.
type Properties ¶
type Properties struct { // CommonProperties needs to be at the top of the Properties struct so that the // offsets of the fields in CommonProperties match the offsets of the embedded // fields of CommonProperties in Properties. CommonProperties `prop:"pebble.embbeded_common_properties"` // The name of the comparer used in this table. ComparerName string `prop:"rocksdb.comparator"` // The total size of all data blocks. DataSize uint64 `prop:"rocksdb.data.size"` // The name of the filter policy used in this table. Empty if no filter // policy is used. FilterPolicyName string `prop:"rocksdb.filter.policy"` // The size of filter block. FilterSize uint64 `prop:"rocksdb.filter.size"` // Total number of index partitions if kTwoLevelIndexSearch is used. IndexPartitions uint64 `prop:"rocksdb.index.partitions"` // The size of index block. IndexSize uint64 `prop:"rocksdb.index.size"` // The index type. TODO(peter): add a more detailed description. IndexType uint32 `prop:"rocksdb.block.based.table.index.type"` // For formats >= TableFormatPebblev4, this is set to true if the obsolete // bit is strict for all the point keys. IsStrictObsolete bool `prop:"pebble.obsolete.is_strict"` // The name of the key schema used in this table. Empty for formats <= // TableFormatPebblev4. KeySchemaName string `prop:"pebble.colblk.schema"` // The name of the merger used in this table. Empty if no merger is used. MergerName string `prop:"rocksdb.merge.operator"` // The number of merge operands in the table. NumMergeOperands uint64 `prop:"rocksdb.merge.operands"` // The number of RANGEKEYUNSETs in this table. NumRangeKeyUnsets uint64 `prop:"pebble.num.range-key-unsets"` // The number of value blocks in this table. Only serialized if > 0. NumValueBlocks uint64 `prop:"pebble.num.value-blocks"` // The number of values stored in value blocks. Only serialized if > 0. NumValuesInValueBlocks uint64 `prop:"pebble.num.values.in.value-blocks"` // A comma separated list of names of the property collectors used in this // table. PropertyCollectorNames string `prop:"rocksdb.property.collectors"` // Total raw rangekey key size. RawRangeKeyKeySize uint64 `prop:"pebble.raw.range-key.key.size"` // Total raw rangekey value size. RawRangeKeyValueSize uint64 `prop:"pebble.raw.range-key.value.size"` // The total number of keys in this table that were pinned by open snapshots. SnapshotPinnedKeys uint64 `prop:"pebble.num.snapshot-pinned-keys"` // The cumulative bytes of keys in this table that were pinned by // open snapshots. This value is comparable to RawKeySize. SnapshotPinnedKeySize uint64 `prop:"pebble.raw.snapshot-pinned-keys.size"` // The cumulative bytes of values in this table that were pinned by // open snapshots. This value is comparable to RawValueSize. SnapshotPinnedValueSize uint64 `prop:"pebble.raw.snapshot-pinned-values.size"` // Size of the top-level index if kTwoLevelIndexSearch is used. TopLevelIndexSize uint64 `prop:"rocksdb.top-level.index.size"` // User collected properties. Currently, we only use them to store block // properties aggregated at the table level. UserProperties map[string]string // Loaded set indicating which fields have been loaded from disk. Indexed by // the field's byte offset within the struct // (reflect.StructField.Offset). Only set if the properties have been loaded // from a file. Only exported for testing purposes. Loaded map[uintptr]struct{} }
Properties holds the sstable property values. The properties are automatically populated during sstable creation and load from the properties meta block when an sstable is opened.
func (*Properties) NumPointDeletions ¶
func (p *Properties) NumPointDeletions() uint64
NumPointDeletions returns the number of point deletions in this table.
func (*Properties) NumRangeKeys ¶
func (p *Properties) NumRangeKeys() uint64
NumRangeKeys returns a count of the number of range keys in this table.
func (*Properties) String ¶
func (p *Properties) String() string
type QoSLevel ¶
type QoSLevel int
QoSLevel describes whether the read is latency-sensitive or not. Each category must map to a single QoSLevel. While category strings are opaque to Pebble, the QoSLevel may be internally utilized in Pebble to better optimize future reads.
func StringToQoSForTesting ¶
StringToQoSForTesting returns the QoSLevel for the string, or panics if the string is not known.
func (QoSLevel) SafeFormat ¶
func (q QoSLevel) SafeFormat(p redact.SafePrinter, verb rune)
SafeFormat implements the redact.SafeFormatter interface.
type RawColumnWriter ¶
type RawColumnWriter struct {
// contains filtered or unexported fields
}
RawColumnWriter is a sstable RawWriter that writes sstables with column-oriented blocks. All table formats TableFormatPebblev5 and later write column-oriented blocks and use RawColumnWriter.
func (*RawColumnWriter) AddWithForceObsolete ¶
func (w *RawColumnWriter) AddWithForceObsolete( key InternalKey, value []byte, forceObsolete bool, ) error
AddWithForceObsolete adds a point key/value pair when writing a strict-obsolete sstable. For a given Writer, the keys passed to Add must be in increasing order. Span keys (range deletions, range keys) must be added through EncodeSpan.
forceObsolete indicates whether the caller has determined that this key is obsolete even though it may be the latest point key for this userkey. This should be set to true for keys obsoleted by RANGEDELs, and is required for strict-obsolete sstables.
Note that there are two properties, S1 and S2 (see comment in format.go) that strict-obsolete ssts must satisfy. S2, due to RANGEDELs, is solely the responsibility of the caller. S1 is solely the responsibility of the callee.
func (*RawColumnWriter) Close ¶
func (w *RawColumnWriter) Close() (err error)
func (*RawColumnWriter) ComparePrev ¶
func (w *RawColumnWriter) ComparePrev(k []byte) int
ComparePrev compares the provided user to the last point key written to the writer. The returned value is equivalent to Compare(key, prevKey) where prevKey is the last point key written to the writer.
If no key has been written yet, ComparePrev returns +1.
Must not be called after Writer is closed.
func (*RawColumnWriter) EncodeSpan ¶
func (w *RawColumnWriter) EncodeSpan(span keyspan.Span) error
EncodeSpan encodes the keys in the given span. The span can contain either only RANGEDEL keys or only range keys.
func (*RawColumnWriter) Error ¶
func (w *RawColumnWriter) Error() error
Error returns the current accumulated error if any.
func (*RawColumnWriter) EstimatedSize ¶
func (w *RawColumnWriter) EstimatedSize() uint64
EstimatedSize returns the estimated size of the sstable being written if a call to Close() was made without adding additional keys.
func (*RawColumnWriter) Metadata ¶
func (w *RawColumnWriter) Metadata() (*WriterMetadata, error)
Metadata returns the metadata for the finished sstable. Only valid to call after the sstable has been finished.
func (*RawColumnWriter) SetSnapshotPinnedProperties ¶
func (w *RawColumnWriter) SetSnapshotPinnedProperties( pinnedKeyCount, pinnedKeySize, pinnedValueSize uint64, )
SetSnapshotPinnedProperties sets the properties for pinned keys. Should only be used internally by Pebble.
type RawRowWriter ¶
type RawRowWriter struct {
// contains filtered or unexported fields
}
RawRowWriter is a sstable RawWriter that writes sstables with row-oriented blocks. All table formats TableFormatPebblev4 and earlier write row-oriented blocks and use RawRowWriter.
func (*RawRowWriter) AddWithForceObsolete ¶
func (w *RawRowWriter) AddWithForceObsolete( key InternalKey, value []byte, forceObsolete bool, ) error
AddWithForceObsolete must be used when writing a strict-obsolete sstable.
forceObsolete indicates whether the caller has determined that this key is obsolete even though it may be the latest point key for this userkey. This should be set to true for keys obsoleted by RANGEDELs, and is required for strict-obsolete sstables.
Note that there are two properties, S1 and S2 (see comment in format.go) that strict-obsolete ssts must satisfy. S2, due to RANGEDELs, is solely the responsibility of the caller. S1 is solely the responsibility of the callee.
func (*RawRowWriter) Close ¶
func (w *RawRowWriter) Close() (err error)
Close finishes writing the table and closes the underlying file that the table was written to.
func (*RawRowWriter) ComparePrev ¶
func (w *RawRowWriter) ComparePrev(k []byte) int
ComparePrev compares the provided user to the last point key written to the writer. The returned value is equivalent to Compare(key, prevKey) where prevKey is the last point key written to the writer.
If no key has been written yet, ComparePrev returns +1.
Must not be called after Writer is closed.
func (*RawRowWriter) EncodeSpan ¶
func (w *RawRowWriter) EncodeSpan(span keyspan.Span) error
EncodeSpan encodes the keys in the given span. The span can contain either only RANGEDEL keys or only range keys.
This is a low-level API that bypasses the fragmenter. The spans passed to this function must be fragmented and ordered.
func (*RawRowWriter) Error ¶
func (w *RawRowWriter) Error() error
Error returns the current accumulated error, if any.
func (*RawRowWriter) EstimatedSize ¶
func (w *RawRowWriter) EstimatedSize() uint64
EstimatedSize returns the estimated size of the sstable being written if a call to Finish() was made without adding additional keys.
func (*RawRowWriter) Metadata ¶
func (w *RawRowWriter) Metadata() (*WriterMetadata, error)
Metadata returns the metadata for the finished sstable. Only valid to call after the sstable has been finished.
func (*RawRowWriter) SetSnapshotPinnedProperties ¶
func (w *RawRowWriter) SetSnapshotPinnedProperties( pinnedKeyCount, pinnedKeySize, pinnedValueSize uint64, )
SetSnapshotPinnedProperties sets the properties for pinned keys. Should only be used internally by Pebble.
type RawWriter ¶
type RawWriter interface { // Error returns the current accumulated error if any. Error() error // AddWithForceObsolete must be used when writing a strict-obsolete sstable. // // forceObsolete indicates whether the caller has determined that this key is // obsolete even though it may be the latest point key for this userkey. This // should be set to true for keys obsoleted by RANGEDELs, and is required for // strict-obsolete sstables. It's optional for non-strict-obsolete sstables. // // Note that there are two properties, S1 and S2 (see comment in format.go) // that strict-obsolete ssts must satisfy. S2, due to RANGEDELs, is solely the // responsibility of the caller. S1 is solely the responsibility of the // callee. AddWithForceObsolete( key InternalKey, value []byte, forceObsolete bool, ) error // EncodeSpan encodes the keys in the given span. The span can contain // either only RANGEDEL keys or only range keys. // // This is a low-level API that bypasses the fragmenter. The spans passed to // this function must be fragmented and ordered. EncodeSpan(span keyspan.Span) error // EstimatedSize returns the estimated size of the sstable being written if // a call to Close() was made without adding additional keys. EstimatedSize() uint64 // ComparePrev compares the provided user to the last point key written to the // writer. The returned value is equivalent to Compare(key, prevKey) where // prevKey is the last point key written to the writer. // // If no key has been written yet, ComparePrev returns +1. // // Must not be called after Writer is closed. ComparePrev(k []byte) int // SetSnapshotPinnedProperties sets the properties for pinned keys. Should only // be used internally by Pebble. SetSnapshotPinnedProperties(keyCount, keySize, valueSize uint64) // Close finishes writing the table and closes the underlying file that the // table was written to. Close() error // Metadata returns the metadata for the finished sstable. Only valid to // call after the sstable has been finished. Metadata() (*WriterMetadata, error) // contains filtered or unexported methods }
RawWriter defines an interface for sstable writers. Implementations may vary depending on the TableFormat being written.
func NewRawWriter ¶
func NewRawWriter(writable objstorage.Writable, o WriterOptions) RawWriter
NewRawWriter returns a new table writer for the file. Closing the writer will close the file.
type ReadableFile ¶
ReadableFile describes the smallest subset of vfs.File that is required for reading SSTs.
type Reader ¶
type Reader struct { Comparer *base.Comparer Compare Compare Equal Equal Split Split Properties Properties // contains filtered or unexported fields }
Reader is a table reader.
func NewMemReader ¶
func NewMemReader(sst []byte, o ReaderOptions) (*Reader, error)
NewMemReader opens a reader over the SST stored in the passed []byte.
func NewReader ¶
func NewReader(ctx context.Context, f objstorage.Readable, o ReaderOptions) (*Reader, error)
NewReader returns a new table reader for the file. Closing the reader will close the file.
The context is used for tracing any operations performed by NewReader; it is NOT stored for future use.
func (*Reader) CommonProperties ¶
func (r *Reader) CommonProperties() *CommonProperties
CommonProperties implemented the CommonReader interface.
func (*Reader) EstimateDiskUsage ¶
EstimateDiskUsage returns the total size of data blocks overlapping the range `[start, end]`. Even if a data block partially overlaps, or we cannot determine overlap due to abbreviated index keys, the full data block size is included in the estimation.
This function does not account for any metablock space usage. Assumes there is at least partial overlap, i.e., `[start, end]` falls neither completely before nor completely after the file's range.
Only blocks containing point keys are considered. Range deletion and range key blocks are not considered.
TODO(ajkr): account for metablock space usage. Perhaps look at the fraction of data blocks overlapped and add that same fraction of the metadata blocks to the estimate.
func (*Reader) NewCompactionIter ¶
func (r *Reader) NewCompactionIter( transforms IterTransforms, statsAccum IterStatsAccumulator, rp ReaderProvider, bufferPool *block.BufferPool, ) (Iterator, error)
NewCompactionIter returns an iterator similar to NewIter but it also increments the number of bytes iterated. If an error occurs, NewCompactionIter cleans up after itself and returns a nil iterator.
func (*Reader) NewIter ¶
func (r *Reader) NewIter(transforms IterTransforms, lower, upper []byte) (Iterator, error)
NewIter returns an iterator for the point keys in the table. It is a simplified version of NewPointIter and should only be used for tests and tooling.
NewIter must only be used when the Reader is guaranteed to outlive any LazyValues returned from the iter.
func (*Reader) NewPointIter ¶
func (r *Reader) NewPointIter( ctx context.Context, transforms IterTransforms, lower, upper []byte, filterer *BlockPropertiesFilterer, filterBlockSizeLimit FilterBlockSizeLimit, stats *base.InternalIteratorStats, statsAccum IterStatsAccumulator, rp ReaderProvider, ) (Iterator, error)
NewPointIter returns an iterator for the point keys in the table.
If transform.HideObsoletePoints is set, the callee assumes that filterer already includes obsoleteKeyBlockPropertyFilter. The caller can satisfy this contract by first calling TryAddBlockPropertyFilterForHideObsoletePoints.
func (*Reader) NewRawRangeDelIter ¶
func (r *Reader) NewRawRangeDelIter( ctx context.Context, transforms FragmentIterTransforms, ) (iter keyspan.FragmentIterator, err error)
NewRawRangeDelIter returns an internal iterator for the contents of the range-del block for the table. Returns nil if the table does not contain any range deletions.
func (*Reader) NewRawRangeKeyIter ¶
func (r *Reader) NewRawRangeKeyIter( ctx context.Context, transforms FragmentIterTransforms, ) (iter keyspan.FragmentIterator, err error)
NewRawRangeKeyIter returns an internal iterator for the contents of the range-key block for the table. Returns nil if the table does not contain any range keys.
func (*Reader) TableFormat ¶
func (r *Reader) TableFormat() (TableFormat, error)
TableFormat returns the format version for the table.
func (*Reader) TryAddBlockPropertyFilterForHideObsoletePoints ¶
func (r *Reader) TryAddBlockPropertyFilterForHideObsoletePoints( snapshotForHideObsoletePoints base.SeqNum, fileLargestSeqNum base.SeqNum, pointKeyFilters []BlockPropertyFilter, ) (hideObsoletePoints bool, filters []BlockPropertyFilter)
TryAddBlockPropertyFilterForHideObsoletePoints is expected to be called before the call to NewPointIter, to get the value of hideObsoletePoints and potentially add a block property filter.
func (*Reader) ValidateBlockChecksums ¶
ValidateBlockChecksums validates the checksums for each block in the SSTable.
type ReaderOptions ¶
type ReaderOptions struct { // LoadBlockSema, if set, is used to limit the number of blocks that can be // loaded (i.e. read from the filesystem) in parallel. Each load acquires one // unit from the semaphore for the duration of the read. LoadBlockSema *fifo.Semaphore // User properties specified in this map will not be added to sst.Properties.UserProperties. DeniedUserProperties map[string]struct{} // Comparer defines a total ordering over the space of []byte keys: a 'less // than' relationship. The same comparison algorithm must be used for reads // and writes over the lifetime of the DB. // // The default value uses the same ordering as bytes.Compare. Comparer *Comparer // Merger defines the Merge function in use for this keyspace. Merger *Merger Comparers Comparers Mergers Mergers // KeySchemas contains the set of known key schemas to use when interpreting // columnar data blocks. Only used for sstables encoded in format // TableFormatPebblev5 or higher. KeySchemas KeySchemas // Filters is a map from filter policy name to filter policy. Filters with // policies that are not in this map will be ignored. Filters map[string]FilterPolicy // Logger is an optional logger and tracer. LoggerAndTracer base.LoggerAndTracer // FilterMetricsTracker is optionally used to track filter metrics. FilterMetricsTracker *FilterMetricsTracker // contains filtered or unexported fields }
ReaderOptions holds the parameters needed for reading an sstable.
func (*ReaderOptions) SetInternal ¶
func (o *ReaderOptions) SetInternal(internalOpts sstableinternal.ReaderOptions)
SetInternal sets the internal reader options. Note that even though this method is public, a caller outside the pebble package can't construct a value to pass to it.
func (*ReaderOptions) SetInternalCacheOpts ¶
func (o *ReaderOptions) SetInternalCacheOpts(cacheOpts sstableinternal.CacheOptions)
SetInternalCacheOpts sets the internal cache options. Note that even though this method is public, a caller outside the pebble package can't construct a value to pass to it.
type ReaderProvider ¶
ReaderProvider supports the implementation of blockProviderWhenClosed. GetReader and Close can be called multiple times in pairs.
func MakeTrivialReaderProvider ¶
func MakeTrivialReaderProvider(r *Reader) ReaderProvider
MakeTrivialReaderProvider creates a ReaderProvider which always returns the given reader. It should be used when the Reader will outlive the iterator tree.
type SyntheticPrefix ¶
type SyntheticPrefix = block.SyntheticPrefix
SyntheticPrefix re-exports block.SyntheticPrefix.
type SyntheticPrefixAndSuffix ¶
type SyntheticPrefixAndSuffix = block.SyntheticPrefixAndSuffix
SyntheticPrefixAndSuffix re-exports block.SyntheticPrefixAndSuffix.
func MakeSyntheticPrefixAndSuffix ¶
func MakeSyntheticPrefixAndSuffix( prefix SyntheticPrefix, suffix SyntheticSuffix, ) SyntheticPrefixAndSuffix
MakeSyntheticPrefixAndSuffix returns a SyntheticPrefixAndSuffix with the given prefix and suffix.
type SyntheticSeqNum ¶
type SyntheticSeqNum = block.SyntheticSeqNum
SyntheticSeqNum re-exports block.SyntheticSeqNum.
type SyntheticSuffix ¶
type SyntheticSuffix = block.SyntheticSuffix
SyntheticSuffix re-exports block.SyntheticSuffix.
type TableFormat ¶
type TableFormat uint32
TableFormat specifies the format version for sstables. The legacy LevelDB format is format version 1.
const ( TableFormatUnspecified TableFormat = iota TableFormatLevelDB TableFormatRocksDBv2 TableFormatPebblev1 // Block properties. TableFormatPebblev2 // Range keys. TableFormatPebblev3 // Value blocks. TableFormatPebblev4 // DELSIZED tombstones. TableFormatPebblev5 // Columnar blocks. NumTableFormats TableFormatMax = NumTableFormats - 1 // TableFormatMinSupported is the minimum format supported by Pebble. This // package still supports older formats for uses outside of Pebble // (CockroachDB uses it to read data from backups that could be old). TableFormatMinSupported = TableFormatPebblev1 )
The available table formats, representing the tuple (magic number, version number). Note that these values are not (and should not) be serialized to disk. The ordering should follow the order the versions were introduced to Pebble (i.e. the history is linear).
func ParseTableFormatString ¶
func ParseTableFormatString(s string) (TableFormat, error)
ParseTableFormatString parses a TableFormat from its human-readable string representation.
func (TableFormat) AsTuple ¶
func (f TableFormat) AsTuple() (string, uint32)
AsTuple returns the TableFormat's (Magic String, Version) tuple.
func (TableFormat) BlockColumnar ¶
func (f TableFormat) BlockColumnar() bool
BlockColumnar returns true iff the table format uses the columnar format for data, index and keyspan blocks.
func (TableFormat) String ¶
func (f TableFormat) String() string
String returns the TableFormat (Magic String,Version) tuple.
type TestFixtureInfo ¶
type TestFixtureInfo struct { Filename string Compression block.Compression FullKeyFilter bool PrefixFilter bool IndexBlockSize int UseFixtureComparer bool }
TestFixtureInfo contains all metadata necessary to generate a test sstable.
type TestKeysMaskingFilter ¶
type TestKeysMaskingFilter struct {
*BlockIntervalFilter
}
TestKeysMaskingFilter implements BlockPropertyFilterMask and may be used to mask point keys with the testkeys-style suffixes (eg, @4) that are masked by range keys with testkeys-style suffixes.
func NewTestKeysMaskingFilter ¶
func NewTestKeysMaskingFilter() TestKeysMaskingFilter
NewTestKeysMaskingFilter constructs a TestKeysMaskingFilter that implements pebble.BlockPropertyFilterMask for efficient range-key masking using the testkeys block property filter. The masking filter wraps a block interval filter, and modifies the configured interval when Pebble requests it.
func (TestKeysMaskingFilter) Intersects ¶
func (f TestKeysMaskingFilter) Intersects(prop []byte) (bool, error)
Intersects implements the BlockPropertyFilter interface.
func (TestKeysMaskingFilter) SetSuffix ¶
func (f TestKeysMaskingFilter) SetSuffix(suffix []byte) error
SetSuffix implements pebble.BlockPropertyFilterMask.
func (TestKeysMaskingFilter) SyntheticSuffixIntersects ¶
func (f TestKeysMaskingFilter) SyntheticSuffixIntersects(prop []byte, suffix []byte) (bool, error)
SyntheticSuffixIntersects implements the BlockPropertyFilter interface.
type UserKeyPrefixBound ¶
type UserKeyPrefixBound struct { // Lower is a lower bound user key prefix. Lower []byte // Upper is an upper bound user key prefix. Upper []byte }
UserKeyPrefixBound represents a [Lower,Upper) bound of user key prefixes. If both are nil, there is no bound specified. Else, Compare(Lower,Upper) must be < 0.
func (*UserKeyPrefixBound) IsEmpty ¶
func (ukb *UserKeyPrefixBound) IsEmpty() bool
IsEmpty returns true iff the bound is empty.
type VirtualReader ¶
type VirtualReader struct { Properties CommonProperties // contains filtered or unexported fields }
VirtualReader wraps Reader. Its purpose is to restrict functionality of the Reader which should be inaccessible to virtual sstables, and enforce bounds invariants associated with virtual sstables. All reads on virtual sstables should go through a VirtualReader.
INVARIANT: Any iterators created through a virtual reader will guarantee that they don't expose keys outside the virtual sstable bounds.
func MakeVirtualReader ¶
func MakeVirtualReader(reader *Reader, p VirtualReaderParams) VirtualReader
MakeVirtualReader is used to contruct a reader which can read from virtual sstables.
func (*VirtualReader) CommonProperties ¶
func (v *VirtualReader) CommonProperties() *CommonProperties
CommonProperties implements the CommonReader interface.
func (*VirtualReader) EstimateDiskUsage ¶
func (v *VirtualReader) EstimateDiskUsage(start, end []byte) (uint64, error)
EstimateDiskUsage just calls VirtualReader.reader.EstimateDiskUsage after enforcing the virtual sstable bounds.
func (*VirtualReader) NewCompactionIter ¶
func (v *VirtualReader) NewCompactionIter( transforms IterTransforms, statsAccum IterStatsAccumulator, rp ReaderProvider, bufferPool *block.BufferPool, ) (Iterator, error)
NewCompactionIter is the compaction iterator function for virtual readers.
func (*VirtualReader) NewPointIter ¶
func (v *VirtualReader) NewPointIter( ctx context.Context, transforms IterTransforms, lower, upper []byte, filterer *BlockPropertiesFilterer, filterBlockSizeLimit FilterBlockSizeLimit, stats *base.InternalIteratorStats, statsAccum IterStatsAccumulator, rp ReaderProvider, ) (Iterator, error)
NewPointIter returns an iterator for the point keys in the table.
If transform.HideObsoletePoints is set, the callee assumes that filterer already includes obsoleteKeyBlockPropertyFilter. The caller can satisfy this contract by first calling TryAddBlockPropertyFilterForHideObsoletePoints.
We assume that the [lower, upper) bounds (if specified) will have at least some overlap with the virtual sstable bounds. No overlap is not currently supported in the iterator.
func (*VirtualReader) NewRawRangeDelIter ¶
func (v *VirtualReader) NewRawRangeDelIter( ctx context.Context, transforms FragmentIterTransforms, ) (keyspan.FragmentIterator, error)
NewRawRangeDelIter wraps Reader.NewRawRangeDelIter.
func (*VirtualReader) NewRawRangeKeyIter ¶
func (v *VirtualReader) NewRawRangeKeyIter( ctx context.Context, transforms FragmentIterTransforms, ) (keyspan.FragmentIterator, error)
NewRawRangeKeyIter wraps Reader.NewRawRangeKeyIter.
func (*VirtualReader) UnsafeReader ¶
func (v *VirtualReader) UnsafeReader() *Reader
UnsafeReader returns the underlying *sstable.Reader behind a VirtualReader.
func (*VirtualReader) ValidateBlockChecksumsOnBacking ¶
func (v *VirtualReader) ValidateBlockChecksumsOnBacking() error
ValidateBlockChecksumsOnBacking will call ValidateBlockChecksumsOnBacking on the underlying reader. Note that block checksum validation is NOT restricted to virtual sstable bounds.
type VirtualReaderParams ¶
type VirtualReaderParams struct { Lower InternalKey Upper InternalKey FileNum base.FileNum // Size is an estimate of the size of the [Lower, Upper) section of the table. Size uint64 // BackingSize is the total size of the backing table. The ratio between Size // and BackingSize is used to estimate statistics. BackingSize uint64 }
VirtualReaderParams are the parameters necessary to create a VirtualReader.
type Writer ¶
type Writer struct {
// contains filtered or unexported fields
}
Writer is a table writer.
func NewWriter ¶
func NewWriter(writable objstorage.Writable, o WriterOptions) *Writer
NewWriter returns a new table writer intended for building external sstables (eg, for ingestion or storage outside the LSM) for the file. Closing the writer will close the file.
Internal clients should generally prefer NewRawWriter.
func (*Writer) Close ¶
Close finishes writing the table and closes the underlying file that the table was written to.
func (*Writer) Delete ¶
Delete deletes the value for the given key. The sequence number is set to 0. Intended for use to externally construct an sstable before ingestion into a DB.
TODO(peter): untested
func (*Writer) DeleteRange ¶
DeleteRange deletes all of the keys (and values) in the range [start,end) (inclusive on start, exclusive on end). The sequence number is set to 0. Intended for use to externally construct an sstable before ingestion into a DB.
Calls to DeleteRange must be made using already-fragmented (non-overlapping) spans and in sorted order.
TODO(peter): untested
func (*Writer) Merge ¶
Merge adds an action to the DB that merges the value at key with the new value. The details of the merge are dependent upon the configured merge operator. The sequence number is set to 0. Intended for use to externally construct an sstable before ingestion into a DB.
TODO(peter): untested
func (*Writer) Metadata ¶
func (w *Writer) Metadata() (*WriterMetadata, error)
Metadata returns the metadata for the finished sstable. Only valid to call after the sstable has been finished.
func (*Writer) RangeKeyDelete ¶
RangeKeyDelete deletes a range between start (inclusive) and end (exclusive).
Keys must be added to the table in increasing order of start key. Spans are not required to be fragmented.
func (*Writer) RangeKeySet ¶
RangeKeySet sets a range between start (inclusive) and end (exclusive) with the given suffix to the given value. The resulting range key is given the sequence number zero, with the expectation that the resulting sstable will be ingested.
Keys must be added to the table in increasing order of start key. Spans are not required to be fragmented. The same suffix may not be set or unset twice over the same keyspan, because it would result in inconsistent state. Both the Set and Unset would share the zero sequence number, and a key cannot be both simultaneously set and unset.
func (*Writer) RangeKeyUnset ¶
RangeKeyUnset un-sets a range between start (inclusive) and end (exclusive) with the given suffix. The resulting range key is given the sequence number zero, with the expectation that the resulting sstable will be ingested.
Keys must be added to the table in increasing order of start key. Spans are not required to be fragmented. The same suffix may not be set or unset twice over the same keyspan, because it would result in inconsistent state. Both the Set and Unset would share the zero sequence number, and a key cannot be both simultaneously set and unset.
type WriterMetadata ¶
type WriterMetadata struct { Size uint64 SmallestPoint InternalKey // LargestPoint, LargestRangeKey, LargestRangeDel should not be accessed // before Writer.Close is called, because they may only be set on // Writer.Close. LargestPoint InternalKey SmallestRangeDel InternalKey LargestRangeDel InternalKey SmallestRangeKey InternalKey LargestRangeKey InternalKey HasPointKeys bool HasRangeDelKeys bool HasRangeKeys bool SmallestSeqNum base.SeqNum LargestSeqNum base.SeqNum Properties Properties }
WriterMetadata holds info about a finished sstable.
func RewriteKeySuffixesViaWriter ¶
func RewriteKeySuffixesViaWriter( r *Reader, out objstorage.Writable, o WriterOptions, from, to []byte, ) (*WriterMetadata, error)
RewriteKeySuffixesViaWriter is similar to RewriteKeySuffixes but uses just a single loop over the Reader that writes each key to the Writer with the new suffix. The is significantly slower than the parallelized rewriter, and does more work to rederive filters, props, etc.
Any obsolete bits that key-value pairs may be annotated with are ignored and lost during the rewrite. Some of the obsolete bits may be recreated -- specifically when there are multiple keys with the same user key. Additionally, the output sstable has the pebble.obsolete.is_strict property set to false. See the longer comment at RewriteKeySuffixesAndReturnFormat.
func (*WriterMetadata) SetLargestPointKey ¶
func (m *WriterMetadata) SetLargestPointKey(k InternalKey)
SetLargestPointKey sets the largest point key to the given key. NB: this method set the "absolute" largest point key. Any existing key is overridden.
func (*WriterMetadata) SetLargestRangeDelKey ¶
func (m *WriterMetadata) SetLargestRangeDelKey(k InternalKey)
SetLargestRangeDelKey sets the largest rangedel key to the given key. NB: this method set the "absolute" largest rangedel key. Any existing key is overridden.
func (*WriterMetadata) SetLargestRangeKey ¶
func (m *WriterMetadata) SetLargestRangeKey(k InternalKey)
SetLargestRangeKey sets the largest range key to the given key. NB: this method set the "absolute" largest range key. Any existing key is overridden.
func (*WriterMetadata) SetSmallestPointKey ¶
func (m *WriterMetadata) SetSmallestPointKey(k InternalKey)
SetSmallestPointKey sets the smallest point key to the given key. NB: this method set the "absolute" smallest point key. Any existing key is overridden.
func (*WriterMetadata) SetSmallestRangeDelKey ¶
func (m *WriterMetadata) SetSmallestRangeDelKey(k InternalKey)
SetSmallestRangeDelKey sets the smallest rangedel key to the given key. NB: this method set the "absolute" smallest rangedel key. Any existing key is overridden.
func (*WriterMetadata) SetSmallestRangeKey ¶
func (m *WriterMetadata) SetSmallestRangeKey(k InternalKey)
SetSmallestRangeKey sets the smallest range key to the given key. NB: this method set the "absolute" smallest range key. Any existing key is overridden.
type WriterOptions ¶
type WriterOptions struct { // BlockRestartInterval is the number of keys between restart points // for delta encoding of keys. // // The default value is 16. BlockRestartInterval int // BlockSize is the target uncompressed size in bytes of each table block. // // The default value is 4096. BlockSize int // BlockSizeThreshold finishes a block if the block size is larger than the // specified percentage of the target block size and adding the next entry // would cause the block to be larger than the target block size. // // The default value is 90. BlockSizeThreshold int // SizeClassAwareThreshold imposes a minimum block size restriction for blocks // to be flushed, that is computed as the percentage of the target block size. // Note that this threshold takes precedence over BlockSizeThreshold when // valid AllocatorSizeClasses are specified. // // The default value is 60. SizeClassAwareThreshold int // Comparer defines a total ordering over the space of []byte keys: a 'less // than' relationship. The same comparison algorithm must be used for reads // and writes over the lifetime of the DB. // // The default value uses the same ordering as bytes.Compare. Comparer *Comparer // Compression defines the per-block compression to use. // // The default value (DefaultCompression) uses snappy compression. Compression block.Compression // FilterPolicy defines a filter algorithm (such as a Bloom filter) that can // reduce disk reads for Get calls. // // One such implementation is bloom.FilterPolicy(10) from the pebble/bloom // package. // // The default value means to use no filter. FilterPolicy FilterPolicy // FilterType defines whether an existing filter policy is applied at a // block-level or table-level. Block-level filters use less memory to create, // but are slower to access as a check for the key in the index must first be // performed to locate the filter block. A table-level filter will require // memory proportional to the number of keys in an sstable to create, but // avoids the index lookup when determining if a key is present. Table-level // filters should be preferred except under constrained memory situations. FilterType FilterType // IndexBlockSize is the target uncompressed size in bytes of each index // block. When the index block size is larger than this target, two-level // indexes are automatically enabled. Setting this option to a large value // (such as math.MaxInt32) disables the automatic creation of two-level // indexes. // // The default value is the value of BlockSize. IndexBlockSize int // KeySchema describes the schema to use for sstable formats that make use // of columnar blocks, decomposing keys into their constituent components. // Ignored if TableFormat <= TableFormatPebblev4. KeySchema *colblk.KeySchema // Merger defines the associative merge operation to use for merging values // written with {Batch,DB}.Merge. The MergerName is checked for consistency // with the value stored in the sstable when it was written. MergerName string // TableFormat specifies the format version for writing sstables. The default // is TableFormatMinSupported. TableFormat TableFormat // IsStrictObsolete is only relevant for >= TableFormatPebblev4. See comment // in format.go. Must be false if format < TableFormatPebblev4. // // TODO(bilal): set this when writing shared ssts. IsStrictObsolete bool // WritingToLowestLevel is only relevant for >= TableFormatPebblev4. It is // used to set the obsolete bit on DEL/DELSIZED/SINGLEDEL if they are the // youngest for a userkey. WritingToLowestLevel bool // BlockPropertyCollectors is a list of BlockPropertyCollector creation // functions. A new BlockPropertyCollector is created for each sstable // built and lives for the lifetime of writing that table. BlockPropertyCollectors []func() BlockPropertyCollector // Checksum specifies which checksum to use. Checksum block.ChecksumType // Parallelism is used to indicate that the sstable Writer is allowed to // compress data blocks and write datablocks to disk in parallel with the // Writer client goroutine. Parallelism bool // ShortAttributeExtractor mirrors // Options.Experimental.ShortAttributeExtractor. ShortAttributeExtractor base.ShortAttributeExtractor // RequiredInPlaceValueBound mirrors // Options.Experimental.RequiredInPlaceValueBound. RequiredInPlaceValueBound UserKeyPrefixBound // DisableValueBlocks is only used for TableFormat >= TableFormatPebblev3, // and if set to true, does not write any values to value blocks. This is // only intended for cases where the in-memory buffering of all value blocks // while writing a sstable is too expensive and likely to cause an OOM. It // is never set to true by a Pebble DB, and can be set to true when some // external code is directly generating huge sstables using Pebble's // sstable.Writer (for example, CockroachDB backups can sometimes write // 750MB sstables -- see // https://github.com/cockroachdb/cockroach/issues/117113). DisableValueBlocks bool // AllocatorSizeClasses provides a sorted list containing the supported size // classes of the underlying memory allocator. This provides hints to the // writer's flushing policy to select block sizes that preemptively reduce // internal fragmentation when loaded into the block cache. AllocatorSizeClasses []int // NumDeletionsThreshold mirrors Options.Experimental.NumDeletionsThreshold. NumDeletionsThreshold int // DeletionSizeRatioThreshold mirrors // Options.Experimental.DeletionSizeRatioThreshold. DeletionSizeRatioThreshold float32 // contains filtered or unexported fields }
WriterOptions holds the parameters used to control building an sstable.
func (*WriterOptions) SetInternal ¶
func (o *WriterOptions) SetInternal(internalOpts sstableinternal.WriterOptions)
SetInternal sets the internal writer options. Note that even though this method is public, a caller outside the pebble package can't construct a value to pass to it.
Source Files
¶
- block_property.go
- block_property_obsolete.go
- block_property_test_utils.go
- category_stats.go
- colblk_writer.go
- comparer.go
- copier.go
- filter.go
- format.go
- internal.go
- layout.go
- options.go
- properties.go
- reader.go
- reader_common.go
- reader_iter.go
- reader_iter_single_lvl.go
- reader_iter_two_lvl.go
- reader_virtual.go
- rowblk_writer.go
- suffix_rewriter.go
- table.go
- test_fixtures.go
- test_utils.go
- unsafe.go
- value_block.go
- write_queue.go
- writer.go