Documentation ¶
Index ¶
- Constants
- Variables
- func BytesMax(a, b []byte) []byte
- func BytesMin(a, b []byte) []byte
- func CleanUpFiles(ctx context.Context, store storage.ExternalStorage, subDir string) error
- func GetAllFileNames(ctx context.Context, store storage.ExternalStorage, subDir string) ([]string, []string, error)
- func GetMaxOverlapping(points []Endpoint) int64
- func GetMaxOverlappingTotal(stats []MultipleFilesStat) int64
- func MergeOverlappingFiles(ctx context.Context, paths []string, store storage.ExternalStorage, ...) error
- func MockExternalEngine(storage storage.ExternalStorage, keys [][]byte, values [][]byte) (dataFiles []string, statsFiles []string, err error)
- func MockExternalEngineWithWriter(storage storage.ExternalStorage, writer *Writer, subDir string, keys [][]byte, ...) (dataFiles []string, statsFiles []string, err error)
- func NewExternalEngine(storage storage.ExternalStorage, dataFiles []string, statsFiles []string, ...) common.Engine
- type Endpoint
- type EndpointTp
- type Engine
- func (e *Engine) Close() error
- func (e *Engine) GetKeyRange() (startKey []byte, endKey []byte, err error)
- func (e *Engine) ID() string
- func (e *Engine) ImportedStatistics() (importedSize int64, importedKVCount int64)
- func (e *Engine) KVStatistics() (totalKVSize int64, totalKVCount int64)
- func (e *Engine) LoadIngestData(ctx context.Context, regionRanges []common.Range, ...) error
- func (e *Engine) SplitRanges(startKey, endKey []byte, _, _ int64, _ log.Logger) ([]common.Range, error)
- type EngineWriter
- type KeyValueStore
- type MemoryIngestData
- func (m *MemoryIngestData) DecRef()
- func (m *MemoryIngestData) Finish(totalBytes, totalCount int64)
- func (m *MemoryIngestData) GetFirstAndLastKey(lowerBound, upperBound []byte) ([]byte, []byte, error)
- func (m *MemoryIngestData) GetTS() uint64
- func (m *MemoryIngestData) IncRef()
- func (m *MemoryIngestData) NewIter(ctx context.Context, lowerBound, upperBound []byte) common.ForwardIter
- type MergeKVIter
- type MergePropIter
- type MultipleFilesStat
- type OnCloseFunc
- type RangeSplitter
- type SortedKVMeta
- type Writer
- type WriterBuilder
- func (b *WriterBuilder) Build(store storage.ExternalStorage, prefix string, writerID string) *Writer
- func (b *WriterBuilder) SetBlockSize(blockSize int) *WriterBuilder
- func (b *WriterBuilder) SetKeyDuplicationEncoding(val bool) *WriterBuilder
- func (b *WriterBuilder) SetMemorySizeLimit(size uint64) *WriterBuilder
- func (b *WriterBuilder) SetOnCloseFunc(onClose OnCloseFunc) *WriterBuilder
- func (b *WriterBuilder) SetPropKeysDistance(dist uint64) *WriterBuilder
- func (b *WriterBuilder) SetPropSizeDistance(dist uint64) *WriterBuilder
- func (b *WriterBuilder) SetWriterBatchCount(count uint64) *WriterBuilder
- type WriterSummary
Constants ¶
const DefaultBlockSize = 16 * units.MiB
DefaultBlockSize is the default block size for preAllocKVBuf.
const ( // DefaultMemSizeLimit is the default memory size limit for writer. DefaultMemSizeLimit = 256 * size.MB )
Variables ¶
var ( // ConcurrentReaderBufferSizePerConc is the buffer size for concurrent reader per // concurrency. ConcurrentReaderBufferSizePerConc = 4 * 1024 * 1024 // ConcurrentReaderConcurrency is the concurrency for concurrent reader. ConcurrentReaderConcurrency = 8 )
var ( // MergeSortOverlapThreshold is the threshold of overlap between sorted kv files. // if the overlap ratio is greater than this threshold, we will merge the files. MergeSortOverlapThreshold int64 = 1000 // MergeSortFileCountStep is the step of file count when we split the sorted kv files. MergeSortFileCountStep = 1000 )
var LargeRegionSplitDataThreshold = int(config.SplitRegionSize)
LargeRegionSplitDataThreshold is exposed for test.
Functions ¶
func CleanUpFiles ¶
CleanUpFiles delete all data and stat files under one subDir.
func GetAllFileNames ¶
func GetAllFileNames( ctx context.Context, store storage.ExternalStorage, subDir string, ) ([]string, []string, error)
GetAllFileNames returns data file paths and stat file paths. Both paths are sorted.
func GetMaxOverlapping ¶
GetMaxOverlapping returns the maximum overlapping weight treating given `points` as endpoints of intervals. `points` are not required to be sorted, and will be sorted in-place in this function.
func GetMaxOverlappingTotal ¶
func GetMaxOverlappingTotal(stats []MultipleFilesStat) int64
GetMaxOverlappingTotal assume the most overlapping case from given stats and returns the overlapping level.
func MergeOverlappingFiles ¶
func MergeOverlappingFiles(ctx context.Context, paths []string, store storage.ExternalStorage, readBufferSize int, newFilePrefix string, blockSize int, writeBatchCount uint64, propSizeDist uint64, propKeysDist uint64, onClose OnCloseFunc, concurrency int, checkHotspot bool) error
MergeOverlappingFiles reads from given files whose key range may overlap and writes to new sorted, nonoverlapping files.
func MockExternalEngine ¶
func MockExternalEngine( storage storage.ExternalStorage, keys [][]byte, values [][]byte, ) (dataFiles []string, statsFiles []string, err error)
MockExternalEngine generates an external engine with the given keys and values.
func MockExternalEngineWithWriter ¶
func MockExternalEngineWithWriter( storage storage.ExternalStorage, writer *Writer, subDir string, keys [][]byte, values [][]byte, ) (dataFiles []string, statsFiles []string, err error)
MockExternalEngineWithWriter generates an external engine with the given writer, keys and values.
func NewExternalEngine ¶
func NewExternalEngine( storage storage.ExternalStorage, dataFiles []string, statsFiles []string, startKey []byte, endKey []byte, splitKeys [][]byte, regionSplitSize int64, keyAdapter common.KeyAdapter, duplicateDetection bool, duplicateDB *pebble.DB, dupDetectOpt common.DupDetectOpt, workerConcurrency int, ts uint64, totalKVSize int64, totalKVCount int64, checkHotspot bool, ) common.Engine
NewExternalEngine creates an (external) engine.
Types ¶
type Endpoint ¶
type Endpoint struct { Key []byte Tp EndpointTp Weight int64 // all EndpointTp use positive weight }
Endpoint represents an endpoint of an interval which can be used by GetMaxOverlapping.
type EndpointTp ¶
type EndpointTp int
EndpointTp is the type of Endpoint.Key.
const ( // ExclusiveEnd represents "..., Endpoint.Key)". ExclusiveEnd EndpointTp = iota // InclusiveStart represents "[Endpoint.Key, ...". InclusiveStart // InclusiveEnd represents "..., Endpoint.Key]". InclusiveEnd )
type Engine ¶
type Engine struct {
// contains filtered or unexported fields
}
Engine stored sorted key/value pairs in an external storage.
func (*Engine) GetKeyRange ¶
GetKeyRange implements common.Engine.
func (*Engine) ImportedStatistics ¶
ImportedStatistics returns the imported kv size and imported kv count.
func (*Engine) KVStatistics ¶
KVStatistics returns the total kv size and total kv count.
func (*Engine) LoadIngestData ¶
func (e *Engine) LoadIngestData( ctx context.Context, regionRanges []common.Range, outCh chan<- common.DataAndRange, ) error
LoadIngestData loads the data from the external storage to memory in [start, end) range, so local backend can ingest it. The used byte slice of ingest data are allocated from Engine.bufPool and must be released by MemoryIngestData.DecRef().
type EngineWriter ¶
type EngineWriter struct {
// contains filtered or unexported fields
}
EngineWriter implements backend.EngineWriter interface.
func NewEngineWriter ¶
func NewEngineWriter(w *Writer) *EngineWriter
NewEngineWriter creates a new EngineWriter.
func (*EngineWriter) AppendRows ¶
AppendRows implements backend.EngineWriter interface.
func (*EngineWriter) Close ¶
func (e *EngineWriter) Close(ctx context.Context) (backend.ChunkFlushStatus, error)
Close implements backend.EngineWriter interface.
func (*EngineWriter) IsSynced ¶
func (e *EngineWriter) IsSynced() bool
IsSynced implements backend.EngineWriter interface.
type KeyValueStore ¶
type KeyValueStore struct {
// contains filtered or unexported fields
}
KeyValueStore stores key-value pairs and maintains the range properties.
func NewKeyValueStore ¶
func NewKeyValueStore( ctx context.Context, dataWriter storage.ExternalFileWriter, rangePropertiesCollector *rangePropertiesCollector, ) (*KeyValueStore, error)
NewKeyValueStore creates a new KeyValueStore. The data will be written to the given dataWriter and range properties will be maintained in the given rangePropertiesCollector.
func (*KeyValueStore) Close ¶
func (s *KeyValueStore) Close()
Close closes the KeyValueStore and append the last range property.
type MemoryIngestData ¶
type MemoryIngestData struct {
// contains filtered or unexported fields
}
MemoryIngestData is the in-memory implementation of IngestData.
func (*MemoryIngestData) DecRef ¶
func (m *MemoryIngestData) DecRef()
DecRef implements IngestData.DecRef.
func (*MemoryIngestData) Finish ¶
func (m *MemoryIngestData) Finish(totalBytes, totalCount int64)
Finish implements IngestData.Finish.
func (*MemoryIngestData) GetFirstAndLastKey ¶
func (m *MemoryIngestData) GetFirstAndLastKey(lowerBound, upperBound []byte) ([]byte, []byte, error)
GetFirstAndLastKey implements IngestData.GetFirstAndLastKey.
func (*MemoryIngestData) GetTS ¶
func (m *MemoryIngestData) GetTS() uint64
GetTS implements IngestData.GetTS.
func (*MemoryIngestData) IncRef ¶
func (m *MemoryIngestData) IncRef()
IncRef implements IngestData.IncRef.
func (*MemoryIngestData) NewIter ¶
func (m *MemoryIngestData) NewIter(ctx context.Context, lowerBound, upperBound []byte) common.ForwardIter
NewIter implements IngestData.NewIter.
type MergeKVIter ¶
type MergeKVIter struct {
// contains filtered or unexported fields
}
MergeKVIter is an iterator that merges multiple sorted KV pairs from different files.
func NewMergeKVIter ¶
func NewMergeKVIter( ctx context.Context, paths []string, pathsStartOffset []uint64, exStorage storage.ExternalStorage, readBufferSize int, checkHotspot bool, ) (*MergeKVIter, error)
NewMergeKVIter creates a new MergeKVIter. The KV can be accessed by calling Next() then Key() or Values(). readBufferSize is the buffer size for each file reader, which means the total memory usage is readBufferSize * len(paths).
func (*MergeKVIter) Error ¶
func (i *MergeKVIter) Error() error
Error returns the error of the iterator.
func (*MergeKVIter) Next ¶
func (i *MergeKVIter) Next() bool
Next moves the iterator to the next position. When it returns false, the iterator is not usable.
type MergePropIter ¶
type MergePropIter struct {
// contains filtered or unexported fields
}
MergePropIter is an iterator that merges multiple range properties from different files.
func NewMergePropIter ¶
func NewMergePropIter( ctx context.Context, paths []string, exStorage storage.ExternalStorage, checkHotSpot bool, ) (*MergePropIter, error)
NewMergePropIter creates a new MergePropIter.
func (*MergePropIter) Error ¶
func (i *MergePropIter) Error() error
Error returns the error of the iterator.
func (*MergePropIter) Next ¶
func (i *MergePropIter) Next() bool
Next moves the iterator to the next position.
type MultipleFilesStat ¶
type MultipleFilesStat struct { MinKey tidbkv.Key `json:"min-key"` MaxKey tidbkv.Key `json:"max-key"` Filenames [][2]string `json:"filenames"` // [dataFile, statFile] MaxOverlappingNum int64 `json:"max-overlapping-num"` }
MultipleFilesStat is the statistic information of multiple files (currently every 500 files). It is used to estimate the data overlapping, and per-file statistic information maybe too big to loaded into memory.
type OnCloseFunc ¶
type OnCloseFunc func(summary *WriterSummary)
OnCloseFunc is the callback function when a writer is closed.
type RangeSplitter ¶
type RangeSplitter struct {
// contains filtered or unexported fields
}
RangeSplitter is used to split key ranges of an external engine. It will return one group of ranges by invoking `SplitOneRangesGroup` once.
func NewRangeSplitter ¶
func NewRangeSplitter( ctx context.Context, dataFiles, statFiles []string, externalStorage storage.ExternalStorage, rangesGroupSize, rangesGroupKeys int64, maxRangeSize, maxRangeKeys int64, checkHotSpot bool, ) (*RangeSplitter, error)
NewRangeSplitter creates a new RangeSplitter. `dataFiles` and `statFiles` must be corresponding to each other. `rangesGroupSize` and `rangesGroupKeys` controls the total range group size of one `SplitOneRangesGroup` invocation, while `rangeSize` and `rangeKeys` controls the size of one range.
func (*RangeSplitter) Close ¶
func (r *RangeSplitter) Close() error
Close release the resources of RangeSplitter.
func (*RangeSplitter) GetRangeSplitSize ¶
func (r *RangeSplitter) GetRangeSplitSize() int64
GetRangeSplitSize returns the expected size of one range.
func (*RangeSplitter) SplitOneRangesGroup ¶
func (r *RangeSplitter) SplitOneRangesGroup() ( endKeyOfGroup []byte, dataFiles []string, statFiles []string, rangeSplitKeys [][]byte, err error, )
SplitOneRangesGroup splits one group of ranges. `endKeyOfGroup` represents the end key of the group, but it will be nil when the group is the last one. `dataFiles` and `statFiles` are all the files that have overlapping key ranges in this group. `rangeSplitKeys` are the internal split keys of the ranges in this group.
type SortedKVMeta ¶
type SortedKVMeta struct { StartKey []byte `json:"start-key"` EndKey []byte `json:"end-key"` // exclusive TotalKVSize uint64 `json:"total-kv-size"` MultipleFilesStats []MultipleFilesStat `json:"multiple-files-stats"` }
SortedKVMeta is the meta of sorted kv.
func NewSortedKVMeta ¶
func NewSortedKVMeta(summary *WriterSummary) *SortedKVMeta
NewSortedKVMeta creates a SortedKVMeta from a WriterSummary. If the summary is empty, it will return a pointer to zero SortedKVMeta.
func (*SortedKVMeta) GetDataFiles ¶
func (m *SortedKVMeta) GetDataFiles() []string
GetDataFiles returns all data files in the meta.
func (*SortedKVMeta) GetStatFiles ¶
func (m *SortedKVMeta) GetStatFiles() []string
GetStatFiles returns all stat files in the meta.
func (*SortedKVMeta) Merge ¶
func (m *SortedKVMeta) Merge(other *SortedKVMeta)
Merge merges the other SortedKVMeta into this one.
func (*SortedKVMeta) MergeSummary ¶
func (m *SortedKVMeta) MergeSummary(summary *WriterSummary)
MergeSummary merges the WriterSummary into this SortedKVMeta.
type Writer ¶
type Writer struct {
// contains filtered or unexported fields
}
Writer is used to write data into external storage.
func (*Writer) LockForWrite ¶
func (w *Writer) LockForWrite() func()
LockForWrite implements ingest.Writer. Since flushKVs is thread-safe in external storage writer, this is implemented as noop.
type WriterBuilder ¶
type WriterBuilder struct {
// contains filtered or unexported fields
}
WriterBuilder builds a new Writer.
func NewWriterBuilder ¶
func NewWriterBuilder() *WriterBuilder
NewWriterBuilder creates a WriterBuilder.
func (*WriterBuilder) Build ¶
func (b *WriterBuilder) Build( store storage.ExternalStorage, prefix string, writerID string, ) *Writer
Build builds a new Writer. The files writer will create are under the prefix of "{prefix}/{writerID}".
func (*WriterBuilder) SetBlockSize ¶
func (b *WriterBuilder) SetBlockSize(blockSize int) *WriterBuilder
SetBlockSize sets the block size of pre-allocated buf in the writer.
func (*WriterBuilder) SetKeyDuplicationEncoding ¶
func (b *WriterBuilder) SetKeyDuplicationEncoding(val bool) *WriterBuilder
SetKeyDuplicationEncoding sets if the writer can distinguish duplicate key.
func (*WriterBuilder) SetMemorySizeLimit ¶
func (b *WriterBuilder) SetMemorySizeLimit(size uint64) *WriterBuilder
SetMemorySizeLimit sets the memory size limit of the writer. When accumulated data size exceeds this limit, the writer will flush data as a file to external storage.
func (*WriterBuilder) SetOnCloseFunc ¶
func (b *WriterBuilder) SetOnCloseFunc(onClose OnCloseFunc) *WriterBuilder
SetOnCloseFunc sets the callback function when a writer is closed.
func (*WriterBuilder) SetPropKeysDistance ¶
func (b *WriterBuilder) SetPropKeysDistance(dist uint64) *WriterBuilder
SetPropKeysDistance sets the distance of range keys for each property.
func (*WriterBuilder) SetPropSizeDistance ¶
func (b *WriterBuilder) SetPropSizeDistance(dist uint64) *WriterBuilder
SetPropSizeDistance sets the distance of range size for each property.
func (*WriterBuilder) SetWriterBatchCount ¶
func (b *WriterBuilder) SetWriterBatchCount(count uint64) *WriterBuilder
SetWriterBatchCount sets the batch count of the writer.
type WriterSummary ¶
type WriterSummary struct { WriterID string Seq int // Min and Max are the min and max key written by this writer, both are // inclusive, i.e. [Min, Max]. // will be empty if no key is written. Min tidbkv.Key Max tidbkv.Key TotalSize uint64 MultipleFilesStats []MultipleFilesStat }
WriterSummary is the summary of a writer.