Documentation ¶
Index ¶
- Constants
- Variables
- func AddNewAnalyzeJob(job *AnalyzeJob)
- func CMSketchToProto(c *CMSketch) *tipb.CMSketch
- func ClearHistoryJobs()
- func ConvertDatumsType(vals []types.Datum, ft *types.FieldType, loc *time.Location) error
- func DecodeFeedback(val []byte, q *QueryFeedback, c *CMSketch, ft *types.FieldType) error
- func EncodeCMSketchWithoutTopN(c *CMSketch) ([]byte, error)
- func EncodeFeedback(q *QueryFeedback) ([]byte, error)
- func FMSketchToProto(s *FMSketch) *tipb.FMSketch
- func GetOrdinalOfRangeCond(sc *stmtctx.StatementContext, ran *ranger.Range) int
- func GetPseudoRowCountByColumnRanges(sc *stmtctx.StatementContext, tableRowCount float64, ...) (float64, error)
- func HistogramEqual(a, b *Histogram, ignoreID bool) bool
- func HistogramToProto(hg *Histogram) *tipb.Histogram
- func IsAnalyzed(flag int64) bool
- func MoveToHistory(job *AnalyzeJob)
- func ResetAnalyzeFlag(flag int64) int64
- func RowToDatums(row chunk.Row, fields []*ast.ResultField) []types.Datum
- func SampleCollectorToProto(c *SampleCollector) *tipb.SampleCollector
- func SortSampleItems(sc *stmtctx.StatementContext, items []*SampleItem) error
- func SplitFeedbackByQueryType(feedbacks []Feedback) ([]Feedback, []Feedback)
- func SupportColumnType(ft *types.FieldType) bool
- func ValueToString(value *types.Datum, idxCols int) (string, error)
- type AnalyzeJob
- type Bucket
- type BucketFeedback
- type CMSketch
- func CMSketchFromProto(protoSketch *tipb.CMSketch) *CMSketch
- func DecodeCMSketch(data []byte, topNRows []chunk.Row) (*CMSketch, error)
- func NewCMSketch(d, w int32) *CMSketch
- func NewCMSketchWithTopN(d, w int32, sample [][]byte, numTop uint32, rowCount uint64) (*CMSketch, uint64, uint64)
- func UpdateCMSketch(c *CMSketch, eqFeedbacks []Feedback) *CMSketch
- func (c *CMSketch) AppendTopN(data []byte, count uint64)
- func (c *CMSketch) Copy() *CMSketch
- func (c *CMSketch) Equal(rc *CMSketch) bool
- func (c *CMSketch) GetWidthAndDepth() (int32, int32)
- func (c *CMSketch) InsertBytes(bytes []byte)
- func (c *CMSketch) MergeCMSketch(rc *CMSketch, numTopN uint32) error
- func (c *CMSketch) MergeCMSketch4IncrementalAnalyze(rc *CMSketch, numTopN uint32) error
- func (c *CMSketch) QueryBytes(d []byte) uint64
- func (c *CMSketch) TopN() []*TopNMeta
- func (c *CMSketch) TotalCount() uint64
- type Column
- func (c *Column) AvgColSize(count int64, isKey bool) float64
- func (c *Column) AvgColSizeChunkFormat(count int64) float64
- func (c *Column) AvgColSizeListInDisk(count int64) float64
- func (c *Column) GetColumnRowCount(sc *stmtctx.StatementContext, ranges []*ranger.Range, modifyCount int64, ...) (float64, error)
- func (c *Column) IsInvalid(sc *stmtctx.StatementContext, collPseudo bool) bool
- func (c *Column) String() string
- type ErrorRate
- type FMSketch
- type Feedback
- type HistColl
- func (coll *HistColl) GenerateHistCollFromColumnInfo(infos []*model.ColumnInfo, columns []*expression.Column) *HistColl
- func (coll *HistColl) GetAvgRowSize(ctx sessionctx.Context, cols []*expression.Column, isEncodedKey bool, ...) (size float64)
- func (coll *HistColl) GetAvgRowSizeListInDisk(cols []*expression.Column) (size float64)
- func (coll *HistColl) GetIndexAvgRowSize(ctx sessionctx.Context, cols []*expression.Column, isUnique bool) (size float64)
- func (coll *HistColl) GetRowCountByColumnRanges(sc *stmtctx.StatementContext, colID int64, colRanges []*ranger.Range) (float64, error)
- func (coll *HistColl) GetRowCountByIndexRanges(sc *stmtctx.StatementContext, idxID int64, indexRanges []*ranger.Range) (float64, error)
- func (coll *HistColl) GetRowCountByIntColumnRanges(sc *stmtctx.StatementContext, colID int64, intRanges []*ranger.Range) (float64, error)
- func (coll *HistColl) GetTableAvgRowSize(ctx sessionctx.Context, cols []*expression.Column, storeType kv.StoreType, ...) (size float64)
- func (coll *HistColl) ID2UniqueID(columns []*expression.Column) *HistColl
- func (coll *HistColl) NewHistCollBySelectivity(sc *stmtctx.StatementContext, statsNodes []*StatsNode) *HistColl
- func (coll *HistColl) Selectivity(ctx sessionctx.Context, exprs []expression.Expression, ...) (float64, []*StatsNode, error)
- type Histogram
- func BuildColumn(ctx sessionctx.Context, numBuckets, id int64, collector *SampleCollector, ...) (*Histogram, error)
- func BuildColumnHist(ctx sessionctx.Context, numBuckets, id int64, collector *SampleCollector, ...) (*Histogram, error)
- func HistogramFromProto(protoHg *tipb.Histogram) *Histogram
- func MergeHistograms(sc *stmtctx.StatementContext, lh *Histogram, rh *Histogram, bucketSize int) (*Histogram, error)
- func NewHistogram(id, ndv, nullCount int64, version uint64, tp *types.FieldType, bucketSize int, ...) *Histogram
- func UpdateHistogram(h *Histogram, feedback *QueryFeedback) *Histogram
- func (hg *Histogram) AppendBucket(lower *types.Datum, upper *types.Datum, count, repeat int64)
- func (hg *Histogram) AvgCountPerNotNullValue(totalCount int64) float64
- func (hg *Histogram) BetweenRowCount(a, b types.Datum) float64
- func (hg *Histogram) BucketToString(bktID, idxCols int) string
- func (hg *Histogram) ConvertTo(sc *stmtctx.StatementContext, tp *types.FieldType) (*Histogram, error)
- func (hg *Histogram) Copy() *Histogram
- func (hg *Histogram) DecodeTo(tp *types.FieldType, timeZone *time.Location) error
- func (hg *Histogram) ExtractTopN(cms *CMSketch, numCols int, numTopN uint32) error
- func (hg *Histogram) GetIncreaseFactor(totalCount int64) float64
- func (hg *Histogram) GetLower(idx int) *types.Datum
- func (hg *Histogram) GetUpper(idx int) *types.Datum
- func (hg *Histogram) IsIndexHist() bool
- func (hg *Histogram) Len() int
- func (hg *Histogram) LessRowCountWithBktIdx(value types.Datum) (float64, int)
- func (hg *Histogram) PreCalculateScalar()
- func (hg *Histogram) RemoveUpperBound() *Histogram
- func (hg *Histogram) SplitRange(sc *stmtctx.StatementContext, oldRanges []*ranger.Range, encoded bool) ([]*ranger.Range, bool)
- func (hg *Histogram) ToString(idxCols int) string
- func (hg *Histogram) TotalRowCount() float64
- func (hg *Histogram) TruncateHistogram(numBkt int) *Histogram
- type Index
- type QueryFeedback
- func (q *QueryFeedback) Actual() int64
- func (q *QueryFeedback) CalcErrorRate() float64
- func (q *QueryFeedback) CollectFeedback(numOfRanges int) bool
- func (q *QueryFeedback) DecodeIntValues() *QueryFeedback
- func (q *QueryFeedback) DecodeToRanges(isIndex bool) ([]*ranger.Range, error)
- func (q *QueryFeedback) Invalidate()
- func (q *QueryFeedback) StoreRanges(ranges []*ranger.Range)
- func (q *QueryFeedback) Update(startKey kv.Key, counts []int64)
- type SampleBuilder
- type SampleCollector
- type SampleItem
- type SortedBuilder
- type StatsNode
- type Table
- func (t *Table) ColumnBetweenRowCount(sc *stmtctx.StatementContext, a, b types.Datum, colID int64) float64
- func (t *Table) ColumnByName(colName string) *Column
- func (t *Table) ColumnEqualRowCount(sc *stmtctx.StatementContext, value types.Datum, colID int64) (float64, error)
- func (t *Table) ColumnGreaterRowCount(sc *stmtctx.StatementContext, value types.Datum, colID int64) float64
- func (t *Table) ColumnLessRowCount(sc *stmtctx.StatementContext, value types.Datum, colID int64) float64
- func (t *Table) Copy() *Table
- func (t *Table) IndexStartWithColumn(colName string) *Index
- func (t *Table) IsOutdated() bool
- func (t *Table) PseudoAvgCountPerValue() float64
- func (t *Table) String() string
- type TopNMeta
Constants ¶
const ( CurStatsVersion = Version1 Version1 = 1 )
constants for stats version. These const can be used for solving compatibility issue.
const ( IndexType = iota PkType ColType )
The type of the StatsNode.
const ( // PseudoVersion means the pseudo statistics version is 0. PseudoVersion uint64 = 0 // PseudoRowCount export for other pkg to use. // When we haven't analyzed a table, we use pseudo statistics to estimate costs. // It has row count 10000, equal condition selects 1/1000 of total rows, less condition selects 1/3 of total rows, // between condition selects 1/40 of total rows. PseudoRowCount = 10000 )
const AnalyzeFlag = 1
AnalyzeFlag is set when the statistics comes from analyze and has not been modified by feedback.
const MaxErrorRate = 0.25
MaxErrorRate is the max error rate of estimate row count of a not pseudo column. If the table is pseudo, but the average error rate is less than MaxErrorRate, then the column is not pseudo.
Variables ¶
var ( // MaxNumberOfRanges is the max number of ranges before split to collect feedback. MaxNumberOfRanges = 20 // FeedbackProbability is the probability to collect the feedback. FeedbackProbability = atomic.NewFloat64(0) )
var HistogramNeededColumns = neededColumnMap{/* contains filtered or unexported fields */}
HistogramNeededColumns stores the columns whose Histograms need to be loaded from physical kv layer. Currently, we only load index/pk's Histogram from kv automatically. Columns' are loaded by needs.
var RatioOfPseudoEstimate = atomic.NewFloat64(0.7)
RatioOfPseudoEstimate means if modifyCount / statsTblCount is greater than this ratio, we think the stats is invalid and use pseudo estimation.
Functions ¶
func AddNewAnalyzeJob ¶
func AddNewAnalyzeJob(job *AnalyzeJob)
AddNewAnalyzeJob adds new analyze job.
func CMSketchToProto ¶
func CMSketchToProto(c *CMSketch) *tipb.CMSketch
CMSketchToProto converts CMSketch to its protobuf representation.
func ConvertDatumsType ¶
ConvertDatumsType converts the datums type to `ft`.
func DecodeFeedback ¶
DecodeFeedback decodes a byte slice to feedback.
func EncodeCMSketchWithoutTopN ¶
EncodeCMSketchWithoutTopN encodes the given CMSketch to byte slice. Note that it does not include the topN.
func EncodeFeedback ¶
func EncodeFeedback(q *QueryFeedback) ([]byte, error)
EncodeFeedback encodes the given feedback to byte slice.
func FMSketchToProto ¶
func FMSketchToProto(s *FMSketch) *tipb.FMSketch
FMSketchToProto converts FMSketch to its protobuf representation.
func GetOrdinalOfRangeCond ¶
func GetOrdinalOfRangeCond(sc *stmtctx.StatementContext, ran *ranger.Range) int
GetOrdinalOfRangeCond gets the ordinal of the position range condition, if not exist, it returns the end position.
func GetPseudoRowCountByColumnRanges ¶
func GetPseudoRowCountByColumnRanges(sc *stmtctx.StatementContext, tableRowCount float64, columnRanges []*ranger.Range, colIdx int) (float64, error)
GetPseudoRowCountByColumnRanges calculate the row count by the ranges if there's no statistics information for this column.
func HistogramEqual ¶
HistogramEqual tests if two histograms are equal.
func HistogramToProto ¶
func HistogramToProto(hg *Histogram) *tipb.Histogram
HistogramToProto converts Histogram to its protobuf representation. Note that when this is used, the lower/upper bound in the bucket must be BytesDatum.
func IsAnalyzed ¶
IsAnalyzed checks whether this flag contains AnalyzeFlag.
func MoveToHistory ¶
func MoveToHistory(job *AnalyzeJob)
MoveToHistory moves the analyze job to history.
func ResetAnalyzeFlag ¶
ResetAnalyzeFlag resets the AnalyzeFlag because it has been modified by feedback.
func RowToDatums ¶
RowToDatums converts row to datum slice.
func SampleCollectorToProto ¶
func SampleCollectorToProto(c *SampleCollector) *tipb.SampleCollector
SampleCollectorToProto converts SampleCollector to its protobuf representation.
func SortSampleItems ¶
func SortSampleItems(sc *stmtctx.StatementContext, items []*SampleItem) error
SortSampleItems sorts a slice of SampleItem.
func SplitFeedbackByQueryType ¶
SplitFeedbackByQueryType splits the feedbacks into equality feedbacks and range feedbacks.
func SupportColumnType ¶
SupportColumnType checks if the type of the column can be updated by feedback.
Types ¶
type AnalyzeJob ¶
type AnalyzeJob struct { sync.Mutex DBName string TableName string PartitionName string JobInfo string RowCount int64 StartTime time.Time State string // contains filtered or unexported fields }
AnalyzeJob is used to represent the status of one analyze job.
func GetAllAnalyzeJobs ¶
func GetAllAnalyzeJobs() []*AnalyzeJob
GetAllAnalyzeJobs gets all analyze jobs.
func (*AnalyzeJob) Finish ¶
func (job *AnalyzeJob) Finish(meetError bool)
Finish update the status of analyze job to finished or failed according to `meetError`.
func (*AnalyzeJob) Start ¶
func (job *AnalyzeJob) Start()
Start marks status of the analyze job as running and update the start time.
func (*AnalyzeJob) Update ¶
func (job *AnalyzeJob) Update(rowCount int64)
Update updates the row count of analyze job.
type BucketFeedback ¶
type BucketFeedback struct {
// contains filtered or unexported fields
}
BucketFeedback stands for all the feedback for a bucket.
type CMSketch ¶
type CMSketch struct {
// contains filtered or unexported fields
}
CMSketch is used to estimate point queries. Refer: https://en.wikipedia.org/wiki/Count-min_sketch
func CMSketchFromProto ¶
func CMSketchFromProto(protoSketch *tipb.CMSketch) *CMSketch
CMSketchFromProto converts CMSketch from its protobuf representation.
func DecodeCMSketch ¶
DecodeCMSketch decode a CMSketch from the given byte slice.
func NewCMSketchWithTopN ¶
func NewCMSketchWithTopN(d, w int32, sample [][]byte, numTop uint32, rowCount uint64) (*CMSketch, uint64, uint64)
NewCMSketchWithTopN returns a new CM sketch with TopN elements, the estimate NDV and the scale ratio.
func UpdateCMSketch ¶
UpdateCMSketch updates the CMSketch by feedback.
func (*CMSketch) AppendTopN ¶
AppendTopN appends a topn into the cm sketch.
func (*CMSketch) GetWidthAndDepth ¶
GetWidthAndDepth returns the width and depth of CM Sketch.
func (*CMSketch) InsertBytes ¶
InsertBytes inserts the bytes value into the CM Sketch.
func (*CMSketch) MergeCMSketch ¶
MergeCMSketch merges two CM Sketch.
func (*CMSketch) MergeCMSketch4IncrementalAnalyze ¶
MergeCMSketch4IncrementalAnalyze merges two CM Sketch for incremental analyze. Since there is no value that appears partially in `c` and `rc` for incremental analyze, it uses `max` to merge them. Here is a simple proof: when we query from the CM sketch, we use the `min` to get the answer:
(1): For values that only appears in `c, using `max` to merge them affects the `min` query result less than using `sum`; (2): For values that only appears in `rc`, it is the same as condition (1); (3): For values that appears both in `c` and `rc`, if they do not appear partially in `c` and `rc`, for example, if `v` appears 5 times in the table, it can appears 5 times in `c` and 3 times in `rc`, then `max` also gives the correct answer.
So in fact, if we can know the number of appearances of each value in the first place, it is better to use `max` to construct the CM sketch rather than `sum`.
func (*CMSketch) QueryBytes ¶
QueryBytes is used to query the count of specified bytes.
func (*CMSketch) TotalCount ¶
TotalCount returns the total count in the sketch, it is only used for test.
type Column ¶
type Column struct { Histogram *CMSketch PhysicalID int64 Count int64 Info *model.ColumnInfo IsHandle bool ErrorRate Flag int64 LastAnalyzePos types.Datum }
Column represents a column histogram.
func (*Column) AvgColSize ¶
AvgColSize is the average column size of the histogram. These sizes are derived from function `encode` and `Datum::ConvertTo`, so we need to update them if those 2 functions are changed.
func (*Column) AvgColSizeChunkFormat ¶
AvgColSizeChunkFormat is the average column size of the histogram. These sizes are derived from function `Encode` and `DecodeToChunk`, so we need to update them if those 2 functions are changed.
func (*Column) AvgColSizeListInDisk ¶
AvgColSizeListInDisk is the average column size of the histogram. These sizes are derived from `chunk.ListInDisk` so we need to update them if those 2 functions are changed.
func (*Column) GetColumnRowCount ¶
func (c *Column) GetColumnRowCount(sc *stmtctx.StatementContext, ranges []*ranger.Range, modifyCount int64, pkIsHandle bool) (float64, error)
GetColumnRowCount estimates the row count by a slice of Range.
type ErrorRate ¶
ErrorRate is the error rate of estimate row count by bucket and cm sketch.
func (*ErrorRate) NotAccurate ¶
NotAccurate is true when the total of query is zero or the average error rate is greater than MaxErrorRate.
type FMSketch ¶
type FMSketch struct {
// contains filtered or unexported fields
}
FMSketch is used to count the number of distinct elements in a set.
func FMSketchFromProto ¶
func FMSketchFromProto(protoSketch *tipb.FMSketch) *FMSketch
FMSketchFromProto converts FMSketch from its protobuf representation.
func (*FMSketch) InsertValue ¶
InsertValue inserts a value into the FM sketch.
type HistColl ¶
type HistColl struct { PhysicalID int64 Columns map[int64]*Column Indices map[int64]*Index // Idx2ColumnIDs maps the index id to its column ids. It's used to calculate the selectivity in planner. Idx2ColumnIDs map[int64][]int64 // ColID2IdxID maps the column id to index id whose first column is it. It's used to calculate the selectivity in planner. ColID2IdxID map[int64]int64 Count int64 ModifyCount int64 // Total modify count in a table. // HavePhysicalID is true means this HistColl is from single table and have its ID's information. // The physical id is used when try to load column stats from storage. HavePhysicalID bool Pseudo bool }
HistColl is a collection of histogram. It collects enough information for plan to calculate the selectivity.
func (*HistColl) GenerateHistCollFromColumnInfo ¶
func (coll *HistColl) GenerateHistCollFromColumnInfo(infos []*model.ColumnInfo, columns []*expression.Column) *HistColl
GenerateHistCollFromColumnInfo generates a new HistColl whose ColID2IdxID and IdxID2ColIDs is built from the given parameter.
func (*HistColl) GetAvgRowSize ¶
func (coll *HistColl) GetAvgRowSize(ctx sessionctx.Context, cols []*expression.Column, isEncodedKey bool, isForScan bool) (size float64)
GetAvgRowSize computes average row size for given columns.
func (*HistColl) GetAvgRowSizeListInDisk ¶
func (coll *HistColl) GetAvgRowSizeListInDisk(cols []*expression.Column) (size float64)
GetAvgRowSizeListInDisk computes average row size for given columns.
func (*HistColl) GetIndexAvgRowSize ¶
func (coll *HistColl) GetIndexAvgRowSize(ctx sessionctx.Context, cols []*expression.Column, isUnique bool) (size float64)
GetIndexAvgRowSize computes average row size for a index scan.
func (*HistColl) GetRowCountByColumnRanges ¶
func (coll *HistColl) GetRowCountByColumnRanges(sc *stmtctx.StatementContext, colID int64, colRanges []*ranger.Range) (float64, error)
GetRowCountByColumnRanges estimates the row count by a slice of Range.
func (*HistColl) GetRowCountByIndexRanges ¶
func (coll *HistColl) GetRowCountByIndexRanges(sc *stmtctx.StatementContext, idxID int64, indexRanges []*ranger.Range) (float64, error)
GetRowCountByIndexRanges estimates the row count by a slice of Range.
func (*HistColl) GetRowCountByIntColumnRanges ¶
func (coll *HistColl) GetRowCountByIntColumnRanges(sc *stmtctx.StatementContext, colID int64, intRanges []*ranger.Range) (float64, error)
GetRowCountByIntColumnRanges estimates the row count by a slice of IntColumnRange.
func (*HistColl) GetTableAvgRowSize ¶
func (coll *HistColl) GetTableAvgRowSize(ctx sessionctx.Context, cols []*expression.Column, storeType kv.StoreType, handleInCols bool) (size float64)
GetTableAvgRowSize computes average row size for a table scan, exclude the index key-value pairs.
func (*HistColl) ID2UniqueID ¶
func (coll *HistColl) ID2UniqueID(columns []*expression.Column) *HistColl
ID2UniqueID generates a new HistColl whose `Columns` is built from UniqueID of given columns.
func (*HistColl) NewHistCollBySelectivity ¶
func (coll *HistColl) NewHistCollBySelectivity(sc *stmtctx.StatementContext, statsNodes []*StatsNode) *HistColl
NewHistCollBySelectivity creates new HistColl by the given statsNodes.
func (*HistColl) Selectivity ¶
func (coll *HistColl) Selectivity(ctx sessionctx.Context, exprs []expression.Expression, filledPaths []*planutil.AccessPath) (float64, []*StatsNode, error)
Selectivity is a function calculate the selectivity of the expressions. The definition of selectivity is (row count after filter / row count before filter). And exprs must be CNF now, in other words, `exprs[0] and exprs[1] and ... and exprs[len - 1]` should be held when you call this. Currently the time complexity is o(n^2).
type Histogram ¶
type Histogram struct { ID int64 // Column ID. NDV int64 // Number of distinct values. NullCount int64 // Number of null values. // LastUpdateVersion is the version that this histogram updated last time. LastUpdateVersion uint64 Tp *types.FieldType // Histogram elements. // // A bucket bound is the smallest and greatest values stored in the bucket. The lower and upper bound // are stored in one column. // // A bucket count is the number of items stored in all previous buckets and the current bucket. // Bucket counts are always in increasing order. // // A bucket repeat is the number of repeats of the bucket value, it can be used to find popular values. Bounds *chunk.Chunk Buckets []Bucket // TotColSize is the total column size for the histogram. // For unfixed-len types, it includes LEN and BYTE. TotColSize int64 // Correlation is the statistical correlation between physical row ordering and logical ordering of // the column values. This ranges from -1 to +1, and it is only valid for Column histogram, not for // Index histogram. Correlation float64 // contains filtered or unexported fields }
Histogram represents statistics for a column or index.
func BuildColumn ¶
func BuildColumn(ctx sessionctx.Context, numBuckets, id int64, collector *SampleCollector, tp *types.FieldType) (*Histogram, error)
BuildColumn builds histogram from samples for column.
func BuildColumnHist ¶
func BuildColumnHist(ctx sessionctx.Context, numBuckets, id int64, collector *SampleCollector, tp *types.FieldType, count int64, ndv int64, nullCount int64) (*Histogram, error)
BuildColumnHist build a histogram for a column. numBuckets: number of buckets for the histogram. id: the id of the table. collector: the collector of samples. tp: the FieldType for the column. count: represents the row count for the column. ndv: represents the number of distinct values for the column. nullCount: represents the number of null values for the column.
func HistogramFromProto ¶
func HistogramFromProto(protoHg *tipb.Histogram) *Histogram
HistogramFromProto converts Histogram from its protobuf representation. Note that we will set BytesDatum for the lower/upper bound in the bucket, the decode will be after all histograms merged.
func MergeHistograms ¶
func MergeHistograms(sc *stmtctx.StatementContext, lh *Histogram, rh *Histogram, bucketSize int) (*Histogram, error)
MergeHistograms merges two histograms.
func NewHistogram ¶
func NewHistogram(id, ndv, nullCount int64, version uint64, tp *types.FieldType, bucketSize int, totColSize int64) *Histogram
NewHistogram creates a new histogram.
func UpdateHistogram ¶
func UpdateHistogram(h *Histogram, feedback *QueryFeedback) *Histogram
UpdateHistogram updates the histogram according buckets.
func (*Histogram) AppendBucket ¶
AppendBucket appends a bucket into `hg`.
func (*Histogram) AvgCountPerNotNullValue ¶
AvgCountPerNotNullValue gets the average row count per value by the data of histogram.
func (*Histogram) BetweenRowCount ¶
BetweenRowCount estimates the row count where column greater or equal to a and less than b.
func (*Histogram) BucketToString ¶
BucketToString change the given bucket to string format.
func (*Histogram) ConvertTo ¶
func (hg *Histogram) ConvertTo(sc *stmtctx.StatementContext, tp *types.FieldType) (*Histogram, error)
ConvertTo converts the histogram bucket values into `Tp`.
func (*Histogram) ExtractTopN ¶
ExtractTopN extracts topn from histogram.
func (*Histogram) GetIncreaseFactor ¶
GetIncreaseFactor will return a factor of data increasing after the last analysis.
func (*Histogram) IsIndexHist ¶
IsIndexHist checks whether current histogram is one for index.
func (*Histogram) LessRowCountWithBktIdx ¶
LessRowCountWithBktIdx estimates the row count where the column less than value.
func (*Histogram) PreCalculateScalar ¶
func (hg *Histogram) PreCalculateScalar()
PreCalculateScalar converts the lower and upper to scalar. When the datum type is KindString or KindBytes, we also calculate their common prefix length, because when a value falls between lower and upper, the common prefix of lower and upper equals to the common prefix of the lower, upper and the value. For some simple types like `Int64`, we do not convert it because we can directly infer the scalar value.
func (*Histogram) RemoveUpperBound ¶
RemoveUpperBound removes the upper bound from histogram. It is used when merge stats for incremental analyze.
func (*Histogram) SplitRange ¶
func (hg *Histogram) SplitRange(sc *stmtctx.StatementContext, oldRanges []*ranger.Range, encoded bool) ([]*ranger.Range, bool)
SplitRange splits the range according to the histogram lower bound. Note that we treat first bucket's lower bound as -inf and last bucket's upper bound as +inf, so all the split ranges will totally fall in one of the (-inf, l(1)), [l(1), l(2)),...[l(n-2), l(n-1)), [l(n-1), +inf), where n is the number of buckets, l(i) is the i-th bucket's lower bound.
func (*Histogram) TotalRowCount ¶
TotalRowCount returns the total count of this histogram.
func (*Histogram) TruncateHistogram ¶
TruncateHistogram truncates the histogram to `numBkt` buckets.
type Index ¶
type Index struct { Histogram *CMSketch ErrorRate StatsVer int64 // StatsVer is the version of the current stats, used to maintain compatibility Info *model.IndexInfo Flag int64 LastAnalyzePos types.Datum }
Index represents an index histogram.
func (*Index) GetRowCount ¶
func (idx *Index) GetRowCount(sc *stmtctx.StatementContext, indexRanges []*ranger.Range, modifyCount int64) (float64, error)
GetRowCount returns the row count of the given ranges. It uses the modifyCount to adjust the influence of modifications on the table.
type QueryFeedback ¶
type QueryFeedback struct { PhysicalID int64 Hist *Histogram Tp int Feedback []Feedback Expected int64 // Expected is the Expected scan count of corresponding query. Valid bool // Valid represents the whether this query feedback is still Valid. // contains filtered or unexported fields }
QueryFeedback is used to represent the query feedback info. It contains the query's scan ranges and number of rows in each range.
func NewQueryFeedback ¶
func NewQueryFeedback(physicalID int64, hist *Histogram, expected int64, desc bool) *QueryFeedback
NewQueryFeedback returns a new query feedback.
func (*QueryFeedback) Actual ¶
func (q *QueryFeedback) Actual() int64
Actual gets the actual row count.
func (*QueryFeedback) CalcErrorRate ¶
func (q *QueryFeedback) CalcErrorRate() float64
CalcErrorRate calculates the error rate the current QueryFeedback.
func (*QueryFeedback) CollectFeedback ¶
func (q *QueryFeedback) CollectFeedback(numOfRanges int) bool
CollectFeedback decides whether to collect the feedback. It returns false when: 1: the histogram is nil or has no buckets; 2: the number of scan ranges exceeds the limit because it may affect the performance; 3: it does not pass the probabilistic sampler.
func (*QueryFeedback) DecodeIntValues ¶
func (q *QueryFeedback) DecodeIntValues() *QueryFeedback
DecodeIntValues is called when the current Feedback stores encoded int values.
func (*QueryFeedback) DecodeToRanges ¶
func (q *QueryFeedback) DecodeToRanges(isIndex bool) ([]*ranger.Range, error)
DecodeToRanges decode the feedback to ranges.
func (*QueryFeedback) Invalidate ¶
func (q *QueryFeedback) Invalidate()
Invalidate is used to invalidate the query feedback.
func (*QueryFeedback) StoreRanges ¶
func (q *QueryFeedback) StoreRanges(ranges []*ranger.Range)
StoreRanges stores the ranges for update.
func (*QueryFeedback) Update ¶
func (q *QueryFeedback) Update(startKey kv.Key, counts []int64)
Update updates the query feedback. `startKey` is the start scan key of the partial result, used to find the range for update. `counts` is the scan counts of each range, used to update the feedback count info.
type SampleBuilder ¶
type SampleBuilder struct { Sc *stmtctx.StatementContext RecordSet sqlexec.RecordSet ColLen int // ColLen is the number of columns need to be sampled. PkBuilder *SortedBuilder MaxBucketSize int64 MaxSampleSize int64 MaxFMSketchSize int64 CMSketchDepth int32 CMSketchWidth int32 }
SampleBuilder is used to build samples for columns. Also, if primary key is handle, it will directly build histogram for it.
func (SampleBuilder) CollectColumnStats ¶
func (s SampleBuilder) CollectColumnStats() ([]*SampleCollector, *SortedBuilder, error)
CollectColumnStats collects sample from the result set using Reservoir Sampling algorithm, and estimates NDVs using FM Sketch during the collecting process. It returns the sample collectors which contain total count, null count, distinct values count and CM Sketch. It also returns the statistic builder for PK which contains the histogram. See https://en.wikipedia.org/wiki/Reservoir_sampling
type SampleCollector ¶
type SampleCollector struct { Samples []*SampleItem IsMerger bool NullCount int64 Count int64 // Count is the number of non-null rows. MaxSampleSize int64 FMSketch *FMSketch CMSketch *CMSketch TotalSize int64 // TotalSize is the total size of column. // contains filtered or unexported fields }
SampleCollector will collect Samples and calculate the count and ndv of an attribute.
func SampleCollectorFromProto ¶
func SampleCollectorFromProto(collector *tipb.SampleCollector) *SampleCollector
SampleCollectorFromProto converts SampleCollector from its protobuf representation.
func (*SampleCollector) CalcTotalSize ¶
func (c *SampleCollector) CalcTotalSize()
CalcTotalSize is to calculate total size based on samples.
func (*SampleCollector) ExtractTopN ¶
func (c *SampleCollector) ExtractTopN(numTop uint32)
ExtractTopN extracts the topn from the CM Sketch.
func (*SampleCollector) MergeSampleCollector ¶
func (c *SampleCollector) MergeSampleCollector(sc *stmtctx.StatementContext, rc *SampleCollector)
MergeSampleCollector merges two sample collectors.
type SampleItem ¶
type SampleItem struct { // Value is the sampled column value. Value types.Datum // Ordinal is original position of this item in SampleCollector before sorting. This // is used for computing correlation. Ordinal int // RowID is the row id of the sample in its key. // This property is used to calculate Ordinal in fast analyze. RowID int64 }
SampleItem is an item of sampled column value.
type SortedBuilder ¶
type SortedBuilder struct { Count int64 // contains filtered or unexported fields }
SortedBuilder is used to build histograms for PK and index.
func NewSortedBuilder ¶
func NewSortedBuilder(sc *stmtctx.StatementContext, numBuckets, id int64, tp *types.FieldType) *SortedBuilder
NewSortedBuilder creates a new SortedBuilder.
func (*SortedBuilder) Hist ¶
func (b *SortedBuilder) Hist() *Histogram
Hist returns the histogram built by SortedBuilder.
type StatsNode ¶
type StatsNode struct { Tp int ID int64 // Ranges contains all the Ranges we got. Ranges []*ranger.Range // Selectivity indicates the Selectivity of this column/index. Selectivity float64 // contains filtered or unexported fields }
StatsNode is used for calculating selectivity.
func GetUsableSetsByGreedy ¶
GetUsableSetsByGreedy will select the indices and pk used for calculate selectivity by greedy algorithm.
type Table ¶
Table represents statistics for a table.
func PseudoTable ¶
PseudoTable creates a pseudo table statistics.
func (*Table) ColumnBetweenRowCount ¶
func (t *Table) ColumnBetweenRowCount(sc *stmtctx.StatementContext, a, b types.Datum, colID int64) float64
ColumnBetweenRowCount estimates the row count where column greater or equal to a and less than b.
func (*Table) ColumnByName ¶
ColumnByName finds the statistics.Column for the given column.
func (*Table) ColumnEqualRowCount ¶
func (t *Table) ColumnEqualRowCount(sc *stmtctx.StatementContext, value types.Datum, colID int64) (float64, error)
ColumnEqualRowCount estimates the row count where the column equals to value.
func (*Table) ColumnGreaterRowCount ¶
func (t *Table) ColumnGreaterRowCount(sc *stmtctx.StatementContext, value types.Datum, colID int64) float64
ColumnGreaterRowCount estimates the row count where the column greater than value.
func (*Table) ColumnLessRowCount ¶
func (t *Table) ColumnLessRowCount(sc *stmtctx.StatementContext, value types.Datum, colID int64) float64
ColumnLessRowCount estimates the row count where the column less than value. Note that null values are not counted.
func (*Table) IndexStartWithColumn ¶
IndexStartWithColumn finds the first index whose first column is the given column.
func (*Table) IsOutdated ¶
IsOutdated returns true if the table stats is outdated.
func (*Table) PseudoAvgCountPerValue ¶
PseudoAvgCountPerValue gets a pseudo average count if histogram not exists.