Documentation ¶
Index ¶
- Constants
- Variables
- func CMSketchToProto(c *CMSketch) *tipb.CMSketch
- func DurationToTS(d time.Duration) uint64
- func FMSketchToProto(s *FMSketch) *tipb.FMSketch
- func HistogramEqual(a, b *Histogram, ignoreID bool) bool
- func HistogramToProto(hg *Histogram) *tipb.Histogram
- func SampleCollectorToProto(c *SampleCollector) *tipb.SampleCollector
- func SaveStatsToStorage(sctx sessionctx.Context, tableID int64, count int64, isIndex int, ...) error
- func TableAnalyzed(tbl *Table) bool
- func ValueToString(value *types.Datum, idxCols int) (string, error)
- type AnalyzeResult
- type Bucket
- type BucketFeedback
- type CMSketch
- type Column
- type FMSketch
- type Handle
- func (h *Handle) AnalyzeResultCh() chan *AnalyzeResult
- func (h *Handle) Clear()
- func (h *Handle) DDLEventCh() chan *util.Event
- func (h *Handle) DeleteTableStatsFromKV(id int64) error
- func (h *Handle) DumpStatsDeltaToKV() error
- func (h *Handle) DumpStatsFeedbackToKV() error
- func (h *Handle) DumpStatsToJSON(dbName string, tableInfo *model.TableInfo) (*JSONTable, error)
- func (h *Handle) GCStats(is infoschema.InfoSchema, ddlLease time.Duration) error
- func (h *Handle) GetQueryFeedback() []*QueryFeedback
- func (h *Handle) GetTableStats(tblInfo *model.TableInfo) *Table
- func (h *Handle) HandleAutoAnalyze(is infoschema.InfoSchema) error
- func (h *Handle) HandleDDLEvent(t *util.Event) error
- func (h *Handle) HandleUpdateStats(is infoschema.InfoSchema) error
- func (h *Handle) InitStats(is infoschema.InfoSchema) error
- func (h *Handle) LoadNeededHistograms() error
- func (h *Handle) LoadStatsFromJSON(tableInfo *model.TableInfo, jsonTbl *JSONTable) (*Table, error)
- func (h *Handle) NewSessionStatsCollector() *SessionStatsCollector
- func (h *Handle) Update(is infoschema.InfoSchema) error
- func (h *Handle) UpdateTableStats(tables []*Table, deletedIDs []int64)
- type Histogram
- func BuildColumn(ctx sessionctx.Context, numBuckets, id int64, collector *SampleCollector, ...) (*Histogram, error)
- func HistogramFromProto(protoHg *tipb.Histogram) *Histogram
- func MergeHistograms(sc *stmtctx.StatementContext, lh *Histogram, rh *Histogram, bucketSize int) (*Histogram, error)
- func NewHistogram(id, ndv, nullCount int64, version uint64, tp *types.FieldType, bucketSize int, ...) *Histogram
- func UpdateHistogram(h *Histogram, feedback *QueryFeedback) *Histogram
- func (hg *Histogram) AppendBucket(lower *types.Datum, upper *types.Datum, count, repeat int64)
- func (hg *Histogram) ConvertTo(sc *stmtctx.StatementContext, tp *types.FieldType) (*Histogram, error)
- func (hg *Histogram) DecodeTo(tp *types.FieldType, timeZone *time.Location) error
- func (hg *Histogram) GetLower(idx int) *types.Datum
- func (hg *Histogram) GetUpper(idx int) *types.Datum
- func (hg *Histogram) Len() int
- func (hg *Histogram) PreCalculateScalar()
- func (hg *Histogram) SplitRange(ranges []*ranger.NewRange) []*ranger.NewRange
- func (hg *Histogram) ToString(idxCols int) string
- type Index
- type JSONTable
- type QueryFeedback
- func (q *QueryFeedback) Actual() int64
- func (q *QueryFeedback) CollectFeedback(numOfRanges int) bool
- func (q *QueryFeedback) Equal(rq *QueryFeedback) bool
- func (q *QueryFeedback) Hist() *Histogram
- func (q *QueryFeedback) Invalidate()
- func (q *QueryFeedback) StoreRanges(ranges []*ranger.NewRange)
- func (q *QueryFeedback) Update(startKey kv.Key, counts []int64)
- type SampleBuilder
- type SampleCollector
- type SessionStatsCollector
- type SortedBuilder
- type Table
- func (t *Table) ColumnBetweenRowCount(sc *stmtctx.StatementContext, a, b types.Datum, colID int64) float64
- func (t *Table) ColumnEqualRowCount(sc *stmtctx.StatementContext, value types.Datum, colID int64) (float64, error)
- func (t *Table) ColumnGreaterRowCount(sc *stmtctx.StatementContext, value types.Datum, colID int64) float64
- func (t *Table) ColumnIsInvalid(sc *stmtctx.StatementContext, colID int64) bool
- func (t *Table) ColumnLessRowCount(sc *stmtctx.StatementContext, value types.Datum, colID int64) float64
- func (t *Table) GetRowCountByColumnRanges(sc *stmtctx.StatementContext, colID int64, colRanges []*ranger.NewRange) (float64, error)
- func (t *Table) GetRowCountByIndexRanges(sc *stmtctx.StatementContext, idxID int64, indexRanges []*ranger.NewRange) (float64, error)
- func (t *Table) GetRowCountByIntColumnRanges(sc *stmtctx.StatementContext, colID int64, intRanges []*ranger.NewRange) (float64, error)
- func (t *Table) Selectivity(ctx sessionctx.Context, exprs []expression.Expression) (float64, error)
- func (t *Table) String() string
Constants ¶
const ( // StatsOwnerKey is the stats owner path that is saved to etcd. StatsOwnerKey = "/tidb/stats/owner" // StatsPrompt is the prompt for stats owner manager. StatsPrompt = "stats" )
Variables ¶
var ( // MaxNumberOfRanges is the max number of ranges before split to collect feedback. MaxNumberOfRanges = 20 // FeedbackProbability is the probability to collect the feedback. FeedbackProbability = 0.0 )
var AutoAnalyzeMinCnt int64 = 1000
AutoAnalyzeMinCnt means if the count of table is less than this value, we needn't do auto analyze.
var MaxQueryFeedbackCount = 1 << 10
MaxQueryFeedbackCount is the max number of feedback that cache in memory.
Functions ¶
func CMSketchToProto ¶
func CMSketchToProto(c *CMSketch) *tipb.CMSketch
CMSketchToProto converts CMSketch to its protobuf representation.
func DurationToTS ¶
DurationToTS converts duration to timestamp.
func FMSketchToProto ¶
FMSketchToProto converts FMSketch to its protobuf representation.
func HistogramEqual ¶
HistogramEqual tests if two histograms are equal.
func HistogramToProto ¶
HistogramToProto converts Histogram to its protobuf representation. Note that when this is used, the lower/upper bound in the bucket must be BytesDatum.
func SampleCollectorToProto ¶
func SampleCollectorToProto(c *SampleCollector) *tipb.SampleCollector
SampleCollectorToProto converts SampleCollector to its protobuf representation.
func SaveStatsToStorage ¶
func SaveStatsToStorage(sctx sessionctx.Context, tableID int64, count int64, isIndex int, hg *Histogram, cms *CMSketch) error
SaveStatsToStorage saves the stats to storage.
func TableAnalyzed ¶
TableAnalyzed checks if the table is analyzed.
Types ¶
type AnalyzeResult ¶
type AnalyzeResult struct { TableID int64 Hist []*Histogram Cms []*CMSketch Count int64 IsIndex int Err error }
AnalyzeResult is used to represent analyze result.
type BucketFeedback ¶
type BucketFeedback struct {
// contains filtered or unexported fields
}
BucketFeedback stands for all the feedback for a bucket.
type CMSketch ¶
type CMSketch struct {
// contains filtered or unexported fields
}
CMSketch is used to estimate point queries. Refer: https://en.wikipedia.org/wiki/Count-min_sketch
func CMSketchFromProto ¶
func CMSketchFromProto(protoSketch *tipb.CMSketch) *CMSketch
CMSketchFromProto converts CMSketch from its protobuf representation.
func (*CMSketch) InsertBytes ¶
InsertBytes inserts the bytes value into the CM Sketch.
func (*CMSketch) MergeCMSketch ¶
MergeCMSketch merges two CM Sketch.
func (*CMSketch) TotalCount ¶
TotalCount returns the count, it is only used for test.
type Column ¶
type Column struct { Histogram *CMSketch Count int64 Info *model.ColumnInfo // contains filtered or unexported fields }
Column represents a column histogram.
func (*Column) AvgColSize ¶
AvgColSize is the average column size of the histogram.
type FMSketch ¶
type FMSketch struct {
// contains filtered or unexported fields
}
FMSketch is used to count the number of distinct elements in a set.
func FMSketchFromProto ¶
FMSketchFromProto converts FMSketch from its protobuf representation.
func (*FMSketch) InsertValue ¶
InsertValue inserts a value into the FM sketch.
type Handle ¶
type Handle struct { // LastVersion is the latest update version before last lease. Exported for test. LastVersion uint64 // PrevLastVersion is the latest update version before two lease. Exported for test. // We need this because for two tables, the smaller version may write later than the one with larger version. // We can read the version with lastTwoVersion if the diff between commit time and version is less than one lease. // PrevLastVersion will be assigned by LastVersion every time Update is called. PrevLastVersion uint64 Lease time.Duration // contains filtered or unexported fields }
Handle can update stats info periodically.
func NewHandle ¶
func NewHandle(ctx sessionctx.Context, lease time.Duration) *Handle
NewHandle creates a Handle for update stats.
func (*Handle) AnalyzeResultCh ¶
func (h *Handle) AnalyzeResultCh() chan *AnalyzeResult
AnalyzeResultCh returns analyze result channel in handle.
func (*Handle) DDLEventCh ¶
DDLEventCh returns ddl events channel in handle.
func (*Handle) DeleteTableStatsFromKV ¶
DeleteTableStatsFromKV deletes table statistics from kv.
func (*Handle) DumpStatsDeltaToKV ¶
DumpStatsDeltaToKV sweeps the whole list and updates the global map. Then we dumps every table that held in map to KV.
func (*Handle) DumpStatsFeedbackToKV ¶
DumpStatsFeedbackToKV dumps the stats feedback to KV.
func (*Handle) DumpStatsToJSON ¶
DumpStatsToJSON dumps statistic to json.
func (*Handle) GCStats ¶
func (h *Handle) GCStats(is infoschema.InfoSchema, ddlLease time.Duration) error
GCStats will garbage collect the useless stats info. For dropped tables, we will first update their version so that other tidb could know that table is deleted.
func (*Handle) GetQueryFeedback ¶
func (h *Handle) GetQueryFeedback() []*QueryFeedback
GetQueryFeedback gets the query feedback. It is only use in test.
func (*Handle) GetTableStats ¶
GetTableStats retrieves the statistics table from cache, and the cache will be updated by a goroutine.
func (*Handle) HandleAutoAnalyze ¶
func (h *Handle) HandleAutoAnalyze(is infoschema.InfoSchema) error
HandleAutoAnalyze analyzes the newly created table or index.
func (*Handle) HandleDDLEvent ¶
HandleDDLEvent begins to process a ddl task.
func (*Handle) HandleUpdateStats ¶
func (h *Handle) HandleUpdateStats(is infoschema.InfoSchema) error
HandleUpdateStats update the stats using feedback.
func (*Handle) InitStats ¶ added in v1.0.5
func (h *Handle) InitStats(is infoschema.InfoSchema) error
InitStats will init the stats cache using full load strategy.
func (*Handle) LoadNeededHistograms ¶
LoadNeededHistograms will load histograms for those needed columns.
func (*Handle) LoadStatsFromJSON ¶
LoadStatsFromJSON load statistic from json.
func (*Handle) NewSessionStatsCollector ¶
func (h *Handle) NewSessionStatsCollector() *SessionStatsCollector
NewSessionStatsCollector allocates a stats collector for a session.
func (*Handle) Update ¶
func (h *Handle) Update(is infoschema.InfoSchema) error
Update reads stats meta from store and updates the stats map.
func (*Handle) UpdateTableStats ¶
UpdateTableStats updates the statistics table cache using copy on write.
type Histogram ¶
type Histogram struct { ID int64 // Column ID. NDV int64 // Number of distinct values. NullCount int64 // Number of null values. // LastUpdateVersion is the version that this histogram updated last time. LastUpdateVersion uint64 // Histogram elements. // // A bucket bound is the smallest and greatest values stored in the bucket. The lower and upper bound // are stored in one column. // // A bucket count is the number of items stored in all previous buckets and the current bucket. // Bucket counts are always in increasing order. // // A bucket repeat is the number of repeats of the bucket value, it can be used to find popular values. Bounds *chunk.Chunk Buckets []Bucket // TotColSize is the total column size for the histogram. TotColSize int64 // contains filtered or unexported fields }
Histogram represents statistics for a column or index.
func BuildColumn ¶
func BuildColumn(ctx sessionctx.Context, numBuckets, id int64, collector *SampleCollector, tp *types.FieldType) (*Histogram, error)
BuildColumn builds histogram from samples for column.
func HistogramFromProto ¶
HistogramFromProto converts Histogram from its protobuf representation. Note that we will set BytesDatum for the lower/upper bound in the bucket, the decode will be after all histograms merged.
func MergeHistograms ¶
func MergeHistograms(sc *stmtctx.StatementContext, lh *Histogram, rh *Histogram, bucketSize int) (*Histogram, error)
MergeHistograms merges two histograms.
func NewHistogram ¶
func NewHistogram(id, ndv, nullCount int64, version uint64, tp *types.FieldType, bucketSize int, totColSize int64) *Histogram
NewHistogram creates a new histogram.
func UpdateHistogram ¶
func UpdateHistogram(h *Histogram, feedback *QueryFeedback) *Histogram
UpdateHistogram updates the histogram according buckets.
func (*Histogram) AppendBucket ¶
AppendBucket appends a bucket into `hg`.
func (*Histogram) ConvertTo ¶
func (hg *Histogram) ConvertTo(sc *stmtctx.StatementContext, tp *types.FieldType) (*Histogram, error)
ConvertTo converts the histogram bucket values into `tp`.
func (*Histogram) PreCalculateScalar ¶
func (hg *Histogram) PreCalculateScalar()
PreCalculateScalar converts the lower and upper to scalar. When the datum type is KindString or KindBytes, we also calculate their common prefix length, because when a value falls between lower and upper, the common prefix of lower and upper equals to the common prefix of the lower, upper and the value. For some simple types like `Int64`, we do not convert it because we can directly infer the scalar value.
func (*Histogram) SplitRange ¶
SplitRange splits the range according to the histogram upper bound. Note that we treat last bucket's upper bound as inf, so all the split ranges will totally fall in one of the (-inf, u(0)], (u(0), u(1)],...(u(n-3), u(n-2)], (u(n-2), +inf), where n is the number of buckets, u(i) is the i-th bucket's upper bound.
type JSONTable ¶
type JSONTable struct { DatabaseName string `json:"database_name"` TableName string `json:"table_name"` Columns map[string]*jsonColumn `json:"columns"` Indices map[string]*jsonColumn `json:"indices"` Count int64 `json:"count"` ModifyCount int64 `json:"modify_count"` Version uint64 `json:"version"` }
JSONTable is used for dumping statistics.
type QueryFeedback ¶
type QueryFeedback struct {
// contains filtered or unexported fields
}
QueryFeedback is used to represent the query feedback info. It contains the query's scan ranges and number of rows in each range.
func NewQueryFeedback ¶
func NewQueryFeedback(tableID int64, hist *Histogram, expected int64, desc bool) *QueryFeedback
NewQueryFeedback returns a new query feedback.
func (*QueryFeedback) Actual ¶
func (q *QueryFeedback) Actual() int64
Actual gets the actual row count.
func (*QueryFeedback) CollectFeedback ¶
func (q *QueryFeedback) CollectFeedback(numOfRanges int) bool
CollectFeedback decides whether to collect the feedback. It returns false when: 1: the histogram is nil or has no buckets; 2: the number of scan ranges exceeds the limit because it may affect the performance; 3: it does not pass the probabilistic sampler.
func (*QueryFeedback) Equal ¶
func (q *QueryFeedback) Equal(rq *QueryFeedback) bool
Equal tests if two query feedback equal, it is only used in test.
func (*QueryFeedback) Invalidate ¶
func (q *QueryFeedback) Invalidate()
Invalidate is used to invalidate the query feedback.
func (*QueryFeedback) StoreRanges ¶
func (q *QueryFeedback) StoreRanges(ranges []*ranger.NewRange)
StoreRanges stores the ranges for update.
func (*QueryFeedback) Update ¶
func (q *QueryFeedback) Update(startKey kv.Key, counts []int64)
Update updates the query feedback. `startKey` is the start scan key of the partial result, used to find the range for update. `counts` is the scan counts of each range, used to update the feedback count info.
type SampleBuilder ¶
type SampleBuilder struct { Sc *stmtctx.StatementContext RecordSet ast.RecordSet ColLen int // ColLen is the number of columns need to be sampled. PkBuilder *SortedBuilder MaxBucketSize int64 MaxSampleSize int64 MaxFMSketchSize int64 CMSketchDepth int32 CMSketchWidth int32 }
SampleBuilder is used to build samples for columns. Also, if primary key is handle, it will directly build histogram for it.
func (SampleBuilder) CollectColumnStats ¶
func (s SampleBuilder) CollectColumnStats() ([]*SampleCollector, *SortedBuilder, error)
CollectColumnStats collects sample from the result set using Reservoir Sampling algorithm, and estimates NDVs using FM Sketch during the collecting process. It returns the sample collectors which contain total count, null count, distinct values count and CM Sketch. It also returns the statistic builder for PK which contains the histogram. See https://en.wikipedia.org/wiki/Reservoir_sampling
type SampleCollector ¶
type SampleCollector struct { Samples []types.Datum IsMerger bool NullCount int64 Count int64 // Count is the number of non-null rows. MaxSampleSize int64 FMSketch *FMSketch CMSketch *CMSketch TotalSize int64 // TotalSize is the total size of column. // contains filtered or unexported fields }
SampleCollector will collect Samples and calculate the count and ndv of an attribute.
func SampleCollectorFromProto ¶
func SampleCollectorFromProto(collector *tipb.SampleCollector) *SampleCollector
SampleCollectorFromProto converts SampleCollector from its protobuf representation.
func (*SampleCollector) MergeSampleCollector ¶
func (c *SampleCollector) MergeSampleCollector(sc *stmtctx.StatementContext, rc *SampleCollector)
MergeSampleCollector merges two sample collectors.
type SessionStatsCollector ¶
SessionStatsCollector is a list item that holds the delta mapper. If you want to write or read mapper, you must lock it.
func (*SessionStatsCollector) Delete ¶
func (s *SessionStatsCollector) Delete()
Delete only sets the deleted flag true, it will be deleted from list when DumpStatsDeltaToKV is called.
func (*SessionStatsCollector) StoreQueryFeedback ¶
func (s *SessionStatsCollector) StoreQueryFeedback(feedback interface{})
StoreQueryFeedback will merges the feedback into stats collector.
type SortedBuilder ¶
type SortedBuilder struct { Count int64 // contains filtered or unexported fields }
SortedBuilder is used to build histograms for PK and index.
func NewSortedBuilder ¶
func NewSortedBuilder(sc *stmtctx.StatementContext, numBuckets, id int64, tp *types.FieldType) *SortedBuilder
NewSortedBuilder creates a new SortedBuilder.
func (*SortedBuilder) Hist ¶
func (b *SortedBuilder) Hist() *Histogram
Hist returns the histogram built by SortedBuilder.
type Table ¶
type Table struct { TableID int64 Columns map[int64]*Column Indices map[int64]*Index Count int64 // Total row count in a table. ModifyCount int64 // Total modify count in a table. Version uint64 Pseudo bool PKIsHandle bool }
Table represents statistics for a table.
func PseudoTable ¶
PseudoTable creates a pseudo table statistics.
func (*Table) ColumnBetweenRowCount ¶
func (t *Table) ColumnBetweenRowCount(sc *stmtctx.StatementContext, a, b types.Datum, colID int64) float64
ColumnBetweenRowCount estimates the row count where column greater or equal to a and less than b.
func (*Table) ColumnEqualRowCount ¶
func (t *Table) ColumnEqualRowCount(sc *stmtctx.StatementContext, value types.Datum, colID int64) (float64, error)
ColumnEqualRowCount estimates the row count where the column equals to value.
func (*Table) ColumnGreaterRowCount ¶
func (t *Table) ColumnGreaterRowCount(sc *stmtctx.StatementContext, value types.Datum, colID int64) float64
ColumnGreaterRowCount estimates the row count where the column greater than value.
func (*Table) ColumnIsInvalid ¶
func (t *Table) ColumnIsInvalid(sc *stmtctx.StatementContext, colID int64) bool
ColumnIsInvalid checks if this column is invalid. If this column has histogram but not loaded yet, then we mark it as need histogram.
func (*Table) ColumnLessRowCount ¶
func (t *Table) ColumnLessRowCount(sc *stmtctx.StatementContext, value types.Datum, colID int64) float64
ColumnLessRowCount estimates the row count where the column less than value.
func (*Table) GetRowCountByColumnRanges ¶
func (t *Table) GetRowCountByColumnRanges(sc *stmtctx.StatementContext, colID int64, colRanges []*ranger.NewRange) (float64, error)
GetRowCountByColumnRanges estimates the row count by a slice of NewRange.
func (*Table) GetRowCountByIndexRanges ¶
func (t *Table) GetRowCountByIndexRanges(sc *stmtctx.StatementContext, idxID int64, indexRanges []*ranger.NewRange) (float64, error)
GetRowCountByIndexRanges estimates the row count by a slice of NewRange.
func (*Table) GetRowCountByIntColumnRanges ¶
func (t *Table) GetRowCountByIntColumnRanges(sc *stmtctx.StatementContext, colID int64, intRanges []*ranger.NewRange) (float64, error)
GetRowCountByIntColumnRanges estimates the row count by a slice of IntColumnRange.
func (*Table) Selectivity ¶
func (t *Table) Selectivity(ctx sessionctx.Context, exprs []expression.Expression) (float64, error)
Selectivity is a function calculate the selectivity of the expressions. The definition of selectivity is (row count after filter / row count before filter). And exprs must be CNF now, in other words, `exprs[0] and exprs[1] and ... and exprs[len - 1]` should be held when you call this. TODO: support expressions that the top layer is a DNF. Currently the time complexity is o(n^2).