statistics

package

v0.6.2 Latest Latest Go to latest Published: Apr 18, 2019 License: GPL-3.0 Imports: 40 Imported by: 0

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/hanchuanchuan/goInception

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
Variables
func CMSketchToProto(c *CMSketch) *tipb.CMSketch
func FMSketchToProto(s *FMSketch) *tipb.FMSketch
func HistogramEqual(a, b *Histogram, ignoreID bool) bool
func HistogramToProto(hg *Histogram) *tipb.Histogram
func NeedAnalyzeTable(tbl *Table, limit time.Duration, autoAnalyzeRatio float64, ...) bool
func SampleCollectorToProto(c *SampleCollector) *tipb.SampleCollector
func TableAnalyzed(tbl *Table) bool
func ValueToString(value *types.Datum, idxCols int) (string, error)
type AnalyzeResult
type Bucket
type BucketFeedback
type CMSketch
- func CMSketchFromProto(protoSketch *tipb.CMSketch) *CMSketch
- func NewCMSketch(d, w int32) *CMSketch
- func UpdateCMSketch(c *CMSketch, eqFeedbacks []feedback) *CMSketch
- func (c *CMSketch) Equal(rc *CMSketch) bool
- func (c *CMSketch) InsertBytes(bytes []byte)
- func (c *CMSketch) MergeCMSketch(rc *CMSketch) error
- func (c *CMSketch) QueryBytes(bytes []byte) uint32
- func (c *CMSketch) TotalCount() uint64
type Column
- func (c *Column) AvgColSize(count int64) float64
- func (c *Column) String() string
type ErrorRate
- func (e *ErrorRate) NotAccurate() bool
type FMSketch
- func FMSketchFromProto(protoSketch *tipb.FMSketch) *FMSketch
- func NewFMSketch(maxSize int) *FMSketch
- func (s *FMSketch) InsertValue(sc *stmtctx.StatementContext, value types.Datum) error
- func (s *FMSketch) NDV() int64
type Handle
- func NewHandle(ctx sessionctx.Context, lease time.Duration) *Handle
- func (h *Handle) Clear()
- func (h *Handle) DDLEventCh() chan *util.Event
- func (h *Handle) DeleteTableStatsFromKV(id int64) (err error)
- func (h *Handle) DumpStatsDeltaToKV(dumpMode bool) error
- func (h *Handle) DumpStatsFeedbackToKV() error
- func (h *Handle) DumpStatsToJSON(dbName string, tableInfo *model.TableInfo) (*JSONTable, error)
- func (h *Handle) FlushStats()
- func (h *Handle) GCStats(is infoschema.InfoSchema, ddlLease time.Duration) error
- func (h *Handle) GetPartitionStats(tblInfo *model.TableInfo, pid int64) *Table
- func (h *Handle) GetQueryFeedback() []*QueryFeedback
- func (h *Handle) GetTableStats(tblInfo *model.TableInfo) *Table
- func (h *Handle) HandleAutoAnalyze(is infoschema.InfoSchema) error
- func (h *Handle) HandleDDLEvent(t *util.Event) error
- func (h *Handle) HandleUpdateStats(is infoschema.InfoSchema) error
- func (h *Handle) InitStats(is infoschema.InfoSchema) error
- func (h *Handle) LastUpdateVersion() uint64
- func (h *Handle) LoadNeededHistograms() error
- func (h *Handle) LoadStatsFromJSON(is infoschema.InfoSchema, jsonTbl *JSONTable) error
- func (h *Handle) NewSessionStatsCollector() *SessionStatsCollector
- func (h *Handle) SaveMetaToStorage(tableID, count, modifyCount int64) (err error)
- func (h *Handle) SaveStatsToStorage(tableID int64, count int64, isIndex int, hg *Histogram, cms *CMSketch, ...) (err error)
- func (h *Handle) SetLastUpdateVersion(version uint64)
- func (h *Handle) Update(is infoschema.InfoSchema) error
- func (h *Handle) UpdateErrorRate(is infoschema.InfoSchema)
- func (h *Handle) UpdateStatsByLocalFeedback(is infoschema.InfoSchema)
- func (h *Handle) UpdateTableStats(tables []*Table, deletedIDs []int64)
type HistColl
- func (coll *HistColl) ColumnIsInvalid(sc *stmtctx.StatementContext, colID int64) bool
- func (coll *HistColl) GenerateHistCollFromColumnInfo(infos []*model.ColumnInfo, columns []*expression.Column) HistColl
- func (coll *HistColl) GetRowCountByColumnRanges(sc *stmtctx.StatementContext, colID int64, colRanges []*ranger.Range) (float64, error)
- func (coll *HistColl) GetRowCountByIndexRanges(sc *stmtctx.StatementContext, idxID int64, indexRanges []*ranger.Range) (float64, error)
- func (coll *HistColl) GetRowCountByIntColumnRanges(sc *stmtctx.StatementContext, colID int64, intRanges []*ranger.Range) (float64, error)
- func (coll *HistColl) Selectivity(ctx sessionctx.Context, exprs []expression.Expression) (float64, error)
type Histogram
- func BuildColumn(ctx sessionctx.Context, numBuckets, id int64, collector *SampleCollector, ...) (*Histogram, error)
- func HistogramFromProto(protoHg *tipb.Histogram) *Histogram
- func MergeHistograms(sc *stmtctx.StatementContext, lh *Histogram, rh *Histogram, bucketSize int) (*Histogram, error)
- func NewHistogram(id, ndv, nullCount int64, version uint64, tp *types.FieldType, bucketSize int, ...) *Histogram
- func UpdateHistogram(h *Histogram, feedback *QueryFeedback) *Histogram
- func (hg *Histogram) AppendBucket(lower *types.Datum, upper *types.Datum, count, repeat int64)
- func (hg *Histogram) AvgCountPerValue(totalCount int64) float64
- func (hg *Histogram) ConvertTo(sc *stmtctx.StatementContext, tp *types.FieldType) (*Histogram, error)
- func (hg *Histogram) DecodeTo(tp *types.FieldType, timeZone *time.Location) error
- func (hg *Histogram) GetLower(idx int) *types.Datum
- func (hg *Histogram) GetUpper(idx int) *types.Datum
- func (hg *Histogram) Len() int
- func (hg *Histogram) PreCalculateScalar()
- func (hg *Histogram) SplitRange(ranges []*ranger.Range) []*ranger.Range
- func (hg *Histogram) ToString(idxCols int) string
type Index
- func (idx *Index) String() string
type JSONTable
type QueryFeedback
- func NewQueryFeedback(tableID int64, hist *Histogram, expected int64, desc bool) *QueryFeedback
- func (q *QueryFeedback) Actual() int64
- func (q *QueryFeedback) CollectFeedback(numOfRanges int) bool
- func (q *QueryFeedback) DecodeToRanges(isIndex bool) ([]*ranger.Range, error)
- func (q *QueryFeedback) Equal(rq *QueryFeedback) bool
- func (q *QueryFeedback) Hist() *Histogram
- func (q *QueryFeedback) Invalidate()
- func (q *QueryFeedback) StoreRanges(ranges []*ranger.Range)
- func (q *QueryFeedback) Update(startKey kv.Key, counts []int64)
type SampleBuilder
- func (s SampleBuilder) CollectColumnStats() ([]*SampleCollector, *SortedBuilder, error)
type SampleCollector
- func SampleCollectorFromProto(collector *tipb.SampleCollector) *SampleCollector
- func (c *SampleCollector) MergeSampleCollector(sc *stmtctx.StatementContext, rc *SampleCollector)
type SessionStatsCollector
- func (s *SessionStatsCollector) Delete()
- func (s *SessionStatsCollector) StoreQueryFeedback(feedback interface{}, h *Handle) error
- func (s *SessionStatsCollector) Update(id int64, delta int64, count int64, colSize *map[int64]int64)
type SortedBuilder
- func NewSortedBuilder(sc *stmtctx.StatementContext, numBuckets, id int64, tp *types.FieldType) *SortedBuilder
- func (b *SortedBuilder) Hist() *Histogram
- func (b *SortedBuilder) Iterate(data types.Datum) error
type Table
- func PseudoTable(tblInfo *model.TableInfo) *Table
- func TableStatsFromJSON(tableInfo *model.TableInfo, physicalID int64, jsonTbl *JSONTable) (*Table, error)
- func (t *Table) ColumnBetweenRowCount(sc *stmtctx.StatementContext, a, b types.Datum, colID int64) float64
- func (t *Table) ColumnEqualRowCount(sc *stmtctx.StatementContext, value types.Datum, colID int64) (float64, error)
- func (t *Table) ColumnGreaterRowCount(sc *stmtctx.StatementContext, value types.Datum, colID int64) float64
- func (t *Table) ColumnLessRowCount(sc *stmtctx.StatementContext, value types.Datum, colID int64) float64
- func (t *Table) IsOutdated() bool
- func (t *Table) PseudoAvgCountPerValue() float64
- func (t *Table) String() string

Constants ¶

View Source

const (
	// DumpAll indicates dump all the delta info in to kv
	DumpAll = true
	// DumpDelta indicates dump part of the delta info in to kv.
	DumpDelta = false
)

View Source

const (
	// StatsOwnerKey is the stats owner path that is saved to etcd.
	StatsOwnerKey = "/tidb/stats/owner"
	// StatsPrompt is the prompt for stats owner manager.
	StatsPrompt = "stats"
)

View Source

const MaxErrorRate = 0.25

MaxErrorRate is the max error rate of estimate row count of a not pseudo column. If the table is pseudo, but the average error rate is less than MaxErrorRate, then the column is not pseudo.

Variables ¶

View Source

var (
	// MaxNumberOfRanges is the max number of ranges before split to collect feedback.
	MaxNumberOfRanges = 20
	// FeedbackProbability is the probability to collect the feedback.
	FeedbackProbability = 0.0
)

View Source

var (
	// MinLogScanCount is the minimum scan count for a feedback to be logged.
	MinLogScanCount = int64(1000)
	// MinLogErrorRate is the minimum error rate for a feedback to be logged.
	MinLogErrorRate = 0.5
)

View Source

var AutoAnalyzeMinCnt int64 = 1000

AutoAnalyzeMinCnt means if the count of table is less than this value, we needn't do auto analyze.

View Source

var (
	// DumpStatsDeltaRatio is the lower bound of `Modify Count / Table Count` for stats delta to be dumped.
	DumpStatsDeltaRatio = 1 / 10000.0
)

View Source

var MaxQueryFeedbackCount = 1 << 10

MaxQueryFeedbackCount is the max number of feedback that cache in memory.

View Source

var RatioOfPseudoEstimate = 0.7

RatioOfPseudoEstimate means if modifyCount / statsTblCount is greater than this ratio, we think the stats is invalid and use pseudo estimation.

Functions ¶

func CMSketchToProto ¶

func CMSketchToProto(c *CMSketch) *tipb.CMSketch

CMSketchToProto converts CMSketch to its protobuf representation.

func FMSketchToProto ¶

func FMSketchToProto(s *FMSketch) *tipb.FMSketch

FMSketchToProto converts FMSketch to its protobuf representation.

func HistogramEqual ¶

func HistogramEqual(a, b *Histogram, ignoreID bool) bool

HistogramEqual tests if two histograms are equal.

func HistogramToProto ¶

func HistogramToProto(hg *Histogram) *tipb.Histogram

HistogramToProto converts Histogram to its protobuf representation. Note that when this is used, the lower/upper bound in the bucket must be BytesDatum.

func NeedAnalyzeTable ¶

func NeedAnalyzeTable(tbl *Table, limit time.Duration, autoAnalyzeRatio float64, start, end, now time.Time) bool

NeedAnalyzeTable checks if we need to analyze the table:

If the table has never been analyzed, we need to analyze it when it has not been modified for a while.
If the table had been analyzed before, we need to analyze it when "tbl.ModifyCount/tbl.Count > autoAnalyzeRatio".
The current time is between `start` and `end`.

func SampleCollectorToProto ¶

func SampleCollectorToProto(c *SampleCollector) *tipb.SampleCollector

SampleCollectorToProto converts SampleCollector to its protobuf representation.

func TableAnalyzed ¶

func TableAnalyzed(tbl *Table) bool

TableAnalyzed checks if the table is analyzed.

func ValueToString ¶

func ValueToString(value *types.Datum, idxCols int) (string, error)

ValueToString converts a possible encoded value to a formatted string. If the value is encoded, then idxCols equals to number of origin values, else idxCols is 0.

Types ¶

type AnalyzeResult ¶

type AnalyzeResult struct {
	// PhysicalTableID is the id of a partition or a table.
	PhysicalTableID int64
	Hist            []*Histogram
	Cms             []*CMSketch
	Count           int64
	IsIndex         int
	Err             error
}

AnalyzeResult is used to represent analyze result.

type Bucket ¶

type Bucket struct {
	Count  int64
	Repeat int64
}

Bucket store the bucket count and repeat.

type BucketFeedback ¶

type BucketFeedback struct {
	// contains filtered or unexported fields
}

BucketFeedback stands for all the feedback for a bucket.

type CMSketch ¶

type CMSketch struct {
	// contains filtered or unexported fields
}

CMSketch is used to estimate point queries. Refer: https://en.wikipedia.org/wiki/Count-min_sketch

func CMSketchFromProto ¶

func CMSketchFromProto(protoSketch *tipb.CMSketch) *CMSketch

CMSketchFromProto converts CMSketch from its protobuf representation.

func UpdateCMSketch ¶

func UpdateCMSketch(c *CMSketch, eqFeedbacks []feedback) *CMSketch

UpdateCMSketch updates the CMSketch by feedback.

func (*CMSketch) Equal ¶

func (c *CMSketch) Equal(rc *CMSketch) bool

Equal tests if two CM Sketch equal, it is only used for test.

func (*CMSketch) InsertBytes ¶

func (c *CMSketch) InsertBytes(bytes []byte)

InsertBytes inserts the bytes value into the CM Sketch.

func (*CMSketch) MergeCMSketch ¶

func (c *CMSketch) MergeCMSketch(rc *CMSketch) error

MergeCMSketch merges two CM Sketch.

func (*CMSketch) QueryBytes ¶

func (c *CMSketch) QueryBytes(bytes []byte) uint32

QueryBytes is used to query the count of specified bytes.

func (*CMSketch) TotalCount ¶

func (c *CMSketch) TotalCount() uint64

TotalCount returns the count, it is only used for test.

func (*Column) AvgColSize ¶

func (c *Column) AvgColSize(count int64) float64

AvgColSize is the average column size of the histogram.

func (*Column) String ¶

func (c *Column) String() string

type ErrorRate ¶

type ErrorRate struct {
	ErrorTotal float64
	QueryTotal int64
}

ErrorRate is the error rate of estimate row count by bucket and cm sketch.

func (*ErrorRate) NotAccurate ¶

func (e *ErrorRate) NotAccurate() bool

NotAccurate is true when the total of query is zero or the average error rate is greater than MaxErrorRate.

type FMSketch ¶

type FMSketch struct {
	// contains filtered or unexported fields
}

FMSketch is used to count the number of distinct elements in a set.

func FMSketchFromProto ¶

func FMSketchFromProto(protoSketch *tipb.FMSketch) *FMSketch

FMSketchFromProto converts FMSketch from its protobuf representation.

func (*FMSketch) InsertValue ¶

func (s *FMSketch) InsertValue(sc *stmtctx.StatementContext, value types.Datum) error

InsertValue inserts a value into the FM sketch.

func (*FMSketch) NDV ¶

func (s *FMSketch) NDV() int64

NDV returns the ndv of the sketch.

type Handle ¶

type Handle struct {
	Lease time.Duration
	// contains filtered or unexported fields
}

Handle can update stats info periodically.

func NewHandle ¶

func NewHandle(ctx sessionctx.Context, lease time.Duration) *Handle

NewHandle creates a Handle for update stats.

func (*Handle) Clear ¶

func (h *Handle) Clear()

Clear the statsCache, only for test.

func (*Handle) DDLEventCh ¶

func (h *Handle) DDLEventCh() chan *util.Event

DDLEventCh returns ddl events channel in handle.

func (*Handle) DeleteTableStatsFromKV ¶

func (h *Handle) DeleteTableStatsFromKV(id int64) (err error)

DeleteTableStatsFromKV deletes table statistics from kv.

func (*Handle) DumpStatsDeltaToKV ¶

func (h *Handle) DumpStatsDeltaToKV(dumpMode bool) error

DumpStatsDeltaToKV sweeps the whole list and updates the global map, then we dumps every table that held in map to KV. If the `dumpAll` is false, it will only dump that delta info that `Modify Count / Table Count` greater than a ratio.

func (*Handle) DumpStatsFeedbackToKV ¶

func (h *Handle) DumpStatsFeedbackToKV() error

DumpStatsFeedbackToKV dumps the stats feedback to KV.

func (*Handle) DumpStatsToJSON ¶

func (h *Handle) DumpStatsToJSON(dbName string, tableInfo *model.TableInfo) (*JSONTable, error)

DumpStatsToJSON dumps statistic to json.

func (*Handle) FlushStats ¶

func (h *Handle) FlushStats()

FlushStats flushes the cached stats update into store.

func (*Handle) GCStats ¶

func (h *Handle) GCStats(is infoschema.InfoSchema, ddlLease time.Duration) error

GCStats will garbage collect the useless stats info. For dropped tables, we will first update their version so that other tidb could know that table is deleted.

func (*Handle) GetPartitionStats ¶

func (h *Handle) GetPartitionStats(tblInfo *model.TableInfo, pid int64) *Table

GetPartitionStats retrieves the partition stats from cache.

func (*Handle) GetQueryFeedback ¶

func (h *Handle) GetQueryFeedback() []*QueryFeedback

GetQueryFeedback gets the query feedback. It is only use in test.

func (*Handle) GetTableStats ¶

func (h *Handle) GetTableStats(tblInfo *model.TableInfo) *Table

GetTableStats retrieves the statistics table from cache, and the cache will be updated by a goroutine.

func (*Handle) HandleAutoAnalyze ¶

func (h *Handle) HandleAutoAnalyze(is infoschema.InfoSchema) error

HandleAutoAnalyze analyzes the newly created table or index.

func (*Handle) HandleDDLEvent ¶

func (h *Handle) HandleDDLEvent(t *util.Event) error

HandleDDLEvent begins to process a ddl task.

func (*Handle) HandleUpdateStats ¶

func (h *Handle) HandleUpdateStats(is infoschema.InfoSchema) error

HandleUpdateStats update the stats using feedback.

func (*Handle) InitStats ¶

func (h *Handle) InitStats(is infoschema.InfoSchema) error

InitStats will init the stats cache using full load strategy.

func (*Handle) LastUpdateVersion ¶

func (h *Handle) LastUpdateVersion() uint64

LastUpdateVersion gets the last update version.

func (*Handle) LoadNeededHistograms ¶

func (h *Handle) LoadNeededHistograms() error

LoadNeededHistograms will load histograms for those needed columns.

func (*Handle) LoadStatsFromJSON ¶

func (h *Handle) LoadStatsFromJSON(is infoschema.InfoSchema, jsonTbl *JSONTable) error

LoadStatsFromJSON will load statistic from JSONTable, and save it to the storage.

func (*Handle) NewSessionStatsCollector ¶

func (h *Handle) NewSessionStatsCollector() *SessionStatsCollector

NewSessionStatsCollector allocates a stats collector for a session.

func (*Handle) SaveMetaToStorage ¶

func (h *Handle) SaveMetaToStorage(tableID, count, modifyCount int64) (err error)

SaveMetaToStorage will save stats_meta to storage.

func (*Handle) SaveStatsToStorage ¶

func (h *Handle) SaveStatsToStorage(tableID int64, count int64, isIndex int, hg *Histogram, cms *CMSketch, isAnalyzed int64) (err error)

SaveStatsToStorage saves the stats to storage.

func (*Handle) SetLastUpdateVersion ¶

func (h *Handle) SetLastUpdateVersion(version uint64)

SetLastUpdateVersion sets the last update version.

func (*Handle) Update ¶

func (h *Handle) Update(is infoschema.InfoSchema) error

Update reads stats meta from store and updates the stats map.

func (*Handle) UpdateErrorRate ¶

func (h *Handle) UpdateErrorRate(is infoschema.InfoSchema)

UpdateErrorRate updates the error rate of columns from h.rateMap to cache.

func (*Handle) UpdateStatsByLocalFeedback ¶

func (h *Handle) UpdateStatsByLocalFeedback(is infoschema.InfoSchema)

UpdateStatsByLocalFeedback will update statistics by the local feedback. Currently, we dump the feedback with the period of 10 minutes, which means it takes 10 minutes for a feedback to take effect. However, we can use the feedback locally on this tidb-server, so it could be used more timely.

func (*Handle) UpdateTableStats ¶

func (h *Handle) UpdateTableStats(tables []*Table, deletedIDs []int64)

UpdateTableStats updates the statistics table cache using copy on write.

type HistColl ¶

type HistColl struct {
	PhysicalID int64
	// HavePhysicalID is true means this HistColl is from single table and have its ID's information.
	// The physical id is used when try to load column stats from storage.
	HavePhysicalID bool
	Columns        map[int64]*Column
	Indices        map[int64]*Index
	// Idx2ColumnIDs maps the index id to its column ids. It's used to calculate the selectivity in planner.
	Idx2ColumnIDs map[int64][]int64
	// ColID2IdxID maps the column id to index id whose first column is it. It's used to calculate the selectivity in planner.
	ColID2IdxID map[int64]int64
	Pseudo      bool
	Count       int64
	ModifyCount int64 // Total modify count in a table.
}

HistColl is a collection of histogram. It collects enough information for plan to calculate the selectivity.

func (*HistColl) ColumnIsInvalid ¶

func (coll *HistColl) ColumnIsInvalid(sc *stmtctx.StatementContext, colID int64) bool

ColumnIsInvalid checks if this column is invalid. If this column has histogram but not loaded yet, then we mark it as need histogram.

func (*HistColl) GenerateHistCollFromColumnInfo ¶

func (coll *HistColl) GenerateHistCollFromColumnInfo(infos []*model.ColumnInfo, columns []*expression.Column) HistColl

GenerateHistCollFromColumnInfo generates a new HistColl whose ColID2IdxID and IdxID2ColIDs is built from the given parameter.

func (*HistColl) GetRowCountByColumnRanges ¶

func (coll *HistColl) GetRowCountByColumnRanges(sc *stmtctx.StatementContext, colID int64, colRanges []*ranger.Range) (float64, error)

GetRowCountByColumnRanges estimates the row count by a slice of Range.

func (*HistColl) GetRowCountByIndexRanges ¶

func (coll *HistColl) GetRowCountByIndexRanges(sc *stmtctx.StatementContext, idxID int64, indexRanges []*ranger.Range) (float64, error)

GetRowCountByIndexRanges estimates the row count by a slice of Range.

func (*HistColl) GetRowCountByIntColumnRanges ¶

func (coll *HistColl) GetRowCountByIntColumnRanges(sc *stmtctx.StatementContext, colID int64, intRanges []*ranger.Range) (float64, error)

GetRowCountByIntColumnRanges estimates the row count by a slice of IntColumnRange.

func (*HistColl) Selectivity ¶

func (coll *HistColl) Selectivity(ctx sessionctx.Context, exprs []expression.Expression) (float64, error)

Selectivity is a function calculate the selectivity of the expressions. The definition of selectivity is (row count after filter / row count before filter). And exprs must be CNF now, in other words, `exprs[0] and exprs[1] and ... and exprs[len - 1]` should be held when you call this. TODO: support expressions that the top layer is a DNF. Currently the time complexity is o(n^2).

type Histogram ¶

type Histogram struct {
	ID        int64 // Column ID.
	NDV       int64 // Number of distinct values.
	NullCount int64 // Number of null values.
	// LastUpdateVersion is the version that this histogram updated last time.
	LastUpdateVersion uint64

	// Histogram elements.
	//
	// A bucket bound is the smallest and greatest values stored in the bucket. The lower and upper bound
	// are stored in one column.
	//
	// A bucket count is the number of items stored in all previous buckets and the current bucket.
	// Bucket counts are always in increasing order.
	//
	// A bucket repeat is the number of repeats of the bucket value, it can be used to find popular values.
	Bounds  *chunk.Chunk
	Buckets []Bucket

	// TotColSize is the total column size for the histogram.
	TotColSize int64
	// contains filtered or unexported fields
}

Histogram represents statistics for a column or index.

func BuildColumn ¶

func BuildColumn(ctx sessionctx.Context, numBuckets, id int64, collector *SampleCollector, tp *types.FieldType) (*Histogram, error)

BuildColumn builds histogram from samples for column.

func HistogramFromProto ¶

func HistogramFromProto(protoHg *tipb.Histogram) *Histogram

HistogramFromProto converts Histogram from its protobuf representation. Note that we will set BytesDatum for the lower/upper bound in the bucket, the decode will be after all histograms merged.

func MergeHistograms ¶

func MergeHistograms(sc *stmtctx.StatementContext, lh *Histogram, rh *Histogram, bucketSize int) (*Histogram, error)

MergeHistograms merges two histograms.

func NewHistogram ¶

func NewHistogram(id, ndv, nullCount int64, version uint64, tp *types.FieldType, bucketSize int, totColSize int64) *Histogram

NewHistogram creates a new histogram.

func UpdateHistogram ¶

func UpdateHistogram(h *Histogram, feedback *QueryFeedback) *Histogram

UpdateHistogram updates the histogram according buckets.

func (*Histogram) AppendBucket ¶

func (hg *Histogram) AppendBucket(lower *types.Datum, upper *types.Datum, count, repeat int64)

AppendBucket appends a bucket into `hg`.

func (*Histogram) AvgCountPerValue ¶

func (hg *Histogram) AvgCountPerValue(totalCount int64) float64

AvgCountPerValue gets the average row count per value by the data of histogram.

func (*Histogram) ConvertTo ¶

func (hg *Histogram) ConvertTo(sc *stmtctx.StatementContext, tp *types.FieldType) (*Histogram, error)

ConvertTo converts the histogram bucket values into `tp`.

func (*Histogram) DecodeTo ¶

func (hg *Histogram) DecodeTo(tp *types.FieldType, timeZone *time.Location) error

DecodeTo decodes the histogram bucket values into `tp`.

func (*Histogram) GetLower ¶

func (hg *Histogram) GetLower(idx int) *types.Datum

GetLower gets the lower bound of bucket `idx`.

func (*Histogram) GetUpper ¶

func (hg *Histogram) GetUpper(idx int) *types.Datum

GetUpper gets the upper bound of bucket `idx`.

func (*Histogram) Len ¶

func (hg *Histogram) Len() int

Len is the number of buckets in the histogram.

func (*Histogram) PreCalculateScalar ¶

func (hg *Histogram) PreCalculateScalar()

PreCalculateScalar converts the lower and upper to scalar. When the datum type is KindString or KindBytes, we also calculate their common prefix length, because when a value falls between lower and upper, the common prefix of lower and upper equals to the common prefix of the lower, upper and the value. For some simple types like `Int64`, we do not convert it because we can directly infer the scalar value.

func (*Histogram) SplitRange ¶

func (hg *Histogram) SplitRange(ranges []*ranger.Range) []*ranger.Range

SplitRange splits the range according to the histogram upper bound. Note that we treat last bucket's upper bound as inf, so all the split ranges will totally fall in one of the (-inf, u(0)], (u(0), u(1)],...(u(n-3), u(n-2)], (u(n-2), +inf), where n is the number of buckets, u(i) is the i-th bucket's upper bound.

func (*Histogram) ToString ¶

func (hg *Histogram) ToString(idxCols int) string

ToString gets the string representation for the histogram.

type Index ¶

type Index struct {
	Histogram
	*CMSketch
	ErrorRate

	Info *model.IndexInfo
	// contains filtered or unexported fields
}

Index represents an index histogram.

func (*Index) String ¶

func (idx *Index) String() string

type JSONTable ¶

type JSONTable struct {
	DatabaseName string                 `json:"database_name"`
	TableName    string                 `json:"table_name"`
	Columns      map[string]*jsonColumn `json:"columns"`
	Indices      map[string]*jsonColumn `json:"indices"`
	Count        int64                  `json:"count"`
	ModifyCount  int64                  `json:"modify_count"`
	Partitions   map[string]*JSONTable  `json:"partitions"`
}

type QueryFeedback ¶

type QueryFeedback struct {
	// contains filtered or unexported fields
}

QueryFeedback is used to represent the query feedback info. It contains the query's scan ranges and number of rows in each range.

func NewQueryFeedback ¶

func NewQueryFeedback(tableID int64, hist *Histogram, expected int64, desc bool) *QueryFeedback

NewQueryFeedback returns a new query feedback.

func (*QueryFeedback) Actual ¶

func (q *QueryFeedback) Actual() int64

Actual gets the actual row count.

func (*QueryFeedback) CollectFeedback ¶

func (q *QueryFeedback) CollectFeedback(numOfRanges int) bool

CollectFeedback decides whether to collect the feedback. It returns false when: 1: the histogram is nil or has no buckets; 2: the number of scan ranges exceeds the limit because it may affect the performance; 3: it does not pass the probabilistic sampler.

func (*QueryFeedback) DecodeToRanges ¶

func (q *QueryFeedback) DecodeToRanges(isIndex bool) ([]*ranger.Range, error)

DecodeToRanges decode the feedback to ranges.

func (*QueryFeedback) Equal ¶

func (q *QueryFeedback) Equal(rq *QueryFeedback) bool

Equal tests if two query feedback equal, it is only used in test.

func (*QueryFeedback) Hist ¶

func (q *QueryFeedback) Hist() *Histogram

Hist gets the histogram.

func (*QueryFeedback) Invalidate ¶

func (q *QueryFeedback) Invalidate()

Invalidate is used to invalidate the query feedback.

func (*QueryFeedback) StoreRanges ¶

func (q *QueryFeedback) StoreRanges(ranges []*ranger.Range)

StoreRanges stores the ranges for update.

func (*QueryFeedback) Update ¶

func (q *QueryFeedback) Update(startKey kv.Key, counts []int64)

Update updates the query feedback. `startKey` is the start scan key of the partial result, used to find the range for update. `counts` is the scan counts of each range, used to update the feedback count info.

type SampleBuilder ¶

type SampleBuilder struct {
	Sc              *stmtctx.StatementContext
	RecordSet       ast.RecordSet
	ColLen          int // ColLen is the number of columns need to be sampled.
	PkBuilder       *SortedBuilder
	MaxBucketSize   int64
	MaxSampleSize   int64
	MaxFMSketchSize int64
	CMSketchDepth   int32
	CMSketchWidth   int32
}

SampleBuilder is used to build samples for columns. Also, if primary key is handle, it will directly build histogram for it.

func (SampleBuilder) CollectColumnStats ¶

func (s SampleBuilder) CollectColumnStats() ([]*SampleCollector, *SortedBuilder, error)

CollectColumnStats collects sample from the result set using Reservoir Sampling algorithm, and estimates NDVs using FM Sketch during the collecting process. It returns the sample collectors which contain total count, null count, distinct values count and CM Sketch. It also returns the statistic builder for PK which contains the histogram. See https://en.wikipedia.org/wiki/Reservoir_sampling

type SampleCollector ¶

type SampleCollector struct {
	Samples []types.Datum

	IsMerger      bool
	NullCount     int64
	Count         int64 // Count is the number of non-null rows.
	MaxSampleSize int64
	FMSketch      *FMSketch
	CMSketch      *CMSketch
	TotalSize     int64 // TotalSize is the total size of column.
	// contains filtered or unexported fields
}

SampleCollector will collect Samples and calculate the count and ndv of an attribute.

func SampleCollectorFromProto ¶

func SampleCollectorFromProto(collector *tipb.SampleCollector) *SampleCollector

SampleCollectorFromProto converts SampleCollector from its protobuf representation.

func (*SampleCollector) MergeSampleCollector ¶

func (c *SampleCollector) MergeSampleCollector(sc *stmtctx.StatementContext, rc *SampleCollector)

MergeSampleCollector merges two sample collectors.

type SessionStatsCollector ¶

type SessionStatsCollector struct {
	sync.Mutex
	// contains filtered or unexported fields
}

SessionStatsCollector is a list item that holds the delta mapper. If you want to write or read mapper, you must lock it.

func (*SessionStatsCollector) Delete ¶

func (s *SessionStatsCollector) Delete()

Delete only sets the deleted flag true, it will be deleted from list when DumpStatsDeltaToKV is called.

func (*SessionStatsCollector) StoreQueryFeedback ¶

func (s *SessionStatsCollector) StoreQueryFeedback(feedback interface{}, h *Handle) error

StoreQueryFeedback will merges the feedback into stats collector.

func (*SessionStatsCollector) Update ¶

func (s *SessionStatsCollector) Update(id int64, delta int64, count int64, colSize *map[int64]int64)

Update will updates the delta and count for one table id.

type SortedBuilder ¶

type SortedBuilder struct {
	Count int64
	// contains filtered or unexported fields
}

SortedBuilder is used to build histograms for PK and index.

func NewSortedBuilder ¶

func NewSortedBuilder(sc *stmtctx.StatementContext, numBuckets, id int64, tp *types.FieldType) *SortedBuilder

NewSortedBuilder creates a new SortedBuilder.

func (*SortedBuilder) Hist ¶

func (b *SortedBuilder) Hist() *Histogram

Hist returns the histogram built by SortedBuilder.

func (*SortedBuilder) Iterate ¶

func (b *SortedBuilder) Iterate(data types.Datum) error

Iterate updates the histogram incrementally.

type Table ¶

type Table struct {
	HistColl
	Version uint64
	// contains filtered or unexported fields
}

Table represents statistics for a table.

func PseudoTable ¶

func PseudoTable(tblInfo *model.TableInfo) *Table

PseudoTable creates a pseudo table statistics.

func TableStatsFromJSON ¶

func TableStatsFromJSON(tableInfo *model.TableInfo, physicalID int64, jsonTbl *JSONTable) (*Table, error)

TableStatsFromJSON loads statistic from JSONTable and return the Table of statistic.

func (*Table) ColumnBetweenRowCount ¶

func (t *Table) ColumnBetweenRowCount(sc *stmtctx.StatementContext, a, b types.Datum, colID int64) float64

ColumnBetweenRowCount estimates the row count where column greater or equal to a and less than b.

func (*Table) ColumnEqualRowCount ¶

func (t *Table) ColumnEqualRowCount(sc *stmtctx.StatementContext, value types.Datum, colID int64) (float64, error)

ColumnEqualRowCount estimates the row count where the column equals to value.

func (*Table) ColumnGreaterRowCount ¶

func (t *Table) ColumnGreaterRowCount(sc *stmtctx.StatementContext, value types.Datum, colID int64) float64

ColumnGreaterRowCount estimates the row count where the column greater than value.

func (*Table) ColumnLessRowCount ¶

func (t *Table) ColumnLessRowCount(sc *stmtctx.StatementContext, value types.Datum, colID int64) float64

ColumnLessRowCount estimates the row count where the column less than value.

func (*Table) IsOutdated ¶

func (t *Table) IsOutdated() bool

IsOutdated returns true if the table stats is outdated.

func (*Table) PseudoAvgCountPerValue ¶

func (t *Table) PseudoAvgCountPerValue() float64

PseudoAvgCountPerValue gets a pseudo average count if histogram not exists.

func (*Table) String ¶

func (t *Table) String() string

String implements Stringer interface.

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL

Documentation ¶

Index ¶

Constants ¶

Variables ¶

Functions ¶

func CMSketchToProto ¶

func FMSketchToProto ¶

func HistogramEqual ¶

func HistogramToProto ¶

func NeedAnalyzeTable ¶

func SampleCollectorToProto ¶

func TableAnalyzed ¶

func ValueToString ¶

Types ¶

type AnalyzeResult ¶

type Bucket ¶

type BucketFeedback ¶

type CMSketch ¶

func CMSketchFromProto ¶

func NewCMSketch ¶

func UpdateCMSketch ¶

func (*CMSketch) Equal ¶

func (*CMSketch) InsertBytes ¶

func (*CMSketch) MergeCMSketch ¶

func (*CMSketch) QueryBytes ¶

func (*CMSketch) TotalCount ¶

type Column ¶

func (*Column) AvgColSize ¶

func (*Column) String ¶

type ErrorRate ¶

func (*ErrorRate) NotAccurate ¶

type FMSketch ¶

func FMSketchFromProto ¶

func NewFMSketch ¶

func (*FMSketch) InsertValue ¶

func (*FMSketch) NDV ¶

type Handle ¶

func NewHandle ¶

func (*Handle) Clear ¶

func (*Handle) DDLEventCh ¶

func (*Handle) DeleteTableStatsFromKV ¶

func (*Handle) DumpStatsDeltaToKV ¶

func (*Handle) DumpStatsFeedbackToKV ¶

func (*Handle) DumpStatsToJSON ¶

func (*Handle) FlushStats ¶

func (*Handle) GCStats ¶

func (*Handle) GetPartitionStats ¶

func (*Handle) GetQueryFeedback ¶

func (*Handle) GetTableStats ¶

func (*Handle) HandleAutoAnalyze ¶

func (*Handle) HandleDDLEvent ¶

func (*Handle) HandleUpdateStats ¶

func (*Handle) InitStats ¶

func (*Handle) LastUpdateVersion ¶

func (*Handle) LoadNeededHistograms ¶

func (*Handle) LoadStatsFromJSON ¶

func (*Handle) NewSessionStatsCollector ¶

func (*Handle) SaveMetaToStorage ¶

func (*Handle) SaveStatsToStorage ¶

func (*Handle) SetLastUpdateVersion ¶

func (*Handle) Update ¶

func (*Handle) UpdateErrorRate ¶

func (*Handle) UpdateStatsByLocalFeedback ¶

func (*Handle) UpdateTableStats ¶

type HistColl ¶

func (*HistColl) ColumnIsInvalid ¶

func (*HistColl) GenerateHistCollFromColumnInfo ¶

func (*HistColl) GetRowCountByColumnRanges ¶

func (*HistColl) GetRowCountByIndexRanges ¶

func (*HistColl) GetRowCountByIntColumnRanges ¶

func (*HistColl) Selectivity ¶

type Histogram ¶

func BuildColumn ¶

func HistogramFromProto ¶

func MergeHistograms ¶

func NewHistogram ¶

func UpdateHistogram ¶

func (*Histogram) AppendBucket ¶

func (*Histogram) AvgCountPerValue ¶

func (*Histogram) ConvertTo ¶