statistics

package

v1.0.7 Latest Latest Go to latest Published: Jan 19, 2018 License: Apache-2.0 Imports: 29 Imported by: 0

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/zhaox1n/tidb

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
func FMSketchToProto(s *FMSketch) *tipb.FMSketch
func HistogramToProto(hg *Histogram) *tipb.Histogram
func SampleCollectorToProto(c *SampleCollector) *tipb.SampleCollector
type AnalyzeResult
type Bucket
type Column
- func (c *Column) String() string
type FMSketch
- func FMSketchFromProto(protoSketch *tipb.FMSketch) *FMSketch
- func NewFMSketch(maxSize int) *FMSketch
- func (s *FMSketch) InsertValue(value types.Datum) error
- func (s *FMSketch) NDV() int64
type Handle
- func NewHandle(ctx context.Context, lease time.Duration) *Handle
- func (h *Handle) AnalyzeResultCh() chan *AnalyzeResult
- func (h *Handle) Clear()
- func (h *Handle) DDLEventCh() chan *ddl.Event
- func (h *Handle) DeleteTableStatsFromKV(id int64) error
- func (h *Handle) DumpStatsDeltaToKV()
- func (h *Handle) GetTableStats(tblID int64) *Table
- func (h *Handle) HandleAutoAnalyze(is infoschema.InfoSchema) error
- func (h *Handle) HandleDDLEvent(t *ddl.Event) error
- func (h *Handle) InitStats(is infoschema.InfoSchema) error
- func (h *Handle) NewSessionStatsCollector() *SessionStatsCollector
- func (h *Handle) Update(is infoschema.InfoSchema) error
- func (h *Handle) UpdateTableStats(tables []*Table, deletedIDs []int64)
type Histogram
- func BuildColumn(ctx context.Context, numBuckets, id int64, collector *SampleCollector) (*Histogram, error)
- func BuildIndex(ctx context.Context, numBuckets, id int64, records ast.RecordSet) (int64, *Histogram, error)
- func HistogramFromProto(protoHg *tipb.Histogram) *Histogram
- func MergeHistograms(sc *variable.StatementContext, lh *Histogram, rh *Histogram, bucketSize int) (*Histogram, error)
- func (hg *Histogram) SaveToStorage(ctx context.Context, tableID int64, count int64, isIndex int) error
type Index
- func (idx *Index) String() string
type SampleBuilder
- func (s SampleBuilder) CollectSamplesAndEstimateNDVs() ([]*SampleCollector, *SortedBuilder, error)
type SampleCollector
- func SampleCollectorFromProto(collector *tipb.SampleCollector) *SampleCollector
- func (c *SampleCollector) MergeSampleCollector(rc *SampleCollector)
type SessionStatsCollector
- func (s *SessionStatsCollector) Delete()
- func (s *SessionStatsCollector) Update(id int64, delta int64, count int64)
type SortedBuilder
- func NewSortedBuilder(sc *variable.StatementContext, numBuckets, id int64) *SortedBuilder
- func (b *SortedBuilder) Hist() *Histogram
- func (b *SortedBuilder) Iterate(data types.Datum) error
type Table
- func PseudoTable(tableID int64) *Table
- func (t *Table) ColumnBetweenRowCount(sc *variable.StatementContext, a, b types.Datum, colInfo *model.ColumnInfo) (float64, error)
- func (t *Table) ColumnEqualRowCount(sc *variable.StatementContext, value types.Datum, colInfo *model.ColumnInfo) (float64, error)
- func (t *Table) ColumnGreaterRowCount(sc *variable.StatementContext, value types.Datum, colInfo *model.ColumnInfo) (float64, error)
- func (t *Table) ColumnIsInvalid(colInfo *model.ColumnInfo) bool
- func (t *Table) ColumnLessRowCount(sc *variable.StatementContext, value types.Datum, colInfo *model.ColumnInfo) (float64, error)
- func (t *Table) GetRowCountByColumnRanges(sc *variable.StatementContext, colID int64, colRanges []*types.ColumnRange) (float64, error)
- func (t *Table) GetRowCountByIndexRanges(sc *variable.StatementContext, idxID int64, indexRanges []*types.IndexRange) (float64, error)
- func (t *Table) GetRowCountByIntColumnRanges(sc *variable.StatementContext, colID int64, intRanges []types.IntColumnRange) (float64, error)
- func (t *Table) Selectivity(ctx context.Context, exprs []expression.Expression) (float64, error)
- func (t *Table) String() string

Constants ¶

View Source

const (
	// StatsOwnerKey is the stats owner path that is saved to etcd.
	StatsOwnerKey = "/tidb/stats/owner"
	// StatsPrompt is the prompt for stats owner manager.
	StatsPrompt = "stats"
)

Variables ¶

This section is empty.

Functions ¶

func FMSketchToProto ¶

func FMSketchToProto(s *FMSketch) *tipb.FMSketch

FMSketchToProto converts FMSketch to its protobuf representation.

func HistogramToProto ¶

func HistogramToProto(hg *Histogram) *tipb.Histogram

HistogramToProto converts Histogram to its protobuf representation. Note that when this is used, the lower/upper bound in the bucket must be BytesDatum.

func SampleCollectorToProto ¶

func SampleCollectorToProto(c *SampleCollector) *tipb.SampleCollector

SampleCollectorToProto converts SampleCollector to its protobuf representation.

Types ¶

type AnalyzeResult ¶

type AnalyzeResult struct {
	TableID int64
	Hist    []*Histogram
	Count   int64
	IsIndex int
	Err     error
}

AnalyzeResult is used to represent analyze result.

type Bucket ¶

type Bucket struct {
	Count      int64
	UpperBound types.Datum
	LowerBound types.Datum
	Repeats    int64
	// contains filtered or unexported fields
}

Bucket is an element of histogram.

A bucket count is the number of items stored in all previous buckets and the current bucket. bucket numbers are always in increasing order.

A bucket value is the greatest item value stored in the bucket.

Repeat is the number of repeats of the bucket value, it can be used to find popular values.

func (*Column) String ¶

func (c *Column) String() string

type FMSketch ¶

type FMSketch struct {
	// contains filtered or unexported fields
}

FMSketch is used to count the number of distinct elements in a set.

func FMSketchFromProto ¶

func FMSketchFromProto(protoSketch *tipb.FMSketch) *FMSketch

FMSketchFromProto converts FMSketch from its protobuf representation.

func (*FMSketch) InsertValue ¶

func (s *FMSketch) InsertValue(value types.Datum) error

InsertValue inserts a value into the FM sketch.

func (*FMSketch) NDV ¶

func (s *FMSketch) NDV() int64

NDV returns the ndv of the sketch.

type Handle ¶

type Handle struct {

	// LastVersion is the latest update version before last lease. Exported for test.
	LastVersion uint64
	// PrevLastVersion is the latest update version before two lease. Exported for test.
	// We need this because for two tables, the smaller version may write later than the one with larger version.
	// We can read the version with lastTwoVersion if the diff between commit time and version is less than one lease.
	// PrevLastVersion will be assigned by LastVersion every time Update is called.
	PrevLastVersion uint64

	Lease time.Duration
	// contains filtered or unexported fields
}

Handle can update stats info periodically.

func NewHandle ¶

func NewHandle(ctx context.Context, lease time.Duration) *Handle

NewHandle creates a Handle for update stats.

func (*Handle) AnalyzeResultCh ¶

func (h *Handle) AnalyzeResultCh() chan *AnalyzeResult

AnalyzeResultCh returns analyze result channel in handle.

func (*Handle) Clear ¶

func (h *Handle) Clear()

Clear the statsCache, only for test.

func (*Handle) DDLEventCh ¶

func (h *Handle) DDLEventCh() chan *ddl.Event

DDLEventCh returns ddl events channel in handle.

func (*Handle) DeleteTableStatsFromKV ¶

func (h *Handle) DeleteTableStatsFromKV(id int64) error

DeleteTableStatsFromKV deletes table statistics from kv.

func (*Handle) DumpStatsDeltaToKV ¶

func (h *Handle) DumpStatsDeltaToKV()

DumpStatsDeltaToKV sweeps the whole list and updates the global map. Then we dumps every table that held in map to KV.

func (*Handle) GetTableStats ¶

func (h *Handle) GetTableStats(tblID int64) *Table

GetTableStats retrieves the statistics table from cache, and the cache will be updated by a goroutine.

func (*Handle) HandleAutoAnalyze ¶

func (h *Handle) HandleAutoAnalyze(is infoschema.InfoSchema) error

HandleAutoAnalyze analyzes the newly created table or index.

func (*Handle) HandleDDLEvent ¶

func (h *Handle) HandleDDLEvent(t *ddl.Event) error

HandleDDLEvent begins to process a ddl task.

func (*Handle) InitStats ¶ added in v1.0.5

func (h *Handle) InitStats(is infoschema.InfoSchema) error

InitStats will init the stats cache using full load strategy.

func (*Handle) NewSessionStatsCollector ¶

func (h *Handle) NewSessionStatsCollector() *SessionStatsCollector

NewSessionStatsCollector allocates a stats collector for a session.

func (*Handle) Update ¶

func (h *Handle) Update(is infoschema.InfoSchema) error

Update reads stats meta from store and updates the stats map.

func (*Handle) UpdateTableStats ¶

func (h *Handle) UpdateTableStats(tables []*Table, deletedIDs []int64)

UpdateTableStats updates the statistics table cache using copy on write.

type Histogram ¶

type Histogram struct {
	ID        int64 // Column ID.
	NDV       int64 // Number of distinct values.
	NullCount int64 // Number of null values.
	// LastUpdateVersion is the version that this histogram updated last time.
	LastUpdateVersion uint64

	Buckets []Bucket
}

Histogram represents statistics for a column or index.

func BuildColumn ¶

func BuildColumn(ctx context.Context, numBuckets, id int64, collector *SampleCollector) (*Histogram, error)

BuildColumn builds histogram from samples for column.

func HistogramFromProto ¶

func HistogramFromProto(protoHg *tipb.Histogram) *Histogram

HistogramFromProto converts Histogram from its protobuf representation. Note that we will set BytesDatum for the lower/upper bound in the bucket, the decode will be after all histograms merged.

func (*Histogram) SaveToStorage ¶

func (hg *Histogram) SaveToStorage(ctx context.Context, tableID int64, count int64, isIndex int) error

SaveToStorage saves the histogram to storage.

func (*Index) String ¶

func (idx *Index) String() string

type SampleBuilder ¶

type SampleBuilder struct {
	Sc            *variable.StatementContext
	RecordSet     ast.RecordSet
	ColLen        int   // ColLen is the number of columns need to be sampled.
	PkID          int64 // If primary key is handle, the PkID is the id of the primary key. If not exists, it is -1.
	MaxBucketSize int64
	MaxSampleSize int64
	MaxSketchSize int64
}

SampleBuilder is used to build samples for columns. Also, if primary key is handle, it will directly build histogram for it.

func (SampleBuilder) CollectSamplesAndEstimateNDVs ¶

func (s SampleBuilder) CollectSamplesAndEstimateNDVs() ([]*SampleCollector, *SortedBuilder, error)

CollectSamplesAndEstimateNDVs collects sample from the result set using Reservoir Sampling algorithm, and estimates NDVs using FM Sketch during the collecting process. It returns the sample collectors which contain total count, null count and distinct values count. It also returns the statistic builder for PK which contains the histogram. See https://en.wikipedia.org/wiki/Reservoir_sampling

type SampleCollector ¶

type SampleCollector struct {
	Samples []types.Datum

	IsMerger      bool
	NullCount     int64
	Count         int64 // Count is the number of non-null rows.
	MaxSampleSize int64
	Sketch        *FMSketch
	// contains filtered or unexported fields
}

SampleCollector will collect Samples and calculate the count and ndv of an attribute.

func SampleCollectorFromProto ¶

func SampleCollectorFromProto(collector *tipb.SampleCollector) *SampleCollector

SampleCollectorFromProto converts SampleCollector from its protobuf representation.

func (*SampleCollector) MergeSampleCollector ¶

func (c *SampleCollector) MergeSampleCollector(rc *SampleCollector)

MergeSampleCollector merges two sample collectors.

type SessionStatsCollector ¶

type SessionStatsCollector struct {
	sync.Mutex
	// contains filtered or unexported fields
}

SessionStatsCollector is a list item that holds the delta mapper. If you want to write or read mapper, you must lock it.

func (*SessionStatsCollector) Delete ¶

func (s *SessionStatsCollector) Delete()

Delete only sets the deleted flag true, it will be deleted from list when DumpStatsDeltaToKV is called.

func (*SessionStatsCollector) Update ¶

func (s *SessionStatsCollector) Update(id int64, delta int64, count int64)

Update will updates the delta and count for one table id.

type SortedBuilder ¶

type SortedBuilder struct {
	Count int64
	// contains filtered or unexported fields
}

SortedBuilder is used to build histograms for PK and index.

func NewSortedBuilder ¶

func NewSortedBuilder(sc *variable.StatementContext, numBuckets, id int64) *SortedBuilder

NewSortedBuilder creates a new SortedBuilder.

func (*SortedBuilder) Hist ¶

func (b *SortedBuilder) Hist() *Histogram

Hist returns the histogram built by SortedBuilder.

func (*SortedBuilder) Iterate ¶

func (b *SortedBuilder) Iterate(data types.Datum) error

Iterate updates the histogram incrementally.

type Table ¶

type Table struct {
	TableID     int64
	Columns     map[int64]*Column
	Indices     map[int64]*Index
	Count       int64 // Total row count in a table.
	ModifyCount int64 // Total modify count in a table.
	Version     uint64
	Pseudo      bool
}

Table represents statistics for a table.

func PseudoTable ¶

func PseudoTable(tableID int64) *Table

PseudoTable creates a pseudo table statistics when statistic can not be found in KV store.

func (*Table) ColumnBetweenRowCount ¶

func (t *Table) ColumnBetweenRowCount(sc *variable.StatementContext, a, b types.Datum, colInfo *model.ColumnInfo) (float64, error)

ColumnBetweenRowCount estimates the row count where column greater or equal to a and less than b.

func (*Table) ColumnEqualRowCount ¶

func (t *Table) ColumnEqualRowCount(sc *variable.StatementContext, value types.Datum, colInfo *model.ColumnInfo) (float64, error)

ColumnEqualRowCount estimates the row count where the column equals to value.

func (*Table) ColumnGreaterRowCount ¶

func (t *Table) ColumnGreaterRowCount(sc *variable.StatementContext, value types.Datum, colInfo *model.ColumnInfo) (float64, error)

ColumnGreaterRowCount estimates the row count where the column greater than value.

func (*Table) ColumnIsInvalid ¶

func (t *Table) ColumnIsInvalid(colInfo *model.ColumnInfo) bool

ColumnIsInvalid checks if this column is invalid.

func (*Table) ColumnLessRowCount ¶

func (t *Table) ColumnLessRowCount(sc *variable.StatementContext, value types.Datum, colInfo *model.ColumnInfo) (float64, error)

ColumnLessRowCount estimates the row count where the column less than value.

func (*Table) GetRowCountByColumnRanges ¶

func (t *Table) GetRowCountByColumnRanges(sc *variable.StatementContext, colID int64, colRanges []*types.ColumnRange) (float64, error)

GetRowCountByColumnRanges estimates the row count by a slice of ColumnRange.

func (*Table) GetRowCountByIndexRanges ¶

func (t *Table) GetRowCountByIndexRanges(sc *variable.StatementContext, idxID int64, indexRanges []*types.IndexRange) (float64, error)

GetRowCountByIndexRanges estimates the row count by a slice of IndexRange.

func (*Table) GetRowCountByIntColumnRanges ¶

func (t *Table) GetRowCountByIntColumnRanges(sc *variable.StatementContext, colID int64, intRanges []types.IntColumnRange) (float64, error)

GetRowCountByIntColumnRanges estimates the row count by a slice of IntColumnRange.

func (*Table) Selectivity ¶

func (t *Table) Selectivity(ctx context.Context, exprs []expression.Expression) (float64, error)

Selectivity is a function calculate the selectivity of the expressions. The definition of selectivity is (row count after filter / row count before filter). And exprs must be CNF now, in other words, `exprs[0] and exprs[1] and ... and exprs[len - 1]` should be held when you call this. TODO: support expressions that the top layer is a DNF. Currently the time complexity is o(n^2).

func (*Table) String ¶

func (t *Table) String() string

String implements Stringer interface.

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL