Documentation ¶
Overview ¶
Package discovery provides a way to discover all tablets e.g. within a specific shard and monitor their current health.
Use the HealthCheck object to query for tablets and their health.
For an example how to use the HealthCheck object, see worker/topo_utils.go.
Tablets have to be manually added to the HealthCheck using AddTablet(). Alternatively, use a Watcher implementation which will constantly watch a source (e.g. the topology) and add and remove tablets as they are added or removed from the source. For a Watcher example have a look at NewShardReplicationWatcher().
Note that the getter functions GetTabletStatsFrom* will always return an unfiltered list of all known tablets. Use the helper functions in utils.go to filter them e.g. RemoveUnhealthyTablets() or GetCurrentMaster(). replicationlag.go contains a more advanced health filter which is used by vtgate.
Internally, the HealthCheck module is connected to each tablet and has a streaming RPC (StreamHealth) open to receive periodic health infos.
Index ¶
- Constants
- Variables
- func TabletToMapKey(tablet *topodatapb.Tablet) string
- func WaitForAllServingTablets(ctx context.Context, hc HealthCheck, ts topo.SrvTopoServer, cell string, ...) error
- func WaitForTablets(ctx context.Context, hc HealthCheck, cell, keyspace, shard string, ...) error
- type FakeHealthCheck
- func (fhc *FakeHealthCheck) AddTablet(cell, name string, tablet *topodatapb.Tablet)
- func (fhc *FakeHealthCheck) AddTestTablet(cell, host string, port int32, keyspace, shard string, ...) *sandboxconn.SandboxConn
- func (fhc *FakeHealthCheck) CacheStatus() TabletsCacheStatusList
- func (fhc *FakeHealthCheck) Close() error
- func (fhc *FakeHealthCheck) GetAllTablets() map[string]*topodatapb.Tablet
- func (fhc *FakeHealthCheck) GetConnection(tablet *topodatapb.Tablet) tabletconn.TabletConn
- func (fhc *FakeHealthCheck) GetTabletStatsFromKeyspaceShard(keyspace, shard string) []*TabletStats
- func (fhc *FakeHealthCheck) GetTabletStatsFromTarget(keyspace, shard string, tabletType topodatapb.TabletType) []*TabletStats
- func (fhc *FakeHealthCheck) RegisterStats()
- func (fhc *FakeHealthCheck) RemoveTablet(tablet *topodatapb.Tablet)
- func (fhc *FakeHealthCheck) Reset()
- func (fhc *FakeHealthCheck) SetListener(listener HealthCheckStatsListener)
- type HealthCheck
- type HealthCheckImpl
- func (hc *HealthCheckImpl) AddTablet(cell, name string, tablet *topodatapb.Tablet)
- func (hc *HealthCheckImpl) CacheStatus() TabletsCacheStatusList
- func (hc *HealthCheckImpl) Close() error
- func (hc *HealthCheckImpl) GetConnection(tablet *topodatapb.Tablet) tabletconn.TabletConn
- func (hc *HealthCheckImpl) GetTabletStatsFromKeyspaceShard(keyspace, shard string) []*TabletStats
- func (hc *HealthCheckImpl) GetTabletStatsFromTarget(keyspace, shard string, tabletType topodatapb.TabletType) []*TabletStats
- func (hc *HealthCheckImpl) RegisterStats()
- func (hc *HealthCheckImpl) RemoveTablet(tablet *topodatapb.Tablet)
- func (hc *HealthCheckImpl) SetListener(listener HealthCheckStatsListener)
- type HealthCheckStatsListener
- type TabletStats
- type TabletStatsList
- type TabletsCacheStatus
- type TabletsCacheStatusList
- type TopologyWatcher
- func NewCellTabletsWatcher(topoServer topo.Server, hc HealthCheck, cell string, ...) *TopologyWatcher
- func NewShardReplicationWatcher(topoServer topo.Server, hc HealthCheck, cell, keyspace, shard string, ...) *TopologyWatcher
- func NewTopologyWatcher(topoServer topo.Server, hc HealthCheck, cell string, ...) *TopologyWatcher
Constants ¶
const ( // DefaultTopoReadConcurrency can be used as default value for the topoReadConcurrency parameter of a TopologyWatcher. DefaultTopoReadConcurrency int = 5 )
Variables ¶
var ( // ErrWaitForTabletsTimeout is returned if we cannot get the tablets in time ErrWaitForTabletsTimeout = errors.New("timeout waiting for tablets") )
var ( // LowReplicationLag defines the duration that replication lag is low enough that the VTTablet is considered healthy. LowReplicationLag = flag.Duration("discovery_low_replication_lag", 30*time.Second, "the replication lag that is considered low enough to be healthy") )
Functions ¶
func TabletToMapKey ¶
func TabletToMapKey(tablet *topodatapb.Tablet) string
TabletToMapKey creates a key to the map from tablet's host and ports. It should only be used in discovery and related module.
func WaitForAllServingTablets ¶
func WaitForAllServingTablets(ctx context.Context, hc HealthCheck, ts topo.SrvTopoServer, cell string, types []topodatapb.TabletType) error
WaitForAllServingTablets waits for at least one serving tablet in the given cell for all keyspaces / shards before returning.
func WaitForTablets ¶
func WaitForTablets(ctx context.Context, hc HealthCheck, cell, keyspace, shard string, types []topodatapb.TabletType) error
WaitForTablets waits for at least one tablet in the given cell / keyspace / shard before returning.
Types ¶
type FakeHealthCheck ¶
type FakeHealthCheck struct { // GetStatsFromTargetCounter counts GetTabletStatsFromTarget() being called. // (it can be accessed concurrently by 'multiGo', so using atomic) GetStatsFromTargetCounter sync2.AtomicInt32 // GetStatsFromKeyspaceShardCounter counts GetTabletStatsFromKeyspaceShard() being called. GetStatsFromKeyspaceShardCounter int // contains filtered or unexported fields }
FakeHealthCheck implements discovery.HealthCheck.
func NewFakeHealthCheck ¶
func NewFakeHealthCheck() *FakeHealthCheck
NewFakeHealthCheck returns the fake healthcheck object.
func (*FakeHealthCheck) AddTablet ¶
func (fhc *FakeHealthCheck) AddTablet(cell, name string, tablet *topodatapb.Tablet)
AddTablet adds the tablet.
func (*FakeHealthCheck) AddTestTablet ¶
func (fhc *FakeHealthCheck) AddTestTablet(cell, host string, port int32, keyspace, shard string, tabletType topodatapb.TabletType, serving bool, reparentTS int64, err error) *sandboxconn.SandboxConn
AddTestTablet inserts a fake entry into FakeHealthCheck. The Tablet can be talked to using the provided connection.
func (*FakeHealthCheck) CacheStatus ¶
func (fhc *FakeHealthCheck) CacheStatus() TabletsCacheStatusList
CacheStatus is not implemented.
func (*FakeHealthCheck) GetAllTablets ¶
func (fhc *FakeHealthCheck) GetAllTablets() map[string]*topodatapb.Tablet
GetAllTablets returns all the tablets we have.
func (*FakeHealthCheck) GetConnection ¶
func (fhc *FakeHealthCheck) GetConnection(tablet *topodatapb.Tablet) tabletconn.TabletConn
GetConnection returns the TabletConn of the given tablet.
func (*FakeHealthCheck) GetTabletStatsFromKeyspaceShard ¶
func (fhc *FakeHealthCheck) GetTabletStatsFromKeyspaceShard(keyspace, shard string) []*TabletStats
GetTabletStatsFromKeyspaceShard returns all TabletStats for the given keyspace/shard.
func (*FakeHealthCheck) GetTabletStatsFromTarget ¶
func (fhc *FakeHealthCheck) GetTabletStatsFromTarget(keyspace, shard string, tabletType topodatapb.TabletType) []*TabletStats
GetTabletStatsFromTarget returns all TabletStats for the given target.
func (*FakeHealthCheck) RegisterStats ¶
func (fhc *FakeHealthCheck) RegisterStats()
RegisterStats is not implemented.
func (*FakeHealthCheck) RemoveTablet ¶
func (fhc *FakeHealthCheck) RemoveTablet(tablet *topodatapb.Tablet)
RemoveTablet removes the tablet.
func (*FakeHealthCheck) Reset ¶
func (fhc *FakeHealthCheck) Reset()
Reset cleans up the internal state.
func (*FakeHealthCheck) SetListener ¶
func (fhc *FakeHealthCheck) SetListener(listener HealthCheckStatsListener)
SetListener is not implemented.
type HealthCheck ¶
type HealthCheck interface { // RegisterStats registers the connection counts stats. // It can only be called on one Healthcheck object per process. RegisterStats() // SetListener sets the listener for healthcheck updates. It should not block. // Note that the default implementation requires to set the listener before // any tablets are added to the healthcheck. SetListener(listener HealthCheckStatsListener) // AddTablet adds the tablet, and starts health check. AddTablet(cell, name string, tablet *topodatapb.Tablet) // RemoveTablet removes the tablet, and stops the health check. RemoveTablet(tablet *topodatapb.Tablet) // GetTabletStatsFromKeyspaceShard returns all TabletStats for the given keyspace/shard. GetTabletStatsFromKeyspaceShard(keyspace, shard string) []*TabletStats // GetTabletStatsFromTarget returns all TabletStats for the given target. // You can exclude unhealthy entries using the helper in utils.go. GetTabletStatsFromTarget(keyspace, shard string, tabletType topodatapb.TabletType) []*TabletStats // GetConnection returns the TabletConn of the given tablet. GetConnection(tablet *topodatapb.Tablet) tabletconn.TabletConn // CacheStatus returns a displayable version of the cache. CacheStatus() TabletsCacheStatusList // Close stops the healthcheck. Close() error }
HealthCheck defines the interface of health checking module.
func NewHealthCheck ¶
func NewHealthCheck(connTimeout time.Duration, retryDelay time.Duration, healthCheckTimeout time.Duration) HealthCheck
NewHealthCheck creates a new HealthCheck object.
type HealthCheckImpl ¶
type HealthCheckImpl struct {
// contains filtered or unexported fields
}
HealthCheckImpl performs health checking and notifies downstream components about any changes.
func (*HealthCheckImpl) AddTablet ¶
func (hc *HealthCheckImpl) AddTablet(cell, name string, tablet *topodatapb.Tablet)
AddTablet adds the tablet, and starts health check. It does not block on making connection. name is an optional tag for the tablet, e.g. an alternative address.
func (*HealthCheckImpl) CacheStatus ¶
func (hc *HealthCheckImpl) CacheStatus() TabletsCacheStatusList
CacheStatus returns a displayable version of the cache.
func (*HealthCheckImpl) Close ¶
func (hc *HealthCheckImpl) Close() error
Close stops the healthcheck. After Close() returned, it's guaranteed that the listener won't be called anymore.
func (*HealthCheckImpl) GetConnection ¶
func (hc *HealthCheckImpl) GetConnection(tablet *topodatapb.Tablet) tabletconn.TabletConn
GetConnection returns the TabletConn of the given tablet.
func (*HealthCheckImpl) GetTabletStatsFromKeyspaceShard ¶
func (hc *HealthCheckImpl) GetTabletStatsFromKeyspaceShard(keyspace, shard string) []*TabletStats
GetTabletStatsFromKeyspaceShard returns all TabletStats for the given keyspace/shard.
func (*HealthCheckImpl) GetTabletStatsFromTarget ¶
func (hc *HealthCheckImpl) GetTabletStatsFromTarget(keyspace, shard string, tabletType topodatapb.TabletType) []*TabletStats
GetTabletStatsFromTarget returns all TabletStats for the given target.
func (*HealthCheckImpl) RegisterStats ¶
func (hc *HealthCheckImpl) RegisterStats()
RegisterStats registers the connection counts stats
func (*HealthCheckImpl) RemoveTablet ¶
func (hc *HealthCheckImpl) RemoveTablet(tablet *topodatapb.Tablet)
RemoveTablet removes the tablet, and stops the health check. It does not block.
func (*HealthCheckImpl) SetListener ¶
func (hc *HealthCheckImpl) SetListener(listener HealthCheckStatsListener)
SetListener sets the listener for healthcheck updates. It should not block. It must be called after NewHealthCheck and before any tablets are added (either through AddTablet or through a Watcher).
type HealthCheckStatsListener ¶
type HealthCheckStatsListener interface {
StatsUpdate(*TabletStats)
}
HealthCheckStatsListener is the listener to receive health check stats update.
type TabletStats ¶
type TabletStats struct { Tablet *topodatapb.Tablet Name string // name is an optional tag (e.g. alternative address) Target *querypb.Target Up bool // whether the tablet is added Serving bool // whether the server is serving TabletExternallyReparentedTimestamp int64 Stats *querypb.RealtimeStats LastError error }
TabletStats is returned when getting the set of tablets.
func FilterByReplicationLag ¶
func FilterByReplicationLag(tabletStatsList []*TabletStats) []*TabletStats
FilterByReplicationLag filters the list of TabletStats by TabletStats.Stats.SecondsBehindMaster. The algorithm (TabletStats that is non-serving or has error is ignored): - Return the list if there is 0 or 1 tablet. - Return the list if all tablets have <=30s lag. - Filter by replication lag: for each tablet, if the mean value without it is more than 0.7 of the mean value across all tablets, it is valid. - Make sure we return at least two tablets (if there is one with <2h replication lag). - If one tablet is removed, run above steps again in case there are two tablets with high replication lag. (It should cover most cases.) For example, lags of (5s, 10s, 15s, 120s) return the first three; lags of (30m, 35m, 40m, 45m) return all.
func GetCurrentMaster ¶
func GetCurrentMaster(tabletStatsList []*TabletStats) []*TabletStats
GetCurrentMaster returns the MASTER tablet with the highest TabletExternallyReparentedTimestamp value.
func RemoveUnhealthyTablets ¶
func RemoveUnhealthyTablets(tabletStatsList []*TabletStats) []*TabletStats
RemoveUnhealthyTablets filters all unhealthy tablets out. NOTE: Non-serving tablets are considered healthy.
func (*TabletStats) String ¶
func (e *TabletStats) String() string
String is defined because we want to print a []*TabletStats array nicely.
type TabletStatsList ¶
type TabletStatsList []*TabletStats
TabletStatsList is used for sorting.
func (TabletStatsList) Less ¶
func (tsl TabletStatsList) Less(i, j int) bool
Less is part of sort.Interface
func (TabletStatsList) Swap ¶
func (tsl TabletStatsList) Swap(i, j int)
Swap is part of sort.Interface
type TabletsCacheStatus ¶
type TabletsCacheStatus struct { Cell string Target *querypb.Target TabletsStats TabletStatsList }
TabletsCacheStatus is the current tablets for a cell/target. TODO: change this to reflect the e2e information about the tablets.
func (*TabletsCacheStatus) StatusAsHTML ¶
func (tcs *TabletsCacheStatus) StatusAsHTML() template.HTML
StatusAsHTML returns an HTML version of the status.
type TabletsCacheStatusList ¶
type TabletsCacheStatusList []*TabletsCacheStatus
TabletsCacheStatusList is used for sorting.
func (TabletsCacheStatusList) Len ¶
func (tcsl TabletsCacheStatusList) Len() int
Len is part of sort.Interface.
func (TabletsCacheStatusList) Less ¶
func (tcsl TabletsCacheStatusList) Less(i, j int) bool
Less is part of sort.Interface
func (TabletsCacheStatusList) Swap ¶
func (tcsl TabletsCacheStatusList) Swap(i, j int)
Swap is part of sort.Interface
type TopologyWatcher ¶
type TopologyWatcher struct {
// contains filtered or unexported fields
}
TopologyWatcher polls tablet from a configurable set of tablets periodically.
func NewCellTabletsWatcher ¶
func NewCellTabletsWatcher(topoServer topo.Server, hc HealthCheck, cell string, refreshInterval time.Duration, topoReadConcurrency int) *TopologyWatcher
NewCellTabletsWatcher returns a TopologyWatcher that monitors all the tablets in a cell, and starts refreshing.
func NewShardReplicationWatcher ¶
func NewShardReplicationWatcher(topoServer topo.Server, hc HealthCheck, cell, keyspace, shard string, refreshInterval time.Duration, topoReadConcurrency int) *TopologyWatcher
NewShardReplicationWatcher returns a TopologyWatcher that monitors the tablets in a cell/keyspace/shard, and starts refreshing.
func NewTopologyWatcher ¶
func NewTopologyWatcher(topoServer topo.Server, hc HealthCheck, cell string, refreshInterval time.Duration, topoReadConcurrency int, getTablets func(tw *TopologyWatcher) ([]*topodatapb.TabletAlias, error)) *TopologyWatcher
NewTopologyWatcher returns a TopologyWatcher that monitors all the tablets in a cell, and starts refreshing.
func (*TopologyWatcher) Stop ¶
func (tw *TopologyWatcher) Stop()
Stop stops the watcher. It does not clean up the tablets added to HealthCheck.