metrics

package

v0.0.0-...-5ae08a9 Latest Latest Go to latest Published: Nov 22, 2024 License: Apache-2.0 Imports: 11 Imported by: 61

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/kubernetes/autoscaler

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
func ObserveNodeTaintsCount(taintType string, count float64)
func ObservePendingNodeDeletions(value int)
func RegisterAll(emitPerNodeGroupMetrics bool)
func RegisterError(err errors.AutoscalerError)
func RegisterEvictions(podsCount int, result PodEvictionResult)
func RegisterFailedScaleUp(reason FailedScaleUpReason, gpuResourceName, gpuType string)
func RegisterNodeGroupCreation()
func RegisterNodeGroupCreationWithLabelValues(groupType string)
func RegisterNodeGroupDeletion()
func RegisterNodeGroupDeletionWithLabelValues(groupType string)
func RegisterOldUnregisteredNodesRemoved(nodesCount int)
func RegisterScaleDown(nodesCount int, gpuResourceName, gpuType string, reason NodeScaleDownReason)
func RegisterScaleUp(nodesCount int, gpuResourceName, gpuType string)
func RegisterSkippedScaleDownCPU()
func RegisterSkippedScaleDownMemory()
func RegisterSkippedScaleUpCPU()
func RegisterSkippedScaleUpMemory()
func UpdateCPULimitsCores(minCoresCount int64, maxCoresCount int64)
func UpdateClusterCPUCurrentCores(coresCount int64)
func UpdateClusterMemoryCurrentBytes(memoryCount int64)
func UpdateClusterSafeToAutoscale(safe bool)
func UpdateDuration(label FunctionLabel, duration time.Duration)
func UpdateDurationFromStart(label FunctionLabel, start time.Time)
func UpdateInconsistentInstancesMigsCount(migCount int)
func UpdateLastTime(label FunctionLabel, now time.Time)
func UpdateMaxNodesCount(nodesCount int)
func UpdateMemoryLimitsBytes(minMemoryCount int64, maxMemoryCount int64)
func UpdateNapEnabled(enabled bool)
func UpdateNodeGroupBackOffStatus(nodeGroup string, backoffReasonStatus map[string]bool)
func UpdateNodeGroupHealthStatus(nodeGroup string, healthy bool)
func UpdateNodeGroupMax(nodeGroup string, maxNodes int)
func UpdateNodeGroupMin(nodeGroup string, minNodes int)
func UpdateNodeGroupTargetSize(targetSizes map[string]int)
func UpdateNodeGroupsCount(autoscaled, autoprovisioned int)
func UpdateNodesCount(ready, unready, starting, longUnregistered, unregistered int)
func UpdateOverflowingControllers(count int)
func UpdateScaleDownInCooldown(inCooldown bool)
func UpdateUnneededNodesCount(nodesCount int)
func UpdateUnremovableNodesCount(unremovableReasonCounts map[simulator.UnremovableReason]int)
func UpdateUnschedulablePodsCount(uschedulablePodsCount, schedulerUnprocessedCount int)
func UpdateUnschedulablePodsCountWithLabel(uschedulablePodsCount int, label string)
type FailedScaleUpReason
type FunctionLabel
type HealthCheck
- func NewHealthCheck(activityTimeout, successTimeout time.Duration) *HealthCheck
- func (hc *HealthCheck) ServeHTTP(w http.ResponseWriter, r *http.Request)
- func (hc *HealthCheck) StartMonitoring()
- func (hc *HealthCheck) UpdateLastActivity(timestamp time.Time)
- func (hc *HealthCheck) UpdateLastSuccessfulRun(timestamp time.Time)
type NodeGroupType
type NodeScaleDownReason
type PodEvictionResult

Constants ¶

View Source

const (

	// Underutilized node was removed because of low utilization
	Underutilized NodeScaleDownReason = "underutilized"
	// Empty node was removed
	Empty NodeScaleDownReason = "empty"
	// Unready node was removed
	Unready NodeScaleDownReason = "unready"

	// CloudProviderError caused scale-up to fail
	CloudProviderError FailedScaleUpReason = "cloudProviderError"
	// APIError caused scale-up to fail
	APIError FailedScaleUpReason = "apiCallError"
	// Timeout was encountered when trying to scale-up
	Timeout FailedScaleUpReason = "timeout"

	// DirectionScaleDown is the direction of skipped scaling event when scaling in (shrinking)
	DirectionScaleDown string = "down"
	// DirectionScaleUp is the direction of skipped scaling event when scaling out (growing)
	DirectionScaleUp string = "up"

	// CpuResourceLimit minimum or maximum reached, check the direction label to determine min or max
	CpuResourceLimit string = "CpuResourceLimit"
	// MemoryResourceLimit minimum or maximum reached, check the direction label to determine min or max
	MemoryResourceLimit string = "MemoryResourceLimit"

	// LogLongDurationThreshold defines the duration after which long function
	// duration will be logged (in addition to being counted in metric).
	// This is meant to help find unexpectedly long function execution times for
	// debugging purposes.
	LogLongDurationThreshold = 5 * time.Second
	// PodEvictionSucceed means creation of the pod eviction object succeed
	PodEvictionSucceed PodEvictionResult = "succeeded"
	// PodEvictionFailed means creation of the pod eviction object failed
	PodEvictionFailed PodEvictionResult = "failed"
)

Variables ¶

This section is empty.

Functions ¶

func ObserveNodeTaintsCount ¶

func ObserveNodeTaintsCount(taintType string, count float64)

ObserveNodeTaintsCount records the node taints count of given type.

func ObservePendingNodeDeletions ¶

func ObservePendingNodeDeletions(value int)

ObservePendingNodeDeletions records the current value of nodes_pending_deletion metric

func RegisterError ¶

func RegisterError(err errors.AutoscalerError)

RegisterError records any errors preventing Cluster Autoscaler from working. No more than one error should be recorded per loop.

func RegisterEvictions ¶

func RegisterEvictions(podsCount int, result PodEvictionResult)

RegisterEvictions records number of evicted pods succeed or failed

func RegisterFailedScaleUp ¶

func RegisterFailedScaleUp(reason FailedScaleUpReason, gpuResourceName, gpuType string)

RegisterFailedScaleUp records a failed scale-up operation

func RegisterNodeGroupCreation ¶

func RegisterNodeGroupCreation()

RegisterNodeGroupCreation registers node group creation

func RegisterNodeGroupCreationWithLabelValues ¶

func RegisterNodeGroupCreationWithLabelValues(groupType string)

RegisterNodeGroupCreationWithLabelValues registers node group creation with the provided labels

func RegisterNodeGroupDeletion ¶

func RegisterNodeGroupDeletion()

RegisterNodeGroupDeletion registers node group deletion

func RegisterNodeGroupDeletionWithLabelValues ¶

func RegisterNodeGroupDeletionWithLabelValues(groupType string)

RegisterNodeGroupDeletionWithLabelValues registers node group deletion with the provided labels

func RegisterOldUnregisteredNodesRemoved ¶

func RegisterOldUnregisteredNodesRemoved(nodesCount int)

RegisterOldUnregisteredNodesRemoved records number of old unregistered nodes that have been removed by the cluster autoscaler

func RegisterScaleDown ¶

func RegisterScaleDown(nodesCount int, gpuResourceName, gpuType string, reason NodeScaleDownReason)

RegisterScaleDown records number of nodes removed by scale down

func RegisterScaleUp ¶

func RegisterScaleUp(nodesCount int, gpuResourceName, gpuType string)

RegisterScaleUp records number of nodes added by scale up

func RegisterSkippedScaleDownCPU ¶

func RegisterSkippedScaleDownCPU()

RegisterSkippedScaleDownCPU increases the count of skipped scale outs because of CPU resource limits

func RegisterSkippedScaleDownMemory ¶

func RegisterSkippedScaleDownMemory()

RegisterSkippedScaleDownMemory increases the count of skipped scale outs because of Memory resource limits

func RegisterSkippedScaleUpCPU ¶

func RegisterSkippedScaleUpCPU()

RegisterSkippedScaleUpCPU increases the count of skipped scale outs because of CPU resource limits

func RegisterSkippedScaleUpMemory ¶

func RegisterSkippedScaleUpMemory()

RegisterSkippedScaleUpMemory increases the count of skipped scale outs because of Memory resource limits

func UpdateCPULimitsCores ¶

func UpdateCPULimitsCores(minCoresCount int64, maxCoresCount int64)

UpdateCPULimitsCores records the minimum and maximum number of cores in the cluster

func UpdateClusterCPUCurrentCores ¶

func UpdateClusterCPUCurrentCores(coresCount int64)

UpdateClusterCPUCurrentCores records the number of cores in the cluster, minus deleting nodes

func UpdateClusterMemoryCurrentBytes ¶

func UpdateClusterMemoryCurrentBytes(memoryCount int64)

UpdateClusterMemoryCurrentBytes records the number of bytes of memory in the cluster, minus deleting nodes

func UpdateClusterSafeToAutoscale ¶

func UpdateClusterSafeToAutoscale(safe bool)

UpdateClusterSafeToAutoscale records if cluster is safe to autoscale

func UpdateDuration ¶

func UpdateDuration(label FunctionLabel, duration time.Duration)

UpdateDuration records the duration of the step identified by the label

func UpdateDurationFromStart ¶

func UpdateDurationFromStart(label FunctionLabel, start time.Time)

UpdateDurationFromStart records the duration of the step identified by the label using start time

func UpdateInconsistentInstancesMigsCount ¶

func UpdateInconsistentInstancesMigsCount(migCount int)

UpdateInconsistentInstancesMigsCount records the observed number of migs where instance count according to InstanceGroupManagers.List() differs from the results of Instances.List(). This can happen when some instances are abandoned or a user edits instance 'created-by' metadata.

func UpdateLastTime ¶

func UpdateLastTime(label FunctionLabel, now time.Time)

UpdateLastTime records the time the step identified by the label was started

func UpdateMaxNodesCount ¶

func UpdateMaxNodesCount(nodesCount int)

UpdateMaxNodesCount records the current maximum number of nodes being set for all node groups

func UpdateMemoryLimitsBytes ¶

func UpdateMemoryLimitsBytes(minMemoryCount int64, maxMemoryCount int64)

UpdateMemoryLimitsBytes records the minimum and maximum bytes of memory in the cluster

func UpdateNapEnabled ¶

func UpdateNapEnabled(enabled bool)

UpdateNapEnabled records if NodeAutoprovisioning is enabled

func UpdateNodeGroupBackOffStatus ¶

func UpdateNodeGroupBackOffStatus(nodeGroup string, backoffReasonStatus map[string]bool)

UpdateNodeGroupBackOffStatus records if node group is backoff for not autoscaling

func UpdateNodeGroupHealthStatus ¶

func UpdateNodeGroupHealthStatus(nodeGroup string, healthy bool)

UpdateNodeGroupHealthStatus records if node group is healthy to autoscaling

func UpdateNodeGroupMax ¶

func UpdateNodeGroupMax(nodeGroup string, maxNodes int)

UpdateNodeGroupMax records the node group maximum allowed number of nodes

func UpdateNodeGroupMin ¶

func UpdateNodeGroupMin(nodeGroup string, minNodes int)

UpdateNodeGroupMin records the node group minimum allowed number of nodes

func UpdateNodeGroupTargetSize ¶

func UpdateNodeGroupTargetSize(targetSizes map[string]int)

UpdateNodeGroupTargetSize records the node group target size

func UpdateNodeGroupsCount ¶

func UpdateNodeGroupsCount(autoscaled, autoprovisioned int)

UpdateNodeGroupsCount records the number of node groups managed by CA

func UpdateNodesCount ¶

func UpdateNodesCount(ready, unready, starting, longUnregistered, unregistered int)

UpdateNodesCount records the number of nodes in cluster

func UpdateOverflowingControllers ¶

func UpdateOverflowingControllers(count int)

UpdateOverflowingControllers sets the number of controllers that could not have their pods cached.

func UpdateScaleDownInCooldown ¶

func UpdateScaleDownInCooldown(inCooldown bool)

UpdateScaleDownInCooldown registers if the cluster autoscaler scaledown is in cooldown

func UpdateUnneededNodesCount ¶

func UpdateUnneededNodesCount(nodesCount int)

UpdateUnneededNodesCount records number of currently unneeded nodes

func UpdateUnremovableNodesCount ¶

func UpdateUnremovableNodesCount(unremovableReasonCounts map[simulator.UnremovableReason]int)

UpdateUnremovableNodesCount records number of currently unremovable nodes

func UpdateUnschedulablePodsCount ¶

func UpdateUnschedulablePodsCount(uschedulablePodsCount, schedulerUnprocessedCount int)

UpdateUnschedulablePodsCount records number of currently unschedulable pods

func UpdateUnschedulablePodsCountWithLabel ¶

func UpdateUnschedulablePodsCountWithLabel(uschedulablePodsCount int, label string)

UpdateUnschedulablePodsCountWithLabel records number of currently unschedulable pods wil label "type" value "label"

Types ¶

type FailedScaleUpReason ¶

type FailedScaleUpReason string

FailedScaleUpReason describes reason of failed scale-up

type FunctionLabel ¶

type FunctionLabel string

FunctionLabel is a name of Cluster Autoscaler operation for which we measure duration

const (
	ScaleDown                  FunctionLabel = "scaleDown"
	ScaleDownNodeDeletion      FunctionLabel = "scaleDown:nodeDeletion"
	ScaleDownFindNodesToRemove FunctionLabel = "scaleDown:findNodesToRemove"
	ScaleDownMiscOperations    FunctionLabel = "scaleDown:miscOperations"
	ScaleDownSoftTaintUnneeded FunctionLabel = "scaleDown:softTaintUnneeded"
	ScaleUp                    FunctionLabel = "scaleUp"
	BuildPodEquivalenceGroups  FunctionLabel = "scaleUp:buildPodEquivalenceGroups"
	Estimate                   FunctionLabel = "scaleUp:estimate"
	FindUnneeded               FunctionLabel = "findUnneeded"
	UpdateState                FunctionLabel = "updateClusterState"
	FilterOutSchedulable       FunctionLabel = "filterOutSchedulable"
	CloudProviderRefresh       FunctionLabel = "cloudProviderRefresh"
	Main                       FunctionLabel = "main"
	Poll                       FunctionLabel = "poll"
	Reconfigure                FunctionLabel = "reconfigure"
	Autoscaling                FunctionLabel = "autoscaling"
	LoopWait                   FunctionLabel = "loopWait"
	BulkListAllGceInstances    FunctionLabel = "bulkListInstances:listAllInstances"
	BulkListMigInstances       FunctionLabel = "bulkListInstances:listMigInstances"
)

Names of Cluster Autoscaler operations

type HealthCheck ¶

type HealthCheck struct {
	// contains filtered or unexported fields
}

HealthCheck contains information about last time of autoscaler activity and timeout

func NewHealthCheck ¶

func NewHealthCheck(activityTimeout, successTimeout time.Duration) *HealthCheck

NewHealthCheck builds new HealthCheck object with given timeout

func (*HealthCheck) ServeHTTP ¶

func (hc *HealthCheck) ServeHTTP(w http.ResponseWriter, r *http.Request)

ServeHTTP implements http.Handler interface to provide a health-check endpoint

func (*HealthCheck) StartMonitoring ¶

func (hc *HealthCheck) StartMonitoring()

StartMonitoring activates checks for autoscaler inactivity

func (*HealthCheck) UpdateLastActivity ¶

func (hc *HealthCheck) UpdateLastActivity(timestamp time.Time)

UpdateLastActivity updates last time of activity

func (*HealthCheck) UpdateLastSuccessfulRun ¶

func (hc *HealthCheck) UpdateLastSuccessfulRun(timestamp time.Time)

UpdateLastSuccessfulRun updates last time of successful (i.e. not ending in error) activity

type NodeGroupType ¶

type NodeGroupType string

NodeGroupType describes node group relation to CA

type NodeScaleDownReason ¶

type NodeScaleDownReason string

NodeScaleDownReason describes reason for removing node

type PodEvictionResult ¶

type PodEvictionResult string

PodEvictionResult describes result of the pod eviction attempt

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL