Documentation ¶
Index ¶
- Constants
- func InitMetrics()
- func ObserveNodeTaintsCount(taintType string, count float64)
- func ObservePendingNodeDeletions(value int)
- func RegisterAll(emitPerNodeGroupMetrics bool)
- func RegisterError(err errors.AutoscalerError)
- func RegisterEvictions(podsCount int, result PodEvictionResult)
- func RegisterFailedScaleUp(reason FailedScaleUpReason, gpuResourceName, gpuType string)
- func RegisterNodeGroupCreation()
- func RegisterNodeGroupCreationWithLabelValues(groupType string)
- func RegisterNodeGroupDeletion()
- func RegisterNodeGroupDeletionWithLabelValues(groupType string)
- func RegisterOldUnregisteredNodesRemoved(nodesCount int)
- func RegisterScaleDown(nodesCount int, gpuResourceName, gpuType string, reason NodeScaleDownReason)
- func RegisterScaleUp(nodesCount int, gpuResourceName, gpuType string)
- func RegisterSkippedScaleDownCPU()
- func RegisterSkippedScaleDownMemory()
- func RegisterSkippedScaleUpCPU()
- func RegisterSkippedScaleUpMemory()
- func UpdateCPULimitsCores(minCoresCount int64, maxCoresCount int64)
- func UpdateClusterCPUCurrentCores(coresCount int64)
- func UpdateClusterMemoryCurrentBytes(memoryCount int64)
- func UpdateClusterSafeToAutoscale(safe bool)
- func UpdateDuration(label FunctionLabel, duration time.Duration)
- func UpdateDurationFromStart(label FunctionLabel, start time.Time)
- func UpdateInconsistentInstancesMigsCount(migCount int)
- func UpdateLastTime(label FunctionLabel, now time.Time)
- func UpdateMaxNodesCount(nodesCount int)
- func UpdateMemoryLimitsBytes(minMemoryCount int64, maxMemoryCount int64)
- func UpdateNapEnabled(enabled bool)
- func UpdateNodeGroupBackOffStatus(nodeGroup string, backoffReasonStatus map[string]bool)
- func UpdateNodeGroupHealthStatus(nodeGroup string, healthy bool)
- func UpdateNodeGroupMax(nodeGroup string, maxNodes int)
- func UpdateNodeGroupMin(nodeGroup string, minNodes int)
- func UpdateNodeGroupTargetSize(targetSizes map[string]int)
- func UpdateNodeGroupsCount(autoscaled, autoprovisioned int)
- func UpdateNodesCount(ready, unready, starting, longUnregistered, unregistered int)
- func UpdateOverflowingControllers(count int)
- func UpdateScaleDownInCooldown(inCooldown bool)
- func UpdateUnneededNodesCount(nodesCount int)
- func UpdateUnremovableNodesCount(unremovableReasonCounts map[simulator.UnremovableReason]int)
- func UpdateUnschedulablePodsCount(uschedulablePodsCount, schedulerUnprocessedCount int)
- func UpdateUnschedulablePodsCountWithLabel(uschedulablePodsCount int, label string)
- type FailedScaleUpReason
- type FunctionLabel
- type HealthCheck
- type NodeGroupType
- type NodeScaleDownReason
- type PodEvictionResult
Constants ¶
const ( // Underutilized node was removed because of low utilization Underutilized NodeScaleDownReason = "underutilized" // Empty node was removed Empty NodeScaleDownReason = "empty" // Unready node was removed Unready NodeScaleDownReason = "unready" // CloudProviderError caused scale-up to fail CloudProviderError FailedScaleUpReason = "cloudProviderError" // APIError caused scale-up to fail APIError FailedScaleUpReason = "apiCallError" // Timeout was encountered when trying to scale-up Timeout FailedScaleUpReason = "timeout" // DirectionScaleDown is the direction of skipped scaling event when scaling in (shrinking) DirectionScaleDown string = "down" // DirectionScaleUp is the direction of skipped scaling event when scaling out (growing) DirectionScaleUp string = "up" // CpuResourceLimit minimum or maximum reached, check the direction label to determine min or max CpuResourceLimit string = "CpuResourceLimit" // MemoryResourceLimit minimum or maximum reached, check the direction label to determine min or max MemoryResourceLimit string = "MemoryResourceLimit" // LogLongDurationThreshold defines the duration after which long function // duration will be logged (in addition to being counted in metric). // This is meant to help find unexpectedly long function execution times for // debugging purposes. LogLongDurationThreshold = 5 * time.Second // PodEvictionSucceed means creation of the pod eviction object succeed PodEvictionSucceed PodEvictionResult = "succeeded" // PodEvictionFailed means creation of the pod eviction object failed PodEvictionFailed PodEvictionResult = "failed" )
Variables ¶
This section is empty.
Functions ¶
func ObserveNodeTaintsCount ¶
ObserveNodeTaintsCount records the node taints count of given type.
func ObservePendingNodeDeletions ¶
func ObservePendingNodeDeletions(value int)
ObservePendingNodeDeletions records the current value of nodes_pending_deletion metric
func RegisterAll ¶
func RegisterAll(emitPerNodeGroupMetrics bool)
RegisterAll registers all metrics.
func RegisterError ¶
func RegisterError(err errors.AutoscalerError)
RegisterError records any errors preventing Cluster Autoscaler from working. No more than one error should be recorded per loop.
func RegisterEvictions ¶
func RegisterEvictions(podsCount int, result PodEvictionResult)
RegisterEvictions records number of evicted pods succeed or failed
func RegisterFailedScaleUp ¶
func RegisterFailedScaleUp(reason FailedScaleUpReason, gpuResourceName, gpuType string)
RegisterFailedScaleUp records a failed scale-up operation
func RegisterNodeGroupCreation ¶
func RegisterNodeGroupCreation()
RegisterNodeGroupCreation registers node group creation
func RegisterNodeGroupCreationWithLabelValues ¶
func RegisterNodeGroupCreationWithLabelValues(groupType string)
RegisterNodeGroupCreationWithLabelValues registers node group creation with the provided labels
func RegisterNodeGroupDeletion ¶
func RegisterNodeGroupDeletion()
RegisterNodeGroupDeletion registers node group deletion
func RegisterNodeGroupDeletionWithLabelValues ¶
func RegisterNodeGroupDeletionWithLabelValues(groupType string)
RegisterNodeGroupDeletionWithLabelValues registers node group deletion with the provided labels
func RegisterOldUnregisteredNodesRemoved ¶
func RegisterOldUnregisteredNodesRemoved(nodesCount int)
RegisterOldUnregisteredNodesRemoved records number of old unregistered nodes that have been removed by the cluster autoscaler
func RegisterScaleDown ¶
func RegisterScaleDown(nodesCount int, gpuResourceName, gpuType string, reason NodeScaleDownReason)
RegisterScaleDown records number of nodes removed by scale down
func RegisterScaleUp ¶
RegisterScaleUp records number of nodes added by scale up
func RegisterSkippedScaleDownCPU ¶
func RegisterSkippedScaleDownCPU()
RegisterSkippedScaleDownCPU increases the count of skipped scale outs because of CPU resource limits
func RegisterSkippedScaleDownMemory ¶
func RegisterSkippedScaleDownMemory()
RegisterSkippedScaleDownMemory increases the count of skipped scale outs because of Memory resource limits
func RegisterSkippedScaleUpCPU ¶
func RegisterSkippedScaleUpCPU()
RegisterSkippedScaleUpCPU increases the count of skipped scale outs because of CPU resource limits
func RegisterSkippedScaleUpMemory ¶
func RegisterSkippedScaleUpMemory()
RegisterSkippedScaleUpMemory increases the count of skipped scale outs because of Memory resource limits
func UpdateCPULimitsCores ¶
UpdateCPULimitsCores records the minimum and maximum number of cores in the cluster
func UpdateClusterCPUCurrentCores ¶
func UpdateClusterCPUCurrentCores(coresCount int64)
UpdateClusterCPUCurrentCores records the number of cores in the cluster, minus deleting nodes
func UpdateClusterMemoryCurrentBytes ¶
func UpdateClusterMemoryCurrentBytes(memoryCount int64)
UpdateClusterMemoryCurrentBytes records the number of bytes of memory in the cluster, minus deleting nodes
func UpdateClusterSafeToAutoscale ¶
func UpdateClusterSafeToAutoscale(safe bool)
UpdateClusterSafeToAutoscale records if cluster is safe to autoscale
func UpdateDuration ¶
func UpdateDuration(label FunctionLabel, duration time.Duration)
UpdateDuration records the duration of the step identified by the label
func UpdateDurationFromStart ¶
func UpdateDurationFromStart(label FunctionLabel, start time.Time)
UpdateDurationFromStart records the duration of the step identified by the label using start time
func UpdateInconsistentInstancesMigsCount ¶
func UpdateInconsistentInstancesMigsCount(migCount int)
UpdateInconsistentInstancesMigsCount records the observed number of migs where instance count according to InstanceGroupManagers.List() differs from the results of Instances.List(). This can happen when some instances are abandoned or a user edits instance 'created-by' metadata.
func UpdateLastTime ¶
func UpdateLastTime(label FunctionLabel, now time.Time)
UpdateLastTime records the time the step identified by the label was started
func UpdateMaxNodesCount ¶
func UpdateMaxNodesCount(nodesCount int)
UpdateMaxNodesCount records the current maximum number of nodes being set for all node groups
func UpdateMemoryLimitsBytes ¶
UpdateMemoryLimitsBytes records the minimum and maximum bytes of memory in the cluster
func UpdateNapEnabled ¶
func UpdateNapEnabled(enabled bool)
UpdateNapEnabled records if NodeAutoprovisioning is enabled
func UpdateNodeGroupBackOffStatus ¶
UpdateNodeGroupBackOffStatus records if node group is backoff for not autoscaling
func UpdateNodeGroupHealthStatus ¶
UpdateNodeGroupHealthStatus records if node group is healthy to autoscaling
func UpdateNodeGroupMax ¶
UpdateNodeGroupMax records the node group maximum allowed number of nodes
func UpdateNodeGroupMin ¶
UpdateNodeGroupMin records the node group minimum allowed number of nodes
func UpdateNodeGroupTargetSize ¶
UpdateNodeGroupTargetSize records the node group target size
func UpdateNodeGroupsCount ¶
func UpdateNodeGroupsCount(autoscaled, autoprovisioned int)
UpdateNodeGroupsCount records the number of node groups managed by CA
func UpdateNodesCount ¶
func UpdateNodesCount(ready, unready, starting, longUnregistered, unregistered int)
UpdateNodesCount records the number of nodes in cluster
func UpdateOverflowingControllers ¶
func UpdateOverflowingControllers(count int)
UpdateOverflowingControllers sets the number of controllers that could not have their pods cached.
func UpdateScaleDownInCooldown ¶
func UpdateScaleDownInCooldown(inCooldown bool)
UpdateScaleDownInCooldown registers if the cluster autoscaler scaledown is in cooldown
func UpdateUnneededNodesCount ¶
func UpdateUnneededNodesCount(nodesCount int)
UpdateUnneededNodesCount records number of currently unneeded nodes
func UpdateUnremovableNodesCount ¶
func UpdateUnremovableNodesCount(unremovableReasonCounts map[simulator.UnremovableReason]int)
UpdateUnremovableNodesCount records number of currently unremovable nodes
func UpdateUnschedulablePodsCount ¶
func UpdateUnschedulablePodsCount(uschedulablePodsCount, schedulerUnprocessedCount int)
UpdateUnschedulablePodsCount records number of currently unschedulable pods
func UpdateUnschedulablePodsCountWithLabel ¶
UpdateUnschedulablePodsCountWithLabel records number of currently unschedulable pods wil label "type" value "label"
Types ¶
type FailedScaleUpReason ¶
type FailedScaleUpReason string
FailedScaleUpReason describes reason of failed scale-up
type FunctionLabel ¶
type FunctionLabel string
FunctionLabel is a name of Cluster Autoscaler operation for which we measure duration
const ( ScaleDown FunctionLabel = "scaleDown" ScaleDownNodeDeletion FunctionLabel = "scaleDown:nodeDeletion" ScaleDownFindNodesToRemove FunctionLabel = "scaleDown:findNodesToRemove" ScaleDownMiscOperations FunctionLabel = "scaleDown:miscOperations" ScaleDownSoftTaintUnneeded FunctionLabel = "scaleDown:softTaintUnneeded" ScaleUp FunctionLabel = "scaleUp" BuildPodEquivalenceGroups FunctionLabel = "scaleUp:buildPodEquivalenceGroups" Estimate FunctionLabel = "scaleUp:estimate" FindUnneeded FunctionLabel = "findUnneeded" UpdateState FunctionLabel = "updateClusterState" FilterOutSchedulable FunctionLabel = "filterOutSchedulable" CloudProviderRefresh FunctionLabel = "cloudProviderRefresh" Main FunctionLabel = "main" Poll FunctionLabel = "poll" Reconfigure FunctionLabel = "reconfigure" Autoscaling FunctionLabel = "autoscaling" LoopWait FunctionLabel = "loopWait" BulkListAllGceInstances FunctionLabel = "bulkListInstances:listAllInstances" BulkListMigInstances FunctionLabel = "bulkListInstances:listMigInstances" )
Names of Cluster Autoscaler operations
type HealthCheck ¶
type HealthCheck struct {
// contains filtered or unexported fields
}
HealthCheck contains information about last time of autoscaler activity and timeout
func NewHealthCheck ¶
func NewHealthCheck(activityTimeout, successTimeout time.Duration) *HealthCheck
NewHealthCheck builds new HealthCheck object with given timeout
func (*HealthCheck) ServeHTTP ¶
func (hc *HealthCheck) ServeHTTP(w http.ResponseWriter, r *http.Request)
ServeHTTP implements http.Handler interface to provide a health-check endpoint
func (*HealthCheck) StartMonitoring ¶
func (hc *HealthCheck) StartMonitoring()
StartMonitoring activates checks for autoscaler inactivity
func (*HealthCheck) UpdateLastActivity ¶
func (hc *HealthCheck) UpdateLastActivity(timestamp time.Time)
UpdateLastActivity updates last time of activity
func (*HealthCheck) UpdateLastSuccessfulRun ¶
func (hc *HealthCheck) UpdateLastSuccessfulRun(timestamp time.Time)
UpdateLastSuccessfulRun updates last time of successful (i.e. not ending in error) activity
type NodeScaleDownReason ¶
type NodeScaleDownReason string
NodeScaleDownReason describes reason for removing node
type PodEvictionResult ¶
type PodEvictionResult string
PodEvictionResult describes result of the pod eviction attempt