Documentation ¶
Index ¶
- Constants
- Variables
- func BuildDisruptionBudgets(ctx context.Context, cluster *state.Cluster, clk clock.Clock, ...) (map[string]int, error)
- func BuildNodePoolMap(ctx context.Context, kubeClient client.Client, ...) (map[string]*v1beta1.NodePool, ...)
- func GetPodEvictionCost(ctx context.Context, p *v1.Pod) float64
- func IsValidationError(err error) bool
- func MakeConsolidation(clock clock.Clock, cluster *state.Cluster, kubeClient client.Client, ...) consolidation
- func SimulateScheduling(ctx context.Context, kubeClient client.Client, cluster *state.Cluster, ...) (pscheduling.Results, error)
- type Action
- type Candidate
- type CandidateFilter
- type Command
- type Controller
- type Drift
- type Emptiness
- type EmptyNodeConsolidation
- func (c *EmptyNodeConsolidation) ComputeCommand(ctx context.Context, disruptionBudgetMapping map[string]int, ...) (Command, scheduling.Results, error)
- func (c *EmptyNodeConsolidation) ConsolidationType() string
- func (c *EmptyNodeConsolidation) IsConsolidated() bool
- func (c *EmptyNodeConsolidation) ShouldDisrupt(_ context.Context, cn *Candidate) bool
- func (c *EmptyNodeConsolidation) Type() string
- type Expiration
- type Method
- type MultiNodeConsolidation
- func (m *MultiNodeConsolidation) ComputeCommand(ctx context.Context, disruptionBudgetMapping map[string]int, ...) (Command, scheduling.Results, error)
- func (m *MultiNodeConsolidation) ConsolidationType() string
- func (c *MultiNodeConsolidation) IsConsolidated() bool
- func (c *MultiNodeConsolidation) ShouldDisrupt(_ context.Context, cn *Candidate) bool
- func (m *MultiNodeConsolidation) Type() string
- type PDBLimits
- type SingleNodeConsolidation
- func (s *SingleNodeConsolidation) ComputeCommand(ctx context.Context, disruptionBudgetMapping map[string]int, ...) (Command, scheduling.Results, error)
- func (s *SingleNodeConsolidation) ConsolidationType() string
- func (c *SingleNodeConsolidation) IsConsolidated() bool
- func (c *SingleNodeConsolidation) ShouldDisrupt(_ context.Context, cn *Candidate) bool
- func (s *SingleNodeConsolidation) Type() string
- type UninitializedNodeError
- type Validation
- func (v *Validation) IsValid(ctx context.Context, cmd Command, validationPeriod time.Duration) error
- func (v *Validation) ShouldDisrupt(_ context.Context, c *Candidate) bool
- func (v *Validation) ValidateCandidates(ctx context.Context, candidates ...*Candidate) ([]*Candidate, error)
- func (v *Validation) ValidateCommand(ctx context.Context, cmd Command, candidates []*Candidate) error
- type ValidationError
Constants ¶
const MinInstanceTypesForSpotToSpotConsolidation = 15
MinInstanceTypesForSpotToSpotConsolidation is the minimum number of instanceTypes in a NodeClaim needed to trigger spot-to-spot single-node consolidation
const MultiNodeConsolidationTimeoutDuration = 1 * time.Minute
const SingleNodeConsolidationTimeoutDuration = 3 * time.Minute
Variables ¶
var ( EvaluationDurationHistogram = prometheus.NewHistogramVec( prometheus.HistogramOpts{ Namespace: metrics.Namespace, Subsystem: disruptionSubsystem, Name: "evaluation_duration_seconds", Help: "Duration of the disruption evaluation process in seconds. Labeled by method and consolidation type.", Buckets: metrics.DurationBuckets(), }, []string{methodLabel, consolidationTypeLabel}, ) ActionsPerformedCounter = prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: metrics.Namespace, Subsystem: disruptionSubsystem, Name: "actions_performed_total", Help: "Number of disruption actions performed. Labeled by disruption action, method, and consolidation type.", }, []string{actionLabel, methodLabel, consolidationTypeLabel}, ) NodesDisruptedCounter = prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: metrics.Namespace, Subsystem: disruptionSubsystem, Name: "nodes_disrupted_total", Help: "Total number of nodes disrupted. Labeled by NodePool, disruption action, method, and consolidation type.", }, []string{metrics.NodePoolLabel, actionLabel, methodLabel, consolidationTypeLabel}, ) PodsDisruptedCounter = prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: metrics.Namespace, Subsystem: disruptionSubsystem, Name: "pods_disrupted_total", Help: "Total number of reschedulable pods disrupted on nodes. Labeled by NodePool, disruption action, method, and consolidation type.", }, []string{metrics.NodePoolLabel, actionLabel, methodLabel, consolidationTypeLabel}, ) EligibleNodesGauge = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Namespace: metrics.Namespace, Subsystem: disruptionSubsystem, Name: "eligible_nodes", Help: "Number of nodes eligible for disruption by Karpenter. Labeled by disruption method and consolidation type.", }, []string{methodLabel, consolidationTypeLabel}, ) ConsolidationTimeoutTotalCounter = prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: metrics.Namespace, Subsystem: disruptionSubsystem, Name: "consolidation_timeouts_total", Help: "Number of times the Consolidation algorithm has reached a timeout. Labeled by consolidation type.", }, []string{consolidationTypeLabel}, ) BudgetsAllowedDisruptionsGauge = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Namespace: metrics.Namespace, Subsystem: disruptionSubsystem, Name: "budgets_allowed_disruptions", Help: "The number of nodes for a given NodePool that can be disrupted at a point in time. Labeled by NodePool. Note that allowed disruptions can change very rapidly, as new nodes may be created and others may be deleted at any point.", }, []string{metrics.NodePoolLabel}, ) )
Functions ¶
func BuildDisruptionBudgets ¶ added in v0.34.0
func BuildDisruptionBudgets(ctx context.Context, cluster *state.Cluster, clk clock.Clock, kubeClient client.Client, recorder events.Recorder) (map[string]int, error)
BuildDisruptionBudgets will return a map for nodePoolName -> numAllowedDisruptions and an error
func BuildNodePoolMap ¶ added in v0.34.0
func BuildNodePoolMap(ctx context.Context, kubeClient client.Client, cloudProvider cloudprovider.CloudProvider) (map[string]*v1beta1.NodePool, map[string]map[string]*cloudprovider.InstanceType, error)
BuildNodePoolMap builds a provName -> nodePool map and a provName -> instanceName -> instance type map
func GetPodEvictionCost ¶
GetPodEvictionCost returns the disruption cost computed for evicting the given pod.
func IsValidationError ¶ added in v0.36.1
func MakeConsolidation ¶ added in v0.34.0
func MakeConsolidation(clock clock.Clock, cluster *state.Cluster, kubeClient client.Client, provisioner *provisioning.Provisioner, cloudProvider cloudprovider.CloudProvider, recorder events.Recorder, queue *orchestration.Queue) consolidation
func SimulateScheduling ¶ added in v0.34.0
func SimulateScheduling(ctx context.Context, kubeClient client.Client, cluster *state.Cluster, provisioner *provisioning.Provisioner, candidates ...*Candidate, ) (pscheduling.Results, error)
Types ¶
type Candidate ¶
Candidate is a state.StateNode that we are considering for disruption along with extra information to be used in making that determination
func GetCandidates ¶
func GetCandidates(ctx context.Context, cluster *state.Cluster, kubeClient client.Client, recorder events.Recorder, clk clock.Clock, cloudProvider cloudprovider.CloudProvider, shouldDeprovision CandidateFilter, queue *orchestration.Queue, ) ([]*Candidate, error)
GetCandidates returns nodes that appear to be currently deprovisionable based off of their nodePool
func NewCandidate ¶
func NewCandidate(ctx context.Context, kubeClient client.Client, recorder events.Recorder, clk clock.Clock, node *state.StateNode, pdbs *PDBLimits, nodePoolMap map[string]*v1beta1.NodePool, nodePoolToInstanceTypesMap map[string]map[string]*cloudprovider.InstanceType, queue *orchestration.Queue) (*Candidate, error)
type Controller ¶
type Controller struct {
// contains filtered or unexported fields
}
func NewController ¶
func NewController(clk clock.Clock, kubeClient client.Client, provisioner *provisioning.Provisioner, cp cloudprovider.CloudProvider, recorder events.Recorder, cluster *state.Cluster, queue *orchestration.Queue, ) *Controller
type Drift ¶
type Drift struct {
// contains filtered or unexported fields
}
Drift is a subreconciler that deletes drifted candidates.
func NewDrift ¶
func NewDrift(kubeClient client.Client, cluster *state.Cluster, provisioner *provisioning.Provisioner, recorder events.Recorder) *Drift
func (*Drift) ComputeCommand ¶
func (d *Drift) ComputeCommand(ctx context.Context, disruptionBudgetMapping map[string]int, candidates ...*Candidate) (Command, scheduling.Results, error)
ComputeCommand generates a disruption command given candidates
func (*Drift) ConsolidationType ¶
func (*Drift) ShouldDisrupt ¶
ShouldDisrupt is a predicate used to filter candidates
type Emptiness ¶
type Emptiness struct {
// contains filtered or unexported fields
}
Emptiness is a subreconciler that deletes empty candidates. Emptiness will respect TTLSecondsAfterEmpty
func (*Emptiness) ComputeCommand ¶
func (e *Emptiness) ComputeCommand(_ context.Context, disruptionBudgetMapping map[string]int, candidates ...*Candidate) (Command, scheduling.Results, error)
ComputeCommand generates a disruption command given candidates
func (*Emptiness) ConsolidationType ¶
func (*Emptiness) ShouldDisrupt ¶
ShouldDisrupt is a predicate used to filter candidates
type EmptyNodeConsolidation ¶
type EmptyNodeConsolidation struct {
// contains filtered or unexported fields
}
EmptyNodeConsolidation is the consolidation controller that performs multi-nodeclaim consolidation of entirely empty nodes
func NewEmptyNodeConsolidation ¶
func NewEmptyNodeConsolidation(consolidation consolidation) *EmptyNodeConsolidation
func (*EmptyNodeConsolidation) ComputeCommand ¶
func (c *EmptyNodeConsolidation) ComputeCommand(ctx context.Context, disruptionBudgetMapping map[string]int, candidates ...*Candidate) (Command, scheduling.Results, error)
ComputeCommand generates a disruption command given candidates
func (*EmptyNodeConsolidation) ConsolidationType ¶
func (c *EmptyNodeConsolidation) ConsolidationType() string
func (*EmptyNodeConsolidation) IsConsolidated ¶ added in v0.34.0
func (c *EmptyNodeConsolidation) IsConsolidated() bool
IsConsolidated returns true if nothing has changed since markConsolidated was called.
func (*EmptyNodeConsolidation) ShouldDisrupt ¶
ShouldDisrupt is a predicate used to filter candidates
func (*EmptyNodeConsolidation) Type ¶
func (c *EmptyNodeConsolidation) Type() string
type Expiration ¶
type Expiration struct {
// contains filtered or unexported fields
}
Expiration is a subreconciler that deletes empty candidates. Expiration will respect TTLSecondsAfterEmpty
func NewExpiration ¶
func NewExpiration(clk clock.Clock, kubeClient client.Client, cluster *state.Cluster, provisioner *provisioning.Provisioner, recorder events.Recorder) *Expiration
func (*Expiration) ComputeCommand ¶
func (e *Expiration) ComputeCommand(ctx context.Context, disruptionBudgetMapping map[string]int, candidates ...*Candidate) (Command, scheduling.Results, error)
ComputeCommand generates a disruption command given candidates
func (*Expiration) ConsolidationType ¶
func (e *Expiration) ConsolidationType() string
func (*Expiration) ShouldDisrupt ¶
func (e *Expiration) ShouldDisrupt(_ context.Context, c *Candidate) bool
ShouldDisrupt is a predicate used to filter candidates
func (*Expiration) Type ¶
func (e *Expiration) Type() string
type MultiNodeConsolidation ¶
type MultiNodeConsolidation struct {
// contains filtered or unexported fields
}
func NewMultiNodeConsolidation ¶
func NewMultiNodeConsolidation(consolidation consolidation) *MultiNodeConsolidation
func (*MultiNodeConsolidation) ComputeCommand ¶
func (m *MultiNodeConsolidation) ComputeCommand(ctx context.Context, disruptionBudgetMapping map[string]int, candidates ...*Candidate) (Command, scheduling.Results, error)
func (*MultiNodeConsolidation) ConsolidationType ¶
func (m *MultiNodeConsolidation) ConsolidationType() string
func (*MultiNodeConsolidation) IsConsolidated ¶ added in v0.34.0
func (c *MultiNodeConsolidation) IsConsolidated() bool
IsConsolidated returns true if nothing has changed since markConsolidated was called.
func (*MultiNodeConsolidation) ShouldDisrupt ¶
ShouldDisrupt is a predicate used to filter candidates
func (*MultiNodeConsolidation) Type ¶
func (m *MultiNodeConsolidation) Type() string
type PDBLimits ¶
type PDBLimits struct {
// contains filtered or unexported fields
}
PDBLimits is used to evaluate if evicting a list of pods is possible.
func NewPDBLimits ¶
func (*PDBLimits) CanEvictPods ¶
CanEvictPods returns true if every pod in the list is evictable. They may not all be evictable simultaneously, but for every PDB that controls the pods at least one pod can be evicted. nolint:gocyclo
type SingleNodeConsolidation ¶
type SingleNodeConsolidation struct {
// contains filtered or unexported fields
}
SingleNodeConsolidation is the consolidation controller that performs single-node consolidation.
func NewSingleNodeConsolidation ¶
func NewSingleNodeConsolidation(consolidation consolidation) *SingleNodeConsolidation
func (*SingleNodeConsolidation) ComputeCommand ¶
func (s *SingleNodeConsolidation) ComputeCommand(ctx context.Context, disruptionBudgetMapping map[string]int, candidates ...*Candidate) (Command, scheduling.Results, error)
ComputeCommand generates a disruption command given candidates nolint:gocyclo
func (*SingleNodeConsolidation) ConsolidationType ¶
func (s *SingleNodeConsolidation) ConsolidationType() string
func (*SingleNodeConsolidation) IsConsolidated ¶ added in v0.34.0
func (c *SingleNodeConsolidation) IsConsolidated() bool
IsConsolidated returns true if nothing has changed since markConsolidated was called.
func (*SingleNodeConsolidation) ShouldDisrupt ¶
ShouldDisrupt is a predicate used to filter candidates
func (*SingleNodeConsolidation) Type ¶
func (s *SingleNodeConsolidation) Type() string
type UninitializedNodeError ¶ added in v0.37.0
type UninitializedNodeError struct {
*pscheduling.ExistingNode
}
UninitializedNodeError tracks a special pod error for disruption where pods schedule to a node that hasn't been initialized yet, meaning that we can't be confident to make a disruption decision based off of it
func NewUninitializedNodeError ¶ added in v0.37.0
func NewUninitializedNodeError(node *pscheduling.ExistingNode) *UninitializedNodeError
func (*UninitializedNodeError) Error ¶ added in v0.37.0
func (u *UninitializedNodeError) Error() string
type Validation ¶
type Validation struct {
// contains filtered or unexported fields
}
Validation is used to perform validation on a consolidation command. It makes an assumption that when re-used, all of the commands passed to IsValid were constructed based off of the same consolidation state. This allows it to skip the validation TTL for all but the first command.
func NewValidation ¶
func NewValidation(clk clock.Clock, cluster *state.Cluster, kubeClient client.Client, provisioner *provisioning.Provisioner, cp cloudprovider.CloudProvider, recorder events.Recorder, queue *orchestration.Queue) *Validation
func (*Validation) ShouldDisrupt ¶
func (v *Validation) ShouldDisrupt(_ context.Context, c *Candidate) bool
ShouldDisrupt is a predicate used to filter candidates
func (*Validation) ValidateCandidates ¶ added in v0.36.1
func (v *Validation) ValidateCandidates(ctx context.Context, candidates ...*Candidate) ([]*Candidate, error)
ValidateCandidates gets the current representation of the provided candidates and ensures that they are all still valid. For a candidate to still be valid, the following conditions must be met:
a. It must pass the global candidate filtering logic (no blocking PDBs, no do-not-disrupt annotation, etc) b. It must not have any pods nominated for it c. It must still be disruptable without violating node disruption budgets
If these conditions are met for all candidates, ValidateCandidates returns a slice with the updated representations.
func (*Validation) ValidateCommand ¶
func (v *Validation) ValidateCommand(ctx context.Context, cmd Command, candidates []*Candidate) error
ValidateCommand validates a command for a Method
type ValidationError ¶ added in v0.36.1
type ValidationError struct {
// contains filtered or unexported fields
}
func NewValidationError ¶ added in v0.36.1
func NewValidationError(err error) *ValidationError