Documentation ¶
Index ¶
- Constants
- Variables
- func BuildDisruptionBudgets(ctx context.Context, cluster *state.Cluster, clk clock.Clock, ...) (map[string]map[v1.DisruptionReason]int, error)
- func BuildNodePoolMap(ctx context.Context, kubeClient client.Client, ...) (map[string]*v1.NodePool, map[string]map[string]*cloudprovider.InstanceType, ...)
- func IsValidationError(err error) bool
- func MakeConsolidation(clock clock.Clock, cluster *state.Cluster, kubeClient client.Client, ...) consolidation
- func SimulateScheduling(ctx context.Context, kubeClient client.Client, cluster *state.Cluster, ...) (pscheduling.Results, error)
- type Candidate
- type CandidateFilter
- type Command
- type Controller
- type Decision
- type Drift
- type Emptiness
- func (e *Emptiness) Class() string
- func (e *Emptiness) ComputeCommand(ctx context.Context, ...) (Command, scheduling.Results, error)
- func (e *Emptiness) ConsolidationType() string
- func (c *Emptiness) IsConsolidated() bool
- func (e *Emptiness) Reason() v1.DisruptionReason
- func (e *Emptiness) ShouldDisrupt(_ context.Context, c *Candidate) bool
- type Method
- type MultiNodeConsolidation
- func (m *MultiNodeConsolidation) Class() string
- func (m *MultiNodeConsolidation) ComputeCommand(ctx context.Context, ...) (Command, scheduling.Results, error)
- func (m *MultiNodeConsolidation) ConsolidationType() string
- func (c *MultiNodeConsolidation) IsConsolidated() bool
- func (m *MultiNodeConsolidation) Reason() v1.DisruptionReason
- func (c *MultiNodeConsolidation) ShouldDisrupt(_ context.Context, cn *Candidate) bool
- type SingleNodeConsolidation
- func (s *SingleNodeConsolidation) Class() string
- func (s *SingleNodeConsolidation) ComputeCommand(ctx context.Context, ...) (Command, scheduling.Results, error)
- func (s *SingleNodeConsolidation) ConsolidationType() string
- func (c *SingleNodeConsolidation) IsConsolidated() bool
- func (s *SingleNodeConsolidation) Reason() v1.DisruptionReason
- func (c *SingleNodeConsolidation) ShouldDisrupt(_ context.Context, cn *Candidate) bool
- type UninitializedNodeError
- type Validation
- func (v *Validation) IsValid(ctx context.Context, cmd Command, validationPeriod time.Duration) error
- func (v *Validation) ShouldDisrupt(_ context.Context, c *Candidate) bool
- func (v *Validation) ValidateCandidates(ctx context.Context, candidates ...*Candidate) ([]*Candidate, error)
- func (v *Validation) ValidateCommand(ctx context.Context, cmd Command, candidates []*Candidate) error
- type ValidationError
Constants ¶
const ( GracefulDisruptionClass = "graceful" // graceful disruption always respects blocking pod PDBs and the do-not-disrupt annotation EventualDisruptionClass = "eventual" // eventual disruption is bounded by a NodePool's TerminationGracePeriod, regardless of blocking pod PDBs and the do-not-disrupt annotation )
const MinInstanceTypesForSpotToSpotConsolidation = 15
MinInstanceTypesForSpotToSpotConsolidation is the minimum number of instanceTypes in a NodeClaim needed to trigger spot-to-spot single-node consolidation
const MultiNodeConsolidationTimeoutDuration = 1 * time.Minute
const SingleNodeConsolidationTimeoutDuration = 3 * time.Minute
Variables ¶
var ( EvaluationDurationSeconds = prometheus.NewHistogramVec( prometheus.HistogramOpts{ Namespace: metrics.Namespace, Subsystem: voluntaryDisruptionSubsystem, Name: "decision_evaluation_duration_seconds", Help: "Duration of the disruption decision evaluation process in seconds. Labeled by method and consolidation type.", Buckets: metrics.DurationBuckets(), }, []string{metrics.ReasonLabel, consolidationTypeLabel}, ) DecisionsPerformedTotal = prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: metrics.Namespace, Subsystem: voluntaryDisruptionSubsystem, Name: "decisions_total", Help: "Number of disruption decisions performed. Labeled by disruption decision, reason, and consolidation type.", }, []string{decisionLabel, metrics.ReasonLabel, consolidationTypeLabel}, ) EligibleNodes = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Namespace: metrics.Namespace, Subsystem: voluntaryDisruptionSubsystem, Name: "eligible_nodes", Help: "Number of nodes eligible for disruption by Karpenter. Labeled by disruption reason.", }, []string{metrics.ReasonLabel}, ) ConsolidationTimeoutsTotal = prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: metrics.Namespace, Subsystem: voluntaryDisruptionSubsystem, Name: "consolidation_timeouts_total", Help: "Number of times the Consolidation algorithm has reached a timeout. Labeled by consolidation type.", }, []string{consolidationTypeLabel}, ) NodePoolAllowedDisruptions = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Namespace: metrics.Namespace, Subsystem: metrics.NodePoolSubsystem, Name: "allowed_disruptions", Help: "The number of nodes for a given NodePool that can be concurrently disrupting at a point in time. Labeled by NodePool. Note that allowed disruptions can change very rapidly, as new nodes may be created and others may be deleted at any point.", }, []string{metrics.NodePoolLabel, metrics.ReasonLabel}, ) )
Functions ¶
func BuildDisruptionBudgets ¶ added in v0.34.0
func BuildDisruptionBudgets(ctx context.Context, cluster *state.Cluster, clk clock.Clock, kubeClient client.Client, recorder events.Recorder) (map[string]map[v1.DisruptionReason]int, error)
BuildDisruptionBudgets prepares our disruption budget mapping. The disruption budget maps each disruption reason to the number of allowed disruptions. We calculate allowed disruptions by taking the max disruptions allowed by disruption reason and subtracting the number of nodes that are NotReady and already being deleted by that disruption reason.
func BuildNodePoolMap ¶ added in v0.34.0
func BuildNodePoolMap(ctx context.Context, kubeClient client.Client, cloudProvider cloudprovider.CloudProvider) (map[string]*v1.NodePool, map[string]map[string]*cloudprovider.InstanceType, error)
BuildNodePoolMap builds a provName -> nodePool map and a provName -> instanceName -> instance type map
func IsValidationError ¶ added in v0.36.1
func MakeConsolidation ¶ added in v0.34.0
func MakeConsolidation(clock clock.Clock, cluster *state.Cluster, kubeClient client.Client, provisioner *provisioning.Provisioner, cloudProvider cloudprovider.CloudProvider, recorder events.Recorder, queue *orchestration.Queue) consolidation
func SimulateScheduling ¶ added in v0.34.0
func SimulateScheduling(ctx context.Context, kubeClient client.Client, cluster *state.Cluster, provisioner *provisioning.Provisioner, candidates ...*Candidate, ) (pscheduling.Results, error)
Types ¶
type Candidate ¶
Candidate is a state.StateNode that we are considering for disruption along with extra information to be used in making that determination
func GetCandidates ¶
func GetCandidates(ctx context.Context, cluster *state.Cluster, kubeClient client.Client, recorder events.Recorder, clk clock.Clock, cloudProvider cloudprovider.CloudProvider, shouldDeprovision CandidateFilter, disruptionClass string, queue *orchestration.Queue, ) ([]*Candidate, error)
GetCandidates returns nodes that appear to be currently deprovisionable based off of their nodePool
func NewCandidate ¶
func NewCandidate(ctx context.Context, kubeClient client.Client, recorder events.Recorder, clk clock.Clock, node *state.StateNode, pdbs pdb.Limits, nodePoolMap map[string]*v1.NodePool, nodePoolToInstanceTypesMap map[string]map[string]*cloudprovider.InstanceType, queue *orchestration.Queue, disruptionClass string) (*Candidate, error)
type Controller ¶
type Controller struct {
// contains filtered or unexported fields
}
func NewController ¶
func NewController(clk clock.Clock, kubeClient client.Client, provisioner *provisioning.Provisioner, cp cloudprovider.CloudProvider, recorder events.Recorder, cluster *state.Cluster, queue *orchestration.Queue, ) *Controller
type Drift ¶
type Drift struct {
// contains filtered or unexported fields
}
Drift is a subreconciler that deletes drifted candidates.
func NewDrift ¶
func NewDrift(kubeClient client.Client, cluster *state.Cluster, provisioner *provisioning.Provisioner, recorder events.Recorder) *Drift
func (*Drift) ComputeCommand ¶
func (d *Drift) ComputeCommand(ctx context.Context, disruptionBudgetMapping map[string]map[v1.DisruptionReason]int, candidates ...*Candidate) (Command, scheduling.Results, error)
ComputeCommand generates a disruption command given candidates
func (*Drift) ConsolidationType ¶
func (*Drift) Reason ¶ added in v1.0.0
func (d *Drift) Reason() v1.DisruptionReason
type Emptiness ¶
type Emptiness struct {
// contains filtered or unexported fields
}
Emptiness is a subreconciler that deletes empty candidates.
func NewEmptiness ¶
func NewEmptiness(c consolidation) *Emptiness
func (*Emptiness) ComputeCommand ¶
func (e *Emptiness) ComputeCommand(ctx context.Context, disruptionBudgetMapping map[string]map[v1.DisruptionReason]int, candidates ...*Candidate) (Command, scheduling.Results, error)
ComputeCommand generates a disruption command given candidates
func (*Emptiness) ConsolidationType ¶
func (*Emptiness) IsConsolidated ¶ added in v1.0.0
func (c *Emptiness) IsConsolidated() bool
IsConsolidated returns true if nothing has changed since markConsolidated was called.
func (*Emptiness) Reason ¶ added in v1.0.0
func (e *Emptiness) Reason() v1.DisruptionReason
type MultiNodeConsolidation ¶
type MultiNodeConsolidation struct {
// contains filtered or unexported fields
}
func NewMultiNodeConsolidation ¶
func NewMultiNodeConsolidation(consolidation consolidation) *MultiNodeConsolidation
func (*MultiNodeConsolidation) Class ¶ added in v1.0.0
func (m *MultiNodeConsolidation) Class() string
func (*MultiNodeConsolidation) ComputeCommand ¶
func (m *MultiNodeConsolidation) ComputeCommand(ctx context.Context, disruptionBudgetMapping map[string]map[v1.DisruptionReason]int, candidates ...*Candidate) (Command, scheduling.Results, error)
func (*MultiNodeConsolidation) ConsolidationType ¶
func (m *MultiNodeConsolidation) ConsolidationType() string
func (*MultiNodeConsolidation) IsConsolidated ¶ added in v0.34.0
func (c *MultiNodeConsolidation) IsConsolidated() bool
IsConsolidated returns true if nothing has changed since markConsolidated was called.
func (*MultiNodeConsolidation) Reason ¶ added in v1.0.0
func (m *MultiNodeConsolidation) Reason() v1.DisruptionReason
type SingleNodeConsolidation ¶
type SingleNodeConsolidation struct {
// contains filtered or unexported fields
}
SingleNodeConsolidation is the consolidation controller that performs single-node consolidation.
func NewSingleNodeConsolidation ¶
func NewSingleNodeConsolidation(consolidation consolidation) *SingleNodeConsolidation
func (*SingleNodeConsolidation) Class ¶ added in v1.0.0
func (s *SingleNodeConsolidation) Class() string
func (*SingleNodeConsolidation) ComputeCommand ¶
func (s *SingleNodeConsolidation) ComputeCommand(ctx context.Context, disruptionBudgetMapping map[string]map[v1.DisruptionReason]int, candidates ...*Candidate) (Command, scheduling.Results, error)
ComputeCommand generates a disruption command given candidates nolint:gocyclo
func (*SingleNodeConsolidation) ConsolidationType ¶
func (s *SingleNodeConsolidation) ConsolidationType() string
func (*SingleNodeConsolidation) IsConsolidated ¶ added in v0.34.0
func (c *SingleNodeConsolidation) IsConsolidated() bool
IsConsolidated returns true if nothing has changed since markConsolidated was called.
func (*SingleNodeConsolidation) Reason ¶ added in v1.0.0
func (s *SingleNodeConsolidation) Reason() v1.DisruptionReason
type UninitializedNodeError ¶ added in v0.37.0
type UninitializedNodeError struct {
*pscheduling.ExistingNode
}
UninitializedNodeError tracks a special pod error for disruption where pods schedule to a node that hasn't been initialized yet, meaning that we can't be confident to make a disruption decision based off of it
func NewUninitializedNodeError ¶ added in v0.37.0
func NewUninitializedNodeError(node *pscheduling.ExistingNode) *UninitializedNodeError
func (*UninitializedNodeError) Error ¶ added in v0.37.0
func (u *UninitializedNodeError) Error() string
type Validation ¶
type Validation struct {
// contains filtered or unexported fields
}
Validation is used to perform validation on a consolidation command. It makes an assumption that when re-used, all of the commands passed to IsValid were constructed based off of the same consolidation state. This allows it to skip the validation TTL for all but the first command.
func NewValidation ¶
func NewValidation(clk clock.Clock, cluster *state.Cluster, kubeClient client.Client, provisioner *provisioning.Provisioner, cp cloudprovider.CloudProvider, recorder events.Recorder, queue *orchestration.Queue, reason v1.DisruptionReason) *Validation
func (*Validation) ShouldDisrupt ¶
func (v *Validation) ShouldDisrupt(_ context.Context, c *Candidate) bool
ShouldDisrupt is a predicate used to filter candidates
func (*Validation) ValidateCandidates ¶ added in v0.36.1
func (v *Validation) ValidateCandidates(ctx context.Context, candidates ...*Candidate) ([]*Candidate, error)
ValidateCandidates gets the current representation of the provided candidates and ensures that they are all still valid. For a candidate to still be valid, the following conditions must be met:
a. It must pass the global candidate filtering logic (no blocking PDBs, no do-not-disrupt annotation, etc) b. It must not have any pods nominated for it c. It must still be disruptable without violating node disruption budgets
If these conditions are met for all candidates, ValidateCandidates returns a slice with the updated representations.
func (*Validation) ValidateCommand ¶
func (v *Validation) ValidateCommand(ctx context.Context, cmd Command, candidates []*Candidate) error
ValidateCommand validates a command for a Method
type ValidationError ¶ added in v0.36.1
type ValidationError struct {
// contains filtered or unexported fields
}
func NewValidationError ¶ added in v0.36.1
func NewValidationError(err error) *ValidationError