Documentation ¶
Index ¶
- Constants
- Variables
- func GetEventReason() string
- func GetUpgradeDriverWaitForSafeLoadAnnotationKey() string
- func GetUpgradeInitialStateAnnotationKey() string
- func GetUpgradeRequestedAnnotationKey() string
- func GetUpgradeSkipNodeLabelKey() string
- func GetUpgradeStateLabelKey() string
- func GetValidationStartTimeAnnotationKey() string
- func GetWaitForPodCompletionStartTimeAnnotationKey() string
- func SetDriverName(driver string)
- type ClusterUpgradeState
- type ClusterUpgradeStateManager
- type ClusterUpgradeStateManagerImpl
- func (m *ClusterUpgradeStateManagerImpl) ApplyState(ctx context.Context, currentState *ClusterUpgradeState, ...) (err error)
- func (m *ClusterUpgradeStateManagerImpl) BuildState(ctx context.Context, namespace string, driverLabels map[string]string) (*ClusterUpgradeState, error)
- func (m *ClusterUpgradeStateManagerImpl) GetCurrentUnavailableNodes(ctx context.Context, currentState *ClusterUpgradeState) int
- func (m *ClusterUpgradeStateManagerImpl) GetTotalManagedNodes(ctx context.Context, currentState *ClusterUpgradeState) int
- func (m *ClusterUpgradeStateManagerImpl) GetUpgradesAvailable(ctx context.Context, currentState *ClusterUpgradeState, ...) int
- func (m *ClusterUpgradeStateManagerImpl) GetUpgradesDone(ctx context.Context, currentState *ClusterUpgradeState) int
- func (m *ClusterUpgradeStateManagerImpl) GetUpgradesFailed(ctx context.Context, currentState *ClusterUpgradeState) int
- func (m *ClusterUpgradeStateManagerImpl) GetUpgradesInProgress(ctx context.Context, currentState *ClusterUpgradeState) int
- func (m *ClusterUpgradeStateManagerImpl) GetUpgradesPending(ctx context.Context, currentState *ClusterUpgradeState) int
- func (m *ClusterUpgradeStateManagerImpl) IsPodDeletionEnabled() bool
- func (m *ClusterUpgradeStateManagerImpl) IsValidationEnabled() bool
- func (m *ClusterUpgradeStateManagerImpl) ProcessCordonRequiredNodes(ctx context.Context, currentClusterState *ClusterUpgradeState) error
- func (m *ClusterUpgradeStateManagerImpl) ProcessDoneOrUnknownNodes(ctx context.Context, currentClusterState *ClusterUpgradeState, ...) error
- func (m *ClusterUpgradeStateManagerImpl) ProcessDrainNodes(ctx context.Context, currentClusterState *ClusterUpgradeState, ...) error
- func (m *ClusterUpgradeStateManagerImpl) ProcessPodDeletionRequiredNodes(ctx context.Context, currentClusterState *ClusterUpgradeState, ...) error
- func (m *ClusterUpgradeStateManagerImpl) ProcessPodRestartNodes(ctx context.Context, currentClusterState *ClusterUpgradeState) error
- func (m *ClusterUpgradeStateManagerImpl) ProcessUncordonRequiredNodes(ctx context.Context, currentClusterState *ClusterUpgradeState) error
- func (m *ClusterUpgradeStateManagerImpl) ProcessUpgradeFailedNodes(ctx context.Context, currentClusterState *ClusterUpgradeState) error
- func (m *ClusterUpgradeStateManagerImpl) ProcessUpgradeRequiredNodes(ctx context.Context, currentClusterState *ClusterUpgradeState, ...) error
- func (m *ClusterUpgradeStateManagerImpl) ProcessValidationRequiredNodes(ctx context.Context, currentClusterState *ClusterUpgradeState) error
- func (m *ClusterUpgradeStateManagerImpl) ProcessWaitForJobsRequiredNodes(ctx context.Context, currentClusterState *ClusterUpgradeState, ...) error
- func (m *ClusterUpgradeStateManagerImpl) WithPodDeletionEnabled(filter PodDeletionFilter) ClusterUpgradeStateManager
- func (m *ClusterUpgradeStateManagerImpl) WithValidationEnabled(podSelector string) ClusterUpgradeStateManager
- type CordonManager
- type CordonManagerImpl
- type DrainConfiguration
- type DrainManager
- type DrainManagerImpl
- type KeyedMutex
- type NodeUpgradeState
- type NodeUpgradeStateProvider
- type NodeUpgradeStateProviderImpl
- func (p *NodeUpgradeStateProviderImpl) ChangeNodeUpgradeAnnotation(ctx context.Context, node *corev1.Node, key string, value string) error
- func (p *NodeUpgradeStateProviderImpl) ChangeNodeUpgradeState(ctx context.Context, node *corev1.Node, newNodeState string) error
- func (p *NodeUpgradeStateProviderImpl) GetNode(ctx context.Context, nodeName string) (*corev1.Node, error)
- type PodDeletionFilter
- type PodManager
- type PodManagerConfig
- type PodManagerImpl
- func (m *PodManagerImpl) GetDaemonsetControllerRevisionHash(ctx context.Context, daemonset *appsv1.DaemonSet) (string, error)
- func (m *PodManagerImpl) GetPodControllerRevisionHash(ctx context.Context, pod *corev1.Pod) (string, error)
- func (m *PodManagerImpl) GetPodDeletionFilter() PodDeletionFilter
- func (m *PodManagerImpl) HandleTimeoutOnPodCompletions(ctx context.Context, node *corev1.Node, timeoutSeconds int64) error
- func (m *PodManagerImpl) IsPodRunningOrPending(pod corev1.Pod) bool
- func (m *PodManagerImpl) ListPods(ctx context.Context, selector string, nodeName string) (*corev1.PodList, error)
- func (m *PodManagerImpl) ScheduleCheckOnPodCompletion(ctx context.Context, config *PodManagerConfig) error
- func (m *PodManagerImpl) SchedulePodEviction(ctx context.Context, config *PodManagerConfig) error
- func (m *PodManagerImpl) SchedulePodsRestart(ctx context.Context, pods []*corev1.Pod) error
- type SafeDriverLoadManager
- type SafeDriverLoadManagerImpl
- type StringSet
- type UnlockFunc
- type ValidationManager
- type ValidationManagerImpl
Constants ¶
const ( // UpgradeStateLabelKeyFmt is the format of the node label key indicating driver upgrade states UpgradeStateLabelKeyFmt = "nvidia.com/%s-driver-upgrade-state" // UpgradeSkipNodeLabelKeyFmt is the format of the node label boolean key indicating to skip driver upgrade UpgradeSkipNodeLabelKeyFmt = "nvidia.com/%s-driver-upgrade.skip" // UpgradeWaitForSafeDriverLoadAnnotationKeyFmt is the format of the node annotation key indicating that // the driver is waiting for safe load. Meaning node should be cordoned and workloads should be removed from the // node before the driver can continue to load. UpgradeWaitForSafeDriverLoadAnnotationKeyFmt = "nvidia.com/%s-driver-upgrade.driver-wait-for-safe-load" // UpgradeInitialStateAnnotationKeyFmt is the format of the node annotation indicating node was unschedulable at // beginning of upgrade process UpgradeInitialStateAnnotationKeyFmt = "nvidia.com/%s-driver-upgrade.node-initial-state.unschedulable" // UpgradeWaitForPodCompletionStartTimeAnnotationKeyFmt is the format of the node annotation indicating start time // for waiting on pod completions //nolint: lll UpgradeWaitForPodCompletionStartTimeAnnotationKeyFmt = "nvidia.com/%s-driver-upgrade-wait-for-pod-completion-start-time" // UpgradeValidationStartTimeAnnotationKeyFmt is the format of the node annotation indicating start time for // validation-required state UpgradeValidationStartTimeAnnotationKeyFmt = "nvidia.com/%s-driver-upgrade-validation-start-time" // UpgradeRequestedAnnotationKeyFmt is the format of the node label key indicating driver upgrade was requested // (used for orphaned pods) // Setting this label will trigger setting upgrade state to upgrade-required UpgradeRequestedAnnotationKeyFmt = "nvidia.com/%s-driver-upgrade-requested" // UpgradeStateUnknown Node has this state when the upgrade flow is disabled or the node hasn't been processed yet UpgradeStateUnknown = "" // UpgradeStateUpgradeRequired is set when the driver pod on the node is not up-to-date and required upgrade // No actions are performed at this stage UpgradeStateUpgradeRequired = "upgrade-required" // UpgradeStateCordonRequired is set when the node needs to be made unschedulable in preparation for driver upgrade UpgradeStateCordonRequired = "cordon-required" // UpgradeStateWaitForJobsRequired is set on the node when we need to wait on jobs to complete until given timeout. UpgradeStateWaitForJobsRequired = "wait-for-jobs-required" // UpgradeStatePodDeletionRequired is set when deletion of pods is required for the driver upgrade to proceed. UpgradeStatePodDeletionRequired = "pod-deletion-required" // UpgradeStateDrainRequired is set when the node is required to be scheduled for drain. After the drain the state // is changed either to UpgradeStatePodRestartRequired or UpgradeStateFailed UpgradeStateDrainRequired = "drain-required" // UpgradeStatePodRestartRequired is set when the driver pod on the node is scheduled for restart // or when unblock of the driver loading is required (safe driver load) UpgradeStatePodRestartRequired = "pod-restart-required" // UpgradeStateValidationRequired is set when validation of the new driver deployed on the node is // required before moving to UpgradeStateUncordonRequired. UpgradeStateValidationRequired = "validation-required" // UpgradeStateUncordonRequired is set when driver pod on the node is up-to-date and has "Ready" status UpgradeStateUncordonRequired = "uncordon-required" // UpgradeStateDone is set when driver pod is up to date and running on the node, the node is schedulable UpgradeStateDone = "upgrade-done" // UpgradeStateFailed is set when there are any failures during the driver upgrade UpgradeStateFailed = "upgrade-failed" )
const (
// PodControllerRevisionHashLabelKey is the label key containing the controller-revision-hash
PodControllerRevisionHashLabelKey = "controller-revision-hash"
)
Variables ¶
var ( // DriverName is the name of the driver to be managed by this package DriverName string )
Functions ¶
func GetEventReason ¶
func GetEventReason() string
GetEventReason returns the reason type based on the driver name
func GetUpgradeDriverWaitForSafeLoadAnnotationKey ¶
func GetUpgradeDriverWaitForSafeLoadAnnotationKey() string
GetUpgradeDriverWaitForSafeLoadAnnotationKey returns the key for annotation used to mark node as waiting for driver safe load
func GetUpgradeInitialStateAnnotationKey ¶
func GetUpgradeInitialStateAnnotationKey() string
GetUpgradeInitialStateAnnotationKey returns the key for annotation used to track initial state of the node
func GetUpgradeRequestedAnnotationKey ¶
func GetUpgradeRequestedAnnotationKey() string
GetUpgradeRequestedAnnotationKey returns the key for annotation used to mark node as driver upgrade is requested externally (orphaned pod)
func GetUpgradeSkipNodeLabelKey ¶
func GetUpgradeSkipNodeLabelKey() string
GetUpgradeSkipNodeLabelKey returns node label used to skip upgrades
func GetUpgradeStateLabelKey ¶
func GetUpgradeStateLabelKey() string
GetUpgradeStateLabelKey returns state label key used for upgrades
func GetValidationStartTimeAnnotationKey ¶
func GetValidationStartTimeAnnotationKey() string
GetValidationStartTimeAnnotationKey returns the key for annotation indicating start time for validation-required state
func GetWaitForPodCompletionStartTimeAnnotationKey ¶
func GetWaitForPodCompletionStartTimeAnnotationKey() string
GetWaitForPodCompletionStartTimeAnnotationKey returns the key for annotation used to track start time for waiting on pod/job completions
func SetDriverName ¶
func SetDriverName(driver string)
SetDriverName sets the name of the driver managed by the upgrade package
Types ¶
type ClusterUpgradeState ¶
type ClusterUpgradeState struct {
NodeStates map[string][]*NodeUpgradeState
}
ClusterUpgradeState contains a snapshot of the driver upgrade state in the cluster It contains driver upgrade policy and mappings between nodes and their upgrade state Nodes are grouped together with the driver POD running on them and the daemon set, controlling this pod This state is then used as an input for the ClusterUpgradeStateManager
func NewClusterUpgradeState ¶
func NewClusterUpgradeState() ClusterUpgradeState
NewClusterUpgradeState creates an empty ClusterUpgradeState object
type ClusterUpgradeStateManager ¶
type ClusterUpgradeStateManager interface { // ApplyState receives a complete cluster upgrade state and, based on upgrade policy, processes each node's state. // Based on the current state of the node, it is calculated if the node can be moved to the next state right now // or whether any actions need to be scheduled for the node to move to the next state. // The function is stateless and idempotent. If the error was returned before all nodes' states were processed, // ApplyState would be called again and complete the processing - all the decisions are based on the input data. ApplyState(ctx context.Context, currentState *ClusterUpgradeState, upgradePolicy *v1alpha1.DriverUpgradePolicySpec) (err error) // BuildState builds a point-in-time snapshot of the driver upgrade state in the cluster. BuildState(ctx context.Context, namespace string, driverLabels map[string]string) (*ClusterUpgradeState, error) // GetTotalManagedNodes returns the total count of nodes managed for driver upgrades GetTotalManagedNodes(ctx context.Context, currentState *ClusterUpgradeState) int // GetUpgradesInProgress returns count of nodes on which upgrade is in progress GetUpgradesInProgress(ctx context.Context, currentState *ClusterUpgradeState) int // GetUpgradesDone returns count of nodes on which upgrade is complete GetUpgradesDone(ctx context.Context, currentState *ClusterUpgradeState) int // GetUpgradesAvailable returns count of nodes on which upgrade can be done GetUpgradesAvailable(ctx context.Context, currentState *ClusterUpgradeState, maxParallelUpgrades int, maxUnavailable int) int // GetUpgradesFailed returns count of nodes on which upgrades have failed GetUpgradesFailed(ctx context.Context, currentState *ClusterUpgradeState) int // GetUpgradesPending returns count of nodes on which are marked for upgrades and upgrade is pending GetUpgradesPending(ctx context.Context, currentState *ClusterUpgradeState) int // WithPodDeletionEnabled provides an option to enable the optional 'pod-deletion' // state and pass a custom PodDeletionFilter to use WithPodDeletionEnabled(filter PodDeletionFilter) ClusterUpgradeStateManager // WithValidationEnabled provides an option to enable the optional 'validation' state // and pass a podSelector to specify which pods are performing the validation WithValidationEnabled(podSelector string) ClusterUpgradeStateManager // IsPodDeletionEnabled returns true if 'pod-deletion' state is enabled IsPodDeletionEnabled() bool // IsValidationEnabled returns true if 'validation' state is enabled IsValidationEnabled() bool }
ClusterUpgradeStateManager is an interface for performing cluster upgrades of driver containers
func NewClusterUpgradeStateManager ¶
func NewClusterUpgradeStateManager( log logr.Logger, k8sConfig *rest.Config, eventRecorder record.EventRecorder) (ClusterUpgradeStateManager, error)
NewClusterUpgradeStateManager creates a new instance of ClusterUpgradeStateManagerImpl
type ClusterUpgradeStateManagerImpl ¶
type ClusterUpgradeStateManagerImpl struct { Log logr.Logger K8sClient client.Client K8sInterface kubernetes.Interface EventRecorder record.EventRecorder DrainManager DrainManager PodManager PodManager CordonManager CordonManager NodeUpgradeStateProvider NodeUpgradeStateProvider ValidationManager ValidationManager SafeDriverLoadManager SafeDriverLoadManager // contains filtered or unexported fields }
ClusterUpgradeStateManagerImpl serves as a state machine for the ClusterUpgradeState It processes each node and based on its state schedules the required jobs to change their state to the next one
func (*ClusterUpgradeStateManagerImpl) ApplyState ¶
func (m *ClusterUpgradeStateManagerImpl) ApplyState(ctx context.Context, currentState *ClusterUpgradeState, upgradePolicy *v1alpha1.DriverUpgradePolicySpec) (err error)
ApplyState receives a complete cluster upgrade state and, based on upgrade policy, processes each node's state. Based on the current state of the node, it is calculated if the node can be moved to the next state right now or whether any actions need to be scheduled for the node to move to the next state. The function is stateless and idempotent. If the error was returned before all nodes' states were processed, ApplyState would be called again and complete the processing - all the decisions are based on the input data.
func (*ClusterUpgradeStateManagerImpl) BuildState ¶
func (m *ClusterUpgradeStateManagerImpl) BuildState(ctx context.Context, namespace string, driverLabels map[string]string) (*ClusterUpgradeState, error)
BuildState builds a point-in-time snapshot of the driver upgrade state in the cluster.
func (*ClusterUpgradeStateManagerImpl) GetCurrentUnavailableNodes ¶
func (m *ClusterUpgradeStateManagerImpl) GetCurrentUnavailableNodes(ctx context.Context, currentState *ClusterUpgradeState) int
GetCurrentUnavailableNodes returns all nodes that are not in ready state TODO: Drop ctx as it's not used
func (*ClusterUpgradeStateManagerImpl) GetTotalManagedNodes ¶
func (m *ClusterUpgradeStateManagerImpl) GetTotalManagedNodes(ctx context.Context, currentState *ClusterUpgradeState) int
GetTotalManagedNodes returns the total count of nodes managed for driver upgrades TODO: Drop ctx as it's not used
func (*ClusterUpgradeStateManagerImpl) GetUpgradesAvailable ¶
func (m *ClusterUpgradeStateManagerImpl) GetUpgradesAvailable(ctx context.Context, currentState *ClusterUpgradeState, maxParallelUpgrades int, maxUnavailable int) int
GetUpgradesAvailable returns count of nodes on which upgrade can be done
func (*ClusterUpgradeStateManagerImpl) GetUpgradesDone ¶
func (m *ClusterUpgradeStateManagerImpl) GetUpgradesDone(ctx context.Context, currentState *ClusterUpgradeState) int
GetUpgradesDone returns count of nodes on which upgrade is complete TODO: Drop ctx as it's not used
func (*ClusterUpgradeStateManagerImpl) GetUpgradesFailed ¶
func (m *ClusterUpgradeStateManagerImpl) GetUpgradesFailed(ctx context.Context, currentState *ClusterUpgradeState) int
GetUpgradesFailed returns count of nodes on which upgrades have failed TODO: Drop ctx as it's not used
func (*ClusterUpgradeStateManagerImpl) GetUpgradesInProgress ¶
func (m *ClusterUpgradeStateManagerImpl) GetUpgradesInProgress(ctx context.Context, currentState *ClusterUpgradeState) int
GetUpgradesInProgress returns count of nodes on which upgrade is in progress
func (*ClusterUpgradeStateManagerImpl) GetUpgradesPending ¶
func (m *ClusterUpgradeStateManagerImpl) GetUpgradesPending(ctx context.Context, currentState *ClusterUpgradeState) int
GetUpgradesPending returns count of nodes on which are marked for upgrades and upgrade is pending TODO: Drop ctx as it's not used
func (*ClusterUpgradeStateManagerImpl) IsPodDeletionEnabled ¶
func (m *ClusterUpgradeStateManagerImpl) IsPodDeletionEnabled() bool
IsPodDeletionEnabled returns true if 'pod-deletion' state is enabled
func (*ClusterUpgradeStateManagerImpl) IsValidationEnabled ¶
func (m *ClusterUpgradeStateManagerImpl) IsValidationEnabled() bool
IsValidationEnabled returns true if 'validation' state is enabled
func (*ClusterUpgradeStateManagerImpl) ProcessCordonRequiredNodes ¶
func (m *ClusterUpgradeStateManagerImpl) ProcessCordonRequiredNodes( ctx context.Context, currentClusterState *ClusterUpgradeState) error
ProcessCordonRequiredNodes processes UpgradeStateCordonRequired nodes, cordons them and moves them to UpgradeStateWaitForJobsRequired state
func (*ClusterUpgradeStateManagerImpl) ProcessDoneOrUnknownNodes ¶
func (m *ClusterUpgradeStateManagerImpl) ProcessDoneOrUnknownNodes( ctx context.Context, currentClusterState *ClusterUpgradeState, nodeStateName string) error
ProcessDoneOrUnknownNodes iterates over UpgradeStateDone or UpgradeStateUnknown nodes and determines whether each specific node should be in UpgradeStateUpgradeRequired or UpgradeStateDone state.
func (*ClusterUpgradeStateManagerImpl) ProcessDrainNodes ¶
func (m *ClusterUpgradeStateManagerImpl) ProcessDrainNodes( ctx context.Context, currentClusterState *ClusterUpgradeState, drainSpec *v1alpha1.DrainSpec) error
ProcessDrainNodes schedules UpgradeStateDrainRequired nodes for drain. If drain is disabled by upgrade policy, moves the nodes straight to UpgradeStatePodRestartRequired state.
func (*ClusterUpgradeStateManagerImpl) ProcessPodDeletionRequiredNodes ¶
func (m *ClusterUpgradeStateManagerImpl) ProcessPodDeletionRequiredNodes( ctx context.Context, currentClusterState *ClusterUpgradeState, podDeletionSpec *v1alpha1.PodDeletionSpec, drainEnabled bool) error
ProcessPodDeletionRequiredNodes processes UpgradeStatePodDeletionRequired nodes, deletes select pods on a node, and moves the nodes to UpgradeStateDrainRequiredRequired state. Pods selected for deletion are determined via PodManager.PodDeletion
func (*ClusterUpgradeStateManagerImpl) ProcessPodRestartNodes ¶
func (m *ClusterUpgradeStateManagerImpl) ProcessPodRestartNodes( ctx context.Context, currentClusterState *ClusterUpgradeState) error
ProcessPodRestartNodes processes UpgradeStatePodRestartRequirednodes and schedules driver pod restart for them. If the pod has already been restarted and is in Ready state - moves the node to UpgradeStateUncordonRequired state.
func (*ClusterUpgradeStateManagerImpl) ProcessUncordonRequiredNodes ¶
func (m *ClusterUpgradeStateManagerImpl) ProcessUncordonRequiredNodes( ctx context.Context, currentClusterState *ClusterUpgradeState) error
ProcessUncordonRequiredNodes processes UpgradeStateUncordonRequired nodes, uncordons them and moves them to UpgradeStateDone state
func (*ClusterUpgradeStateManagerImpl) ProcessUpgradeFailedNodes ¶
func (m *ClusterUpgradeStateManagerImpl) ProcessUpgradeFailedNodes( ctx context.Context, currentClusterState *ClusterUpgradeState) error
ProcessUpgradeFailedNodes processes UpgradeStateFailed nodes and checks whether the driver pod on the node has been successfully restarted. If the pod is in Ready state - moves the node to UpgradeStateUncordonRequired state.
func (*ClusterUpgradeStateManagerImpl) ProcessUpgradeRequiredNodes ¶
func (m *ClusterUpgradeStateManagerImpl) ProcessUpgradeRequiredNodes( ctx context.Context, currentClusterState *ClusterUpgradeState, upgradesAvailable int) error
ProcessUpgradeRequiredNodes processes UpgradeStateUpgradeRequired nodes and moves them to UpgradeStateCordonRequired until the limit on max parallel upgrades is reached.
func (*ClusterUpgradeStateManagerImpl) ProcessValidationRequiredNodes ¶
func (m *ClusterUpgradeStateManagerImpl) ProcessValidationRequiredNodes( ctx context.Context, currentClusterState *ClusterUpgradeState) error
ProcessValidationRequiredNodes processes UpgradeStateValidationRequired nodes
func (*ClusterUpgradeStateManagerImpl) ProcessWaitForJobsRequiredNodes ¶
func (m *ClusterUpgradeStateManagerImpl) ProcessWaitForJobsRequiredNodes( ctx context.Context, currentClusterState *ClusterUpgradeState, waitForCompletionSpec *v1alpha1.WaitForCompletionSpec) error
ProcessWaitForJobsRequiredNodes processes UpgradeStateWaitForJobsRequired nodes, waits for completion of jobs and moves them to UpgradeStatePodDeletionRequired state.
func (*ClusterUpgradeStateManagerImpl) WithPodDeletionEnabled ¶
func (m *ClusterUpgradeStateManagerImpl) WithPodDeletionEnabled(filter PodDeletionFilter) ClusterUpgradeStateManager
WithPodDeletionEnabled provides an option to enable the optional 'pod-deletion' state and pass a custom PodDeletionFilter to use
func (*ClusterUpgradeStateManagerImpl) WithValidationEnabled ¶
func (m *ClusterUpgradeStateManagerImpl) WithValidationEnabled(podSelector string) ClusterUpgradeStateManager
WithValidationEnabled provides an option to enable the optional 'validation' state and pass a podSelector to specify which pods are performing the validation
type CordonManager ¶
type CordonManager interface { Cordon(ctx context.Context, node *corev1.Node) error Uncordon(ctx context.Context, node *corev1.Node) error }
CordonManager provides methods for cordoning / uncordoning nodes
type CordonManagerImpl ¶
type CordonManagerImpl struct {
// contains filtered or unexported fields
}
CordonManagerImpl implements CordonManager interface and can cordon / uncordon k8s nodes
func NewCordonManager ¶
func NewCordonManager(k8sInterface kubernetes.Interface, log logr.Logger) *CordonManagerImpl
NewCordonManager returns a CordonManagerImpl
type DrainConfiguration ¶
DrainConfiguration contains the drain specification and the list of nodes to schedule drain on
type DrainManager ¶
type DrainManager interface {
ScheduleNodesDrain(ctx context.Context, drainConfig *DrainConfiguration) error
}
DrainManager is an interface that allows to schedule nodes drain based on DrainSpec
type DrainManagerImpl ¶
type DrainManagerImpl struct {
// contains filtered or unexported fields
}
DrainManagerImpl implements DrainManager interface and can perform nodes drain based on received DrainConfiguration
func NewDrainManager ¶
func NewDrainManager( k8sInterface kubernetes.Interface, nodeUpgradeStateProvider NodeUpgradeStateProvider, log logr.Logger, eventRecorder record.EventRecorder) *DrainManagerImpl
NewDrainManager creates a DrainManager
func (*DrainManagerImpl) ScheduleNodesDrain ¶
func (m *DrainManagerImpl) ScheduleNodesDrain(ctx context.Context, drainConfig *DrainConfiguration) error
ScheduleNodesDrain receives DrainConfiguration and schedules drain for each node in the list. When the node gets scheduled, it's marked as being drained and therefore will not be scheduled for drain twice if the initial drain didn't complete yet. During the drain the node is cordoned first, and then pods on the node are evicted. If the drain is successful, the node moves to UpgradeStatePodRestartRequiredstate, otherwise it moves to UpgradeStateFailed state.
type KeyedMutex ¶
type KeyedMutex struct {
// contains filtered or unexported fields
}
KeyedMutex is a struct that provides a per-key synchronized access
func (*KeyedMutex) Lock ¶
func (m *KeyedMutex) Lock(key string) UnlockFunc
Lock locks a mutex, associated with a given key and returns an unlock function
type NodeUpgradeState ¶
type NodeUpgradeState struct { Node *corev1.Node DriverPod *corev1.Pod DriverDaemonSet *appsv1.DaemonSet }
NodeUpgradeState contains a mapping between a node, the driver POD running on them and the daemon set, controlling this pod
func (*NodeUpgradeState) IsOrphanedPod ¶
func (nus *NodeUpgradeState) IsOrphanedPod() bool
IsOrphanedPod returns true if Pod is not associated to a DaemonSet
type NodeUpgradeStateProvider ¶
type NodeUpgradeStateProvider interface { GetNode(ctx context.Context, nodeName string) (*corev1.Node, error) ChangeNodeUpgradeState(ctx context.Context, node *corev1.Node, newNodeState string) error ChangeNodeUpgradeAnnotation(ctx context.Context, node *corev1.Node, key string, value string) error }
NodeUpgradeStateProvider allows for synchronized operations on node objects and ensures that the node, got from the provider, always has the up-to-date upgrade state
func NewNodeUpgradeStateProvider ¶
func NewNodeUpgradeStateProvider(k8sClient client.Client, log logr.Logger, eventRecorder record.EventRecorder) NodeUpgradeStateProvider
NewNodeUpgradeStateProvider creates a NodeUpgradeStateProviderImpl
type NodeUpgradeStateProviderImpl ¶
type NodeUpgradeStateProviderImpl struct { K8sClient client.Client Log logr.Logger // contains filtered or unexported fields }
NodeUpgradeStateProviderImpl implements the NodeUpgradeStateProvider interface
func (*NodeUpgradeStateProviderImpl) ChangeNodeUpgradeAnnotation ¶
func (p *NodeUpgradeStateProviderImpl) ChangeNodeUpgradeAnnotation( ctx context.Context, node *corev1.Node, key string, value string) error
ChangeNodeUpgradeAnnotation patches a given corev1.Node object and updates an annotation with a given value The function then waits for the operator cache to get updated
func (*NodeUpgradeStateProviderImpl) ChangeNodeUpgradeState ¶
func (p *NodeUpgradeStateProviderImpl) ChangeNodeUpgradeState( ctx context.Context, node *corev1.Node, newNodeState string) error
ChangeNodeUpgradeState patches a given corev1.Node object and updates its UpgradeStateLabel with a given value The function then waits for the operator cache to get updated
type PodDeletionFilter ¶
PodDeletionFilter takes a pod and returns a boolean indicating whether the pod should be deleted
type PodManager ¶
type PodManager interface { ScheduleCheckOnPodCompletion(ctx context.Context, config *PodManagerConfig) error SchedulePodsRestart(ctx context.Context, pods []*corev1.Pod) error SchedulePodEviction(ctx context.Context, config *PodManagerConfig) error GetPodDeletionFilter() PodDeletionFilter GetPodControllerRevisionHash(ctx context.Context, pod *corev1.Pod) (string, error) GetDaemonsetControllerRevisionHash(ctx context.Context, daemonset *appsv1.DaemonSet) (string, error) }
PodManager is an interface that allows to wait on certain pod statuses
type PodManagerConfig ¶
type PodManagerConfig struct { Nodes []*corev1.Node DeletionSpec *v1alpha1.PodDeletionSpec WaitForCompletionSpec *v1alpha1.WaitForCompletionSpec DrainEnabled bool }
PodManagerConfig represent the selector for pods and Node names to be considered for managing those pods
type PodManagerImpl ¶
type PodManagerImpl struct {
// contains filtered or unexported fields
}
PodManagerImpl implements PodManager interface and checks for pod states
func NewPodManager ¶
func NewPodManager( k8sInterface kubernetes.Interface, nodeUpgradeStateProvider NodeUpgradeStateProvider, log logr.Logger, podDeletionFilter PodDeletionFilter, eventRecorder record.EventRecorder) *PodManagerImpl
NewPodManager returns an instance of PodManager implementation
func (*PodManagerImpl) GetDaemonsetControllerRevisionHash ¶
func (m *PodManagerImpl) GetDaemonsetControllerRevisionHash(ctx context.Context, daemonset *appsv1.DaemonSet) (string, error)
GetDaemonsetControllerRevisionHash returns the latest DaemonSet Controller Revision Hash
func (*PodManagerImpl) GetPodControllerRevisionHash ¶
func (m *PodManagerImpl) GetPodControllerRevisionHash(ctx context.Context, pod *corev1.Pod) (string, error)
GetPodControllerRevisionHash returns the Pod Controller Revision Hash from its labels TODO: Drop ctx as it's not used
func (*PodManagerImpl) GetPodDeletionFilter ¶
func (m *PodManagerImpl) GetPodDeletionFilter() PodDeletionFilter
GetPodDeletionFilter returns the PodDeletionFilter
func (*PodManagerImpl) HandleTimeoutOnPodCompletions ¶
func (m *PodManagerImpl) HandleTimeoutOnPodCompletions(ctx context.Context, node *corev1.Node, timeoutSeconds int64) error
HandleTimeoutOnPodCompletions transitions node based on the timeout for job completions on the node
func (*PodManagerImpl) IsPodRunningOrPending ¶
func (m *PodManagerImpl) IsPodRunningOrPending(pod corev1.Pod) bool
IsPodRunningOrPending returns true when the given pod is currently in Running or Pending state
func (*PodManagerImpl) ListPods ¶
func (m *PodManagerImpl) ListPods(ctx context.Context, selector string, nodeName string) (*corev1.PodList, error)
ListPods returns the list of pods in all namespaces with the given selector
func (*PodManagerImpl) ScheduleCheckOnPodCompletion ¶
func (m *PodManagerImpl) ScheduleCheckOnPodCompletion(ctx context.Context, config *PodManagerConfig) error
ScheduleCheckOnPodCompletion receives PodSelectorConfig and schedules checks for pod statuses on each node in the list. If the checks are successful, the node moves to UpgradeStatePodDeletionRequired state, otherwise it will stay in the same current state.
func (*PodManagerImpl) SchedulePodEviction ¶
func (m *PodManagerImpl) SchedulePodEviction(ctx context.Context, config *PodManagerConfig) error
SchedulePodEviction receives a config for pod eviction and deletes pods for each node in the list. The set of pods to delete is determined by a filter that is provided to the PodManagerImpl during construction.
func (*PodManagerImpl) SchedulePodsRestart ¶
SchedulePodsRestart receives a list of pods and schedules to delete them TODO, schedule deletion of pods in parallel on all nodes
type SafeDriverLoadManager ¶
type SafeDriverLoadManager interface { // IsWaitingForSafeDriverLoad checks if driver Pod on the node is waiting for a safe load IsWaitingForSafeDriverLoad(ctx context.Context, node *corev1.Node) (bool, error) // UnblockLoading unblocks driver loading on the node UnblockLoading(ctx context.Context, node *corev1.Node) error }
SafeDriverLoadManager interface defines handlers to interact with drivers that are waiting for a safe load
type SafeDriverLoadManagerImpl ¶
type SafeDriverLoadManagerImpl struct {
// contains filtered or unexported fields
}
SafeDriverLoadManagerImpl default implementation of the SafeDriverLoadManager interface Support for safe driver loading is implemented as a part of the upgrade flow. When UpgradeStateManager detects a node that is waiting for a safe driver load, it will unconditionally transfer it to the UpgradeStateUpgradeRequired state and wait for Cordon and Drain operations to complete according to the upgrade policy. When the Pod is eventually in the UpgradeStatePodRestartRequired state, the UpgradeStateManager will unblock the driver loading (by removing the safe driver load annotation) instead of restarting the Pod. The default implementation of the SafeDriverLoadManager interface assumes that the driver's safe load mechanism is implemented as a two-step procedure. As a first step, the driver pod should load the init container, which will set "safe driver load annotation" (defined in UpgradeWaitForSafeDriverLoadAnnotationKeyFmt) on the node object, then the container blocks until another entity removes the annotation from the node object. When the init container completes successfully (when the annotation was removed from the Node object), the driver Pod will proceed to the second step and do the driver loading. After that, the UpgradeStateManager will wait for the driver to become ready and then Uncordon the node if required.
func NewSafeDriverLoadManager ¶
func NewSafeDriverLoadManager( nodeUpgradeStateProvider NodeUpgradeStateProvider, log logr.Logger) *SafeDriverLoadManagerImpl
NewSafeDriverLoadManager returns an instance of SafeDriverLoadManager implementation
func (*SafeDriverLoadManagerImpl) IsWaitingForSafeDriverLoad ¶
func (s *SafeDriverLoadManagerImpl) IsWaitingForSafeDriverLoad(_ context.Context, node *corev1.Node) (bool, error)
IsWaitingForSafeDriverLoad checks if driver Pod on the node is waiting for a safe load. The check is implemented by check that "safe driver loading annotation" is set on the Node object
func (*SafeDriverLoadManagerImpl) UnblockLoading ¶
UnblockLoading unblocks driver loading on the node by remove "safe driver loading annotation" from the Node object
type StringSet ¶
type StringSet struct {
// contains filtered or unexported fields
}
StringSet implements a thread safe Set of Strings
type ValidationManager ¶
ValidationManager is an interface for validating driver upgrades
type ValidationManagerImpl ¶
type ValidationManagerImpl struct {
// contains filtered or unexported fields
}
ValidationManagerImpl implements the ValidationManager interface and waits on a validation pod, identified via podSelector, to be Ready.
func NewValidationManager ¶
func NewValidationManager( k8sInterface kubernetes.Interface, log logr.Logger, eventRecorder record.EventRecorder, nodeUpgradeStateProvider NodeUpgradeStateProvider, podSelector string) *ValidationManagerImpl
NewValidationManager returns an instance of ValidationManager implementation