upgrade

package
v0.0.0-...-db12cb1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 17, 2024 License: Apache-2.0 Imports: 25 Imported by: 0

Documentation

Index

Constants

View Source
const (
	// UpgradeStateLabelKeyFmt is the format of the node label key indicating driver upgrade states
	UpgradeStateLabelKeyFmt = "nvidia.com/%s-driver-upgrade-state"
	// UpgradeSkipNodeLabelKeyFmt is the format of the node label boolean key indicating to skip driver upgrade
	UpgradeSkipNodeLabelKeyFmt = "nvidia.com/%s-driver-upgrade.skip"
	// UpgradeWaitForSafeDriverLoadAnnotationKeyFmt is the format of the node annotation key indicating that
	// the driver is waiting for safe load. Meaning node should be cordoned and workloads should be removed from the
	// node before the driver can continue to load.
	UpgradeWaitForSafeDriverLoadAnnotationKeyFmt = "nvidia.com/%s-driver-upgrade.driver-wait-for-safe-load"
	// UpgradeInitialStateAnnotationKeyFmt is the format of the node annotation indicating node was unschedulable at
	// beginning of upgrade process
	UpgradeInitialStateAnnotationKeyFmt = "nvidia.com/%s-driver-upgrade.node-initial-state.unschedulable"
	// UpgradeWaitForPodCompletionStartTimeAnnotationKeyFmt is the format of the node annotation indicating start time
	// for waiting on pod completions
	//nolint: lll
	UpgradeWaitForPodCompletionStartTimeAnnotationKeyFmt = "nvidia.com/%s-driver-upgrade-wait-for-pod-completion-start-time"
	// UpgradeValidationStartTimeAnnotationKeyFmt is the format of the node annotation indicating start time for
	// validation-required state
	UpgradeValidationStartTimeAnnotationKeyFmt = "nvidia.com/%s-driver-upgrade-validation-start-time"
	// UpgradeRequestedAnnotationKeyFmt is the format of the node label key indicating driver upgrade was requested
	// (used for orphaned pods)
	// Setting this label will trigger setting upgrade state to upgrade-required
	UpgradeRequestedAnnotationKeyFmt = "nvidia.com/%s-driver-upgrade-requested"
	// UpgradeStateUnknown Node has this state when the upgrade flow is disabled or the node hasn't been processed yet
	UpgradeStateUnknown = ""
	// UpgradeStateUpgradeRequired is set when the driver pod on the node is not up-to-date and required upgrade
	// No actions are performed at this stage
	UpgradeStateUpgradeRequired = "upgrade-required"
	// UpgradeStateCordonRequired is set when the node needs to be made unschedulable in preparation for driver upgrade
	UpgradeStateCordonRequired = "cordon-required"
	// UpgradeStateWaitForJobsRequired is set on the node when we need to wait on jobs to complete until given timeout.
	UpgradeStateWaitForJobsRequired = "wait-for-jobs-required"
	// UpgradeStatePodDeletionRequired is set when deletion of pods is required for the driver upgrade to proceed.
	UpgradeStatePodDeletionRequired = "pod-deletion-required"
	// UpgradeStateDrainRequired is set when the node is required to be scheduled for drain. After the drain the state
	// is changed either to UpgradeStatePodRestartRequired or UpgradeStateFailed
	UpgradeStateDrainRequired = "drain-required"
	// UpgradeStatePodRestartRequired is set when the driver pod on the node is scheduled for restart
	// or when unblock of the driver loading is required (safe driver load)
	UpgradeStatePodRestartRequired = "pod-restart-required"
	// UpgradeStateValidationRequired is set when validation of the new driver deployed on the node is
	// required before moving to UpgradeStateUncordonRequired.
	UpgradeStateValidationRequired = "validation-required"
	// UpgradeStateUncordonRequired is set when driver pod on the node is up-to-date and has "Ready" status
	UpgradeStateUncordonRequired = "uncordon-required"
	// UpgradeStateDone is set when driver pod is up to date and running on the node, the node is schedulable
	UpgradeStateDone = "upgrade-done"
	// UpgradeStateFailed is set when there are any failures during the driver upgrade
	UpgradeStateFailed = "upgrade-failed"
)
View Source
const (
	// PodControllerRevisionHashLabelKey is the label key containing the controller-revision-hash
	PodControllerRevisionHashLabelKey = "controller-revision-hash"
)

Variables

View Source
var (
	// DriverName is the name of the driver to be managed by this package
	DriverName string
)

Functions

func GetEventReason

func GetEventReason() string

GetEventReason returns the reason type based on the driver name

func GetUpgradeDriverWaitForSafeLoadAnnotationKey

func GetUpgradeDriverWaitForSafeLoadAnnotationKey() string

GetUpgradeDriverWaitForSafeLoadAnnotationKey returns the key for annotation used to mark node as waiting for driver safe load

func GetUpgradeInitialStateAnnotationKey

func GetUpgradeInitialStateAnnotationKey() string

GetUpgradeInitialStateAnnotationKey returns the key for annotation used to track initial state of the node

func GetUpgradeRequestedAnnotationKey

func GetUpgradeRequestedAnnotationKey() string

GetUpgradeRequestedAnnotationKey returns the key for annotation used to mark node as driver upgrade is requested externally (orphaned pod)

func GetUpgradeSkipNodeLabelKey

func GetUpgradeSkipNodeLabelKey() string

GetUpgradeSkipNodeLabelKey returns node label used to skip upgrades

func GetUpgradeStateLabelKey

func GetUpgradeStateLabelKey() string

GetUpgradeStateLabelKey returns state label key used for upgrades

func GetValidationStartTimeAnnotationKey

func GetValidationStartTimeAnnotationKey() string

GetValidationStartTimeAnnotationKey returns the key for annotation indicating start time for validation-required state

func GetWaitForPodCompletionStartTimeAnnotationKey

func GetWaitForPodCompletionStartTimeAnnotationKey() string

GetWaitForPodCompletionStartTimeAnnotationKey returns the key for annotation used to track start time for waiting on pod/job completions

func SetDriverName

func SetDriverName(driver string)

SetDriverName sets the name of the driver managed by the upgrade package

Types

type ClusterUpgradeState

type ClusterUpgradeState struct {
	NodeStates map[string][]*NodeUpgradeState
}

ClusterUpgradeState contains a snapshot of the driver upgrade state in the cluster It contains driver upgrade policy and mappings between nodes and their upgrade state Nodes are grouped together with the driver POD running on them and the daemon set, controlling this pod This state is then used as an input for the ClusterUpgradeStateManager

func NewClusterUpgradeState

func NewClusterUpgradeState() ClusterUpgradeState

NewClusterUpgradeState creates an empty ClusterUpgradeState object

type ClusterUpgradeStateManager

type ClusterUpgradeStateManager interface {
	// ApplyState receives a complete cluster upgrade state and, based on upgrade policy, processes each node's state.
	// Based on the current state of the node, it is calculated if the node can be moved to the next state right now
	// or whether any actions need to be scheduled for the node to move to the next state.
	// The function is stateless and idempotent. If the error was returned before all nodes' states were processed,
	// ApplyState would be called again and complete the processing - all the decisions are based on the input data.
	ApplyState(ctx context.Context,
		currentState *ClusterUpgradeState, upgradePolicy *v1alpha1.DriverUpgradePolicySpec) (err error)
	// BuildState builds a point-in-time snapshot of the driver upgrade state in the cluster.
	BuildState(ctx context.Context, namespace string, driverLabels map[string]string) (*ClusterUpgradeState, error)
	// GetTotalManagedNodes returns the total count of nodes managed for driver upgrades
	GetTotalManagedNodes(ctx context.Context, currentState *ClusterUpgradeState) int
	// GetUpgradesInProgress returns count of nodes on which upgrade is in progress
	GetUpgradesInProgress(ctx context.Context, currentState *ClusterUpgradeState) int
	// GetUpgradesDone returns count of nodes on which upgrade is complete
	GetUpgradesDone(ctx context.Context, currentState *ClusterUpgradeState) int
	// GetUpgradesAvailable returns count of nodes on which upgrade can be done
	GetUpgradesAvailable(ctx context.Context,
		currentState *ClusterUpgradeState, maxParallelUpgrades int, maxUnavailable int) int
	// GetUpgradesFailed returns count of nodes on which upgrades have failed
	GetUpgradesFailed(ctx context.Context, currentState *ClusterUpgradeState) int
	// GetUpgradesPending returns count of nodes on which are marked for upgrades and upgrade is pending
	GetUpgradesPending(ctx context.Context, currentState *ClusterUpgradeState) int
	// WithPodDeletionEnabled provides an option to enable the optional 'pod-deletion'
	// state and pass a custom PodDeletionFilter to use
	WithPodDeletionEnabled(filter PodDeletionFilter) ClusterUpgradeStateManager
	// WithValidationEnabled provides an option to enable the optional 'validation' state
	// and pass a podSelector to specify which pods are performing the validation
	WithValidationEnabled(podSelector string) ClusterUpgradeStateManager
	// IsPodDeletionEnabled returns true if 'pod-deletion' state is enabled
	IsPodDeletionEnabled() bool
	// IsValidationEnabled returns true if 'validation' state is enabled
	IsValidationEnabled() bool
}

ClusterUpgradeStateManager is an interface for performing cluster upgrades of driver containers

func NewClusterUpgradeStateManager

func NewClusterUpgradeStateManager(
	log logr.Logger,
	k8sConfig *rest.Config,
	eventRecorder record.EventRecorder) (ClusterUpgradeStateManager, error)

NewClusterUpgradeStateManager creates a new instance of ClusterUpgradeStateManagerImpl

type ClusterUpgradeStateManagerImpl

type ClusterUpgradeStateManagerImpl struct {
	Log           logr.Logger
	K8sClient     client.Client
	K8sInterface  kubernetes.Interface
	EventRecorder record.EventRecorder

	DrainManager             DrainManager
	PodManager               PodManager
	CordonManager            CordonManager
	NodeUpgradeStateProvider NodeUpgradeStateProvider
	ValidationManager        ValidationManager
	SafeDriverLoadManager    SafeDriverLoadManager
	// contains filtered or unexported fields
}

ClusterUpgradeStateManagerImpl serves as a state machine for the ClusterUpgradeState It processes each node and based on its state schedules the required jobs to change their state to the next one

func (*ClusterUpgradeStateManagerImpl) ApplyState

func (m *ClusterUpgradeStateManagerImpl) ApplyState(ctx context.Context,
	currentState *ClusterUpgradeState, upgradePolicy *v1alpha1.DriverUpgradePolicySpec) (err error)

ApplyState receives a complete cluster upgrade state and, based on upgrade policy, processes each node's state. Based on the current state of the node, it is calculated if the node can be moved to the next state right now or whether any actions need to be scheduled for the node to move to the next state. The function is stateless and idempotent. If the error was returned before all nodes' states were processed, ApplyState would be called again and complete the processing - all the decisions are based on the input data.

func (*ClusterUpgradeStateManagerImpl) BuildState

func (m *ClusterUpgradeStateManagerImpl) BuildState(ctx context.Context, namespace string,
	driverLabels map[string]string) (*ClusterUpgradeState, error)

BuildState builds a point-in-time snapshot of the driver upgrade state in the cluster.

func (*ClusterUpgradeStateManagerImpl) GetCurrentUnavailableNodes

func (m *ClusterUpgradeStateManagerImpl) GetCurrentUnavailableNodes(ctx context.Context,
	currentState *ClusterUpgradeState) int

GetCurrentUnavailableNodes returns all nodes that are not in ready state TODO: Drop ctx as it's not used

func (*ClusterUpgradeStateManagerImpl) GetTotalManagedNodes

func (m *ClusterUpgradeStateManagerImpl) GetTotalManagedNodes(ctx context.Context,
	currentState *ClusterUpgradeState) int

GetTotalManagedNodes returns the total count of nodes managed for driver upgrades TODO: Drop ctx as it's not used

func (*ClusterUpgradeStateManagerImpl) GetUpgradesAvailable

func (m *ClusterUpgradeStateManagerImpl) GetUpgradesAvailable(ctx context.Context,
	currentState *ClusterUpgradeState, maxParallelUpgrades int, maxUnavailable int) int

GetUpgradesAvailable returns count of nodes on which upgrade can be done

func (*ClusterUpgradeStateManagerImpl) GetUpgradesDone

func (m *ClusterUpgradeStateManagerImpl) GetUpgradesDone(ctx context.Context,
	currentState *ClusterUpgradeState) int

GetUpgradesDone returns count of nodes on which upgrade is complete TODO: Drop ctx as it's not used

func (*ClusterUpgradeStateManagerImpl) GetUpgradesFailed

func (m *ClusterUpgradeStateManagerImpl) GetUpgradesFailed(ctx context.Context,
	currentState *ClusterUpgradeState) int

GetUpgradesFailed returns count of nodes on which upgrades have failed TODO: Drop ctx as it's not used

func (*ClusterUpgradeStateManagerImpl) GetUpgradesInProgress

func (m *ClusterUpgradeStateManagerImpl) GetUpgradesInProgress(ctx context.Context,
	currentState *ClusterUpgradeState) int

GetUpgradesInProgress returns count of nodes on which upgrade is in progress

func (*ClusterUpgradeStateManagerImpl) GetUpgradesPending

func (m *ClusterUpgradeStateManagerImpl) GetUpgradesPending(ctx context.Context,
	currentState *ClusterUpgradeState) int

GetUpgradesPending returns count of nodes on which are marked for upgrades and upgrade is pending TODO: Drop ctx as it's not used

func (*ClusterUpgradeStateManagerImpl) IsPodDeletionEnabled

func (m *ClusterUpgradeStateManagerImpl) IsPodDeletionEnabled() bool

IsPodDeletionEnabled returns true if 'pod-deletion' state is enabled

func (*ClusterUpgradeStateManagerImpl) IsValidationEnabled

func (m *ClusterUpgradeStateManagerImpl) IsValidationEnabled() bool

IsValidationEnabled returns true if 'validation' state is enabled

func (*ClusterUpgradeStateManagerImpl) ProcessCordonRequiredNodes

func (m *ClusterUpgradeStateManagerImpl) ProcessCordonRequiredNodes(
	ctx context.Context, currentClusterState *ClusterUpgradeState) error

ProcessCordonRequiredNodes processes UpgradeStateCordonRequired nodes, cordons them and moves them to UpgradeStateWaitForJobsRequired state

func (*ClusterUpgradeStateManagerImpl) ProcessDoneOrUnknownNodes

func (m *ClusterUpgradeStateManagerImpl) ProcessDoneOrUnknownNodes(
	ctx context.Context, currentClusterState *ClusterUpgradeState, nodeStateName string) error

ProcessDoneOrUnknownNodes iterates over UpgradeStateDone or UpgradeStateUnknown nodes and determines whether each specific node should be in UpgradeStateUpgradeRequired or UpgradeStateDone state.

func (*ClusterUpgradeStateManagerImpl) ProcessDrainNodes

func (m *ClusterUpgradeStateManagerImpl) ProcessDrainNodes(
	ctx context.Context, currentClusterState *ClusterUpgradeState, drainSpec *v1alpha1.DrainSpec) error

ProcessDrainNodes schedules UpgradeStateDrainRequired nodes for drain. If drain is disabled by upgrade policy, moves the nodes straight to UpgradeStatePodRestartRequired state.

func (*ClusterUpgradeStateManagerImpl) ProcessPodDeletionRequiredNodes

func (m *ClusterUpgradeStateManagerImpl) ProcessPodDeletionRequiredNodes(
	ctx context.Context, currentClusterState *ClusterUpgradeState, podDeletionSpec *v1alpha1.PodDeletionSpec,
	drainEnabled bool) error

ProcessPodDeletionRequiredNodes processes UpgradeStatePodDeletionRequired nodes, deletes select pods on a node, and moves the nodes to UpgradeStateDrainRequiredRequired state. Pods selected for deletion are determined via PodManager.PodDeletion

func (*ClusterUpgradeStateManagerImpl) ProcessPodRestartNodes

func (m *ClusterUpgradeStateManagerImpl) ProcessPodRestartNodes(
	ctx context.Context, currentClusterState *ClusterUpgradeState) error

ProcessPodRestartNodes processes UpgradeStatePodRestartRequirednodes and schedules driver pod restart for them. If the pod has already been restarted and is in Ready state - moves the node to UpgradeStateUncordonRequired state.

func (*ClusterUpgradeStateManagerImpl) ProcessUncordonRequiredNodes

func (m *ClusterUpgradeStateManagerImpl) ProcessUncordonRequiredNodes(
	ctx context.Context, currentClusterState *ClusterUpgradeState) error

ProcessUncordonRequiredNodes processes UpgradeStateUncordonRequired nodes, uncordons them and moves them to UpgradeStateDone state

func (*ClusterUpgradeStateManagerImpl) ProcessUpgradeFailedNodes

func (m *ClusterUpgradeStateManagerImpl) ProcessUpgradeFailedNodes(
	ctx context.Context, currentClusterState *ClusterUpgradeState) error

ProcessUpgradeFailedNodes processes UpgradeStateFailed nodes and checks whether the driver pod on the node has been successfully restarted. If the pod is in Ready state - moves the node to UpgradeStateUncordonRequired state.

func (*ClusterUpgradeStateManagerImpl) ProcessUpgradeRequiredNodes

func (m *ClusterUpgradeStateManagerImpl) ProcessUpgradeRequiredNodes(
	ctx context.Context, currentClusterState *ClusterUpgradeState, upgradesAvailable int) error

ProcessUpgradeRequiredNodes processes UpgradeStateUpgradeRequired nodes and moves them to UpgradeStateCordonRequired until the limit on max parallel upgrades is reached.

func (*ClusterUpgradeStateManagerImpl) ProcessValidationRequiredNodes

func (m *ClusterUpgradeStateManagerImpl) ProcessValidationRequiredNodes(
	ctx context.Context, currentClusterState *ClusterUpgradeState) error

ProcessValidationRequiredNodes processes UpgradeStateValidationRequired nodes

func (*ClusterUpgradeStateManagerImpl) ProcessWaitForJobsRequiredNodes

func (m *ClusterUpgradeStateManagerImpl) ProcessWaitForJobsRequiredNodes(
	ctx context.Context, currentClusterState *ClusterUpgradeState,
	waitForCompletionSpec *v1alpha1.WaitForCompletionSpec) error

ProcessWaitForJobsRequiredNodes processes UpgradeStateWaitForJobsRequired nodes, waits for completion of jobs and moves them to UpgradeStatePodDeletionRequired state.

func (*ClusterUpgradeStateManagerImpl) WithPodDeletionEnabled

WithPodDeletionEnabled provides an option to enable the optional 'pod-deletion' state and pass a custom PodDeletionFilter to use

func (*ClusterUpgradeStateManagerImpl) WithValidationEnabled

func (m *ClusterUpgradeStateManagerImpl) WithValidationEnabled(podSelector string) ClusterUpgradeStateManager

WithValidationEnabled provides an option to enable the optional 'validation' state and pass a podSelector to specify which pods are performing the validation

type CordonManager

type CordonManager interface {
	Cordon(ctx context.Context, node *corev1.Node) error
	Uncordon(ctx context.Context, node *corev1.Node) error
}

CordonManager provides methods for cordoning / uncordoning nodes

type CordonManagerImpl

type CordonManagerImpl struct {
	// contains filtered or unexported fields
}

CordonManagerImpl implements CordonManager interface and can cordon / uncordon k8s nodes

func NewCordonManager

func NewCordonManager(k8sInterface kubernetes.Interface, log logr.Logger) *CordonManagerImpl

NewCordonManager returns a CordonManagerImpl

func (*CordonManagerImpl) Cordon

func (m *CordonManagerImpl) Cordon(ctx context.Context, node *corev1.Node) error

Cordon marks a node as unschedulable

func (*CordonManagerImpl) Uncordon

func (m *CordonManagerImpl) Uncordon(ctx context.Context, node *corev1.Node) error

Uncordon marks a node as schedulable

type DrainConfiguration

type DrainConfiguration struct {
	Spec  *v1alpha1.DrainSpec
	Nodes []*corev1.Node
}

DrainConfiguration contains the drain specification and the list of nodes to schedule drain on

type DrainManager

type DrainManager interface {
	ScheduleNodesDrain(ctx context.Context, drainConfig *DrainConfiguration) error
}

DrainManager is an interface that allows to schedule nodes drain based on DrainSpec

type DrainManagerImpl

type DrainManagerImpl struct {
	// contains filtered or unexported fields
}

DrainManagerImpl implements DrainManager interface and can perform nodes drain based on received DrainConfiguration

func NewDrainManager

func NewDrainManager(
	k8sInterface kubernetes.Interface,
	nodeUpgradeStateProvider NodeUpgradeStateProvider,
	log logr.Logger,
	eventRecorder record.EventRecorder) *DrainManagerImpl

NewDrainManager creates a DrainManager

func (*DrainManagerImpl) ScheduleNodesDrain

func (m *DrainManagerImpl) ScheduleNodesDrain(ctx context.Context, drainConfig *DrainConfiguration) error

ScheduleNodesDrain receives DrainConfiguration and schedules drain for each node in the list. When the node gets scheduled, it's marked as being drained and therefore will not be scheduled for drain twice if the initial drain didn't complete yet. During the drain the node is cordoned first, and then pods on the node are evicted. If the drain is successful, the node moves to UpgradeStatePodRestartRequiredstate, otherwise it moves to UpgradeStateFailed state.

type KeyedMutex

type KeyedMutex struct {
	// contains filtered or unexported fields
}

KeyedMutex is a struct that provides a per-key synchronized access

func (*KeyedMutex) Lock

func (m *KeyedMutex) Lock(key string) UnlockFunc

Lock locks a mutex, associated with a given key and returns an unlock function

type NodeUpgradeState

type NodeUpgradeState struct {
	Node            *corev1.Node
	DriverPod       *corev1.Pod
	DriverDaemonSet *appsv1.DaemonSet
}

NodeUpgradeState contains a mapping between a node, the driver POD running on them and the daemon set, controlling this pod

func (*NodeUpgradeState) IsOrphanedPod

func (nus *NodeUpgradeState) IsOrphanedPod() bool

IsOrphanedPod returns true if Pod is not associated to a DaemonSet

type NodeUpgradeStateProvider

type NodeUpgradeStateProvider interface {
	GetNode(ctx context.Context, nodeName string) (*corev1.Node, error)
	ChangeNodeUpgradeState(ctx context.Context, node *corev1.Node, newNodeState string) error
	ChangeNodeUpgradeAnnotation(ctx context.Context, node *corev1.Node, key string, value string) error
}

NodeUpgradeStateProvider allows for synchronized operations on node objects and ensures that the node, got from the provider, always has the up-to-date upgrade state

func NewNodeUpgradeStateProvider

func NewNodeUpgradeStateProvider(k8sClient client.Client, log logr.Logger,
	eventRecorder record.EventRecorder) NodeUpgradeStateProvider

NewNodeUpgradeStateProvider creates a NodeUpgradeStateProviderImpl

type NodeUpgradeStateProviderImpl

type NodeUpgradeStateProviderImpl struct {
	K8sClient client.Client
	Log       logr.Logger
	// contains filtered or unexported fields
}

NodeUpgradeStateProviderImpl implements the NodeUpgradeStateProvider interface

func (*NodeUpgradeStateProviderImpl) ChangeNodeUpgradeAnnotation

func (p *NodeUpgradeStateProviderImpl) ChangeNodeUpgradeAnnotation(
	ctx context.Context, node *corev1.Node, key string, value string) error

ChangeNodeUpgradeAnnotation patches a given corev1.Node object and updates an annotation with a given value The function then waits for the operator cache to get updated

func (*NodeUpgradeStateProviderImpl) ChangeNodeUpgradeState

func (p *NodeUpgradeStateProviderImpl) ChangeNodeUpgradeState(
	ctx context.Context, node *corev1.Node, newNodeState string) error

ChangeNodeUpgradeState patches a given corev1.Node object and updates its UpgradeStateLabel with a given value The function then waits for the operator cache to get updated

func (*NodeUpgradeStateProviderImpl) GetNode

func (p *NodeUpgradeStateProviderImpl) GetNode(ctx context.Context, nodeName string) (*corev1.Node, error)

GetNode returns a corev1.Node according to name

type PodDeletionFilter

type PodDeletionFilter func(corev1.Pod) bool

PodDeletionFilter takes a pod and returns a boolean indicating whether the pod should be deleted

type PodManager

type PodManager interface {
	ScheduleCheckOnPodCompletion(ctx context.Context, config *PodManagerConfig) error
	SchedulePodsRestart(ctx context.Context, pods []*corev1.Pod) error
	SchedulePodEviction(ctx context.Context, config *PodManagerConfig) error
	GetPodDeletionFilter() PodDeletionFilter
	GetPodControllerRevisionHash(ctx context.Context, pod *corev1.Pod) (string, error)
	GetDaemonsetControllerRevisionHash(ctx context.Context, daemonset *appsv1.DaemonSet) (string, error)
}

PodManager is an interface that allows to wait on certain pod statuses

type PodManagerConfig

type PodManagerConfig struct {
	Nodes                 []*corev1.Node
	DeletionSpec          *v1alpha1.PodDeletionSpec
	WaitForCompletionSpec *v1alpha1.WaitForCompletionSpec
	DrainEnabled          bool
}

PodManagerConfig represent the selector for pods and Node names to be considered for managing those pods

type PodManagerImpl

type PodManagerImpl struct {
	// contains filtered or unexported fields
}

PodManagerImpl implements PodManager interface and checks for pod states

func NewPodManager

func NewPodManager(
	k8sInterface kubernetes.Interface,
	nodeUpgradeStateProvider NodeUpgradeStateProvider,
	log logr.Logger,
	podDeletionFilter PodDeletionFilter,
	eventRecorder record.EventRecorder) *PodManagerImpl

NewPodManager returns an instance of PodManager implementation

func (*PodManagerImpl) GetDaemonsetControllerRevisionHash

func (m *PodManagerImpl) GetDaemonsetControllerRevisionHash(ctx context.Context,
	daemonset *appsv1.DaemonSet) (string, error)

GetDaemonsetControllerRevisionHash returns the latest DaemonSet Controller Revision Hash

func (*PodManagerImpl) GetPodControllerRevisionHash

func (m *PodManagerImpl) GetPodControllerRevisionHash(ctx context.Context, pod *corev1.Pod) (string, error)

GetPodControllerRevisionHash returns the Pod Controller Revision Hash from its labels TODO: Drop ctx as it's not used

func (*PodManagerImpl) GetPodDeletionFilter

func (m *PodManagerImpl) GetPodDeletionFilter() PodDeletionFilter

GetPodDeletionFilter returns the PodDeletionFilter

func (*PodManagerImpl) HandleTimeoutOnPodCompletions

func (m *PodManagerImpl) HandleTimeoutOnPodCompletions(ctx context.Context, node *corev1.Node,
	timeoutSeconds int64) error

HandleTimeoutOnPodCompletions transitions node based on the timeout for job completions on the node

func (*PodManagerImpl) IsPodRunningOrPending

func (m *PodManagerImpl) IsPodRunningOrPending(pod corev1.Pod) bool

IsPodRunningOrPending returns true when the given pod is currently in Running or Pending state

func (*PodManagerImpl) ListPods

func (m *PodManagerImpl) ListPods(ctx context.Context, selector string, nodeName string) (*corev1.PodList, error)

ListPods returns the list of pods in all namespaces with the given selector

func (*PodManagerImpl) ScheduleCheckOnPodCompletion

func (m *PodManagerImpl) ScheduleCheckOnPodCompletion(ctx context.Context, config *PodManagerConfig) error

ScheduleCheckOnPodCompletion receives PodSelectorConfig and schedules checks for pod statuses on each node in the list. If the checks are successful, the node moves to UpgradeStatePodDeletionRequired state, otherwise it will stay in the same current state.

func (*PodManagerImpl) SchedulePodEviction

func (m *PodManagerImpl) SchedulePodEviction(ctx context.Context, config *PodManagerConfig) error

SchedulePodEviction receives a config for pod eviction and deletes pods for each node in the list. The set of pods to delete is determined by a filter that is provided to the PodManagerImpl during construction.

func (*PodManagerImpl) SchedulePodsRestart

func (m *PodManagerImpl) SchedulePodsRestart(ctx context.Context, pods []*corev1.Pod) error

SchedulePodsRestart receives a list of pods and schedules to delete them TODO, schedule deletion of pods in parallel on all nodes

type SafeDriverLoadManager

type SafeDriverLoadManager interface {
	// IsWaitingForSafeDriverLoad checks if driver Pod on the node is waiting for a safe load
	IsWaitingForSafeDriverLoad(ctx context.Context, node *corev1.Node) (bool, error)
	// UnblockLoading unblocks driver loading on the node
	UnblockLoading(ctx context.Context, node *corev1.Node) error
}

SafeDriverLoadManager interface defines handlers to interact with drivers that are waiting for a safe load

type SafeDriverLoadManagerImpl

type SafeDriverLoadManagerImpl struct {
	// contains filtered or unexported fields
}

SafeDriverLoadManagerImpl default implementation of the SafeDriverLoadManager interface Support for safe driver loading is implemented as a part of the upgrade flow. When UpgradeStateManager detects a node that is waiting for a safe driver load, it will unconditionally transfer it to the UpgradeStateUpgradeRequired state and wait for Cordon and Drain operations to complete according to the upgrade policy. When the Pod is eventually in the UpgradeStatePodRestartRequired state, the UpgradeStateManager will unblock the driver loading (by removing the safe driver load annotation) instead of restarting the Pod. The default implementation of the SafeDriverLoadManager interface assumes that the driver's safe load mechanism is implemented as a two-step procedure. As a first step, the driver pod should load the init container, which will set "safe driver load annotation" (defined in UpgradeWaitForSafeDriverLoadAnnotationKeyFmt) on the node object, then the container blocks until another entity removes the annotation from the node object. When the init container completes successfully (when the annotation was removed from the Node object), the driver Pod will proceed to the second step and do the driver loading. After that, the UpgradeStateManager will wait for the driver to become ready and then Uncordon the node if required.

func NewSafeDriverLoadManager

func NewSafeDriverLoadManager(
	nodeUpgradeStateProvider NodeUpgradeStateProvider, log logr.Logger) *SafeDriverLoadManagerImpl

NewSafeDriverLoadManager returns an instance of SafeDriverLoadManager implementation

func (*SafeDriverLoadManagerImpl) IsWaitingForSafeDriverLoad

func (s *SafeDriverLoadManagerImpl) IsWaitingForSafeDriverLoad(_ context.Context, node *corev1.Node) (bool, error)

IsWaitingForSafeDriverLoad checks if driver Pod on the node is waiting for a safe load. The check is implemented by check that "safe driver loading annotation" is set on the Node object

func (*SafeDriverLoadManagerImpl) UnblockLoading

func (s *SafeDriverLoadManagerImpl) UnblockLoading(ctx context.Context, node *corev1.Node) error

UnblockLoading unblocks driver loading on the node by remove "safe driver loading annotation" from the Node object

type StringSet

type StringSet struct {
	// contains filtered or unexported fields
}

StringSet implements a thread safe Set of Strings

func NewStringSet

func NewStringSet() *StringSet

NewStringSet creates a StringSet

func (*StringSet) Add

func (s *StringSet) Add(item string)

Add item to set

func (*StringSet) Clear

func (s *StringSet) Clear()

Clear removes all items from the set

func (*StringSet) Has

func (s *StringSet) Has(item string) bool

Has looks for item exists in the map

func (*StringSet) Remove

func (s *StringSet) Remove(item string)

Remove deletes the specified item from the set

type UnlockFunc

type UnlockFunc = func()

UnlockFunc is a function that release a lock

type ValidationManager

type ValidationManager interface {
	Validate(ctx context.Context, node *corev1.Node) (bool, error)
}

ValidationManager is an interface for validating driver upgrades

type ValidationManagerImpl

type ValidationManagerImpl struct {
	// contains filtered or unexported fields
}

ValidationManagerImpl implements the ValidationManager interface and waits on a validation pod, identified via podSelector, to be Ready.

func NewValidationManager

func NewValidationManager(
	k8sInterface kubernetes.Interface,
	log logr.Logger,
	eventRecorder record.EventRecorder,
	nodeUpgradeStateProvider NodeUpgradeStateProvider,
	podSelector string) *ValidationManagerImpl

NewValidationManager returns an instance of ValidationManager implementation

func (*ValidationManagerImpl) Validate

func (m *ValidationManagerImpl) Validate(ctx context.Context, node *corev1.Node) (bool, error)

Validate checks if the validation pod(s), identified via podSelector, is Ready

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL