Documentation ¶
Index ¶
- Constants
- type ClusterUpgradeState
- type ClusterUpgradeStateManager
- func (m *ClusterUpgradeStateManager) ApplyState(ctx context.Context, currentState *ClusterUpgradeState, ...) error
- func (m *ClusterUpgradeStateManager) ProcessDoneOrUnknownNodes(ctx context.Context, currentClusterState *ClusterUpgradeState, ...) error
- func (m *ClusterUpgradeStateManager) ProcessDrainFailedNodes(ctx context.Context, currentClusterState *ClusterUpgradeState) error
- func (m *ClusterUpgradeStateManager) ProcessDrainNodes(ctx context.Context, currentClusterState *ClusterUpgradeState, ...) error
- func (m *ClusterUpgradeStateManager) ProcessPodRestartNodes(ctx context.Context, currentClusterState *ClusterUpgradeState) error
- func (m *ClusterUpgradeStateManager) ProcessUncordonRequiredNodes(ctx context.Context, currentClusterState *ClusterUpgradeState) error
- func (m *ClusterUpgradeStateManager) ProcessUpgradeRequiredNodes(ctx context.Context, currentClusterState *ClusterUpgradeState, limit int) error
- type DrainConfiguration
- type DrainManager
- type DrainManagerImpl
- type KeyedMutex
- type NodeUpgradeState
- type NodeUpgradeStateProvider
- type NodeUpgradeStateProviderImpl
- type PodDeleteManager
- type PodDeleteManagerImpl
- type StringSet
- type UncordonManager
- type UncordonManagerImpl
- type UnlockFunc
Constants ¶
const ( UpgradeStateAnnotation = "nvidia.com/ofed-upgrade-state" OfedDriverLabel = "nvidia.com/ofed-driver" OfedUpgradeSkipDrainLabel = "nvidia.com/ofed-upgrade.skip-drain" // UpgradeStateUnknown Node has this state when the upgrade flow is disabled or the node hasn't been processed yet UpgradeStateUnknown = "" // UpgradeStateDone is set when OFED POD is up to date and running on the node, the node is schedulable UpgradeStateDone = "upgrade-done" // UpgradeStateUpgradeRequired is set when OFED POD on the node is not up-to-date and required upgrade // No actions are performed at this stage UpgradeStateUpgradeRequired = "upgrade-required" // UpgradeStateDrain is set when the node is scheduled for drain. After the drain the state is changed // either to UpgradeStatePodRestart or UpgradeStateDrainFailed UpgradeStateDrain = "drain" // UpgradeStatePodRestart is set when the OFED POD on the node is scheduler for restart. // After the restart state is changed to UpgradeStateDone UpgradeStatePodRestart = "pod-restart" // UpgradeStateDrainFailed is set when drain on the node has failed. Manual interaction is required at this stage. UpgradeStateDrainFailed = "drain-failed" // UpgradeStateUncordonRequired is set when OFED POD on the node is up-to-date and has "Ready" status UpgradeStateUncordonRequired = "uncordon-required" )
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type ClusterUpgradeState ¶
type ClusterUpgradeState struct {
NodeStates map[string][]*NodeUpgradeState
}
ClusterUpgradeState contains a snapshot of the OFED upgrade state in the cluster It contains OFED upgrade policy and mappings between nodes and their upgrade state Nodes are grouped together with the driver POD running on them and the daemon set, controlling this pod This state is then used as an input for the ClusterUpgradeStateManager
func NewClusterUpgradeState ¶
func NewClusterUpgradeState() ClusterUpgradeState
NewClusterUpgradeState creates an empty ClusterUpgradeState object
type ClusterUpgradeStateManager ¶
type ClusterUpgradeStateManager struct { K8sClient client.Client K8sInterface kubernetes.Interface Log logr.Logger DrainManager DrainManager PodDeleteManager PodDeleteManager UncordonManager UncordonManager NodeUpgradeStateProvider NodeUpgradeStateProvider }
ClusterUpgradeStateManager serves as a state machine for the ClusterUpgradeState It processes each node and based on its state schedules the required jobs to change their state to the next one
func NewClusterUpdateStateManager ¶
func NewClusterUpdateStateManager( drainManager DrainManager, podDeleteManager PodDeleteManager, uncordonManager UncordonManager, nodeUpgradeStateProvider NodeUpgradeStateProvider, log logr.Logger, k8sClient client.Client, k8sInterface kubernetes.Interface) *ClusterUpgradeStateManager
NewClusterUpdateStateManager creates a new instance of ClusterUpgradeStateManager
func (*ClusterUpgradeStateManager) ApplyState ¶
func (m *ClusterUpgradeStateManager) ApplyState(ctx context.Context, currentState *ClusterUpgradeState, upgradePolicy *v1alpha1.OfedUpgradePolicySpec) error
ApplyState receives a complete cluster upgrade state and, based on upgrade policy, processes each node's state. Based on the current state of the node, it is calculated if the node can be moved to the next state right now or whether any actions need to be scheduled for the node to move to the next state. The function is stateless and idempotent. If the error was returned before all nodes' states were processed, ApplyState would be called again and complete the processing - all the decisions are based on the input data.
func (*ClusterUpgradeStateManager) ProcessDoneOrUnknownNodes ¶
func (m *ClusterUpgradeStateManager) ProcessDoneOrUnknownNodes( ctx context.Context, currentClusterState *ClusterUpgradeState, nodeStateName string) error
ProcessDoneOrUnknownNodes iterates over UpgradeStateDone or UpgradeStateUnknown nodes and determines whether each specific node should be in UpgradeStateUpgradeRequired or UpgradeStateDone state.
func (*ClusterUpgradeStateManager) ProcessDrainFailedNodes ¶
func (m *ClusterUpgradeStateManager) ProcessDrainFailedNodes( ctx context.Context, currentClusterState *ClusterUpgradeState) error
ProcessDrainFailedNodes processes UpgradeStateDrainFailed nodes and checks whether the driver pod on the node has been successfully restarted. If the pod is in Ready state - moves the node to UpgradeStateUncordonRequired state.
func (*ClusterUpgradeStateManager) ProcessDrainNodes ¶
func (m *ClusterUpgradeStateManager) ProcessDrainNodes( ctx context.Context, currentClusterState *ClusterUpgradeState, drainSpec *v1alpha1.DrainSpec) error
ProcessDrainNodes schedules UpgradeStateDrain nodes for drain. If drain is disabled by upgrade policy, moves the nodes straight to UpgradeStatePodRestart state.
func (*ClusterUpgradeStateManager) ProcessPodRestartNodes ¶
func (m *ClusterUpgradeStateManager) ProcessPodRestartNodes( ctx context.Context, currentClusterState *ClusterUpgradeState) error
ProcessPodRestartNodes processes UpgradeStatePodRestart nodes and schedules driver pod restart for them. If the pod has already been restarted and is in Ready state - moves the node to UpgradeStateUncordonRequired state.
func (*ClusterUpgradeStateManager) ProcessUncordonRequiredNodes ¶
func (m *ClusterUpgradeStateManager) ProcessUncordonRequiredNodes( ctx context.Context, currentClusterState *ClusterUpgradeState) error
ProcessUncordonRequiredNodes processes UpgradeStateUncordonRequired nodes, uncordons them and moves them to UpgradeStateDone state
func (*ClusterUpgradeStateManager) ProcessUpgradeRequiredNodes ¶
func (m *ClusterUpgradeStateManager) ProcessUpgradeRequiredNodes( ctx context.Context, currentClusterState *ClusterUpgradeState, limit int) error
ProcessUpgradeRequiredNodes processes UpgradeStateUpgradeRequired nodes and moves them to UpgradeStateDrain until the limit on max parallel upgrades is reached.
type DrainConfiguration ¶
DrainConfiguration contains the drain specification and the list of nodes to schedule drain on
type DrainManager ¶
type DrainManager interface {
ScheduleNodesDrain(ctx context.Context, drainConfig *DrainConfiguration) error
}
DrainManager is an interface that allows to schedule nodes drain based on DrainSpec
type DrainManagerImpl ¶
type DrainManagerImpl struct {
// contains filtered or unexported fields
}
DrainManagerImpl implements DrainManager interface and can perform nodes drain based on received DrainConfiguration
func NewDrainManager ¶
func NewDrainManager( k8sInterface kubernetes.Interface, nodeUpgradeStateProvider NodeUpgradeStateProvider, log logr.Logger) *DrainManagerImpl
func (*DrainManagerImpl) ScheduleNodesDrain ¶
func (m *DrainManagerImpl) ScheduleNodesDrain(ctx context.Context, drainConfig *DrainConfiguration) error
ScheduleNodesDrain receives DrainConfiguration and schedules drain for each node in the list. When the node gets scheduled, it's marked as being drained and therefore will not be scheduled for drain twice if the initial drain didn't complete yet. During the drain the node is cordoned first, and then pods on the node are evicted. If the drain is successful, the node moves to UpgradeStatePodRestart state, otherwise it moves to UpgradeStateDrainFailed state.
type KeyedMutex ¶
type KeyedMutex struct {
// contains filtered or unexported fields
}
KeyedMutex is a struct that provides a per-key synchronized access
func (*KeyedMutex) Lock ¶
func (m *KeyedMutex) Lock(key string) UnlockFunc
Lock locks a mutex, associated with a given key and returns an unlock function
type NodeUpgradeState ¶
NodeUpgradeState contains a mapping between a node, the driver POD running on them and the daemon set, controlling this pod
type NodeUpgradeStateProvider ¶
type NodeUpgradeStateProvider interface { GetNode(ctx context.Context, nodeName string) (*v1.Node, error) ChangeNodeUpgradeState(ctx context.Context, node *v1.Node, newNodeState string) error }
NodeUpgradeStateProvider allows for synchronized operations on node objects and ensures that the node, got from the provider, always has the up-to-date upgrade state
func NewNodeUpgradeStateProvider ¶
func NewNodeUpgradeStateProvider(k8sClient client.Client, log logr.Logger) NodeUpgradeStateProvider
type NodeUpgradeStateProviderImpl ¶
type NodeUpgradeStateProviderImpl struct { K8sClient client.Client Log logr.Logger // contains filtered or unexported fields }
func (*NodeUpgradeStateProviderImpl) ChangeNodeUpgradeState ¶
func (p *NodeUpgradeStateProviderImpl) ChangeNodeUpgradeState( ctx context.Context, node *v1.Node, newNodeState string) error
ChangeNodeUpgradeState patches a given v1.Node object and updates its UpgradeStateAnnotation with a given value The function then waits for the operator cache to get updated
type PodDeleteManager ¶
PodDeleteManager is and interface that allows scheduling driver pod restarts
type PodDeleteManagerImpl ¶
PodDeleteManagerImpl implements PodDeleteManager interface and can restart pods by deleting them
func NewPodDeleteManager ¶
func NewPodDeleteManager(k8sClient client.Client, log logr.Logger) *PodDeleteManagerImpl
func (*PodDeleteManagerImpl) SchedulePodsRestart ¶
SchedulePodsRestart receives a list of pods and schedules to delete them
type StringSet ¶
type StringSet struct {
// contains filtered or unexported fields
}
func NewStringSet ¶
func NewStringSet() *StringSet
type UncordonManager ¶
type UncordonManager interface {
CordonOrUncordonNode(ctx context.Context, node *corev1.Node, desired bool) error
}
UncordonManager is an interface that allows to uncordon nodes
type UncordonManagerImpl ¶
type UncordonManagerImpl struct {
// contains filtered or unexported fields
}
func NewUncordonManager ¶
func NewUncordonManager(k8sInterface kubernetes.Interface, log logr.Logger) *UncordonManagerImpl
func (*UncordonManagerImpl) CordonOrUncordonNode ¶
type UnlockFunc ¶
type UnlockFunc = func()