Documentation ¶
Index ¶
- Constants
- type BaseGangReconciler
- type GangSchedulingInterface
- type JobInterface
- type JobReconciler
- func (r *JobReconciler) CleanupJob(runPolicy *commonv1.RunPolicy, status commonv1.JobStatus, job client.Object) error
- func (r *JobReconciler) CleanupResources(runPolicy *commonv1.RunPolicy, status commonv1.JobStatus, job client.Object) error
- func (r *JobReconciler) DeleteJob(job client.Object) error
- func (r *JobReconciler) ExtractJobStatus(job client.Object) (*commonv1.JobStatus, error)
- func (r *JobReconciler) ExtractReplicasSpec(job client.Object) (map[commonv1.ReplicaType]*commonv1.ReplicaSpec, error)
- func (r *JobReconciler) ExtractRunPolicy(job client.Object) (*commonv1.RunPolicy, error)
- func (r *JobReconciler) GenLabels(jobName string) map[string]string
- func (r *JobReconciler) GetGroupNameLabelValue() string
- func (r *JobReconciler) GetJob(ctx context.Context, req ctrl.Request) (client.Object, error)
- func (r *JobReconciler) IsFlagReplicaTypeForJobStatus(rtype string) bool
- func (r *JobReconciler) IsJobFailed(status commonv1.JobStatus) bool
- func (r *JobReconciler) IsJobSucceeded(status commonv1.JobStatus) bool
- func (r *JobReconciler) IsMasterRole(replicas map[commonv1.ReplicaType]*commonv1.ReplicaSpec, ...) bool
- func (r *JobReconciler) OverrideForJobInterface(ui ReconcilerUtilInterface, pi PodInterface, si ServiceInterface, ...)
- func (r *JobReconciler) PastActiveDeadline(runPolicy *commonv1.RunPolicy, jobStatus *commonv1.JobStatus) bool
- func (r *JobReconciler) PastBackoffLimit(jobName string, runPolicy *commonv1.RunPolicy, ...) (bool, error)
- func (r *JobReconciler) ReconcileJob(ctx context.Context, job client.Object, ...) error
- func (r *JobReconciler) RecordAbnormalPods(activePods []*corev1.Pod, object client.Object)
- func (r *JobReconciler) SetStatusForSuccessJob(status *commonv1.JobStatus)
- func (r *JobReconciler) ShouldCleanUp(status commonv1.JobStatus) bool
- func (r *JobReconciler) UpdateJobStatus(job client.Object, replicas map[commonv1.ReplicaType]*commonv1.ReplicaSpec, ...) error
- func (r *JobReconciler) UpdateJobStatusInAPIServer(ctx context.Context, job client.Object) error
- type PodInterface
- type PodReconciler
- func (r *PodReconciler) CreateNewPod(job client.Object, rt string, index string, spec *commonv1.ReplicaSpec, ...) error
- func (r *PodReconciler) DecoratePod(rtype string, podTemplate *corev1.PodTemplateSpec, job client.Object)
- func (r *PodReconciler) DeletePod(ctx context.Context, ns string, name string) error
- func (r *PodReconciler) FilterPodsForReplicaType(pods []*corev1.Pod, replicaType string) ([]*corev1.Pod, error)
- func (r *PodReconciler) GenPodName(jobName string, rtype string, index string) string
- func (r *PodReconciler) GetDefaultContainerName() string
- func (r *PodReconciler) GetPodSlices(pods []*corev1.Pod, replicas int, logger *log.Entry) [][]*corev1.Pod
- func (r *PodReconciler) GetPodsForJob(ctx context.Context, job client.Object) ([]*corev1.Pod, error)
- func (r *PodReconciler) OverrideForPodInterface(ui ReconcilerUtilInterface, gi GangSchedulingInterface, ji JobInterface)
- func (r *PodReconciler) ReconcilePods(ctx context.Context, job client.Object, jobStatus *commonv1.JobStatus, ...) error
- type ReconcilerUtil
- type ReconcilerUtilInterface
- type SchedulerFrameworkReconciler
- func (r *SchedulerFrameworkReconciler) DecoratePodForGangScheduling(_ string, podTemplate *corev1.PodTemplateSpec, job client.Object)
- func (r *SchedulerFrameworkReconciler) DeletePodGroup(ctx context.Context, job client.Object) error
- func (r *SchedulerFrameworkReconciler) GangSchedulingEnabled() bool
- func (r *SchedulerFrameworkReconciler) GetGangSchedulerName() string
- func (r *SchedulerFrameworkReconciler) GetPodGroupForJob(ctx context.Context, job client.Object) (client.Object, error)
- func (r *SchedulerFrameworkReconciler) GetPodGroupName(job client.Object) string
- func (r *SchedulerFrameworkReconciler) OverrideForGangSchedulingInterface(ui ReconcilerUtilInterface)
- func (r *SchedulerFrameworkReconciler) ReconcilePodGroup(ctx context.Context, job client.Object, runPolicy *commonv1.RunPolicy, ...) error
- type ServiceInterface
- type ServiceReconciler
- func (r *ServiceReconciler) CreateNewService(job client.Object, rtype commonv1.ReplicaType, spec *commonv1.ReplicaSpec, ...) error
- func (r *ServiceReconciler) DecorateService(rtype string, svc *corev1.Service, job client.Object)
- func (r *ServiceReconciler) DeleteService(ns string, name string, job client.Object) error
- func (r *ServiceReconciler) FilterServicesForReplicaType(services []*corev1.Service, replicaType string) ([]*corev1.Service, error)
- func (r *ServiceReconciler) GetPortsFromJob(spec *commonv1.ReplicaSpec) (map[string]int32, error)
- func (r *ServiceReconciler) GetServiceSlices(services []*corev1.Service, replicas int, logger *log.Entry) [][]*corev1.Service
- func (r *ServiceReconciler) GetServicesForJob(ctx context.Context, job client.Object) ([]*corev1.Service, error)
- func (r *ServiceReconciler) OverrideForServiceInterface(ui ReconcilerUtilInterface, pi PodInterface, ji JobInterface)
- func (r *ServiceReconciler) ReconcileServices(job client.Object, services []*corev1.Service, rtype commonv1.ReplicaType, ...) error
- type VolcanoReconciler
- func (r *VolcanoReconciler) DecoratePodForGangScheduling(rtype string, podTemplate *corev1.PodTemplateSpec, job client.Object)
- func (r *VolcanoReconciler) DeletePodGroup(ctx context.Context, job client.Object) error
- func (r *VolcanoReconciler) GangSchedulingEnabled() bool
- func (r *VolcanoReconciler) GetGangSchedulerName() string
- func (r *VolcanoReconciler) GetPodGroupForJob(ctx context.Context, job client.Object) (client.Object, error)
- func (r *VolcanoReconciler) GetPodGroupName(job client.Object) string
- func (r *VolcanoReconciler) OverrideForGangSchedulingInterface(ui ReconcilerUtilInterface)
- func (r *VolcanoReconciler) ReconcilePodGroup(ctx context.Context, job client.Object, runPolicy *commonv1.RunPolicy, ...) error
Constants ¶
const ( GroupName = "kubeflow.org" ReasonKey = "reason" ReasonJobDeleted = "job deleted" MsgReconcileCancelled = "Reconcile Cancelled" MsgReconcileStart = "Reconcile Starts" MsgGetPodsFailed = "Get Pods Failed" MsgGetServicesFailed = "Get Services Failed" MsgBackoffLimitReachedTemplate = "Job %s has failed because it has reached the specified backoff limit" MsgActiveDeadlineReachedTemplate = "Job %s has failed because it was active longer than specified deadline" ErrUpdateJobConditionsFailed = "failed to update job conditions" ErrUpdateJobErrorTemplate = "UpdateJobStatus error %v" ErrAppendJobConditionTemplate = "Append job condition error %v" ErrReconcilePodsTemplate = "ReconcilePods error %v" ErrReconcileServicesTemplate = "ReconcileServices error %v" ErrReconcileGangTemplate = "ReconcilePodGroups error %v" ErrGetReplicasStatusFromStatusFailedTemplate = "failed to get ReplicasStatus for %s from status" WarnDefaultImplementationTemplate = "Warning: executing default implementation for JobReconciler.%s" WarnNotCountedInBackoffLimit = "The restart policy of replica %v of the job %v is not OnFailure or Always. Not counted in backoff limit." )
const DefaultContainerName = "kubeflow"
DefaultContainerName defines the default name for container in Pod
const ReconcilerName = "common-reconciler"
const (
// VolcanoPodGroupAnnotation defines which PodGroup is linked to this Pod in annotation
VolcanoPodGroupAnnotation = "scheduling.k8s.io/group-name"
)
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type BaseGangReconciler ¶
type BaseGangReconciler struct {
Enabled bool
}
BaseGangReconciler defines a basic gang reconciler
func (*BaseGangReconciler) GangSchedulingEnabled ¶
func (r *BaseGangReconciler) GangSchedulingEnabled() bool
GangSchedulingEnabled returns if gang-scheduling is enabled for all jobs
func (*BaseGangReconciler) GetPodGroupName ¶
func (r *BaseGangReconciler) GetPodGroupName(job client.Object) string
GetPodGroupName returns the name of PodGroup for this job
type GangSchedulingInterface ¶
type GangSchedulingInterface interface { // OverrideForGangSchedulingInterface MUST NOT be overridden as it resets ReconcilerUtilInterface OverrideForGangSchedulingInterface(ui ReconcilerUtilInterface) // GangSchedulingEnabled CAN be overridden if definition of gang-scheduling enabling changes. GangSchedulingEnabled() bool // GetGangSchedulerName CAN be overridden to customize the name of gang scheduler. This name will be used to check // the value of podTemplateSpec.Spec.SchedulerName. For volcano, it is "volcano". GetGangSchedulerName() string // GetPodGroupName CAN be overridden to customize the name of PodGroup generated for the job. For example: // podGroupName := fmt.Sprintf("%s-podgroup", job.GetName()) or podGroupName := job.GetName() GetPodGroupName(job client.Object) string // GetPodGroupForJob SHOULD be overridden if Group, APIVersion or Kind changes for PodGroup. The PodGroup is // defined in different gang-scheduler as: // Kube-Batch: "scheduling.incubator.k8s.io/v1alpha1/PodGroup", "scheduling.sigs.dev/v1alpha2/PodGroup" // Volcano: "scheduling.volcano.sh/v1beta1/PodGroup" // Scheduler-Framework: "scheduling.sigs.k8s.io/v1alpha1/PodGroup" GetPodGroupForJob(ctx context.Context, job client.Object) (client.Object, error) // DeletePodGroup SHOULD be overridden if Group, APIVersion and Kind changes for PodGroup. DeletePodGroup(ctx context.Context, job client.Object) error // ReconcilePodGroup CAN be overridden if the logic to reconcile PodGroup changes. ReconcilePodGroup(ctx context.Context, job client.Object, runPolicy *commonv1.RunPolicy, replicas map[commonv1.ReplicaType]*commonv1.ReplicaSpec) error // DecoratePodForGangScheduling SHOULD be overridden if gang scheduler demands Pods associated with PodGroup to be // decorated with specific requests. DecoratePodForGangScheduling(rtype string, podTemplate *corev1.PodTemplateSpec, job client.Object) }
GangSchedulingInterface defines the abstract interface for gang-scheduling related actions, such like get, create or delete PodGroup
type JobInterface ¶
type JobInterface interface { // OverrideForJobInterface MUST NOT be overridden as it reset ReconcilerUtilInterface, PodInterface, ServiceInterface, JobInterface OverrideForJobInterface(ui ReconcilerUtilInterface, pi PodInterface, si ServiceInterface, gi GangSchedulingInterface) // GenLabels CAN be overridden to customize generic label generated for Pods and Services GenLabels(jobName string) map[string]string // GetGroupNameLabelValue CAN be overridden to customize value used in labels regarding Group of job processed. GetGroupNameLabelValue() string // GetJob MUST be overridden to get jobs with specified kind GetJob(ctx context.Context, req ctrl.Request) (client.Object, error) // ExtractReplicasSpec MUST be overridden to extract ReplicasSpec from a job ExtractReplicasSpec(job client.Object) (map[commonv1.ReplicaType]*commonv1.ReplicaSpec, error) // ExtractRunPolicy MUST be overridden to extract the pointer of RunPolicy from a job ExtractRunPolicy(job client.Object) (*commonv1.RunPolicy, error) // ExtractJobStatus MUST be overridden to extract the pointer of JobStatus from a job ExtractJobStatus(job client.Object) (*commonv1.JobStatus, error) // IsMasterRole MUST be overridden to determine whether this ReplicaType with index specified is a master role. // MasterRole pod will have "job-role=master" set in its label IsMasterRole(replicas map[commonv1.ReplicaType]*commonv1.ReplicaSpec, rtype commonv1.ReplicaType, index int) bool // ReconcileJob CAN be overridden to customize how to reconcile a job. ReconcileJob( ctx context.Context, job client.Object, replicas map[commonv1.ReplicaType]*commonv1.ReplicaSpec, status *commonv1.JobStatus, runPolicy *commonv1.RunPolicy) error // DeleteJob CAN be overridden to customize how to delete a job. DeleteJob(job client.Object) error // UpdateJobStatus CAN be overridden to customize how to update job status without submitting to APIServer. UpdateJobStatus( job client.Object, replicas map[commonv1.ReplicaType]*commonv1.ReplicaSpec, jobStatus *commonv1.JobStatus) error // UpdateJobStatusInAPIServer CAN be overridden to customize how to update job status directly to APIServer. UpdateJobStatusInAPIServer(ctx context.Context, job client.Object) error // CleanupResources CAN be overridden to customize how to delete all resources associated with this job. CleanupResources(runPolicy *commonv1.RunPolicy, status commonv1.JobStatus, job client.Object) error // CleanupJob CAN be overridden to customize how to clean up this job. CleanupJob(runPolicy *commonv1.RunPolicy, status commonv1.JobStatus, job client.Object) error // RecordAbnormalPods CAN be overridden to customize how to record abnormal pods RecordAbnormalPods(activePods []*corev1.Pod, object client.Object) // SetStatusForSuccessJob CAN be overridden to customize how to set status for success job SetStatusForSuccessJob(status *commonv1.JobStatus) // IsFlagReplicaTypeForJobStatus CAN be overridden to customize how to determine if this ReplicaType is the // flag ReplicaType for the status of this kind of job IsFlagReplicaTypeForJobStatus(rtype string) bool // IsJobSucceeded CAN be overridden to customize how to determine if this job is succeeded. IsJobSucceeded(status commonv1.JobStatus) bool // IsJobFailed CAN be overridden to customize how to determine if this job is failed. IsJobFailed(status commonv1.JobStatus) bool // ShouldCleanUp CAN be overridden to customize how to determine if this job should be cleaned up. ShouldCleanUp(status commonv1.JobStatus) bool // PastBackoffLimit CAN be overridden to customize how to determine if this job has past backoff limit. PastBackoffLimit(jobName string, runPolicy *commonv1.RunPolicy, replicas map[commonv1.ReplicaType]*commonv1.ReplicaSpec, pods []*corev1.Pod) (bool, error) // PastActiveDeadline CAN be overridden to customize how to determine if this job has past activate deadline. PastActiveDeadline(runPolicy *commonv1.RunPolicy, jobStatus *commonv1.JobStatus) bool }
JobInterface defines the abstract interface for Pod related actions, such like get, create or delete TFJob, PyTorchJob or KFJob, etc.
type JobReconciler ¶
type JobReconciler struct { client.Client ReconcilerUtilInterface PodInterface ServiceInterface GangSchedulingInterface // contains filtered or unexported fields }
JobReconciler defines a Reconciler dealing with generic training job
func BareJobReconciler ¶
func BareJobReconciler(client client.Client) *JobReconciler
BareJobReconciler returns the pointer of a JobReconciler with minimal implementation
func (*JobReconciler) CleanupJob ¶
func (r *JobReconciler) CleanupJob(runPolicy *commonv1.RunPolicy, status commonv1.JobStatus, job client.Object) error
CleanupJob cleans up all resources associated with this generic training job as well as the job itself
func (*JobReconciler) CleanupResources ¶
func (r *JobReconciler) CleanupResources(runPolicy *commonv1.RunPolicy, status commonv1.JobStatus, job client.Object) error
CleanupResources cleans up all resources associated with this generic training job
func (*JobReconciler) DeleteJob ¶
func (r *JobReconciler) DeleteJob(job client.Object) error
DeleteJob deletes this generic training job
func (*JobReconciler) ExtractJobStatus ¶
func (*JobReconciler) ExtractReplicasSpec ¶
func (r *JobReconciler) ExtractReplicasSpec(job client.Object) (map[commonv1.ReplicaType]*commonv1.ReplicaSpec, error)
func (*JobReconciler) ExtractRunPolicy ¶
func (*JobReconciler) GenLabels ¶
func (r *JobReconciler) GenLabels(jobName string) map[string]string
GenLabels returns labels used for this job (based on the name of this generic training job)
func (*JobReconciler) GetGroupNameLabelValue ¶
func (r *JobReconciler) GetGroupNameLabelValue() string
GetGroupNameLabelValue returns the Group Name for the generic training job, which is "kubeflow.org"
func (*JobReconciler) IsFlagReplicaTypeForJobStatus ¶
func (r *JobReconciler) IsFlagReplicaTypeForJobStatus(rtype string) bool
IsFlagReplicaTypeForJobStatus checks if this replicaType is the flag replicaType for the status of generic training job
func (*JobReconciler) IsJobFailed ¶
func (r *JobReconciler) IsJobFailed(status commonv1.JobStatus) bool
IsJobFailed checks if this generic training job failed
func (*JobReconciler) IsJobSucceeded ¶
func (r *JobReconciler) IsJobSucceeded(status commonv1.JobStatus) bool
IsJobSucceeded checks if this generic training job succeeded
func (*JobReconciler) IsMasterRole ¶
func (r *JobReconciler) IsMasterRole(replicas map[commonv1.ReplicaType]*commonv1.ReplicaSpec, rtype commonv1.ReplicaType, index int) bool
func (*JobReconciler) OverrideForJobInterface ¶
func (r *JobReconciler) OverrideForJobInterface(ui ReconcilerUtilInterface, pi PodInterface, si ServiceInterface, gi GangSchedulingInterface)
OverrideForJobInterface resets ReconcilerUtilInterface, PodInterface, ServiceInterface, GangSchedulingInterface used in JobReconciler
func (*JobReconciler) PastActiveDeadline ¶
func (r *JobReconciler) PastActiveDeadline(runPolicy *commonv1.RunPolicy, jobStatus *commonv1.JobStatus) bool
PastActiveDeadline checks if this generic training job has ActiveDeadlineSeconds field set and if it is exceeded.
func (*JobReconciler) PastBackoffLimit ¶
func (r *JobReconciler) PastBackoffLimit(jobName string, runPolicy *commonv1.RunPolicy, replicas map[commonv1.ReplicaType]*commonv1.ReplicaSpec, pods []*corev1.Pod) (bool, error)
PastBackoffLimit checks if this generic training job has past backoff limit
func (*JobReconciler) ReconcileJob ¶
func (r *JobReconciler) ReconcileJob( ctx context.Context, job client.Object, replicas map[commonv1.ReplicaType]*commonv1.ReplicaSpec, status *commonv1.JobStatus, runPolicy *commonv1.RunPolicy) error
ReconcileJob reconciles generic training job
func (*JobReconciler) RecordAbnormalPods ¶
func (r *JobReconciler) RecordAbnormalPods(activePods []*corev1.Pod, object client.Object)
RecordAbnormalPods records abnormal pods during the reconciliation of jobs
func (*JobReconciler) SetStatusForSuccessJob ¶
func (r *JobReconciler) SetStatusForSuccessJob(status *commonv1.JobStatus)
SetStatusForSuccessJob sets the status for job that succeed
func (*JobReconciler) ShouldCleanUp ¶
func (r *JobReconciler) ShouldCleanUp(status commonv1.JobStatus) bool
ShouldCleanUp checks if resources associated with this generic training job should be cleaned up
func (*JobReconciler) UpdateJobStatus ¶
func (r *JobReconciler) UpdateJobStatus( job client.Object, replicas map[commonv1.ReplicaType]*commonv1.ReplicaSpec, jobStatus *commonv1.JobStatus) error
UpdateJobStatus updates the status of this generic training job WITHOUT pushing the updated status to the APIServer
func (*JobReconciler) UpdateJobStatusInAPIServer ¶
UpdateJobStatusInAPIServer updates the status of this generic training job in APIServer
type PodInterface ¶
type PodInterface interface { // OverrideForPodInterface MUST NOT be overridden as it reset ReconcilerUtilInterface, GangSchedulingInterface, JobInterface OverrideForPodInterface(ui ReconcilerUtilInterface, gi GangSchedulingInterface, ji JobInterface) // GetDefaultContainerName CAN be overridden if the default container name is not "kubeflow". GetDefaultContainerName() string // GenPodName CAN be overridden to customize Pod name. GenPodName(jobName string, rtype string, index string) string // GetPodsForJob CAN be overridden to customize how to list all pods with the job. GetPodsForJob(ctx context.Context, job client.Object) ([]*corev1.Pod, error) // FilterPodsForReplicaType CAN be overridden if the linking approach between pods and replicaType changes as this // function filters out pods for specific replica type from all pods associated with the job. FilterPodsForReplicaType(pods []*corev1.Pod, replicaType string) ([]*corev1.Pod, error) // GetPodSlices SHOULD NOT be overridden as it generates pod slices for further pod processing. GetPodSlices(pods []*corev1.Pod, replicas int, logger *logrus.Entry) [][]*corev1.Pod // ReconcilePods CAN be overridden if the logic to reconcile all Pods for the job changes. ReconcilePods( ctx context.Context, job client.Object, jobStatus *commonv1.JobStatus, pods []*corev1.Pod, rtype commonv1.ReplicaType, spec *commonv1.ReplicaSpec, replicas map[commonv1.ReplicaType]*commonv1.ReplicaSpec) error // CreateNewPod CAN be overridden to customize how to create a new pod. CreateNewPod(job client.Object, rt string, index string, spec *commonv1.ReplicaSpec, masterRole bool, replicas map[commonv1.ReplicaType]*commonv1.ReplicaSpec) error // DeletePod CAN be overridden to customize how to delete a pod of {name} in namespace {ns}. DeletePod(ctx context.Context, ns string, name string) error // DecoratePod CAN be overridden if customization to the pod is needed. The default implementation applies nothing // to the pod. DecoratePod(rtype string, podTemplate *corev1.PodTemplateSpec, job client.Object) }
PodInterface defines the abstract interface for Pod related actions, such like get, create or delete Pod
type PodReconciler ¶
type PodReconciler struct { client.Client ReconcilerUtilInterface GangSchedulingInterface JobInterface }
PodReconciler defines a Pod Reconciler for generic training job
func BarePodReconciler ¶
func BarePodReconciler(client client.Client) *PodReconciler
BarePodReconciler returns a pointer of BarePodReconciler with minimal implementation
func (*PodReconciler) CreateNewPod ¶
func (r *PodReconciler) CreateNewPod(job client.Object, rt string, index string, spec *commonv1.ReplicaSpec, masterRole bool, replicas map[commonv1.ReplicaType]*commonv1.ReplicaSpec) error
CreateNewPod generate Pods for this job and submits creation request to APIServer
func (*PodReconciler) DecoratePod ¶
func (r *PodReconciler) DecoratePod(rtype string, podTemplate *corev1.PodTemplateSpec, job client.Object)
DecoratePod decorates podTemplate before a Pod is submitted to the APIServer
func (*PodReconciler) FilterPodsForReplicaType ¶
func (r *PodReconciler) FilterPodsForReplicaType(pods []*corev1.Pod, replicaType string) ([]*corev1.Pod, error)
FilterPodsForReplicaType filters out Pods for this replicaType
func (*PodReconciler) GenPodName ¶
func (r *PodReconciler) GenPodName(jobName string, rtype string, index string) string
GenPodName returns the name of the Pod based on jobName, replicaType and its index
func (*PodReconciler) GetDefaultContainerName ¶
func (r *PodReconciler) GetDefaultContainerName() string
GetDefaultContainerName returns the default name of the container
func (*PodReconciler) GetPodSlices ¶
func (r *PodReconciler) GetPodSlices(pods []*corev1.Pod, replicas int, logger *log.Entry) [][]*corev1.Pod
GetPodSlices generates podSlice from all Pods listed for this job
func (*PodReconciler) GetPodsForJob ¶
func (r *PodReconciler) GetPodsForJob(ctx context.Context, job client.Object) ([]*corev1.Pod, error)
GetPodsForJob returns all Pods associated with this job
func (*PodReconciler) OverrideForPodInterface ¶
func (r *PodReconciler) OverrideForPodInterface(ui ReconcilerUtilInterface, gi GangSchedulingInterface, ji JobInterface)
OverrideForPodInterface resets ReconcilerUtilInterface, GangSchedulingInterface, JobInterface for PodReconciler
func (*PodReconciler) ReconcilePods ¶
func (r *PodReconciler) ReconcilePods( ctx context.Context, job client.Object, jobStatus *commonv1.JobStatus, pods []*corev1.Pod, rType commonv1.ReplicaType, spec *commonv1.ReplicaSpec, replicas map[commonv1.ReplicaType]*commonv1.ReplicaSpec) error
ReconcilePods reconciles Pods for this job
type ReconcilerUtil ¶
ReconcilerUtil defines a reconciler with utility features
func BareUtilReconciler ¶
func BareUtilReconciler( recorder record.EventRecorder, log logr.Logger, scheme *runtime.Scheme) *ReconcilerUtil
BareUtilReconciler returns a pointer of ReconcilerUtil with default implementation
func (*ReconcilerUtil) GetLogger ¶
func (r *ReconcilerUtil) GetLogger(job client.Object) logr.Logger
GetLogger returns a logr.Logger
func (*ReconcilerUtil) GetReconcilerName ¶
func (r *ReconcilerUtil) GetReconcilerName() string
GetReconcilerName returns the name of this reconciler, which is "common-reconciler"
func (*ReconcilerUtil) GetRecorder ¶
func (r *ReconcilerUtil) GetRecorder() record.EventRecorder
GetRecorder returns a record.EventRecorder
func (*ReconcilerUtil) GetScheme ¶
func (r *ReconcilerUtil) GetScheme() *runtime.Scheme
GetScheme returns the pointer of runtime.Schemes that is used in this reconciler
type ReconcilerUtilInterface ¶
type ReconcilerUtilInterface interface { // GetReconcilerName SHOULD be overridden if a new Reconciler is defined. The default implementation returns // "common-reconciler" GetReconcilerName() string // GetRecorder CAN be overridden to customize EventRecorder GetRecorder() record.EventRecorder // GetLogger CAN be overridden to customize logger GetLogger(job client.Object) logr.Logger // GetScheme CAN be overridden to customize runtime scheme GetScheme() *runtime.Scheme }
ReconcilerUtilInterface defines the abstract interface of reconciler on utility features, such like get event recorder or logger
type SchedulerFrameworkReconciler ¶
type SchedulerFrameworkReconciler struct { BaseGangReconciler ReconcilerUtilInterface client.Client SchedulerName string }
SchedulerFrameworkReconciler defines a gang-scheduling reconciler for Kubernetes Scheduler Framework
func BareSchedulerFrameworkReconciler ¶
func BareSchedulerFrameworkReconciler(client client.Client, bgReconciler *BaseGangReconciler, enabled bool) *SchedulerFrameworkReconciler
func (*SchedulerFrameworkReconciler) DecoratePodForGangScheduling ¶
func (r *SchedulerFrameworkReconciler) DecoratePodForGangScheduling( _ string, podTemplate *corev1.PodTemplateSpec, job client.Object, )
DecoratePodForGangScheduling decorates the podTemplate before it's used to generate a pod with information for gang-scheduling
func (*SchedulerFrameworkReconciler) DeletePodGroup ¶
DeletePodGroup delete the PodGroup associated with this job
func (*SchedulerFrameworkReconciler) GangSchedulingEnabled ¶
func (r *SchedulerFrameworkReconciler) GangSchedulingEnabled() bool
GangSchedulingEnabled returns if gang-scheduling is enabled for all jobs
func (*SchedulerFrameworkReconciler) GetGangSchedulerName ¶
func (r *SchedulerFrameworkReconciler) GetGangSchedulerName() string
GetGangSchedulerName returns the name of Gang Scheduler will be used.
func (*SchedulerFrameworkReconciler) GetPodGroupForJob ¶
func (r *SchedulerFrameworkReconciler) GetPodGroupForJob(ctx context.Context, job client.Object) (client.Object, error)
GetPodGroupForJob returns the PodGroup associated with this job
func (*SchedulerFrameworkReconciler) GetPodGroupName ¶
func (r *SchedulerFrameworkReconciler) GetPodGroupName(job client.Object) string
GetPodGroupName returns the name of PodGroup for this job
func (*SchedulerFrameworkReconciler) OverrideForGangSchedulingInterface ¶
func (r *SchedulerFrameworkReconciler) OverrideForGangSchedulingInterface(ui ReconcilerUtilInterface)
func (*SchedulerFrameworkReconciler) ReconcilePodGroup ¶
func (r *SchedulerFrameworkReconciler) ReconcilePodGroup( ctx context.Context, job client.Object, runPolicy *commonv1.RunPolicy, replicas map[commonv1.ReplicaType]*commonv1.ReplicaSpec, ) error
ReconcilePodGroup reconciles the PodGroup resource for this job
type ServiceInterface ¶
type ServiceInterface interface { // OverrideForServiceInterface MUST NOT be overridden as it reset ReconcilerUtilInterface, PodInterface, JobInterface OverrideForServiceInterface(ui ReconcilerUtilInterface, pi PodInterface, ji JobInterface) // GetPortsFromJob CAN be overridden to customize how to find ports defined in the ReplicasSpec. GetPortsFromJob(spec *commonv1.ReplicaSpec) (map[string]int32, error) // GetServicesForJob CAN be overridden to customize how to find all services associated with this job. GetServicesForJob(ctx context.Context, job client.Object) ([]*corev1.Service, error) // FilterServicesForReplicaType CAN be overridden to customize how to filter out services for this Replica Type. FilterServicesForReplicaType(services []*corev1.Service, replicaType string) ([]*corev1.Service, error) // GetServiceSlices CAN be overridden to customize how to generate service slices. GetServiceSlices(services []*corev1.Service, replicas int, logger *logrus.Entry) [][]*corev1.Service // ReconcileServices CAN be overridden to customize how to reconcile services for this job. ReconcileServices( job client.Object, services []*corev1.Service, rtype commonv1.ReplicaType, spec *commonv1.ReplicaSpec) error // CreateNewService CAN be overridden to customize how to create a new service. CreateNewService(job client.Object, rtype commonv1.ReplicaType, spec *commonv1.ReplicaSpec, index string) error // DeleteService CAN be overridden to customize how to delete the service of {name} in namespace {ns}. DeleteService(ns string, name string, job client.Object) error // DecorateService CAN be overridden to customize this service right before being created DecorateService(rtype string, svc *corev1.Service, job client.Object) }
ServiceInterface defines the abstract interface for Pod related actions, such like get, create or delete Service
type ServiceReconciler ¶
type ServiceReconciler struct { client.Client ReconcilerUtilInterface PodInterface JobInterface }
ServiceReconciler defines a Service Reconciler for generic training job
func BareServiceReconciler ¶
func BareServiceReconciler(client client.Client) *ServiceReconciler
BareServiceReconciler returns a pointer of ServiceReconciler with minimal implementation
func (*ServiceReconciler) CreateNewService ¶
func (r *ServiceReconciler) CreateNewService(job client.Object, rtype commonv1.ReplicaType, spec *commonv1.ReplicaSpec, index string) error
CreateNewService generates Service based the job, replica info. and index and submits it to APIServer
func (*ServiceReconciler) DecorateService ¶
DecorateService decorates the Service before it's submitted to APIServer
func (*ServiceReconciler) DeleteService ¶
DeleteService deletes a Service specified by its name and namespace from APIServer
func (*ServiceReconciler) FilterServicesForReplicaType ¶
func (r *ServiceReconciler) FilterServicesForReplicaType(services []*corev1.Service, replicaType string) ([]*corev1.Service, error)
FilterServicesForReplicaType returns service belong to a replicaType.
func (*ServiceReconciler) GetPortsFromJob ¶
func (r *ServiceReconciler) GetPortsFromJob(spec *commonv1.ReplicaSpec) (map[string]int32, error)
GetPortsFromJob gets the ports of job container. Port could be nil, if distributed communication strategy doesn't need and no other ports that need to be exposed.
func (*ServiceReconciler) GetServiceSlices ¶
func (r *ServiceReconciler) GetServiceSlices(services []*corev1.Service, replicas int, logger *log.Entry) [][]*corev1.Service
GetServiceSlices returns the serviceSlice based on all Services listed for this job
func (*ServiceReconciler) GetServicesForJob ¶
func (r *ServiceReconciler) GetServicesForJob(ctx context.Context, job client.Object) ([]*corev1.Service, error)
GetServicesForJob returns all services associated with this job
func (*ServiceReconciler) OverrideForServiceInterface ¶
func (r *ServiceReconciler) OverrideForServiceInterface(ui ReconcilerUtilInterface, pi PodInterface, ji JobInterface)
OverrideForServiceInterface resets ReconcilerUtilInterface, PodInterface, JobInterface for ServiceReconciler
func (*ServiceReconciler) ReconcileServices ¶
func (r *ServiceReconciler) ReconcileServices( job client.Object, services []*corev1.Service, rtype commonv1.ReplicaType, spec *commonv1.ReplicaSpec) error
ReconcileServices reconciles the Services for this job
type VolcanoReconciler ¶
type VolcanoReconciler struct { BaseGangReconciler ReconcilerUtilInterface client.Client }
VolcanoReconciler defines a gang-scheduling reconciler for volcano.sh/volcano
func BareVolcanoReconciler ¶
func BareVolcanoReconciler(client client.Client, bgReconciler *BaseGangReconciler, enabled bool) *VolcanoReconciler
BareVolcanoReconciler returns a VolcanoReconciler pointer with minimal components defined
func (*VolcanoReconciler) DecoratePodForGangScheduling ¶
func (r *VolcanoReconciler) DecoratePodForGangScheduling(rtype string, podTemplate *corev1.PodTemplateSpec, job client.Object)
DecoratePodForGangScheduling decorates the podTemplate before it's used to generate a pod with information for gang-scheduling
func (*VolcanoReconciler) DeletePodGroup ¶
DeletePodGroup delete the PodGroup associated with this job
func (*VolcanoReconciler) GangSchedulingEnabled ¶
func (r *VolcanoReconciler) GangSchedulingEnabled() bool
GangSchedulingEnabled returns if gang-scheduling is enabled for all jobs
func (*VolcanoReconciler) GetGangSchedulerName ¶
func (r *VolcanoReconciler) GetGangSchedulerName() string
GetGangSchedulerName returns the name of Gang Scheduler will be used, which is "volcano" for VolcanoReconciler
func (*VolcanoReconciler) GetPodGroupForJob ¶
func (r *VolcanoReconciler) GetPodGroupForJob(ctx context.Context, job client.Object) (client.Object, error)
GetPodGroupForJob returns the PodGroup associated with this job
func (*VolcanoReconciler) GetPodGroupName ¶
func (r *VolcanoReconciler) GetPodGroupName(job client.Object) string
GetPodGroupName returns the name of PodGroup for this job
func (*VolcanoReconciler) OverrideForGangSchedulingInterface ¶
func (r *VolcanoReconciler) OverrideForGangSchedulingInterface(ui ReconcilerUtilInterface)
OverrideForGangSchedulingInterface reset ReconcilerUtilInterface used in this VolcanoReconciler
func (*VolcanoReconciler) ReconcilePodGroup ¶
func (r *VolcanoReconciler) ReconcilePodGroup( ctx context.Context, job client.Object, runPolicy *commonv1.RunPolicy, replicas map[commonv1.ReplicaType]*commonv1.ReplicaSpec) error
ReconcilePodGroup reconciles the PodGroup resource for this job