common

package
v0.4.7 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 25, 2023 License: Apache-2.0 Imports: 29 Imported by: 0

README

Reconciler.v1

This is package providing most functionalities in pkg/controller.v1 in the form of reconciler.

To use the reconciler, following methods must be overridden according to the APIs the reconciler handles.

// GetJob returns the job that matches the request
func (r *JobReconciler) GetJob(ctx context.Context, req ctrl.Request) (client.Object, error)

// ExtractReplicasSpec extracts the ReplicasSpec map from this job
func (r *JobReconciler) ExtractReplicasSpec(job client.Object) (map[commonv1.ReplicaType]*commonv1.ReplicaSpec, error)

// ExtractRunPolicy extracts the RunPolicy from this job
func (r *JobReconciler) ExtractRunPolicy(job client.Object) (*commonv1.RunPolicy, error)

// ExtractJobStatus extracts the JobStatus from this job
func (r *JobReconciler) ExtractJobStatus(job client.Object) (*commonv1.JobStatus, error)

// IsMasterRole checks if Pod is the master Pod
func (r *JobReconciler) IsMasterRole(replicas map[commonv1.ReplicaType]*commonv1.ReplicaSpec, rtype commonv1.ReplicaType, index int) bool

A simple example can be found at test_job/reconciler.v1/test_job/test_job_reconciler.go.

Documentation

Index

Constants

View Source
const (
	GroupName = "kubeflow.org"

	ReasonKey        = "reason"
	ReasonJobDeleted = "job deleted"

	MsgReconcileCancelled = "Reconcile Cancelled"
	MsgReconcileStart     = "Reconcile Starts"

	MsgGetPodsFailed     = "Get Pods Failed"
	MsgGetServicesFailed = "Get Services Failed"

	MsgBackoffLimitReachedTemplate   = "Job %s has failed because it has reached the specified backoff limit"
	MsgActiveDeadlineReachedTemplate = "Job %s has failed because it was active longer than specified deadline"

	ErrUpdateJobConditionsFailed = "failed to update job conditions"

	ErrUpdateJobErrorTemplate                    = "UpdateJobStatus error %v"
	ErrAppendJobConditionTemplate                = "Append job condition error %v"
	ErrReconcilePodsTemplate                     = "ReconcilePods error %v"
	ErrReconcileServicesTemplate                 = "ReconcileServices error %v"
	ErrReconcileGangTemplate                     = "ReconcilePodGroups error %v"
	ErrGetReplicasStatusFromStatusFailedTemplate = "failed to get ReplicasStatus for %s from status"

	WarnDefaultImplementationTemplate = "Warning: executing default implementation for JobReconciler.%s"
	WarnNotCountedInBackoffLimit      = "The restart policy of replica %v of the job %v is not OnFailure or Always. Not counted in backoff limit."
)
View Source
const DefaultContainerName = "kubeflow"

DefaultContainerName defines the default name for container in Pod

View Source
const ReconcilerName = "common-reconciler"
View Source
const (
	// VolcanoPodGroupAnnotation defines which PodGroup is linked to this Pod in annotation
	VolcanoPodGroupAnnotation = "scheduling.k8s.io/group-name"
)

Variables

This section is empty.

Functions

This section is empty.

Types

type BaseGangReconciler

type BaseGangReconciler struct {
	Enabled bool
}

BaseGangReconciler defines a basic gang reconciler

func (*BaseGangReconciler) GangSchedulingEnabled

func (r *BaseGangReconciler) GangSchedulingEnabled() bool

GangSchedulingEnabled returns if gang-scheduling is enabled for all jobs

func (*BaseGangReconciler) GetPodGroupName

func (r *BaseGangReconciler) GetPodGroupName(job client.Object) string

GetPodGroupName returns the name of PodGroup for this job

type GangSchedulingInterface

type GangSchedulingInterface interface {
	// OverrideForGangSchedulingInterface MUST NOT be overridden as it resets ReconcilerUtilInterface
	OverrideForGangSchedulingInterface(ui ReconcilerUtilInterface)

	// GangSchedulingEnabled CAN be overridden if definition of gang-scheduling enabling changes.
	GangSchedulingEnabled() bool

	// GetGangSchedulerName CAN be overridden to customize the name of gang scheduler. This name will be used to check
	// the value of podTemplateSpec.Spec.SchedulerName. For volcano, it is "volcano".
	GetGangSchedulerName() string

	// GetPodGroupName CAN be overridden to customize the name of PodGroup generated for the job. For example:
	// podGroupName := fmt.Sprintf("%s-podgroup", job.GetName()) or podGroupName := job.GetName()
	GetPodGroupName(job client.Object) string

	// GetPodGroupForJob SHOULD be overridden if Group, APIVersion or Kind changes for PodGroup. The PodGroup is
	// defined in different gang-scheduler as:
	// Kube-Batch:          "scheduling.incubator.k8s.io/v1alpha1/PodGroup", "scheduling.sigs.dev/v1alpha2/PodGroup"
	// Volcano:             "scheduling.volcano.sh/v1beta1/PodGroup"
	// Scheduler-Framework: "scheduling.sigs.k8s.io/v1alpha1/PodGroup"
	GetPodGroupForJob(ctx context.Context, job client.Object) (client.Object, error)

	// DeletePodGroup SHOULD be overridden if Group, APIVersion and Kind changes for PodGroup.
	DeletePodGroup(ctx context.Context, job client.Object) error

	// ReconcilePodGroup CAN be overridden if the logic to reconcile PodGroup changes.
	ReconcilePodGroup(ctx context.Context, job client.Object, runPolicy *commonv1.RunPolicy,
		replicas map[commonv1.ReplicaType]*commonv1.ReplicaSpec) error

	// DecoratePodForGangScheduling SHOULD be overridden if gang scheduler demands Pods associated with PodGroup to be
	// decorated with specific requests.
	DecoratePodForGangScheduling(rtype string, podTemplate *corev1.PodTemplateSpec, job client.Object)
}

GangSchedulingInterface defines the abstract interface for gang-scheduling related actions, such like get, create or delete PodGroup

type JobInterface

type JobInterface interface {
	// OverrideForJobInterface MUST NOT be overridden as it reset ReconcilerUtilInterface, PodInterface, ServiceInterface, JobInterface
	OverrideForJobInterface(ui ReconcilerUtilInterface, pi PodInterface, si ServiceInterface, gi GangSchedulingInterface)

	// GenLabels CAN be overridden to customize generic label generated for Pods and Services
	GenLabels(jobName string) map[string]string

	// GetGroupNameLabelValue CAN be overridden to customize value used in labels regarding Group of job processed.
	GetGroupNameLabelValue() string

	// GetJob MUST be overridden to get jobs with specified kind
	GetJob(ctx context.Context, req ctrl.Request) (client.Object, error)

	// ExtractReplicasSpec MUST be overridden to extract ReplicasSpec from a job
	ExtractReplicasSpec(job client.Object) (map[commonv1.ReplicaType]*commonv1.ReplicaSpec, error)

	// ExtractRunPolicy MUST be overridden to extract the pointer of RunPolicy from a job
	ExtractRunPolicy(job client.Object) (*commonv1.RunPolicy, error)

	// ExtractJobStatus MUST be overridden to extract the pointer of JobStatus from a job
	ExtractJobStatus(job client.Object) (*commonv1.JobStatus, error)

	// IsMasterRole MUST be overridden to determine whether this ReplicaType with index specified is a master role.
	// MasterRole pod will have "job-role=master" set in its label
	IsMasterRole(replicas map[commonv1.ReplicaType]*commonv1.ReplicaSpec, rtype commonv1.ReplicaType, index int) bool

	// ReconcileJob CAN be overridden to customize how to reconcile a job.
	ReconcileJob(
		ctx context.Context,
		job client.Object,
		replicas map[commonv1.ReplicaType]*commonv1.ReplicaSpec,
		status *commonv1.JobStatus,
		runPolicy *commonv1.RunPolicy) error

	// DeleteJob CAN be overridden to customize how to delete a job.
	DeleteJob(job client.Object) error

	// UpdateJobStatus CAN be overridden to customize how to update job status without submitting to APIServer.
	UpdateJobStatus(
		job client.Object,
		replicas map[commonv1.ReplicaType]*commonv1.ReplicaSpec,
		jobStatus *commonv1.JobStatus) error

	// UpdateJobStatusInAPIServer CAN be overridden to customize how to update job status directly to APIServer.
	UpdateJobStatusInAPIServer(ctx context.Context, job client.Object) error

	// CleanupResources CAN be overridden to customize how to delete all resources associated with this job.
	CleanupResources(runPolicy *commonv1.RunPolicy, status commonv1.JobStatus, job client.Object) error

	// CleanupJob CAN be overridden to customize how to clean up this job.
	CleanupJob(runPolicy *commonv1.RunPolicy, status commonv1.JobStatus, job client.Object) error

	// RecordAbnormalPods CAN be overridden to customize how to record abnormal pods
	RecordAbnormalPods(activePods []*corev1.Pod, object client.Object)

	// SetStatusForSuccessJob CAN be overridden to customize how to set status for success job
	SetStatusForSuccessJob(status *commonv1.JobStatus)

	// IsFlagReplicaTypeForJobStatus CAN be overridden to customize how to determine if this ReplicaType is the
	// flag ReplicaType for the status of this kind of job
	IsFlagReplicaTypeForJobStatus(rtype string) bool

	// IsJobSucceeded CAN be overridden to customize how to determine if this job is succeeded.
	IsJobSucceeded(status commonv1.JobStatus) bool

	// IsJobFailed CAN be overridden to customize how to determine if this job is failed.
	IsJobFailed(status commonv1.JobStatus) bool

	// ShouldCleanUp CAN be overridden to customize how to determine if this job should be cleaned up.
	ShouldCleanUp(status commonv1.JobStatus) bool

	// PastBackoffLimit CAN be overridden to customize how to determine if this job has past backoff limit.
	PastBackoffLimit(jobName string, runPolicy *commonv1.RunPolicy,
		replicas map[commonv1.ReplicaType]*commonv1.ReplicaSpec, pods []*corev1.Pod) (bool, error)

	// PastActiveDeadline CAN be overridden to customize how to determine if this job has past activate deadline.
	PastActiveDeadline(runPolicy *commonv1.RunPolicy, jobStatus *commonv1.JobStatus) bool
}

JobInterface defines the abstract interface for Pod related actions, such like get, create or delete TFJob, PyTorchJob or KFJob, etc.

type JobReconciler

type JobReconciler struct {
	client.Client
	ReconcilerUtilInterface
	PodInterface
	ServiceInterface
	GangSchedulingInterface
	// contains filtered or unexported fields
}

JobReconciler defines a Reconciler dealing with generic training job

func BareJobReconciler

func BareJobReconciler(client client.Client) *JobReconciler

BareJobReconciler returns the pointer of a JobReconciler with minimal implementation

func (*JobReconciler) CleanupJob

func (r *JobReconciler) CleanupJob(runPolicy *commonv1.RunPolicy, status commonv1.JobStatus, job client.Object) error

CleanupJob cleans up all resources associated with this generic training job as well as the job itself

func (*JobReconciler) CleanupResources

func (r *JobReconciler) CleanupResources(runPolicy *commonv1.RunPolicy, status commonv1.JobStatus, job client.Object) error

CleanupResources cleans up all resources associated with this generic training job

func (*JobReconciler) DeleteJob

func (r *JobReconciler) DeleteJob(job client.Object) error

DeleteJob deletes this generic training job

func (*JobReconciler) ExtractJobStatus

func (r *JobReconciler) ExtractJobStatus(job client.Object) (*commonv1.JobStatus, error)

func (*JobReconciler) ExtractReplicasSpec

func (r *JobReconciler) ExtractReplicasSpec(job client.Object) (map[commonv1.ReplicaType]*commonv1.ReplicaSpec, error)

func (*JobReconciler) ExtractRunPolicy

func (r *JobReconciler) ExtractRunPolicy(job client.Object) (*commonv1.RunPolicy, error)

func (*JobReconciler) GenLabels

func (r *JobReconciler) GenLabels(jobName string) map[string]string

GenLabels returns labels used for this job (based on the name of this generic training job)

func (*JobReconciler) GetGroupNameLabelValue

func (r *JobReconciler) GetGroupNameLabelValue() string

GetGroupNameLabelValue returns the Group Name for the generic training job, which is "kubeflow.org"

func (*JobReconciler) GetJob

func (r *JobReconciler) GetJob(ctx context.Context, req ctrl.Request) (client.Object, error)

func (*JobReconciler) IsFlagReplicaTypeForJobStatus

func (r *JobReconciler) IsFlagReplicaTypeForJobStatus(rtype string) bool

IsFlagReplicaTypeForJobStatus checks if this replicaType is the flag replicaType for the status of generic training job

func (*JobReconciler) IsJobFailed

func (r *JobReconciler) IsJobFailed(status commonv1.JobStatus) bool

IsJobFailed checks if this generic training job failed

func (*JobReconciler) IsJobSucceeded

func (r *JobReconciler) IsJobSucceeded(status commonv1.JobStatus) bool

IsJobSucceeded checks if this generic training job succeeded

func (*JobReconciler) IsMasterRole

func (r *JobReconciler) IsMasterRole(replicas map[commonv1.ReplicaType]*commonv1.ReplicaSpec, rtype commonv1.ReplicaType, index int) bool

func (*JobReconciler) OverrideForJobInterface

OverrideForJobInterface resets ReconcilerUtilInterface, PodInterface, ServiceInterface, GangSchedulingInterface used in JobReconciler

func (*JobReconciler) PastActiveDeadline

func (r *JobReconciler) PastActiveDeadline(runPolicy *commonv1.RunPolicy, jobStatus *commonv1.JobStatus) bool

PastActiveDeadline checks if this generic training job has ActiveDeadlineSeconds field set and if it is exceeded.

func (*JobReconciler) PastBackoffLimit

func (r *JobReconciler) PastBackoffLimit(jobName string, runPolicy *commonv1.RunPolicy,
	replicas map[commonv1.ReplicaType]*commonv1.ReplicaSpec, pods []*corev1.Pod) (bool, error)

PastBackoffLimit checks if this generic training job has past backoff limit

func (*JobReconciler) ReconcileJob

func (r *JobReconciler) ReconcileJob(
	ctx context.Context,
	job client.Object,
	replicas map[commonv1.ReplicaType]*commonv1.ReplicaSpec,
	status *commonv1.JobStatus,
	runPolicy *commonv1.RunPolicy) error

ReconcileJob reconciles generic training job

func (*JobReconciler) RecordAbnormalPods

func (r *JobReconciler) RecordAbnormalPods(activePods []*corev1.Pod, object client.Object)

RecordAbnormalPods records abnormal pods during the reconciliation of jobs

func (*JobReconciler) SetStatusForSuccessJob

func (r *JobReconciler) SetStatusForSuccessJob(status *commonv1.JobStatus)

SetStatusForSuccessJob sets the status for job that succeed

func (*JobReconciler) ShouldCleanUp

func (r *JobReconciler) ShouldCleanUp(status commonv1.JobStatus) bool

ShouldCleanUp checks if resources associated with this generic training job should be cleaned up

func (*JobReconciler) UpdateJobStatus

func (r *JobReconciler) UpdateJobStatus(
	job client.Object,
	replicas map[commonv1.ReplicaType]*commonv1.ReplicaSpec,
	jobStatus *commonv1.JobStatus) error

UpdateJobStatus updates the status of this generic training job WITHOUT pushing the updated status to the APIServer

func (*JobReconciler) UpdateJobStatusInAPIServer

func (r *JobReconciler) UpdateJobStatusInAPIServer(ctx context.Context, job client.Object) error

UpdateJobStatusInAPIServer updates the status of this generic training job in APIServer

type PodInterface

type PodInterface interface {
	// OverrideForPodInterface MUST NOT be overridden as it reset ReconcilerUtilInterface, GangSchedulingInterface, JobInterface
	OverrideForPodInterface(ui ReconcilerUtilInterface, gi GangSchedulingInterface, ji JobInterface)

	// GetDefaultContainerName CAN be overridden if the default container name is not "kubeflow".
	GetDefaultContainerName() string

	// GenPodName CAN be overridden to customize Pod name.
	GenPodName(jobName string, rtype string, index string) string

	// GetPodsForJob CAN be overridden to customize how to list all pods with the job.
	GetPodsForJob(ctx context.Context, job client.Object) ([]*corev1.Pod, error)

	// FilterPodsForReplicaType CAN be overridden if the linking approach between pods and replicaType changes as this
	// function filters out pods for specific replica type from all pods associated with the job.
	FilterPodsForReplicaType(pods []*corev1.Pod, replicaType string) ([]*corev1.Pod, error)

	// GetPodSlices SHOULD NOT be overridden as it generates pod slices for further pod processing.
	GetPodSlices(pods []*corev1.Pod, replicas int, logger *logrus.Entry) [][]*corev1.Pod

	// ReconcilePods CAN be overridden if the logic to reconcile all Pods for the job changes.
	ReconcilePods(
		ctx context.Context,
		job client.Object,
		jobStatus *commonv1.JobStatus,
		pods []*corev1.Pod,
		rtype commonv1.ReplicaType,
		spec *commonv1.ReplicaSpec,
		replicas map[commonv1.ReplicaType]*commonv1.ReplicaSpec) error

	// CreateNewPod CAN be overridden to customize how to create a new pod.
	CreateNewPod(job client.Object, rt string, index string,
		spec *commonv1.ReplicaSpec, masterRole bool, replicas map[commonv1.ReplicaType]*commonv1.ReplicaSpec) error

	// DeletePod CAN be overridden to customize how to delete a pod of {name} in namespace {ns}.
	DeletePod(ctx context.Context, ns string, name string) error

	// DecoratePod CAN be overridden if customization to the pod is needed. The default implementation applies nothing
	// to the pod.
	DecoratePod(rtype string, podTemplate *corev1.PodTemplateSpec, job client.Object)
}

PodInterface defines the abstract interface for Pod related actions, such like get, create or delete Pod

type PodReconciler

PodReconciler defines a Pod Reconciler for generic training job

func BarePodReconciler

func BarePodReconciler(client client.Client) *PodReconciler

BarePodReconciler returns a pointer of BarePodReconciler with minimal implementation

func (*PodReconciler) CreateNewPod

func (r *PodReconciler) CreateNewPod(job client.Object, rt string, index string,
	spec *commonv1.ReplicaSpec, masterRole bool, replicas map[commonv1.ReplicaType]*commonv1.ReplicaSpec) error

CreateNewPod generate Pods for this job and submits creation request to APIServer

func (*PodReconciler) DecoratePod

func (r *PodReconciler) DecoratePod(rtype string, podTemplate *corev1.PodTemplateSpec, job client.Object)

DecoratePod decorates podTemplate before a Pod is submitted to the APIServer

func (*PodReconciler) DeletePod

func (r *PodReconciler) DeletePod(ctx context.Context, ns string, name string) error

DeletePod delete a Pod specified by name and namespace

func (*PodReconciler) FilterPodsForReplicaType

func (r *PodReconciler) FilterPodsForReplicaType(pods []*corev1.Pod, replicaType string) ([]*corev1.Pod, error)

FilterPodsForReplicaType filters out Pods for this replicaType

func (*PodReconciler) GenPodName

func (r *PodReconciler) GenPodName(jobName string, rtype string, index string) string

GenPodName returns the name of the Pod based on jobName, replicaType and its index

func (*PodReconciler) GetDefaultContainerName

func (r *PodReconciler) GetDefaultContainerName() string

GetDefaultContainerName returns the default name of the container

func (*PodReconciler) GetPodSlices

func (r *PodReconciler) GetPodSlices(pods []*corev1.Pod, replicas int, logger *log.Entry) [][]*corev1.Pod

GetPodSlices generates podSlice from all Pods listed for this job

func (*PodReconciler) GetPodsForJob

func (r *PodReconciler) GetPodsForJob(ctx context.Context, job client.Object) ([]*corev1.Pod, error)

GetPodsForJob returns all Pods associated with this job

func (*PodReconciler) OverrideForPodInterface

func (r *PodReconciler) OverrideForPodInterface(ui ReconcilerUtilInterface, gi GangSchedulingInterface, ji JobInterface)

OverrideForPodInterface resets ReconcilerUtilInterface, GangSchedulingInterface, JobInterface for PodReconciler

func (*PodReconciler) ReconcilePods

func (r *PodReconciler) ReconcilePods(
	ctx context.Context,
	job client.Object,
	jobStatus *commonv1.JobStatus,
	pods []*corev1.Pod,
	rType commonv1.ReplicaType,
	spec *commonv1.ReplicaSpec,
	replicas map[commonv1.ReplicaType]*commonv1.ReplicaSpec) error

ReconcilePods reconciles Pods for this job

type ReconcilerUtil

type ReconcilerUtil struct {
	Recorder record.EventRecorder
	Log      logr.Logger
	Scheme   *runtime.Scheme
}

ReconcilerUtil defines a reconciler with utility features

func BareUtilReconciler

func BareUtilReconciler(
	recorder record.EventRecorder,
	log logr.Logger,
	scheme *runtime.Scheme) *ReconcilerUtil

BareUtilReconciler returns a pointer of ReconcilerUtil with default implementation

func (*ReconcilerUtil) GetLogger

func (r *ReconcilerUtil) GetLogger(job client.Object) logr.Logger

GetLogger returns a logr.Logger

func (*ReconcilerUtil) GetReconcilerName

func (r *ReconcilerUtil) GetReconcilerName() string

GetReconcilerName returns the name of this reconciler, which is "common-reconciler"

func (*ReconcilerUtil) GetRecorder

func (r *ReconcilerUtil) GetRecorder() record.EventRecorder

GetRecorder returns a record.EventRecorder

func (*ReconcilerUtil) GetScheme

func (r *ReconcilerUtil) GetScheme() *runtime.Scheme

GetScheme returns the pointer of runtime.Schemes that is used in this reconciler

type ReconcilerUtilInterface

type ReconcilerUtilInterface interface {
	// GetReconcilerName SHOULD be overridden if a new Reconciler is defined. The default implementation returns
	// "common-reconciler"
	GetReconcilerName() string

	// GetRecorder CAN be overridden to customize EventRecorder
	GetRecorder() record.EventRecorder

	// GetLogger CAN be overridden to customize logger
	GetLogger(job client.Object) logr.Logger

	// GetScheme CAN be overridden to customize runtime scheme
	GetScheme() *runtime.Scheme
}

ReconcilerUtilInterface defines the abstract interface of reconciler on utility features, such like get event recorder or logger

type SchedulerFrameworkReconciler

type SchedulerFrameworkReconciler struct {
	BaseGangReconciler
	ReconcilerUtilInterface
	client.Client
	SchedulerName string
}

SchedulerFrameworkReconciler defines a gang-scheduling reconciler for Kubernetes Scheduler Framework

func BareSchedulerFrameworkReconciler

func BareSchedulerFrameworkReconciler(client client.Client, bgReconciler *BaseGangReconciler, enabled bool) *SchedulerFrameworkReconciler

func (*SchedulerFrameworkReconciler) DecoratePodForGangScheduling

func (r *SchedulerFrameworkReconciler) DecoratePodForGangScheduling(
	_ string,
	podTemplate *corev1.PodTemplateSpec,
	job client.Object,
)

DecoratePodForGangScheduling decorates the podTemplate before it's used to generate a pod with information for gang-scheduling

func (*SchedulerFrameworkReconciler) DeletePodGroup

func (r *SchedulerFrameworkReconciler) DeletePodGroup(ctx context.Context, job client.Object) error

DeletePodGroup delete the PodGroup associated with this job

func (*SchedulerFrameworkReconciler) GangSchedulingEnabled

func (r *SchedulerFrameworkReconciler) GangSchedulingEnabled() bool

GangSchedulingEnabled returns if gang-scheduling is enabled for all jobs

func (*SchedulerFrameworkReconciler) GetGangSchedulerName

func (r *SchedulerFrameworkReconciler) GetGangSchedulerName() string

GetGangSchedulerName returns the name of Gang Scheduler will be used.

func (*SchedulerFrameworkReconciler) GetPodGroupForJob

func (r *SchedulerFrameworkReconciler) GetPodGroupForJob(ctx context.Context, job client.Object) (client.Object, error)

GetPodGroupForJob returns the PodGroup associated with this job

func (*SchedulerFrameworkReconciler) GetPodGroupName

func (r *SchedulerFrameworkReconciler) GetPodGroupName(job client.Object) string

GetPodGroupName returns the name of PodGroup for this job

func (*SchedulerFrameworkReconciler) OverrideForGangSchedulingInterface

func (r *SchedulerFrameworkReconciler) OverrideForGangSchedulingInterface(ui ReconcilerUtilInterface)

func (*SchedulerFrameworkReconciler) ReconcilePodGroup

func (r *SchedulerFrameworkReconciler) ReconcilePodGroup(
	ctx context.Context,
	job client.Object,
	runPolicy *commonv1.RunPolicy,
	replicas map[commonv1.ReplicaType]*commonv1.ReplicaSpec,
) error

ReconcilePodGroup reconciles the PodGroup resource for this job

type ServiceInterface

type ServiceInterface interface {
	// OverrideForServiceInterface MUST NOT be overridden as it reset ReconcilerUtilInterface, PodInterface, JobInterface
	OverrideForServiceInterface(ui ReconcilerUtilInterface, pi PodInterface, ji JobInterface)

	// GetPortsFromJob CAN be overridden to customize how to find ports defined in the ReplicasSpec.
	GetPortsFromJob(spec *commonv1.ReplicaSpec) (map[string]int32, error)

	// GetServicesForJob CAN be overridden to customize how to find all services associated with this job.
	GetServicesForJob(ctx context.Context, job client.Object) ([]*corev1.Service, error)

	// FilterServicesForReplicaType CAN be overridden to customize how to filter out services for this Replica Type.
	FilterServicesForReplicaType(services []*corev1.Service, replicaType string) ([]*corev1.Service, error)

	// GetServiceSlices CAN be overridden to customize how to generate service slices.
	GetServiceSlices(services []*corev1.Service, replicas int, logger *logrus.Entry) [][]*corev1.Service

	// ReconcileServices CAN be overridden to customize how to reconcile services for this job.
	ReconcileServices(
		job client.Object,
		services []*corev1.Service,
		rtype commonv1.ReplicaType,
		spec *commonv1.ReplicaSpec) error

	// CreateNewService CAN be overridden to customize how to create a new service.
	CreateNewService(job client.Object, rtype commonv1.ReplicaType,
		spec *commonv1.ReplicaSpec, index string) error

	// DeleteService CAN be overridden to customize how to delete the service of {name} in namespace {ns}.
	DeleteService(ns string, name string, job client.Object) error

	// DecorateService CAN be overridden to customize this service right before being created
	DecorateService(rtype string, svc *corev1.Service, job client.Object)
}

ServiceInterface defines the abstract interface for Pod related actions, such like get, create or delete Service

type ServiceReconciler

type ServiceReconciler struct {
	client.Client
	ReconcilerUtilInterface
	PodInterface
	JobInterface
}

ServiceReconciler defines a Service Reconciler for generic training job

func BareServiceReconciler

func BareServiceReconciler(client client.Client) *ServiceReconciler

BareServiceReconciler returns a pointer of ServiceReconciler with minimal implementation

func (*ServiceReconciler) CreateNewService

func (r *ServiceReconciler) CreateNewService(job client.Object, rtype commonv1.ReplicaType,
	spec *commonv1.ReplicaSpec, index string) error

CreateNewService generates Service based the job, replica info. and index and submits it to APIServer

func (*ServiceReconciler) DecorateService

func (r *ServiceReconciler) DecorateService(rtype string, svc *corev1.Service, job client.Object)

DecorateService decorates the Service before it's submitted to APIServer

func (*ServiceReconciler) DeleteService

func (r *ServiceReconciler) DeleteService(ns string, name string, job client.Object) error

DeleteService deletes a Service specified by its name and namespace from APIServer

func (*ServiceReconciler) FilterServicesForReplicaType

func (r *ServiceReconciler) FilterServicesForReplicaType(services []*corev1.Service,
	replicaType string) ([]*corev1.Service, error)

FilterServicesForReplicaType returns service belong to a replicaType.

func (*ServiceReconciler) GetPortsFromJob

func (r *ServiceReconciler) GetPortsFromJob(spec *commonv1.ReplicaSpec) (map[string]int32, error)

GetPortsFromJob gets the ports of job container. Port could be nil, if distributed communication strategy doesn't need and no other ports that need to be exposed.

func (*ServiceReconciler) GetServiceSlices

func (r *ServiceReconciler) GetServiceSlices(services []*corev1.Service, replicas int, logger *log.Entry) [][]*corev1.Service

GetServiceSlices returns the serviceSlice based on all Services listed for this job

func (*ServiceReconciler) GetServicesForJob

func (r *ServiceReconciler) GetServicesForJob(ctx context.Context, job client.Object) ([]*corev1.Service, error)

GetServicesForJob returns all services associated with this job

func (*ServiceReconciler) OverrideForServiceInterface

func (r *ServiceReconciler) OverrideForServiceInterface(ui ReconcilerUtilInterface, pi PodInterface, ji JobInterface)

OverrideForServiceInterface resets ReconcilerUtilInterface, PodInterface, JobInterface for ServiceReconciler

func (*ServiceReconciler) ReconcileServices

func (r *ServiceReconciler) ReconcileServices(
	job client.Object,
	services []*corev1.Service,
	rtype commonv1.ReplicaType,
	spec *commonv1.ReplicaSpec) error

ReconcileServices reconciles the Services for this job

type VolcanoReconciler

type VolcanoReconciler struct {
	BaseGangReconciler
	ReconcilerUtilInterface
	client.Client
}

VolcanoReconciler defines a gang-scheduling reconciler for volcano.sh/volcano

func BareVolcanoReconciler

func BareVolcanoReconciler(client client.Client, bgReconciler *BaseGangReconciler, enabled bool) *VolcanoReconciler

BareVolcanoReconciler returns a VolcanoReconciler pointer with minimal components defined

func (*VolcanoReconciler) DecoratePodForGangScheduling

func (r *VolcanoReconciler) DecoratePodForGangScheduling(rtype string, podTemplate *corev1.PodTemplateSpec, job client.Object)

DecoratePodForGangScheduling decorates the podTemplate before it's used to generate a pod with information for gang-scheduling

func (*VolcanoReconciler) DeletePodGroup

func (r *VolcanoReconciler) DeletePodGroup(ctx context.Context, job client.Object) error

DeletePodGroup delete the PodGroup associated with this job

func (*VolcanoReconciler) GangSchedulingEnabled

func (r *VolcanoReconciler) GangSchedulingEnabled() bool

GangSchedulingEnabled returns if gang-scheduling is enabled for all jobs

func (*VolcanoReconciler) GetGangSchedulerName

func (r *VolcanoReconciler) GetGangSchedulerName() string

GetGangSchedulerName returns the name of Gang Scheduler will be used, which is "volcano" for VolcanoReconciler

func (*VolcanoReconciler) GetPodGroupForJob

func (r *VolcanoReconciler) GetPodGroupForJob(ctx context.Context, job client.Object) (client.Object, error)

GetPodGroupForJob returns the PodGroup associated with this job

func (*VolcanoReconciler) GetPodGroupName

func (r *VolcanoReconciler) GetPodGroupName(job client.Object) string

GetPodGroupName returns the name of PodGroup for this job

func (*VolcanoReconciler) OverrideForGangSchedulingInterface

func (r *VolcanoReconciler) OverrideForGangSchedulingInterface(ui ReconcilerUtilInterface)

OverrideForGangSchedulingInterface reset ReconcilerUtilInterface used in this VolcanoReconciler

func (*VolcanoReconciler) ReconcilePodGroup

func (r *VolcanoReconciler) ReconcilePodGroup(
	ctx context.Context,
	job client.Object,
	runPolicy *commonv1.RunPolicy,
	replicas map[commonv1.ReplicaType]*commonv1.ReplicaSpec) error

ReconcilePodGroup reconciles the PodGroup resource for this job

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL