common

package
v1.4.7-rc1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 28, 2023 License: Apache-2.0 Imports: 43 Imported by: 0

Documentation

Overview

Copyright 2019 The Kubeflow Authors

Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.

Index

Constants

This section is empty.

Variables

View Source
var GenNonGangSchedulerSetupFunc = func() GangSchedulingSetupFunc {
	return func(jc *JobController) {
		jc.Config.GangScheduling = ""
		jc.PodGroupControl = nil
	}
}
View Source
var GenSchedulerPluginsSetupFunc = func(c client.Client, gangSchedulerName string) GangSchedulingSetupFunc {
	return func(jc *JobController) {
		jc.Config.GangScheduling = GangScheduler(gangSchedulerName)
		jc.PodGroupControl = control.NewSchedulerPluginsControl(c, gangSchedulerName)
	}
}
View Source
var GenVolcanoSetupFunc = func(vci volcanoclient.Interface) GangSchedulingSetupFunc {
	return func(jc *JobController) {
		jc.Config.GangScheduling = GangSchedulerVolcano
		jc.PodGroupControl = control.NewVolcanoControl(vci)
	}
}
View Source
var (
	// KeyFunc is the short name to DeletionHandlingMetaNamespaceKeyFunc.
	// IndexerInformer uses a delta queue, therefore for deletes we have to use this
	// key function but it should be just fine for non delete events.
	KeyFunc = cache.DeletionHandlingMetaNamespaceKeyFunc
)

Functions

func AddResourceList

func AddResourceList(list, req, limit v1.ResourceList)

func CalcPGMinResources

func CalcPGMinResources(minMember int32, replicas map[apiv1.ReplicaType]*apiv1.ReplicaSpec, pcGetFunc PriorityClassGetFunc) *v1.ResourceList

func GenGeneralName

func GenGeneralName(jobName string, rtype string, index string) string

func MaxInt

func MaxInt(x, y int) int

func RecheckDeletionTimestamp

func RecheckDeletionTimestamp(getObject func() (metav1.Object, error)) func() error

RecheckDeletionTimestamp returns a CanAdopt() function to recheck deletion.

The CanAdopt() function calls getObject() to fetch the latest value, and denies adoption attempts if that object has a non-nil DeletionTimestamp.

Types

type FillPodGroupSpecFunc

type FillPodGroupSpecFunc func(object metav1.Object) error

type GangScheduler

type GangScheduler string
const (
	GangSchedulerNone    GangScheduler = "None"
	GangSchedulerVolcano GangScheduler = "volcano"
	// GangSchedulerSchedulerPlugins Using this scheduler name or any scheduler name different than volcano uses the scheduler-plugins PodGroup
	GangSchedulerSchedulerPlugins GangScheduler = "scheduler-plugins"
)

type GangSchedulingSetupFunc

type GangSchedulingSetupFunc func(jc *JobController)

type JobController

type JobController struct {
	Controller apiv1.ControllerInterface

	Config JobControllerConfiguration

	// PodControl is used to add or delete pods.
	PodControl control.PodControlInterface

	// ServiceControl is used to add or delete services.
	ServiceControl control.ServiceControlInterface

	// KubeClientSet is a standard kubernetes clientset.
	KubeClientSet kubeclientset.Interface

	// PodGroupControl is used to add or delete PodGroup.
	PodGroupControl control.PodGroupControlInterface

	// PodLister can list/get pods from the shared informer's store.
	PodLister corelisters.PodLister

	// ServiceLister can list/get services from the shared informer's store.
	ServiceLister corelisters.ServiceLister

	// PriorityClassLister can list/get priorityClasses from the shared informer's store.
	PriorityClassLister schedulinglisters.PriorityClassLister

	// PodInformerSynced returns true if the pod store has been synced at least once.
	PodInformerSynced cache.InformerSynced

	// ServiceInformerSynced returns true if the service store has been synced at least once.
	ServiceInformerSynced cache.InformerSynced

	// PriorityClassInformerSynced returns true if the priority class store has been synced at least once.
	PriorityClassInformerSynced cache.InformerSynced

	// A TTLCache of pod/services creates/deletes each job expects to see
	// We use Job namespace/name + ReplicaType + pods/services as an expectation key,
	// For example, there is a TFJob with namespace "tf-operator" and name "tfjob-abc":
	// {
	//     "PS": {
	//         "Replicas": 2,
	//     },
	//     "Worker": {
	//         "Replicas": 4,
	//     }
	// }
	// We will create 4 expectations:
	// - "tf-operator/tfjob-abc/ps/services", expects 2 adds.
	// - "tf-operator/tfjob-abc/ps/pods", expects 2 adds.
	// - "tf-operator/tfjob-abc/worker/services", expects 4 adds.
	// - "tf-operator/tfjob-abc/worker/pods", expects 4 adds.
	Expectations expectation.ControllerExpectationsInterface

	// WorkQueue is a rate limited work queue. This is used to queue work to be
	// processed instead of performing it as soon as a change happens. This
	// means we can ensure we only process a fixed amount of resources at a
	// time, and makes it easy to ensure we are never processing the same item
	// simultaneously in two different workers.
	WorkQueue workqueue.RateLimitingInterface

	// Recorder is an event recorder for recording Event resources to the
	// Kubernetes API.
	Recorder record.EventRecorder
}

JobController abstracts other operators to manage the lifecycle of Jobs. User need to first implement the ControllerInterface(objectA) and then initialize a JobController(objectB) struct with objectA as the parameter. And then call objectB.ReconcileJobs as mentioned below, the ReconcileJobs method is the entrypoint to trigger the reconcile logic of the job controller

ReconcileJobs(

job interface{},
replicas map[apiv1.ReplicaType]*apiv1.ReplicaSpec,
jobStatus apiv1.JobStatus,
runPolicy *apiv1.RunPolicy) error

func NewJobController

func NewJobController(
	controllerImpl apiv1.ControllerInterface,
	reconcilerSyncPeriod metav1.Duration,
	kubeClientSet kubeclientset.Interface,
	setupPodGroup GangSchedulingSetupFunc,
	kubeInformerFactory kubeinformers.SharedInformerFactory,
	workQueueName string) JobController

func (*JobController) AddPod

func (jc *JobController) AddPod(obj interface{})

When a pod is created, enqueue the job that manages it and update its expectations.

func (*JobController) AddService

func (jc *JobController) AddService(obj interface{})

When a service is created, enqueue the controller that manages it and update its expectations.

func (*JobController) CleanupJob

func (jc *JobController) CleanupJob(runPolicy *apiv1.RunPolicy, jobStatus apiv1.JobStatus, job interface{}) error

func (*JobController) CreateNewService

func (jc *JobController) CreateNewService(job metav1.Object, rtype apiv1.ReplicaType,
	spec *apiv1.ReplicaSpec, index string) error

createNewService creates a new service for the given index and type.

func (*JobController) DeletePdb

func (jc *JobController) DeletePdb(job metav1.Object) error

func (*JobController) DeletePod

func (jc *JobController) DeletePod(obj interface{})

When a pod is deleted, enqueue the job that manages the pod and update its expectations. obj could be an *v1.Pod, or a DeletionFinalStateUnknown marker item.

func (*JobController) DeletePodGroup

func (jc *JobController) DeletePodGroup(job metav1.Object) error

func (*JobController) DeletePodsAndServices

func (jc *JobController) DeletePodsAndServices(runPolicy *apiv1.RunPolicy, job interface{}, pods []*corev1.Pod) error

func (*JobController) DeleteService

func (jc *JobController) DeleteService(obj interface{})

When a service is deleted, enqueue the job that manages the service and update its expectations. obj could be an *v1.Service, or a DeletionFinalStateUnknown marker item.

func (*JobController) FilterPodsForReplicaType

func (jc *JobController) FilterPodsForReplicaType(pods []*v1.Pod, replicaType string) ([]*v1.Pod, error)

FilterPodsForReplicaType returns pods belong to a replicaType.

func (*JobController) FilterServicesForReplicaType

func (jc *JobController) FilterServicesForReplicaType(services []*v1.Service, replicaType string) ([]*v1.Service, error)

FilterServicesForReplicaType returns service belong to a replicaType.

func (*JobController) GenLabels

func (jc *JobController) GenLabels(jobName string) map[string]string

func (*JobController) GenOwnerReference

func (jc *JobController) GenOwnerReference(obj metav1.Object) *metav1.OwnerReference

func (*JobController) GetPodSlices

func (jc *JobController) GetPodSlices(pods []*v1.Pod, replicas int, logger *log.Entry) [][]*v1.Pod

getPodSlices returns a slice, which element is the slice of pod. It gives enough information to caller to make decision to up/down scale resources.

func (*JobController) GetPodsForJob

func (jc *JobController) GetPodsForJob(jobObject interface{}) ([]*v1.Pod, error)

getPodsForJob returns the set of pods that this job should manage. It also reconciles ControllerRef by adopting/orphaning. Note that the returned Pods are pointers into the cache.

func (*JobController) GetPortsFromJob

func (jc *JobController) GetPortsFromJob(spec *apiv1.ReplicaSpec) (map[string]int32, error)

GetPortsFromJob gets the ports of job container. Port could be nil, if distributed communication strategy doesn't need and no other ports that need to be exposed.

func (*JobController) GetServiceSlices

func (jc *JobController) GetServiceSlices(services []*v1.Service, replicas int, logger *log.Entry) [][]*v1.Service

GetServiceSlices returns a slice, which element is the slice of service. Assume the return object is serviceSlices, then serviceSlices[i] is an array of pointers to services corresponding to Services for replica i.

func (*JobController) GetServicesForJob

func (jc *JobController) GetServicesForJob(jobObject interface{}) ([]*v1.Service, error)

getServicesForJob returns the set of services that this job should manage. It also reconciles ControllerRef by adopting/orphaning. Note that the returned services are pointers into the cache.

func (*JobController) PastActiveDeadline

func (jc *JobController) PastActiveDeadline(runPolicy *apiv1.RunPolicy, jobStatus apiv1.JobStatus) bool

PastActiveDeadline checks if job has ActiveDeadlineSeconds field set and if it is exceeded.

func (*JobController) PastBackoffLimit

func (jc *JobController) PastBackoffLimit(jobName string, runPolicy *apiv1.RunPolicy,
	replicas map[apiv1.ReplicaType]*apiv1.ReplicaSpec, pods []*corev1.Pod) (bool, error)

PastBackoffLimit checks if container restartCounts sum exceeds BackoffLimit this method applies only to pods when restartPolicy is one of OnFailure, Always or ExitCode

func (*JobController) ReconcileJobs

func (jc *JobController) ReconcileJobs(
	job interface{},
	replicas map[apiv1.ReplicaType]*apiv1.ReplicaSpec,
	jobStatus apiv1.JobStatus,
	runPolicy *apiv1.RunPolicy) error

ReconcileJobs checks and updates replicas for each given ReplicaSpec. It will requeue the job in case of an error while creating/deleting pods/services.

func (*JobController) ReconcilePods

func (jc *JobController) ReconcilePods(
	job interface{},
	jobStatus *apiv1.JobStatus,
	pods []*v1.Pod,
	rType apiv1.ReplicaType,
	spec *apiv1.ReplicaSpec,
	replicas map[apiv1.ReplicaType]*apiv1.ReplicaSpec) error

ReconcilePods checks and updates pods for each given ReplicaSpec. It will requeue the job in case of an error while creating/deleting pods.

func (*JobController) ReconcileServices

func (jc *JobController) ReconcileServices(
	job metav1.Object,
	services []*v1.Service,
	rtype apiv1.ReplicaType,
	spec *apiv1.ReplicaSpec) error

reconcileServices checks and updates services for each given ReplicaSpec. It will requeue the job in case of an error while creating/deleting services.

func (*JobController) ResetExpectations

func (jc *JobController) ResetExpectations(jobKey string, replicas map[apiv1.ReplicaType]*apiv1.ReplicaSpec) error

ResetExpectations reset the expectation for creates and deletes of pod/service to zero.

func (*JobController) SyncPdb

func (jc *JobController) SyncPdb(job metav1.Object, minAvailableReplicas int32) (*policyapi.PodDisruptionBudget, error)

SyncPdb will create a PDB for gang scheduling.

func (*JobController) SyncPodGroup

func (jc *JobController) SyncPodGroup(job metav1.Object, specFunc FillPodGroupSpecFunc) (metav1.Object, error)

func (*JobController) UpdatePod

func (jc *JobController) UpdatePod(old, cur interface{})

When a pod is updated, figure out what job is managing it and wake it up. If the labels of the pod have changed we need to awaken both the old and new replica set. old and cur must be *v1.Pod types.

func (*JobController) UpdateService

func (jc *JobController) UpdateService(old, cur interface{})

When a service is updated, figure out what job/s manage it and wake them up. If the labels of the service have changed we need to awaken both the old and new replica set. old and cur must be *v1.Service types.

type JobControllerConfiguration

type JobControllerConfiguration struct {
	// GangScheduling choice: None, volcano and scheduler-plugins
	GangScheduling GangScheduler
}

JobControllerConfiguration contains configuration of operator.

func (*JobControllerConfiguration) EnableGangScheduling

func (c *JobControllerConfiguration) EnableGangScheduling() bool

type PriorityClassGetFunc

type PriorityClassGetFunc func(string) (*schedulingv1.PriorityClass, error)

type ReplicaPriority

type ReplicaPriority struct {
	apiv1.ReplicaSpec
	// contains filtered or unexported fields
}

type ReplicasPriority

type ReplicasPriority []ReplicaPriority

ReplicasPriority is a slice of ReplicaPriority.

func (ReplicasPriority) Len

func (p ReplicasPriority) Len() int

func (ReplicasPriority) Less

func (p ReplicasPriority) Less(i, j int) bool

func (ReplicasPriority) Swap

func (p ReplicasPriority) Swap(i, j int)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL