Documentation ¶
Overview ¶
Package v2beta1 is the v2beta1 version of the API. +groupName=kubeflow.org
Index ¶
- Constants
- Variables
- func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenAPIDefinition
- func RegisterDefaults(scheme *runtime.Scheme) error
- func Resource(resource string) schema.GroupResource
- func SetDefaults_MPIJob(mpiJob *MPIJob)
- func SetObjectDefaults_MPIJob(in *MPIJob)
- func SetObjectDefaults_MPIJobList(in *MPIJobList)
- type CleanPodPolicy
- type JobCondition
- type JobConditionType
- type JobStatus
- type MPIImplementation
- type MPIJob
- type MPIJobList
- type MPIJobSpec
- type MPIReplicaType
- type ReplicaStatus
- type RunPolicy
- type SchedulingPolicy
Constants ¶
const ( // EnvKubeflowNamespace is ENV for kubeflow namespace specified by user. EnvKubeflowNamespace = "KUBEFLOW_NAMESPACE" // DefaultRestartPolicy is default RestartPolicy for ReplicaSpec. DefaultRestartPolicy = common.RestartPolicyNever // DefaultLauncherRestartPolicy is default RestartPolicy for Launcher Job. DefaultLauncherRestartPolicy = common.RestartPolicyOnFailure // OperatorName is the name of the operator used as value to the label common.OperatorLabelName OperatorName = "mpi-operator" )
const ( // GroupName is the group name use in this package. GroupName = "kubeflow.org" // Kind is the kind name. Kind = "MPIJob" // GroupVersion is the version. GroupVersion = "v2beta1" )
Variables ¶
var ( SchemeBuilder = runtime.NewSchemeBuilder(addKnownTypes, addDefaultingFuncs) AddToScheme = SchemeBuilder.AddToScheme SchemeGroupVersion = schema.GroupVersion{Group: GroupName, Version: GroupVersion} SchemeGroupVersionKind = schema.GroupVersionKind{Group: GroupName, Version: GroupVersion, Kind: Kind} )
Functions ¶
func GetOpenAPIDefinitions ¶
func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenAPIDefinition
func RegisterDefaults ¶
RegisterDefaults adds defaulters functions to the given scheme. Public to allow building arbitrary schemes. All generated defaulters are covering - they call all nested defaulters.
func Resource ¶
func Resource(resource string) schema.GroupResource
Resource takes an unqualified resource and returns a Group qualified GroupResource.
func SetDefaults_MPIJob ¶
func SetDefaults_MPIJob(mpiJob *MPIJob)
func SetObjectDefaults_MPIJob ¶
func SetObjectDefaults_MPIJob(in *MPIJob)
func SetObjectDefaults_MPIJobList ¶
func SetObjectDefaults_MPIJobList(in *MPIJobList)
Types ¶
type CleanPodPolicy ¶
type CleanPodPolicy string
CleanPodPolicy describes how to deal with pods when the job is finished.
const ( CleanPodPolicyUndefined CleanPodPolicy = "" CleanPodPolicyAll CleanPodPolicy = "All" CleanPodPolicyRunning CleanPodPolicy = "Running" CleanPodPolicyNone CleanPodPolicy = "None" )
type JobCondition ¶
type JobCondition struct { // type of job condition. Type JobConditionType `json:"type"` // status of the condition, one of True, False, Unknown. // +kubebuilder:validation:Enum:=True;False;Unknown Status v1.ConditionStatus `json:"status"` // The reason for the condition's last transition. // +optional Reason string `json:"reason,omitempty"` // A human-readable message indicating details about the transition. // +optional Message string `json:"message,omitempty"` // The last time this condition was updated. // +optional LastUpdateTime metav1.Time `json:"lastUpdateTime,omitempty"` // Last time the condition transitioned from one status to another. // +optional LastTransitionTime metav1.Time `json:"lastTransitionTime,omitempty"` }
JobCondition describes the state of the job at a certain point.
func (*JobCondition) DeepCopy ¶
func (in *JobCondition) DeepCopy() *JobCondition
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new JobCondition.
func (*JobCondition) DeepCopyInto ¶
func (in *JobCondition) DeepCopyInto(out *JobCondition)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type JobConditionType ¶
type JobConditionType string
JobConditionType defines all kinds of types of JobStatus.
const ( // JobCreated means the job has been accepted by the system, // but one or more of the pods/services has not been started. // This includes time before pods being scheduled and launched. JobCreated JobConditionType = "Created" // JobRunning means all sub-resources (e.g. services/pods) of this job // have been successfully scheduled and launched. // The training is running without error. JobRunning JobConditionType = "Running" // JobRestarting means one or more sub-resources (e.g. services/pods) of this job // reached phase failed but maybe restarted according to it's restart policy // which specified by user in v1.PodTemplateSpec. // The training is freezing/pending. JobRestarting JobConditionType = "Restarting" // JobSucceeded means all sub-resources (e.g. services/pods) of this job // reached phase have terminated in success. // The training is complete without error. JobSucceeded JobConditionType = "Succeeded" // JobSuspended means the job has been suspended. JobSuspended JobConditionType = "Suspended" // JobFailed means one or more sub-resources (e.g. services/pods) of this job // reached phase failed with no restarting. // The training has failed its execution. JobFailed JobConditionType = "Failed" )
type JobStatus ¶
type JobStatus struct { // conditions is a list of current observed job conditions. // +optional // +listType=map // +listMapKey=type Conditions []JobCondition `json:"conditions,omitempty"` // replicaStatuses is map of ReplicaType and ReplicaStatus, // specifies the status of each replica. // +optional ReplicaStatuses map[MPIReplicaType]*ReplicaStatus `json:"replicaStatuses,omitempty"` // Represents time when the job was acknowledged by the job controller. // It is not guaranteed to be set in happens-before order across separate operations. // It is represented in RFC3339 form and is in UTC. // +optional StartTime *metav1.Time `json:"startTime,omitempty"` // Represents time when the job was completed. It is not guaranteed to // be set in happens-before order across separate operations. // It is represented in RFC3339 form and is in UTC. // +optional CompletionTime *metav1.Time `json:"completionTime,omitempty"` // Represents last time when the job was reconciled. It is not guaranteed to // be set in happens-before order across separate operations. // It is represented in RFC3339 form and is in UTC. // +optional LastReconcileTime *metav1.Time `json:"lastReconcileTime,omitempty"` }
JobStatus represents the current observed state of the training Job.
func (*JobStatus) DeepCopy ¶
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new JobStatus.
func (*JobStatus) DeepCopyInto ¶
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type MPIImplementation ¶
type MPIImplementation string
const ( MPIImplementationOpenMPI MPIImplementation = "OpenMPI" MPIImplementationIntel MPIImplementation = "Intel" )
type MPIJob ¶
type MPIJob struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` Spec MPIJobSpec `json:"spec,omitempty"` Status JobStatus `json:"status,omitempty"` }
func (*MPIJob) DeepCopy ¶
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MPIJob.
func (*MPIJob) DeepCopyInto ¶
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*MPIJob) DeepCopyObject ¶
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type MPIJobList ¶
type MPIJobList struct { metav1.TypeMeta `json:",inline"` metav1.ListMeta `json:"metadata"` Items []MPIJob `json:"items"` }
func (*MPIJobList) DeepCopy ¶
func (in *MPIJobList) DeepCopy() *MPIJobList
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MPIJobList.
func (*MPIJobList) DeepCopyInto ¶
func (in *MPIJobList) DeepCopyInto(out *MPIJobList)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*MPIJobList) DeepCopyObject ¶
func (in *MPIJobList) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type MPIJobSpec ¶
type MPIJobSpec struct { // Specifies the number of slots per worker used in hostfile. // Defaults to 1. // +optional // +kubebuilder:default:=1 SlotsPerWorker *int32 `json:"slotsPerWorker,omitempty"` // RunPolicy encapsulates various runtime policies of the job. RunPolicy RunPolicy `json:"runPolicy,omitempty"` // MPIReplicaSpecs contains maps from `MPIReplicaType` to `ReplicaSpec` that // specify the MPI replicas to run. MPIReplicaSpecs map[MPIReplicaType]*common.ReplicaSpec `json:"mpiReplicaSpecs"` // SSHAuthMountPath is the directory where SSH keys are mounted. // Defaults to "/root/.ssh". // +kubebuilder:default:="/root/.ssh" SSHAuthMountPath string `json:"sshAuthMountPath,omitempty"` // MPIImplementation is the MPI implementation. // Options are "OpenMPI" (default) and "Intel". // +kubebuilder:validation:Enum:=OpenMPI;Intel // +kubebuilder:default:=OpenMPI MPIImplementation MPIImplementation `json:"mpiImplementation,omitempty"` }
func (*MPIJobSpec) DeepCopy ¶
func (in *MPIJobSpec) DeepCopy() *MPIJobSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MPIJobSpec.
func (*MPIJobSpec) DeepCopyInto ¶
func (in *MPIJobSpec) DeepCopyInto(out *MPIJobSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type MPIReplicaType ¶
type MPIReplicaType string
MPIReplicaType is the type for MPIReplica.
const ( // MPIReplicaTypeLauncher is the type for launcher replica. MPIReplicaTypeLauncher MPIReplicaType = "Launcher" // MPIReplicaTypeWorker is the type for worker replicas. MPIReplicaTypeWorker MPIReplicaType = "Worker" )
type ReplicaStatus ¶
type ReplicaStatus struct { // The number of actively running pods. // +optional Active int32 `json:"active,omitempty"` // The number of pods which reached phase succeeded. // +optional Succeeded int32 `json:"succeeded,omitempty"` // The number of pods which reached phase failed. // +optional Failed int32 `json:"failed,omitempty"` // Deprecated: Use selector instead // +optional LabelSelector *metav1.LabelSelector `json:"labelSelector,omitempty"` // A selector is a label query over a set of resources. The result of matchLabels and // matchExpressions are ANDed. An empty selector matches all objects. A null // selector matches no objects. // +optional Selector string `json:"selector,omitempty"` }
ReplicaStatus represents the current observed state of the replica.
func (*ReplicaStatus) DeepCopy ¶
func (in *ReplicaStatus) DeepCopy() *ReplicaStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ReplicaStatus.
func (*ReplicaStatus) DeepCopyInto ¶
func (in *ReplicaStatus) DeepCopyInto(out *ReplicaStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type RunPolicy ¶
type RunPolicy struct { // CleanPodPolicy defines the policy to kill pods after the job completes. // Default to Running. CleanPodPolicy *CleanPodPolicy `json:"cleanPodPolicy,omitempty"` // TTLSecondsAfterFinished is the TTL to clean up jobs. // It may take extra ReconcilePeriod seconds for the cleanup, since // reconcile gets called periodically. // Default to infinite. TTLSecondsAfterFinished *int32 `json:"ttlSecondsAfterFinished,omitempty"` // Specifies the duration in seconds relative to the startTime that the job may be active // before the system tries to terminate it; value must be positive integer. // +optional ActiveDeadlineSeconds *int64 `json:"activeDeadlineSeconds,omitempty"` // Optional number of retries before marking this job failed. // +optional BackoffLimit *int32 `json:"backoffLimit,omitempty"` // SchedulingPolicy defines the policy related to scheduling, e.g. gang-scheduling // +optional SchedulingPolicy *SchedulingPolicy `json:"schedulingPolicy,omitempty"` // suspend specifies whether the MPIJob controller should create Pods or not. // If a MPIJob is created with suspend set to true, no Pods are created by // the MPIJob controller. If a MPIJob is suspended after creation (i.e. the // flag goes from false to true), the MPIJob controller will delete all // active Pods and PodGroups associated with this MPIJob. Also, it will suspend the // Launcher Job. Users must design their workload to gracefully handle this. // Suspending a Job will reset the StartTime field of the MPIJob. // // Defaults to false. // +kubebuilder:default:=false Suspend *bool `json:"suspend,omitempty"` }
RunPolicy encapsulates various runtime policies of the distributed training job, for example how to clean up resources and how long the job can stay active.
func (*RunPolicy) DeepCopy ¶
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RunPolicy.
func (*RunPolicy) DeepCopyInto ¶
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type SchedulingPolicy ¶
type SchedulingPolicy struct { // MinAvailable defines the minimal number of member to run the PodGroup. // If the gang-scheduling isn't empty, input is passed to `.spec.minMember` in PodGroup. // Note that, when using this field, // you need to make sure the application supports resizing (e.g., Elastic Horovod). // // If not set, it defaults to the number of workers. // +optional MinAvailable *int32 `json:"minAvailable,omitempty"` // Queue defines the queue name to allocate resource for PodGroup. // If the gang-scheduling is set to the volcano, // input is passed to `.spec.queue` in PodGroup for the volcano, // and if it is set to the scheduler-plugins, // input isn't passed to PodGroup. // +optional Queue string `json:"queue,omitempty"` // MinResources defines the minimal resources of members to run the PodGroup. // If the gang-scheduling isn't empty, // input is passed to `.spec.minResources` in PodGroup for scheduler-plugins. // +optional MinResources *v1.ResourceList `json:"minResources,omitempty"` // PriorityClass defines the PodGroup's PriorityClass. // If the gang-scheduling is set to the volcano, // input is passed to `.spec.priorityClassName` in PodGroup for volcano, // and if it is set to the scheduler-plugins, // input isn't passed to PodGroup for scheduler-plugins. // +optional PriorityClass string `json:"priorityClass,omitempty"` // SchedulerTimeoutSeconds defines the maximal time of members to wait before run the PodGroup. // If the gang-scheduling is set to the scheduler-plugins, // input is passed to `.spec.scheduleTimeoutSeconds` in PodGroup for the scheduler-plugins, // and if it is set to the volcano, input isn't passed to PodGroup. // +optional ScheduleTimeoutSeconds *int32 `json:"scheduleTimeoutSeconds,omitempty"` }
SchedulingPolicy encapsulates various scheduling policies of the distributed training job, for example `minAvailable` for gang-scheduling. Now, it supports only for volcano and scheduler-plugins.
func (*SchedulingPolicy) DeepCopy ¶
func (in *SchedulingPolicy) DeepCopy() *SchedulingPolicy
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingPolicy.
func (*SchedulingPolicy) DeepCopyInto ¶
func (in *SchedulingPolicy) DeepCopyInto(out *SchedulingPolicy)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.