Documentation ¶
Overview ¶
Package v1alpha1 contains API Schema definitions for the training v1alpha1 API group +k8s:defaulter-gen=TypeMeta +groupName=training.kubedl.io
Package v1alpha1 contains API Schema definitions for the training v1alpha1 API group +kubebuilder:object:generate=true +groupName=training.kubedl.io
Index ¶
- Constants
- Variables
- func IsTFJobChieforMaster(typ commonv1.ReplicaType) bool
- func IsTFJobEvaluator(typ commonv1.ReplicaType) bool
- func IsTFJobWorker(typ commonv1.ReplicaType) bool
- func RegisterDefaults(scheme *runtime.Scheme) error
- func Resource(resource string) schema.GroupResource
- func SetDefaults_MPIJob(mpiJob *MPIJob)
- func SetDefaults_MarsJob(job *MarsJob)
- func SetDefaults_PyTorchJob(job *PyTorchJob)
- func SetDefaults_TFJob(tfjob *TFJob)
- func SetDefaults_XDLJob(xdlJob *XDLJob)
- func SetDefaults_XGBoostJob(xgbJob *XGBoostJob)
- func SetObjectDefaults_MPIJob(in *MPIJob)
- func SetObjectDefaults_MPIJobList(in *MPIJobList)
- func SetObjectDefaults_MarsJob(in *MarsJob)
- func SetObjectDefaults_MarsJobList(in *MarsJobList)
- func SetObjectDefaults_PyTorchJob(in *PyTorchJob)
- func SetObjectDefaults_PyTorchJobList(in *PyTorchJobList)
- func SetObjectDefaults_TFJob(in *TFJob)
- func SetObjectDefaults_TFJobList(in *TFJobList)
- func SetObjectDefaults_XDLJob(in *XDLJob)
- func SetObjectDefaults_XDLJobList(in *XDLJobList)
- func SetObjectDefaults_XGBoostJob(in *XGBoostJob)
- func SetObjectDefaults_XGBoostJobList(in *XGBoostJobList)
- type ElasticDLJob
- type ElasticDLJobList
- type ElasticDLJobSpec
- type LegacyV1Alpha1
- type LegacyV1Alpha2
- type MPIDistributionType
- type MPIJob
- type MPIJobLegacySpec
- type MPIJobList
- type MPIJobSpec
- type MarsJob
- type MarsJobList
- type MarsJobSpec
- type MarsJobStatus
- type MarsWorkerMemoryTuningPolicy
- type PyTorchJob
- type PyTorchJobList
- type PyTorchJobSpec
- type PyTorchJobStatus
- type TFJob
- type TFJobList
- type TFJobSpec
- type TFReplicaType
- type XDLJob
- type XDLJobList
- type XDLJobSpec
- type XDLJobStatus
- type XGBoostJob
- type XGBoostJobList
- type XGBoostJobSpec
- type XGBoostJobStatus
Constants ¶
const ( ElasticDLJobKind = "ElasticDLJob" ElasticDLJobDefaultContainerName = "elasticdl" ElasticDLJobDefaultPortName = "elasticdl-port" ElasticDLJobDefaultPort = 11111 )
const ( MarsJobKind = "MarsJob" MarsJobDefaultContainerName = "mars" MarsJobDefaultPortName = "mars-port" MarsJobDefaultPort = 11111 MarsJobDefaultCacheMountPath = "/dev/shm" MarsJobDefaultSchedulerRestartPolicy = v1.RestartPolicyNever MarsJobDefaultWebServiceRestartPolicy = v1.RestartPolicyAlways MarsJobDefaultWorkerRestartPolicy = v1.RestartPolicyExitCode )
const ( // MarsReplicaTypeScheduler is the type for scheduler role in MarsJob, schedule // graph-based workflow including 'operand' and 'chunk' to workers. MarsReplicaTypeScheduler commonv1.ReplicaType = "Scheduler" // MarsReplicaTypeWorker is the type for accepting the scheduled operand from // scheduler do the real execution, it will pull data(chunk) from mounted storage // or other workers, and notify its execution result to scheduler by callback. MarsReplicaTypeWorker commonv1.ReplicaType = "Worker" // MarsReplicaTypeWebService is the type for web-service instance, accepting // requests from end user and forwarding the whole tensor-graph to scheduler. // WebService provides end users with a dashboard so that they can track job // status and submit tensor-graph tasks interactively. MarsReplicaTypeWebService commonv1.ReplicaType = "WebService" )
const ( MPIJobKind = "MPIJob" // DefaultRestartPolicy is default RestartPolicy for ReplicaSpec. MPIJobDefaultRestartPolicy = v1.RestartPolicyNever MPIJobDefaultCleanPodPolicy = v1.CleanPodPolicyRunning MPIJobDefaultContainerName = "mpi" MPIJobDefaultPortName = "mpi-port" MPIJobDefaultPort = 2222 )
const ( // MPIReplicaTypeLauncher is the type for launcher replica. MPIReplicaTypeLauncher apiv1.ReplicaType = "Launcher" // MPIReplicaTypeWorker is the type for worker replicas. MPIReplicaTypeWorker apiv1.ReplicaType = "Worker" )
const ( PyTorchJobKind = "PyTorchJob" // PyTorchJobDefaultPortName is name of the port used to communicate between Master and // workers. PyTorchJobDefaultPortName = "pytorchjob-port" // PyTorchJobDefaultContainerName is the name of the PyTorchJob container. PyTorchJobDefaultContainerName = "pytorch" // PyTorchJobDefaultPort is default value of the port. PyTorchJobDefaultPort = 23456 // PyTorchJobDefaultMasterRestartPolicy is default RestartPolicy for Master PyTorchReplicaSpec. PyTorchJobDefaultMasterRestartPolicy = common.RestartPolicyExitCode // PyTorchJobDefaultWorkerRestartPolicy is default RestartPolicy for Worker PyTorchReplicaSpec, PyTorchJobDefaultWorkerRestartPolicy = common.RestartPolicyOnFailure )
const ( // PyTorchReplicaTypeMaster is the type of Master of distributed PyTorch PyTorchReplicaTypeMaster common.ReplicaType = "Master" // PyTorchReplicaTypeWorker is the type for workers of distributed PyTorch. PyTorchReplicaTypeWorker common.ReplicaType = "Worker" )
const ( TFJobKind = "TFJob" // DefaultPortName is name of the port used to communicate between PS and // workers. TFJobDefaultPortName = "tfjob-port" // DefaultContainerName is the name of the TFJob container. TFJobDefaultContainerName = "tensorflow" // DefaultPort is default value of the port. TFJobDefaultPort = 2222 // DefaultRestartPolicy is default RestartPolicy for TFReplicaSpec. TFJobDefaultRestartPolicy = common.RestartPolicyExitCode )
const ( // TFReplicaTypePS is the type for parameter servers of distributed TensorFlow. TFReplicaTypePS commonv1.ReplicaType = "PS" // TFReplicaTypeWorker is the type for workers of distributed TensorFlow. // This is also used for non-distributed TensorFlow. TFReplicaTypeWorker commonv1.ReplicaType = "Worker" // TFReplicaTypeChief is the type for chief worker of distributed TensorFlow. // If there is "chief" replica type, it's the "chief worker". // Else, worker:0 is the chief worker. TFReplicaTypeChief commonv1.ReplicaType = "Chief" // TFReplicaTypeMaster is the type for master worker of distributed TensorFlow. // This is similar to chief, and kept just for backwards compatibility. TFReplicaTypeMaster commonv1.ReplicaType = "Master" // TFReplicaTypeEval is the type for evaluation replica in TensorFlow. TFReplicaTypeEval commonv1.ReplicaType = "Evaluator" // TFReplicaTypeGraphLearn is the type for graph-learn server replica in TensorFlow. TFReplicaTypeGraphLearn commonv1.ReplicaType = "GraphLearn" )
const ( // Field of XDLJobSpec, 0 indicate that job finish util all workers done. XDLJobDefaultMinFinishWorkNum int32 = 0 // Field of XDLJobSpec, 90 indicate that job finish util 90% workers done. XDLJobDefaultMinFinishWorkRate int32 = 90 // Field of XDLJobSpec, default total failover times of job is 20. XDLJobDefaultBackoffLimit int32 = 20 // TODO(qiukai.cqk): ensure default names XDLJobDefaultContainerName = "xdl" XDLJobDefaultContainerPortName = "xdljob-port" XDLJobDefaultPort = 2222 XDLJobDefaultRestartPolicy = v1.RestartPolicyNever XDLJobKind = "XDLJob" )
const ( // XDLReplicaTypePS is the type for parameter servers of distributed XDL. XDLReplicaTypePS v1.ReplicaType = "PS" // XDLReplicaTypeWorker is the type for workers of distributed XDL. // This is also used for non-distributed XDL. XDLReplicaTypeWorker v1.ReplicaType = "Worker" // XDLReplicaTypeScheduler is the type for code auto-generation scheduler of // distributed XDL. XDLReplicaTypeScheduler v1.ReplicaType = "Scheduler" // XDLReplicaTypeExtendRole is the extended replica type of distributed XDL. // ExtendRole may participate in computing and be seen as another kind of // worker. XDLReplicaTypeExtendRole v1.ReplicaType = "ExtendRole" )
const ( // Kind is the kind name. XGBoostJobKind = "XGBoostJob" XGBoostJobDefaultContainerName = "xgboostjob" XGBoostJobDefaultContainerPortName = "xgboostjob-port" XGBoostJobDefaultPort = 9999 XGBoostJobDefaultTTLseconds = int32(100) XGBoostJobDefaultCleanPodPolicy = v1.CleanPodPolicyNone )
const ( // XGBoostReplicaTypeMaster is the type of Master of distributed PyTorch XGBoostReplicaTypeMaster commonv1.ReplicaType = "Master" // XGBoostReplicaTypeWorker is the type for workers of distributed PyTorch. XGBoostReplicaTypeWorker commonv1.ReplicaType = "Worker" )
const ( // ElasticDLReplicaTypeMaster is the type of Master of distributed ElasticDL ElasticDLReplicaTypeMaster common.ReplicaType = "Master" )
Variables ¶
var ( // GroupVersion is group version used to register these objects GroupVersion = schema.GroupVersion{Group: "training.kubedl.io", Version: "v1alpha1"} // SchemeBuilder is used to add go types to the GroupVersionKind scheme SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion} // AddToScheme adds the types in this group-version to the given scheme. AddToScheme = SchemeBuilder.AddToScheme )
var SchemeGroupVersion = GroupVersion
Functions ¶
func IsTFJobChieforMaster ¶
func IsTFJobChieforMaster(typ commonv1.ReplicaType) bool
IsTFJobChieforMaster returns true if the type is Master or Chief.
func IsTFJobEvaluator ¶
func IsTFJobEvaluator(typ commonv1.ReplicaType) bool
IsTFJobEvaluator returns true if the type is Evaluator.
func IsTFJobWorker ¶
func IsTFJobWorker(typ commonv1.ReplicaType) bool
IsTFJobWorker returns true if the type is Worker.
func RegisterDefaults ¶
RegisterDefaults adds defaulters functions to the given scheme. Public to allow building arbitrary schemes. All generated defaulters are covering - they call all nested defaulters.
func Resource ¶
func Resource(resource string) schema.GroupResource
Resource is required by pkg/client/listers/...
func SetDefaults_MPIJob ¶
func SetDefaults_MPIJob(mpiJob *MPIJob)
func SetDefaults_MarsJob ¶
func SetDefaults_MarsJob(job *MarsJob)
SetDefaults_MarsJob sets any unspecified values to defaults.
func SetDefaults_PyTorchJob ¶
func SetDefaults_PyTorchJob(job *PyTorchJob)
SetDefaults_PyTorchJob sets any unspecified values to defaults.
func SetDefaults_TFJob ¶
func SetDefaults_TFJob(tfjob *TFJob)
SetDefaults_TFJob sets any unspecified values to defaults.
func SetDefaults_XDLJob ¶
func SetDefaults_XDLJob(xdlJob *XDLJob)
SetDefaults_XDLJob sets any unspecified values to defaults.
func SetDefaults_XGBoostJob ¶
func SetDefaults_XGBoostJob(xgbJob *XGBoostJob)
SetDefaults_XGBoostJob sets any unspecified values to defaults.
func SetObjectDefaults_MPIJob ¶
func SetObjectDefaults_MPIJob(in *MPIJob)
func SetObjectDefaults_MPIJobList ¶
func SetObjectDefaults_MPIJobList(in *MPIJobList)
func SetObjectDefaults_MarsJob ¶
func SetObjectDefaults_MarsJob(in *MarsJob)
func SetObjectDefaults_MarsJobList ¶
func SetObjectDefaults_MarsJobList(in *MarsJobList)
func SetObjectDefaults_PyTorchJob ¶
func SetObjectDefaults_PyTorchJob(in *PyTorchJob)
func SetObjectDefaults_PyTorchJobList ¶
func SetObjectDefaults_PyTorchJobList(in *PyTorchJobList)
func SetObjectDefaults_TFJob ¶
func SetObjectDefaults_TFJob(in *TFJob)
func SetObjectDefaults_TFJobList ¶
func SetObjectDefaults_TFJobList(in *TFJobList)
func SetObjectDefaults_XDLJob ¶
func SetObjectDefaults_XDLJob(in *XDLJob)
func SetObjectDefaults_XDLJobList ¶
func SetObjectDefaults_XDLJobList(in *XDLJobList)
func SetObjectDefaults_XGBoostJob ¶
func SetObjectDefaults_XGBoostJob(in *XGBoostJob)
func SetObjectDefaults_XGBoostJobList ¶
func SetObjectDefaults_XGBoostJobList(in *XGBoostJobList)
Types ¶
type ElasticDLJob ¶
type ElasticDLJob struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` Spec ElasticDLJobSpec `json:"spec,omitempty"` Status common.JobStatus `json:"status,omitempty"` }
ElasticDLJob is the Schema for the elasticdljobs API
func (*ElasticDLJob) DeepCopy ¶
func (in *ElasticDLJob) DeepCopy() *ElasticDLJob
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ElasticDLJob.
func (*ElasticDLJob) DeepCopyInto ¶
func (in *ElasticDLJob) DeepCopyInto(out *ElasticDLJob)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*ElasticDLJob) DeepCopyObject ¶
func (in *ElasticDLJob) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type ElasticDLJobList ¶
type ElasticDLJobList struct { metav1.TypeMeta `json:",inline"` metav1.ListMeta `json:"metadata,omitempty"` Items []ElasticDLJob `json:"items"` }
ElasticDLJobList contains a list of ElasticDLJob
func (*ElasticDLJobList) DeepCopy ¶
func (in *ElasticDLJobList) DeepCopy() *ElasticDLJobList
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ElasticDLJobList.
func (*ElasticDLJobList) DeepCopyInto ¶
func (in *ElasticDLJobList) DeepCopyInto(out *ElasticDLJobList)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*ElasticDLJobList) DeepCopyObject ¶
func (in *ElasticDLJobList) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type ElasticDLJobSpec ¶
type ElasticDLJobSpec struct { // RunPolicy encapsulates various runtime policies of the distributed training // job, for example how to clean up resources and how long the job can stay // active. common.RunPolicy `json:",inline"` // A map of ElasticDLReplicaType (type) to ReplicaSpec (value). Specifies the ElasticDL cluster configuration. // For example, // { // "Master": ElasticDLReplicaSpec, // } ElasticDLReplicaSpecs map[common.ReplicaType]*common.ReplicaSpec `json:"elasticdlReplicaSpecs"` }
ElasticDLJobSpec defines the desired state of ElasticDLJob
func (*ElasticDLJobSpec) DeepCopy ¶
func (in *ElasticDLJobSpec) DeepCopy() *ElasticDLJobSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ElasticDLJobSpec.
func (*ElasticDLJobSpec) DeepCopyInto ¶
func (in *ElasticDLJobSpec) DeepCopyInto(out *ElasticDLJobSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type LegacyV1Alpha1 ¶
type LegacyV1Alpha1 struct { // Deprecated. Specifies the desired number of DeprecatedGPUs the MPIJob should run on. // Mutually exclusive with the `Replicas` field. // Note that this is deprecated in favor of `ProcessingUnits` field. // +optional DeprecatedGPUs *int32 `json:"gpus,omitempty"` // The maximum number of GPUs available per node. // Note that this will be ignored if the GPU resources are explicitly // specified in the MPIJob pod spec. // This is deprecated in favor of `ProcessingUnitsPerNode` field. GPUsPerNode *int32 `json:"gpusPerNode,omitempty"` // Specifies the desired number of processing units the MPIJob should run on. // Mutually exclusive with the `Replicas` field. // +optional ProcessingUnits *int32 `json:"processingUnits,omitempty"` // The maximum number of processing units available per node. // Note that this will be ignored if the processing resources are explicitly // specified in the MPIJob pod spec. // +optional ProcessingUnitsPerNode *int32 `json:"processingUnitsPerNode,omitempty"` // The processing resource type, e.g. 'nvidia.com/gpu' or 'cpu'. // Defaults to 'nvidia.com/gpu' // +optional ProcessingResourceType string `json:"processingResourceType,omitempty"` // Run the launcher on the master. // Defaults to false. // +optional LauncherOnMaster bool `json:"launcherOnMaster,omitempty"` // Specifies the desired number of replicas the MPIJob should run on. // The `PodSpec` should specify the number of processing units. // Mutually exclusive with the `GPUs` or `ProcessingUnits` fields. // +optional Replicas *int32 `json:"replicas,omitempty"` // Describes the pod that will be created when executing an MPIJob. Template corev1.PodTemplateSpec `json:"template,omitempty"` }
func (*LegacyV1Alpha1) DeepCopy ¶
func (in *LegacyV1Alpha1) DeepCopy() *LegacyV1Alpha1
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LegacyV1Alpha1.
func (*LegacyV1Alpha1) DeepCopyInto ¶
func (in *LegacyV1Alpha1) DeepCopyInto(out *LegacyV1Alpha1)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type LegacyV1Alpha2 ¶
type LegacyV1Alpha2 struct { // MPIDistribution specifies name of the MPI framwork which is used // Defaults to "OpenMPI" // Options includes "OpenMPI", "IntelMPI" and "MPICH" MPIDistribution *MPIDistributionType `json:"mpiDistribution,omitempty"` }
func (*LegacyV1Alpha2) DeepCopy ¶
func (in *LegacyV1Alpha2) DeepCopy() *LegacyV1Alpha2
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LegacyV1Alpha2.
func (*LegacyV1Alpha2) DeepCopyInto ¶
func (in *LegacyV1Alpha2) DeepCopyInto(out *LegacyV1Alpha2)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type MPIDistributionType ¶
type MPIDistributionType string
MPIDistributionType is the type for MPIDistribution.
const ( // MPIDistributionTypeOpenMPI is the type for Open MPI. MPIDistributionTypeOpenMPI MPIDistributionType = "OpenMPI" // MPIDistributionTypeIntelMPI is the type for Intel MPI. MPIDistributionTypeIntelMPI MPIDistributionType = "IntelMPI" // MPIDistributionTypeMPICH is the type for MPICh. MPIDistributionTypeMPICH MPIDistributionType = "MPICH" )
type MPIJob ¶
type MPIJob struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` Spec MPIJobSpec `json:"spec,omitempty"` Status apiv1.JobStatus `json:"status,omitempty"` }
MPIJob is the Schema for the mpijobs API
func (*MPIJob) DeepCopy ¶
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MPIJob.
func (*MPIJob) DeepCopyInto ¶
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*MPIJob) DeepCopyObject ¶
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type MPIJobLegacySpec ¶
type MPIJobLegacySpec struct { // RunPolicy is inline embedded in MPIJobSpec in both v1alpha1. *apiv1.RunPolicy `json:",inline"` // LegacyV1Alpha1 is legacy fields in v1alpha1 api definition. *LegacyV1Alpha1 `json:",inline"` // LegacyV1Alpha2 is legacy fields in v1alpha2 api definition. *LegacyV1Alpha2 `json:",inline"` }
MPIJobLegacySpec is a collection of legacy fields that were used in v1alpha1/v1alpha2 but deprecated in v1 version, we reserve legacy fields for backward compatibility.
func (*MPIJobLegacySpec) DeepCopy ¶
func (in *MPIJobLegacySpec) DeepCopy() *MPIJobLegacySpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MPIJobLegacySpec.
func (*MPIJobLegacySpec) DeepCopyInto ¶
func (in *MPIJobLegacySpec) DeepCopyInto(out *MPIJobLegacySpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type MPIJobList ¶
type MPIJobList struct { metav1.TypeMeta `json:",inline"` metav1.ListMeta `json:"metadata,omitempty"` Items []MPIJob `json:"items"` }
MPIJobList contains a list of MPIJob
func (*MPIJobList) DeepCopy ¶
func (in *MPIJobList) DeepCopy() *MPIJobList
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MPIJobList.
func (*MPIJobList) DeepCopyInto ¶
func (in *MPIJobList) DeepCopyInto(out *MPIJobList)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*MPIJobList) DeepCopyObject ¶
func (in *MPIJobList) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type MPIJobSpec ¶
type MPIJobSpec struct { // Specifies the number of slots per worker used in hostfile. // Defaults to 1. // +optional SlotsPerWorker *int32 `json:"slotsPerWorker,omitempty"` // `MPIReplicaSpecs` contains maps from `MPIReplicaType` to `ReplicaSpec` that // specify the MPI replicas to run. MPIReplicaSpecs map[apiv1.ReplicaType]*apiv1.ReplicaSpec `json:"mpiReplicaSpecs"` // MainContainer specifies name of the main container which // executes the MPI code. MainContainer string `json:"mainContainer,omitempty"` // `RunPolicy` encapsulates various runtime policies of the distributed training // job, for example how to clean up resources and how long the job can stay // active. RunPolicy apiv1.RunPolicy `json:"runPolicy,omitempty"` // LegacySpec reserves the deprecated fields for backward compatibility. *MPIJobLegacySpec `json:",inline"` }
MPIJobSpec defines the desired state of MPIJob
func (*MPIJobSpec) DeepCopy ¶
func (in *MPIJobSpec) DeepCopy() *MPIJobSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MPIJobSpec.
func (*MPIJobSpec) DeepCopyInto ¶
func (in *MPIJobSpec) DeepCopyInto(out *MPIJobSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type MarsJob ¶
type MarsJob struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` Spec MarsJobSpec `json:"spec,omitempty"` Status MarsJobStatus `json:"status,omitempty"` }
MarsJob is the Schema for the marsjobs API
func (*MarsJob) DeepCopy ¶
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MarsJob.
func (*MarsJob) DeepCopyInto ¶
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*MarsJob) DeepCopyObject ¶
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type MarsJobList ¶
type MarsJobList struct { metav1.TypeMeta `json:",inline"` metav1.ListMeta `json:"metadata,omitempty"` Items []MarsJob `json:"items"` }
MarsJobList contains a list of MarsJob
func (*MarsJobList) DeepCopy ¶
func (in *MarsJobList) DeepCopy() *MarsJobList
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MarsJobList.
func (*MarsJobList) DeepCopyInto ¶
func (in *MarsJobList) DeepCopyInto(out *MarsJobList)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*MarsJobList) DeepCopyObject ¶
func (in *MarsJobList) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type MarsJobSpec ¶
type MarsJobSpec struct { // RunPolicy encapsulates various runtime policies of the distributed training // job, for example how to clean up resources and how long the job can stay // active. commonv1.RunPolicy `json:",inline"` // WorkerMemoryTuningPolicy provides multiple memory tuning policies to mars worker // spec, such as cache size, cold data paths... WorkerMemoryTuningPolicy *MarsWorkerMemoryTuningPolicy `json:"workerMemoryTuningPolicy,omitempty"` // WebHost is the domain address of webservice that expose to external users. WebHost *string `json:"webHost,omitempty"` // MarsReplicaSpecs is a map of MarsReplicaType(key) to ReplicaSpec(value), // specifying replicas and template of each type. MarsReplicaSpecs map[commonv1.ReplicaType]*commonv1.ReplicaSpec `json:"marsReplicaSpecs"` }
MarsJobSpec defines the desired state of MarsJob
func (*MarsJobSpec) DeepCopy ¶
func (in *MarsJobSpec) DeepCopy() *MarsJobSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MarsJobSpec.
func (*MarsJobSpec) DeepCopyInto ¶
func (in *MarsJobSpec) DeepCopyInto(out *MarsJobSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type MarsJobStatus ¶
type MarsJobStatus struct { commonv1.JobStatus `json:",inline"` // WebServiceAddresses is a list of available webservices addresses for users, its length // equals with WebServices.Replicas. WebServiceAddresses []string `json:"webServiceAddresses,omitempty"` }
MarsJobStatus defines the observed state of MarsJob
func (*MarsJobStatus) DeepCopy ¶
func (in *MarsJobStatus) DeepCopy() *MarsJobStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MarsJobStatus.
func (*MarsJobStatus) DeepCopyInto ¶
func (in *MarsJobStatus) DeepCopyInto(out *MarsJobStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type MarsWorkerMemoryTuningPolicy ¶
type MarsWorkerMemoryTuningPolicy struct { // PlasmaStore specify the socket path of plasma store that handles shared memory // between all worker processes. PlasmaStore *string `json:"plasmaStore,omitempty"` // LockFreeFileIO indicates whether spill dirs are dedicated or not. LockFreeFileIO *bool `json:"lockFreeFileIO,omitempty"` // SpillDirs specify multiple directory paths, when size of in-memory objects is // about to reach the limitation, mars workers will swap cold data out to spill dirs // and persist in ephemeral-storage. SpillDirs []string `json:"spillDirs,omitempty"` // WorkerCachePercentage specify the percentage of total available memory size can // be used as cache, it will be overridden by workerCacheSize if it is been set. WorkerCachePercentage *int32 `json:"workerCachePercentage,omitempty"` // WorkerCacheSize specify the exact cache quantity can be used. WorkerCacheSize *resource.Quantity `json:"workerCacheSize,omitempty"` }
MarsWorkerMemoryTuningPolicy defines memory tuning policies that will be applied to workers.
func (*MarsWorkerMemoryTuningPolicy) DeepCopy ¶
func (in *MarsWorkerMemoryTuningPolicy) DeepCopy() *MarsWorkerMemoryTuningPolicy
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MarsWorkerMemoryTuningPolicy.
func (*MarsWorkerMemoryTuningPolicy) DeepCopyInto ¶
func (in *MarsWorkerMemoryTuningPolicy) DeepCopyInto(out *MarsWorkerMemoryTuningPolicy)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type PyTorchJob ¶
type PyTorchJob struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` Spec PyTorchJobSpec `json:"spec,omitempty"` Status common.JobStatus `json:"status,omitempty"` }
PyTorchJob is the Schema for the pytorchjobs API
func (*PyTorchJob) DeepCopy ¶
func (in *PyTorchJob) DeepCopy() *PyTorchJob
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PyTorchJob.
func (*PyTorchJob) DeepCopyInto ¶
func (in *PyTorchJob) DeepCopyInto(out *PyTorchJob)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*PyTorchJob) DeepCopyObject ¶
func (in *PyTorchJob) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type PyTorchJobList ¶
type PyTorchJobList struct { metav1.TypeMeta `json:",inline"` metav1.ListMeta `json:"metadata,omitempty"` Items []PyTorchJob `json:"items"` }
PyTorchJobList contains a list of PyTorchJob
func (*PyTorchJobList) DeepCopy ¶
func (in *PyTorchJobList) DeepCopy() *PyTorchJobList
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PyTorchJobList.
func (*PyTorchJobList) DeepCopyInto ¶
func (in *PyTorchJobList) DeepCopyInto(out *PyTorchJobList)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*PyTorchJobList) DeepCopyObject ¶
func (in *PyTorchJobList) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type PyTorchJobSpec ¶
type PyTorchJobSpec struct { // RunPolicy encapsulates various runtime policies of the distributed training // job, for example how to clean up resources and how long the job can stay // active. common.RunPolicy `json:",inline"` // SuccessPolicy defines the policy to mark the PytorchJob as succeeded when the job contains master role. // Value "" means the default policy that the job is succeeded if all workers are succeeded or master completed, // Value "AllWorkers" means the job is succeeded if all workers *AND* master are succeeded. // Default to "" // +optional SuccessPolicy *common.SuccessPolicy `json:"successPolicy,omitempty"` // A map of PyTorchReplicaType (type) to ReplicaSpec (value). Specifies the PyTorch cluster configuration. // For example, // { // "Master": PyTorchReplicaSpec, // "Worker": PyTorchReplicaSpec, // } PyTorchReplicaSpecs map[common.ReplicaType]*common.ReplicaSpec `json:"pytorchReplicaSpecs"` }
PyTorchJobSpec defines the desired state of PyTorchJob
func (*PyTorchJobSpec) DeepCopy ¶
func (in *PyTorchJobSpec) DeepCopy() *PyTorchJobSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PyTorchJobSpec.
func (*PyTorchJobSpec) DeepCopyInto ¶
func (in *PyTorchJobSpec) DeepCopyInto(out *PyTorchJobSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type PyTorchJobStatus ¶
type PyTorchJobStatus struct { }
PyTorchJobStatus defines the observed state of PyTorchJob
func (*PyTorchJobStatus) DeepCopy ¶
func (in *PyTorchJobStatus) DeepCopy() *PyTorchJobStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PyTorchJobStatus.
func (*PyTorchJobStatus) DeepCopyInto ¶
func (in *PyTorchJobStatus) DeepCopyInto(out *PyTorchJobStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type TFJob ¶
type TFJob struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` Spec TFJobSpec `json:"spec,omitempty"` Status commonv1.JobStatus `json:"status,omitempty"` }
TFJob is the Schema for the tfjobs API
func (*TFJob) DeepCopy ¶
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TFJob.
func (*TFJob) DeepCopyInto ¶
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*TFJob) DeepCopyObject ¶
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type TFJobList ¶
type TFJobList struct { metav1.TypeMeta `json:",inline"` metav1.ListMeta `json:"metadata,omitempty"` Items []TFJob `json:"items"` }
TFJobList contains a list of TFJob
func (*TFJobList) DeepCopy ¶
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TFJobList.
func (*TFJobList) DeepCopyInto ¶
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*TFJobList) DeepCopyObject ¶
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type TFJobSpec ¶
type TFJobSpec struct { // RunPolicy encapsulates various runtime policies of the distributed training // job, for example how to clean up resources and how long the job can stay // active. commonv1.RunPolicy `json:",inline"` // SuccessPolicy defines the policy to mark the TFJob as succeeded when the job does not contain chief or master // role. // Value "" means the default policy that the job is succeeded if all workers are succeeded or worker 0 completed, // Value "AllWorkers" means the job is succeeded if all workers are succeeded. // Default to "" // +optional SuccessPolicy *commonv1.SuccessPolicy `json:"successPolicy,omitempty"` // A map of TFReplicaType (type) to ReplicaSpec (value). Specifies the TF cluster configuration. // For example, // { // "PS": ReplicaSpec, // "Worker": ReplicaSpec, // } TFReplicaSpecs map[commonv1.ReplicaType]*commonv1.ReplicaSpec `json:"tfReplicaSpecs"` }
TFJobSpec defines the desired state of TFJob
func (*TFJobSpec) DeepCopy ¶
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TFJobSpec.
func (*TFJobSpec) DeepCopyInto ¶
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type TFReplicaType ¶
type TFReplicaType commonv1.ReplicaType
TFReplicaType is the type for TFReplica. Can be one of: "Chief"/"Master" (semantically equivalent), "Worker", "PS", or "Evaluator".
type XDLJob ¶
type XDLJob struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` Spec XDLJobSpec `json:"spec,omitempty"` Status v1.JobStatus `json:"status,omitempty"` }
XDLJob is the Schema for the xdljobs API
func (*XDLJob) DeepCopy ¶
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new XDLJob.
func (*XDLJob) DeepCopyInto ¶
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*XDLJob) DeepCopyObject ¶
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type XDLJobList ¶
type XDLJobList struct { metav1.TypeMeta `json:",inline"` metav1.ListMeta `json:"metadata,omitempty"` Items []XDLJob `json:"items"` }
XDLJobList contains a list of XDLJob
func (*XDLJobList) DeepCopy ¶
func (in *XDLJobList) DeepCopy() *XDLJobList
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new XDLJobList.
func (*XDLJobList) DeepCopyInto ¶
func (in *XDLJobList) DeepCopyInto(out *XDLJobList)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*XDLJobList) DeepCopyObject ¶
func (in *XDLJobList) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type XDLJobSpec ¶
type XDLJobSpec struct { // RunPolicy encapsulates various runtime policies of the distributed training // job, for example how to clean up resources and how long the job can stay // active. v1.RunPolicy `json:",inline"` // XDLReplicaSpecs is map of ReplicaType and ReplicaSpec // specifies the XDL replicas to run. // For example, // { // "PS": ReplicaSpec, // "Worker": ReplicaSpec, // } XDLReplicaSpecs map[v1.ReplicaType]*v1.ReplicaSpec `json:"xdlReplicaSpecs"` // MinFinishWorkerNum specifies the minimum number of successfully finished // workers such that the job is treated as successful. Not specifying this // value means all worker should be successfully finished. MinFinishWorkerNum *int32 `json:"minFinishWorkNum,omitempty"` // MinFinishWorkPercentage specifies the minimum percentage of all workers // that should be finished successfully such that the job is treated as successful. // MinFinishWorkPercentage takes precedence over MinFinishWorkerNum if both are // specified. MinFinishWorkerPercentage *int32 `json:"minFinishWorkRate,omitempty"` }
XDLJobSpec defines the desired state of XDLJob
func (*XDLJobSpec) DeepCopy ¶
func (in *XDLJobSpec) DeepCopy() *XDLJobSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new XDLJobSpec.
func (*XDLJobSpec) DeepCopyInto ¶
func (in *XDLJobSpec) DeepCopyInto(out *XDLJobSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type XDLJobStatus ¶
type XDLJobStatus struct { }
XDLJobStatus defines the observed state of XDLJob
func (*XDLJobStatus) DeepCopy ¶
func (in *XDLJobStatus) DeepCopy() *XDLJobStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new XDLJobStatus.
func (*XDLJobStatus) DeepCopyInto ¶
func (in *XDLJobStatus) DeepCopyInto(out *XDLJobStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type XGBoostJob ¶
type XGBoostJob struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` Spec XGBoostJobSpec `json:"spec,omitempty"` Status XGBoostJobStatus `json:"status,omitempty"` }
XGBoostJob is the Schema for the xgboostjobs API
func (*XGBoostJob) DeepCopy ¶
func (in *XGBoostJob) DeepCopy() *XGBoostJob
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new XGBoostJob.
func (*XGBoostJob) DeepCopyInto ¶
func (in *XGBoostJob) DeepCopyInto(out *XGBoostJob)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*XGBoostJob) DeepCopyObject ¶
func (in *XGBoostJob) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type XGBoostJobList ¶
type XGBoostJobList struct { metav1.TypeMeta `json:",inline"` metav1.ListMeta `json:"metadata,omitempty"` Items []XGBoostJob `json:"items"` }
XGBoostJobList contains a list of XGBoostJob
func (*XGBoostJobList) DeepCopy ¶
func (in *XGBoostJobList) DeepCopy() *XGBoostJobList
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new XGBoostJobList.
func (*XGBoostJobList) DeepCopyInto ¶
func (in *XGBoostJobList) DeepCopyInto(out *XGBoostJobList)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*XGBoostJobList) DeepCopyObject ¶
func (in *XGBoostJobList) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type XGBoostJobSpec ¶
type XGBoostJobSpec struct { // RunPolicy encapsulates various runtime policies of the distributed training // job, for example how to clean up resources and how long the job can stay // active. RunPolicy commonv1.RunPolicy `json:",inline"` // XGBoostReplicaSpecs is map of ReplicaType and ReplicaSpec // specifies the XGBoost replicas to run. // For example, // { // "PS": ReplicaSpec, // "Worker": ReplicaSpec, // } XGBReplicaSpecs map[commonv1.ReplicaType]*commonv1.ReplicaSpec `json:"xgbReplicaSpecs"` }
XGBoostJobSpec defines the desired state of XGBoostJob
func (*XGBoostJobSpec) DeepCopy ¶
func (in *XGBoostJobSpec) DeepCopy() *XGBoostJobSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new XGBoostJobSpec.
func (*XGBoostJobSpec) DeepCopyInto ¶
func (in *XGBoostJobSpec) DeepCopyInto(out *XGBoostJobSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type XGBoostJobStatus ¶
XGBoostJobStatus defines the observed state of XGBoostJob
func (*XGBoostJobStatus) DeepCopy ¶
func (in *XGBoostJobStatus) DeepCopy() *XGBoostJobStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new XGBoostJobStatus.
func (*XGBoostJobStatus) DeepCopyInto ¶
func (in *XGBoostJobStatus) DeepCopyInto(out *XGBoostJobStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
Source Files ¶
- common.go
- doc.go
- elasticdljob_constant.go
- elasticdljob_types.go
- groupversion_info.go
- marsjob_constant.go
- marsjob_defaults.go
- marsjob_types.go
- mpijob_constants.go
- mpijob_default.go
- mpijob_types.go
- pytorchjob_constants.go
- pytorchjob_defaults.go
- pytorchjob_types.go
- tfjob_constants.go
- tfjob_defaults.go
- tfjob_types.go
- tfjob_util.go
- xdljob_constants.go
- xdljob_defaults.go
- xdljob_types.go
- xgboostjob_constants.go
- xgboostjob_defaults.go
- xgboostjob_types.go
- zz_generated.deepcopy.go
- zz_generated.defaults.go