v1alpha1

package
v0.0.0-...-e91ad15 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Sep 27, 2024 License: Apache-2.0 Imports: 9 Imported by: 0

Documentation

Overview

Package v1alpha1 contains API Schema definitions for the training v1alpha1 API group +k8s:defaulter-gen=TypeMeta +groupName=training.kubedl.io

Package v1alpha1 contains API Schema definitions for the training v1alpha1 API group +kubebuilder:object:generate=true +groupName=training.kubedl.io

Index

Constants

View Source
const (
	ElasticDLJobKind                 = "ElasticDLJob"
	ElasticDLJobDefaultContainerName = "elasticdl"
	ElasticDLJobDefaultPortName      = "elasticdl-port"
	ElasticDLJobDefaultPort          = 11111
)
View Source
const (
	MarsJobKind                           = "MarsJob"
	MarsJobDefaultContainerName           = "mars"
	MarsJobDefaultPortName                = "mars-port"
	MarsJobDefaultPort                    = 11111
	MarsJobDefaultCacheMountPath          = "/dev/shm"
	MarsJobDefaultSchedulerRestartPolicy  = v1.RestartPolicyNever
	MarsJobDefaultWebServiceRestartPolicy = v1.RestartPolicyAlways
	MarsJobDefaultWorkerRestartPolicy     = v1.RestartPolicyExitCode
)
View Source
const (
	// MarsReplicaTypeScheduler is the type for scheduler role in MarsJob, schedule
	// graph-based workflow including 'operand' and 'chunk' to workers.
	MarsReplicaTypeScheduler commonv1.ReplicaType = "Scheduler"

	// MarsReplicaTypeWorker is the type for accepting the scheduled operand from
	// scheduler do the real execution, it will pull data(chunk) from mounted storage
	// or other workers, and notify its execution result to scheduler by callback.
	MarsReplicaTypeWorker commonv1.ReplicaType = "Worker"

	// MarsReplicaTypeWebService is the type for web-service instance, accepting
	// requests from end user and forwarding the whole tensor-graph to scheduler.
	// WebService provides end users with a dashboard so that they can track job
	// status and submit tensor-graph tasks interactively.
	MarsReplicaTypeWebService commonv1.ReplicaType = "WebService"
)
View Source
const (
	MPIJobKind = "MPIJob"
	// DefaultRestartPolicy is default RestartPolicy for ReplicaSpec.
	MPIJobDefaultRestartPolicy  = v1.RestartPolicyNever
	MPIJobDefaultCleanPodPolicy = v1.CleanPodPolicyRunning

	MPIJobDefaultContainerName = "mpi"
	MPIJobDefaultPortName      = "mpi-port"
	MPIJobDefaultPort          = 2222
)
View Source
const (
	// MPIReplicaTypeLauncher is the type for launcher replica.
	MPIReplicaTypeLauncher apiv1.ReplicaType = "Launcher"

	// MPIReplicaTypeWorker is the type for worker replicas.
	MPIReplicaTypeWorker apiv1.ReplicaType = "Worker"
)
View Source
const (
	PyTorchJobKind = "PyTorchJob"
	// PyTorchJobDefaultPortName is name of the port used to communicate between Master and
	// workers.
	PyTorchJobDefaultPortName = "pytorchjob-port"
	// PyTorchJobDefaultContainerName is the name of the PyTorchJob container.
	PyTorchJobDefaultContainerName = "pytorch"
	// PyTorchJobDefaultPort is default value of the port.
	PyTorchJobDefaultPort = 23456
	// PyTorchJobDefaultMasterRestartPolicy is default RestartPolicy for Master PyTorchReplicaSpec.
	PyTorchJobDefaultMasterRestartPolicy = common.RestartPolicyExitCode
	// PyTorchJobDefaultWorkerRestartPolicy is default RestartPolicy for Worker PyTorchReplicaSpec,
	PyTorchJobDefaultWorkerRestartPolicy = common.RestartPolicyOnFailure
)
View Source
const (
	// PyTorchReplicaTypeMaster is the type of Master of distributed PyTorch
	PyTorchReplicaTypeMaster common.ReplicaType = "Master"

	// PyTorchReplicaTypeWorker is the type for workers of distributed PyTorch.
	PyTorchReplicaTypeWorker common.ReplicaType = "Worker"
)
View Source
const (
	TFJobKind = "TFJob"
	// DefaultPortName is name of the port used to communicate between PS and
	// workers.
	TFJobDefaultPortName = "tfjob-port"
	// DefaultContainerName is the name of the TFJob container.
	TFJobDefaultContainerName = "tensorflow"
	// DefaultPort is default value of the port.
	TFJobDefaultPort = 2222
	// DefaultRestartPolicy is default RestartPolicy for TFReplicaSpec.
	TFJobDefaultRestartPolicy = common.RestartPolicyExitCode
)
View Source
const (
	// TFReplicaTypePS is the type for parameter servers of distributed TensorFlow.
	TFReplicaTypePS commonv1.ReplicaType = "PS"

	// TFReplicaTypeWorker is the type for workers of distributed TensorFlow.
	// This is also used for non-distributed TensorFlow.
	TFReplicaTypeWorker commonv1.ReplicaType = "Worker"

	// TFReplicaTypeChief is the type for chief worker of distributed TensorFlow.
	// If there is "chief" replica type, it's the "chief worker".
	// Else, worker:0 is the chief worker.
	TFReplicaTypeChief commonv1.ReplicaType = "Chief"

	// TFReplicaTypeMaster is the type for master worker of distributed TensorFlow.
	// This is similar to chief, and kept just for backwards compatibility.
	TFReplicaTypeMaster commonv1.ReplicaType = "Master"

	// TFReplicaTypeEval is the type for evaluation replica in TensorFlow.
	TFReplicaTypeEval commonv1.ReplicaType = "Evaluator"

	// TFReplicaTypeGraphLearn is the type for graph-learn server replica in TensorFlow.
	TFReplicaTypeGraphLearn commonv1.ReplicaType = "GraphLearn"
)
View Source
const (
	// Field of XDLJobSpec, 0 indicate that job finish util all workers done.
	XDLJobDefaultMinFinishWorkNum int32 = 0
	// Field of XDLJobSpec, 90 indicate that job finish util 90% workers done.
	XDLJobDefaultMinFinishWorkRate int32 = 90
	// Field of XDLJobSpec, default total failover times of job is 20.
	XDLJobDefaultBackoffLimit int32 = 20
	// TODO(qiukai.cqk): ensure default names
	XDLJobDefaultContainerName     = "xdl"
	XDLJobDefaultContainerPortName = "xdljob-port"
	XDLJobDefaultPort              = 2222
	XDLJobDefaultRestartPolicy     = v1.RestartPolicyNever
	XDLJobKind                     = "XDLJob"
)
View Source
const (
	// XDLReplicaTypePS is the type for parameter servers of distributed XDL.
	XDLReplicaTypePS v1.ReplicaType = "PS"

	// XDLReplicaTypeWorker is the type for workers of distributed XDL.
	// This is also used for non-distributed XDL.
	XDLReplicaTypeWorker v1.ReplicaType = "Worker"

	// XDLReplicaTypeScheduler is the type for code auto-generation scheduler of
	// distributed XDL.
	XDLReplicaTypeScheduler v1.ReplicaType = "Scheduler"

	// XDLReplicaTypeExtendRole is the extended replica type of distributed XDL.
	// ExtendRole may participate in computing and be seen as another kind of
	// worker.
	XDLReplicaTypeExtendRole v1.ReplicaType = "ExtendRole"
)
View Source
const (
	// Kind is the kind name.
	XGBoostJobKind                     = "XGBoostJob"
	XGBoostJobDefaultContainerName     = "xgboostjob"
	XGBoostJobDefaultContainerPortName = "xgboostjob-port"
	XGBoostJobDefaultPort              = 9999
	XGBoostJobDefaultTTLseconds        = int32(100)
	XGBoostJobDefaultCleanPodPolicy    = v1.CleanPodPolicyNone
)
View Source
const (
	// XGBoostReplicaTypeMaster is the type of Master of distributed PyTorch
	XGBoostReplicaTypeMaster commonv1.ReplicaType = "Master"

	// XGBoostReplicaTypeWorker is the type for workers of distributed PyTorch.
	XGBoostReplicaTypeWorker commonv1.ReplicaType = "Worker"
)
View Source
const (
	// ElasticDLReplicaTypeMaster is the type of Master of distributed ElasticDL
	ElasticDLReplicaTypeMaster common.ReplicaType = "Master"
)

Variables

View Source
var (
	// GroupVersion is group version used to register these objects
	GroupVersion = schema.GroupVersion{Group: "training.kubedl.io", Version: "v1alpha1"}

	// SchemeBuilder is used to add go types to the GroupVersionKind scheme
	SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion}

	// AddToScheme adds the types in this group-version to the given scheme.
	AddToScheme = SchemeBuilder.AddToScheme
)
View Source
var SchemeGroupVersion = GroupVersion

Functions

func IsTFJobChieforMaster

func IsTFJobChieforMaster(typ commonv1.ReplicaType) bool

IsTFJobChieforMaster returns true if the type is Master or Chief.

func IsTFJobEvaluator

func IsTFJobEvaluator(typ commonv1.ReplicaType) bool

IsTFJobEvaluator returns true if the type is Evaluator.

func IsTFJobWorker

func IsTFJobWorker(typ commonv1.ReplicaType) bool

IsTFJobWorker returns true if the type is Worker.

func RegisterDefaults

func RegisterDefaults(scheme *runtime.Scheme) error

RegisterDefaults adds defaulters functions to the given scheme. Public to allow building arbitrary schemes. All generated defaulters are covering - they call all nested defaulters.

func Resource

func Resource(resource string) schema.GroupResource

Resource is required by pkg/client/listers/...

func SetDefaults_MPIJob

func SetDefaults_MPIJob(mpiJob *MPIJob)

func SetDefaults_MarsJob

func SetDefaults_MarsJob(job *MarsJob)

SetDefaults_MarsJob sets any unspecified values to defaults.

func SetDefaults_PyTorchJob

func SetDefaults_PyTorchJob(job *PyTorchJob)

SetDefaults_PyTorchJob sets any unspecified values to defaults.

func SetDefaults_TFJob

func SetDefaults_TFJob(tfjob *TFJob)

SetDefaults_TFJob sets any unspecified values to defaults.

func SetDefaults_XDLJob

func SetDefaults_XDLJob(xdlJob *XDLJob)

SetDefaults_XDLJob sets any unspecified values to defaults.

func SetDefaults_XGBoostJob

func SetDefaults_XGBoostJob(xgbJob *XGBoostJob)

SetDefaults_XGBoostJob sets any unspecified values to defaults.

func SetObjectDefaults_MPIJob

func SetObjectDefaults_MPIJob(in *MPIJob)

func SetObjectDefaults_MPIJobList

func SetObjectDefaults_MPIJobList(in *MPIJobList)

func SetObjectDefaults_MarsJob

func SetObjectDefaults_MarsJob(in *MarsJob)

func SetObjectDefaults_MarsJobList

func SetObjectDefaults_MarsJobList(in *MarsJobList)

func SetObjectDefaults_PyTorchJob

func SetObjectDefaults_PyTorchJob(in *PyTorchJob)

func SetObjectDefaults_PyTorchJobList

func SetObjectDefaults_PyTorchJobList(in *PyTorchJobList)

func SetObjectDefaults_TFJob

func SetObjectDefaults_TFJob(in *TFJob)

func SetObjectDefaults_TFJobList

func SetObjectDefaults_TFJobList(in *TFJobList)

func SetObjectDefaults_XDLJob

func SetObjectDefaults_XDLJob(in *XDLJob)

func SetObjectDefaults_XDLJobList

func SetObjectDefaults_XDLJobList(in *XDLJobList)

func SetObjectDefaults_XGBoostJob

func SetObjectDefaults_XGBoostJob(in *XGBoostJob)

func SetObjectDefaults_XGBoostJobList

func SetObjectDefaults_XGBoostJobList(in *XGBoostJobList)

Types

type ElasticDLJob

type ElasticDLJob struct {
	metav1.TypeMeta   `json:",inline"`
	metav1.ObjectMeta `json:"metadata,omitempty"`

	Spec   ElasticDLJobSpec `json:"spec,omitempty"`
	Status common.JobStatus `json:"status,omitempty"`
}

ElasticDLJob is the Schema for the elasticdljobs API

func (*ElasticDLJob) DeepCopy

func (in *ElasticDLJob) DeepCopy() *ElasticDLJob

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ElasticDLJob.

func (*ElasticDLJob) DeepCopyInto

func (in *ElasticDLJob) DeepCopyInto(out *ElasticDLJob)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

func (*ElasticDLJob) DeepCopyObject

func (in *ElasticDLJob) DeepCopyObject() runtime.Object

DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.

type ElasticDLJobList

type ElasticDLJobList struct {
	metav1.TypeMeta `json:",inline"`
	metav1.ListMeta `json:"metadata,omitempty"`
	Items           []ElasticDLJob `json:"items"`
}

ElasticDLJobList contains a list of ElasticDLJob

func (*ElasticDLJobList) DeepCopy

func (in *ElasticDLJobList) DeepCopy() *ElasticDLJobList

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ElasticDLJobList.

func (*ElasticDLJobList) DeepCopyInto

func (in *ElasticDLJobList) DeepCopyInto(out *ElasticDLJobList)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

func (*ElasticDLJobList) DeepCopyObject

func (in *ElasticDLJobList) DeepCopyObject() runtime.Object

DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.

type ElasticDLJobSpec

type ElasticDLJobSpec struct {

	// RunPolicy encapsulates various runtime policies of the distributed training
	// job, for example how to clean up resources and how long the job can stay
	// active.
	common.RunPolicy `json:",inline"`

	// A map of ElasticDLReplicaType (type) to ReplicaSpec (value). Specifies the ElasticDL cluster configuration.
	// For example,
	//   {
	//     "Master": ElasticDLReplicaSpec,
	//   }
	ElasticDLReplicaSpecs map[common.ReplicaType]*common.ReplicaSpec `json:"elasticdlReplicaSpecs"`
}

ElasticDLJobSpec defines the desired state of ElasticDLJob

func (*ElasticDLJobSpec) DeepCopy

func (in *ElasticDLJobSpec) DeepCopy() *ElasticDLJobSpec

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ElasticDLJobSpec.

func (*ElasticDLJobSpec) DeepCopyInto

func (in *ElasticDLJobSpec) DeepCopyInto(out *ElasticDLJobSpec)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

type LegacyV1Alpha1

type LegacyV1Alpha1 struct {
	// Deprecated. Specifies the desired number of DeprecatedGPUs the MPIJob should run on.
	// Mutually exclusive with the `Replicas` field.
	// Note that this is deprecated in favor of `ProcessingUnits` field.
	// +optional
	DeprecatedGPUs *int32 `json:"gpus,omitempty"`

	// The maximum number of GPUs available per node.
	// Note that this will be ignored if the GPU resources are explicitly
	// specified in the MPIJob pod spec.
	// This is deprecated in favor of `ProcessingUnitsPerNode` field.
	GPUsPerNode *int32 `json:"gpusPerNode,omitempty"`

	// Specifies the desired number of processing units the MPIJob should run on.
	// Mutually exclusive with the `Replicas` field.
	// +optional
	ProcessingUnits *int32 `json:"processingUnits,omitempty"`

	// The maximum number of processing units available per node.
	// Note that this will be ignored if the processing resources are explicitly
	// specified in the MPIJob pod spec.
	// +optional
	ProcessingUnitsPerNode *int32 `json:"processingUnitsPerNode,omitempty"`

	// The processing resource type, e.g. 'nvidia.com/gpu' or 'cpu'.
	// Defaults to 'nvidia.com/gpu'
	// +optional
	ProcessingResourceType string `json:"processingResourceType,omitempty"`

	// Run the launcher on the master.
	// Defaults to false.
	// +optional
	LauncherOnMaster bool `json:"launcherOnMaster,omitempty"`

	// Specifies the desired number of replicas the MPIJob should run on.
	// The `PodSpec` should specify the number of processing units.
	// Mutually exclusive with the `GPUs` or `ProcessingUnits` fields.
	// +optional
	Replicas *int32 `json:"replicas,omitempty"`

	// Describes the pod that will be created when executing an MPIJob.
	Template corev1.PodTemplateSpec `json:"template,omitempty"`
}

func (*LegacyV1Alpha1) DeepCopy

func (in *LegacyV1Alpha1) DeepCopy() *LegacyV1Alpha1

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LegacyV1Alpha1.

func (*LegacyV1Alpha1) DeepCopyInto

func (in *LegacyV1Alpha1) DeepCopyInto(out *LegacyV1Alpha1)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

type LegacyV1Alpha2

type LegacyV1Alpha2 struct {
	// MPIDistribution specifies name of the MPI framwork which is used
	// Defaults to "OpenMPI"
	// Options includes "OpenMPI", "IntelMPI" and "MPICH"
	MPIDistribution *MPIDistributionType `json:"mpiDistribution,omitempty"`
}

func (*LegacyV1Alpha2) DeepCopy

func (in *LegacyV1Alpha2) DeepCopy() *LegacyV1Alpha2

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LegacyV1Alpha2.

func (*LegacyV1Alpha2) DeepCopyInto

func (in *LegacyV1Alpha2) DeepCopyInto(out *LegacyV1Alpha2)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

type MPIDistributionType

type MPIDistributionType string

MPIDistributionType is the type for MPIDistribution.

const (
	// MPIDistributionTypeOpenMPI is the type for Open MPI.
	MPIDistributionTypeOpenMPI MPIDistributionType = "OpenMPI"

	// MPIDistributionTypeIntelMPI is the type for Intel MPI.
	MPIDistributionTypeIntelMPI MPIDistributionType = "IntelMPI"

	// MPIDistributionTypeMPICH is the type for MPICh.
	MPIDistributionTypeMPICH MPIDistributionType = "MPICH"
)

type MPIJob

type MPIJob struct {
	metav1.TypeMeta   `json:",inline"`
	metav1.ObjectMeta `json:"metadata,omitempty"`

	Spec   MPIJobSpec      `json:"spec,omitempty"`
	Status apiv1.JobStatus `json:"status,omitempty"`
}

MPIJob is the Schema for the mpijobs API

func (*MPIJob) DeepCopy

func (in *MPIJob) DeepCopy() *MPIJob

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MPIJob.

func (*MPIJob) DeepCopyInto

func (in *MPIJob) DeepCopyInto(out *MPIJob)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

func (*MPIJob) DeepCopyObject

func (in *MPIJob) DeepCopyObject() runtime.Object

DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.

type MPIJobLegacySpec

type MPIJobLegacySpec struct {
	// RunPolicy is inline embedded in MPIJobSpec in both v1alpha1.
	*apiv1.RunPolicy `json:",inline"`

	// LegacyV1Alpha1 is legacy fields in v1alpha1 api definition.
	*LegacyV1Alpha1 `json:",inline"`

	// LegacyV1Alpha2 is legacy fields in v1alpha2 api definition.
	*LegacyV1Alpha2 `json:",inline"`
}

MPIJobLegacySpec is a collection of legacy fields that were used in v1alpha1/v1alpha2 but deprecated in v1 version, we reserve legacy fields for backward compatibility.

func (*MPIJobLegacySpec) DeepCopy

func (in *MPIJobLegacySpec) DeepCopy() *MPIJobLegacySpec

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MPIJobLegacySpec.

func (*MPIJobLegacySpec) DeepCopyInto

func (in *MPIJobLegacySpec) DeepCopyInto(out *MPIJobLegacySpec)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

type MPIJobList

type MPIJobList struct {
	metav1.TypeMeta `json:",inline"`
	metav1.ListMeta `json:"metadata,omitempty"`
	Items           []MPIJob `json:"items"`
}

MPIJobList contains a list of MPIJob

func (*MPIJobList) DeepCopy

func (in *MPIJobList) DeepCopy() *MPIJobList

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MPIJobList.

func (*MPIJobList) DeepCopyInto

func (in *MPIJobList) DeepCopyInto(out *MPIJobList)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

func (*MPIJobList) DeepCopyObject

func (in *MPIJobList) DeepCopyObject() runtime.Object

DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.

type MPIJobSpec

type MPIJobSpec struct {

	// Specifies the number of slots per worker used in hostfile.
	// Defaults to 1.
	// +optional
	SlotsPerWorker *int32 `json:"slotsPerWorker,omitempty"`

	// `MPIReplicaSpecs` contains maps from `MPIReplicaType` to `ReplicaSpec` that
	// specify the MPI replicas to run.
	MPIReplicaSpecs map[apiv1.ReplicaType]*apiv1.ReplicaSpec `json:"mpiReplicaSpecs"`

	// MainContainer specifies name of the main container which
	// executes the MPI code.
	MainContainer string `json:"mainContainer,omitempty"`

	// `RunPolicy` encapsulates various runtime policies of the distributed training
	// job, for example how to clean up resources and how long the job can stay
	// active.
	RunPolicy apiv1.RunPolicy `json:"runPolicy,omitempty"`

	// LegacySpec reserves the deprecated fields for backward compatibility.
	*MPIJobLegacySpec `json:",inline"`
}

MPIJobSpec defines the desired state of MPIJob

func (*MPIJobSpec) DeepCopy

func (in *MPIJobSpec) DeepCopy() *MPIJobSpec

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MPIJobSpec.

func (*MPIJobSpec) DeepCopyInto

func (in *MPIJobSpec) DeepCopyInto(out *MPIJobSpec)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

type MarsJob

type MarsJob struct {
	metav1.TypeMeta   `json:",inline"`
	metav1.ObjectMeta `json:"metadata,omitempty"`

	Spec   MarsJobSpec   `json:"spec,omitempty"`
	Status MarsJobStatus `json:"status,omitempty"`
}

MarsJob is the Schema for the marsjobs API

func (*MarsJob) DeepCopy

func (in *MarsJob) DeepCopy() *MarsJob

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MarsJob.

func (*MarsJob) DeepCopyInto

func (in *MarsJob) DeepCopyInto(out *MarsJob)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

func (*MarsJob) DeepCopyObject

func (in *MarsJob) DeepCopyObject() runtime.Object

DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.

type MarsJobList

type MarsJobList struct {
	metav1.TypeMeta `json:",inline"`
	metav1.ListMeta `json:"metadata,omitempty"`
	Items           []MarsJob `json:"items"`
}

MarsJobList contains a list of MarsJob

func (*MarsJobList) DeepCopy

func (in *MarsJobList) DeepCopy() *MarsJobList

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MarsJobList.

func (*MarsJobList) DeepCopyInto

func (in *MarsJobList) DeepCopyInto(out *MarsJobList)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

func (*MarsJobList) DeepCopyObject

func (in *MarsJobList) DeepCopyObject() runtime.Object

DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.

type MarsJobSpec

type MarsJobSpec struct {

	// RunPolicy encapsulates various runtime policies of the distributed training
	// job, for example how to clean up resources and how long the job can stay
	// active.
	commonv1.RunPolicy `json:",inline"`

	// WorkerMemoryTuningPolicy provides multiple memory tuning policies to mars worker
	// spec, such as cache size, cold data paths...
	WorkerMemoryTuningPolicy *MarsWorkerMemoryTuningPolicy `json:"workerMemoryTuningPolicy,omitempty"`

	// WebHost is the domain address of webservice that expose to external users.
	WebHost *string `json:"webHost,omitempty"`

	// MarsReplicaSpecs is a map of MarsReplicaType(key) to ReplicaSpec(value),
	// specifying replicas and template of each type.
	MarsReplicaSpecs map[commonv1.ReplicaType]*commonv1.ReplicaSpec `json:"marsReplicaSpecs"`
}

MarsJobSpec defines the desired state of MarsJob

func (*MarsJobSpec) DeepCopy

func (in *MarsJobSpec) DeepCopy() *MarsJobSpec

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MarsJobSpec.

func (*MarsJobSpec) DeepCopyInto

func (in *MarsJobSpec) DeepCopyInto(out *MarsJobSpec)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

type MarsJobStatus

type MarsJobStatus struct {
	commonv1.JobStatus `json:",inline"`
	// WebServiceAddresses is a list of available webservices addresses for users, its length
	// equals with WebServices.Replicas.
	WebServiceAddresses []string `json:"webServiceAddresses,omitempty"`
}

MarsJobStatus defines the observed state of MarsJob

func (*MarsJobStatus) DeepCopy

func (in *MarsJobStatus) DeepCopy() *MarsJobStatus

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MarsJobStatus.

func (*MarsJobStatus) DeepCopyInto

func (in *MarsJobStatus) DeepCopyInto(out *MarsJobStatus)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

type MarsWorkerMemoryTuningPolicy

type MarsWorkerMemoryTuningPolicy struct {
	// PlasmaStore specify the socket path of plasma store that handles shared memory
	// between all worker processes.
	PlasmaStore *string `json:"plasmaStore,omitempty"`

	// LockFreeFileIO indicates whether spill dirs are dedicated or not.
	LockFreeFileIO *bool `json:"lockFreeFileIO,omitempty"`

	// SpillDirs specify multiple directory paths, when size of in-memory objects is
	// about to reach the limitation, mars workers will swap cold data out to spill dirs
	// and persist in ephemeral-storage.
	SpillDirs []string `json:"spillDirs,omitempty"`

	// WorkerCachePercentage specify the percentage of total available memory size can
	// be used as cache, it will be overridden by workerCacheSize if it is been set.
	WorkerCachePercentage *int32 `json:"workerCachePercentage,omitempty"`

	// WorkerCacheSize specify the exact cache quantity can be used.
	WorkerCacheSize *resource.Quantity `json:"workerCacheSize,omitempty"`
}

MarsWorkerMemoryTuningPolicy defines memory tuning policies that will be applied to workers.

func (*MarsWorkerMemoryTuningPolicy) DeepCopy

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MarsWorkerMemoryTuningPolicy.

func (*MarsWorkerMemoryTuningPolicy) DeepCopyInto

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

type PyTorchJob

type PyTorchJob struct {
	metav1.TypeMeta   `json:",inline"`
	metav1.ObjectMeta `json:"metadata,omitempty"`

	Spec   PyTorchJobSpec   `json:"spec,omitempty"`
	Status common.JobStatus `json:"status,omitempty"`
}

PyTorchJob is the Schema for the pytorchjobs API

func (*PyTorchJob) DeepCopy

func (in *PyTorchJob) DeepCopy() *PyTorchJob

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PyTorchJob.

func (*PyTorchJob) DeepCopyInto

func (in *PyTorchJob) DeepCopyInto(out *PyTorchJob)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

func (*PyTorchJob) DeepCopyObject

func (in *PyTorchJob) DeepCopyObject() runtime.Object

DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.

type PyTorchJobList

type PyTorchJobList struct {
	metav1.TypeMeta `json:",inline"`
	metav1.ListMeta `json:"metadata,omitempty"`
	Items           []PyTorchJob `json:"items"`
}

PyTorchJobList contains a list of PyTorchJob

func (*PyTorchJobList) DeepCopy

func (in *PyTorchJobList) DeepCopy() *PyTorchJobList

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PyTorchJobList.

func (*PyTorchJobList) DeepCopyInto

func (in *PyTorchJobList) DeepCopyInto(out *PyTorchJobList)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

func (*PyTorchJobList) DeepCopyObject

func (in *PyTorchJobList) DeepCopyObject() runtime.Object

DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.

type PyTorchJobSpec

type PyTorchJobSpec struct {

	// RunPolicy encapsulates various runtime policies of the distributed training
	// job, for example how to clean up resources and how long the job can stay
	// active.
	common.RunPolicy `json:",inline"`

	// SuccessPolicy defines the policy to mark the PytorchJob as succeeded when the job contains master role.
	// Value "" means the default policy that the job is succeeded if all workers are succeeded or master completed,
	// Value "AllWorkers" means the job is succeeded if all workers *AND* master are succeeded.
	// Default to ""
	// +optional
	SuccessPolicy *common.SuccessPolicy `json:"successPolicy,omitempty"`

	// A map of PyTorchReplicaType (type) to ReplicaSpec (value). Specifies the PyTorch cluster configuration.
	// For example,
	//   {
	//     "Master": PyTorchReplicaSpec,
	//     "Worker": PyTorchReplicaSpec,
	//   }
	PyTorchReplicaSpecs map[common.ReplicaType]*common.ReplicaSpec `json:"pytorchReplicaSpecs"`
}

PyTorchJobSpec defines the desired state of PyTorchJob

func (*PyTorchJobSpec) DeepCopy

func (in *PyTorchJobSpec) DeepCopy() *PyTorchJobSpec

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PyTorchJobSpec.

func (*PyTorchJobSpec) DeepCopyInto

func (in *PyTorchJobSpec) DeepCopyInto(out *PyTorchJobSpec)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

type PyTorchJobStatus

type PyTorchJobStatus struct {
}

PyTorchJobStatus defines the observed state of PyTorchJob

func (*PyTorchJobStatus) DeepCopy

func (in *PyTorchJobStatus) DeepCopy() *PyTorchJobStatus

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PyTorchJobStatus.

func (*PyTorchJobStatus) DeepCopyInto

func (in *PyTorchJobStatus) DeepCopyInto(out *PyTorchJobStatus)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

type TFJob

type TFJob struct {
	metav1.TypeMeta   `json:",inline"`
	metav1.ObjectMeta `json:"metadata,omitempty"`

	Spec   TFJobSpec          `json:"spec,omitempty"`
	Status commonv1.JobStatus `json:"status,omitempty"`
}

TFJob is the Schema for the tfjobs API

func (*TFJob) DeepCopy

func (in *TFJob) DeepCopy() *TFJob

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TFJob.

func (*TFJob) DeepCopyInto

func (in *TFJob) DeepCopyInto(out *TFJob)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

func (*TFJob) DeepCopyObject

func (in *TFJob) DeepCopyObject() runtime.Object

DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.

type TFJobList

type TFJobList struct {
	metav1.TypeMeta `json:",inline"`
	metav1.ListMeta `json:"metadata,omitempty"`
	Items           []TFJob `json:"items"`
}

TFJobList contains a list of TFJob

func (*TFJobList) DeepCopy

func (in *TFJobList) DeepCopy() *TFJobList

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TFJobList.

func (*TFJobList) DeepCopyInto

func (in *TFJobList) DeepCopyInto(out *TFJobList)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

func (*TFJobList) DeepCopyObject

func (in *TFJobList) DeepCopyObject() runtime.Object

DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.

type TFJobSpec

type TFJobSpec struct {

	// RunPolicy encapsulates various runtime policies of the distributed training
	// job, for example how to clean up resources and how long the job can stay
	// active.
	commonv1.RunPolicy `json:",inline"`

	// SuccessPolicy defines the policy to mark the TFJob as succeeded when the job does not contain chief or master
	// role.
	// Value "" means the default policy that the job is succeeded if all workers are succeeded or worker 0 completed,
	// Value "AllWorkers" means the job is succeeded if all workers are succeeded.
	// Default to ""
	// +optional
	SuccessPolicy *commonv1.SuccessPolicy `json:"successPolicy,omitempty"`

	// A map of TFReplicaType (type) to ReplicaSpec (value). Specifies the TF cluster configuration.
	// For example,
	//   {
	//     "PS": ReplicaSpec,
	//     "Worker": ReplicaSpec,
	//   }
	TFReplicaSpecs map[commonv1.ReplicaType]*commonv1.ReplicaSpec `json:"tfReplicaSpecs"`
}

TFJobSpec defines the desired state of TFJob

func (*TFJobSpec) DeepCopy

func (in *TFJobSpec) DeepCopy() *TFJobSpec

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TFJobSpec.

func (*TFJobSpec) DeepCopyInto

func (in *TFJobSpec) DeepCopyInto(out *TFJobSpec)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

type TFReplicaType

type TFReplicaType commonv1.ReplicaType

TFReplicaType is the type for TFReplica. Can be one of: "Chief"/"Master" (semantically equivalent), "Worker", "PS", or "Evaluator".

type XDLJob

type XDLJob struct {
	metav1.TypeMeta   `json:",inline"`
	metav1.ObjectMeta `json:"metadata,omitempty"`

	Spec   XDLJobSpec   `json:"spec,omitempty"`
	Status v1.JobStatus `json:"status,omitempty"`
}

XDLJob is the Schema for the xdljobs API

func (*XDLJob) DeepCopy

func (in *XDLJob) DeepCopy() *XDLJob

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new XDLJob.

func (*XDLJob) DeepCopyInto

func (in *XDLJob) DeepCopyInto(out *XDLJob)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

func (*XDLJob) DeepCopyObject

func (in *XDLJob) DeepCopyObject() runtime.Object

DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.

type XDLJobList

type XDLJobList struct {
	metav1.TypeMeta `json:",inline"`
	metav1.ListMeta `json:"metadata,omitempty"`
	Items           []XDLJob `json:"items"`
}

XDLJobList contains a list of XDLJob

func (*XDLJobList) DeepCopy

func (in *XDLJobList) DeepCopy() *XDLJobList

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new XDLJobList.

func (*XDLJobList) DeepCopyInto

func (in *XDLJobList) DeepCopyInto(out *XDLJobList)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

func (*XDLJobList) DeepCopyObject

func (in *XDLJobList) DeepCopyObject() runtime.Object

DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.

type XDLJobSpec

type XDLJobSpec struct {

	// RunPolicy encapsulates various runtime policies of the distributed training
	// job, for example how to clean up resources and how long the job can stay
	// active.
	v1.RunPolicy `json:",inline"`

	// XDLReplicaSpecs is map of ReplicaType and ReplicaSpec
	// specifies the XDL replicas to run.
	// For example,
	//   {
	//     "PS": ReplicaSpec,
	//     "Worker": ReplicaSpec,
	//   }
	XDLReplicaSpecs map[v1.ReplicaType]*v1.ReplicaSpec `json:"xdlReplicaSpecs"`

	// MinFinishWorkerNum specifies the minimum number of successfully finished
	// workers such that the job is treated as successful. Not specifying this
	// value means all worker should be successfully finished.
	MinFinishWorkerNum *int32 `json:"minFinishWorkNum,omitempty"`

	// MinFinishWorkPercentage specifies the minimum percentage of all workers
	// that should be finished successfully such that the job is treated as successful.
	// MinFinishWorkPercentage takes precedence over  MinFinishWorkerNum if both are
	// specified.
	MinFinishWorkerPercentage *int32 `json:"minFinishWorkRate,omitempty"`
}

XDLJobSpec defines the desired state of XDLJob

func (*XDLJobSpec) DeepCopy

func (in *XDLJobSpec) DeepCopy() *XDLJobSpec

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new XDLJobSpec.

func (*XDLJobSpec) DeepCopyInto

func (in *XDLJobSpec) DeepCopyInto(out *XDLJobSpec)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

type XDLJobStatus

type XDLJobStatus struct {
}

XDLJobStatus defines the observed state of XDLJob

func (*XDLJobStatus) DeepCopy

func (in *XDLJobStatus) DeepCopy() *XDLJobStatus

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new XDLJobStatus.

func (*XDLJobStatus) DeepCopyInto

func (in *XDLJobStatus) DeepCopyInto(out *XDLJobStatus)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

type XGBoostJob

type XGBoostJob struct {
	metav1.TypeMeta   `json:",inline"`
	metav1.ObjectMeta `json:"metadata,omitempty"`

	Spec   XGBoostJobSpec   `json:"spec,omitempty"`
	Status XGBoostJobStatus `json:"status,omitempty"`
}

XGBoostJob is the Schema for the xgboostjobs API

func (*XGBoostJob) DeepCopy

func (in *XGBoostJob) DeepCopy() *XGBoostJob

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new XGBoostJob.

func (*XGBoostJob) DeepCopyInto

func (in *XGBoostJob) DeepCopyInto(out *XGBoostJob)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

func (*XGBoostJob) DeepCopyObject

func (in *XGBoostJob) DeepCopyObject() runtime.Object

DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.

type XGBoostJobList

type XGBoostJobList struct {
	metav1.TypeMeta `json:",inline"`
	metav1.ListMeta `json:"metadata,omitempty"`
	Items           []XGBoostJob `json:"items"`
}

XGBoostJobList contains a list of XGBoostJob

func (*XGBoostJobList) DeepCopy

func (in *XGBoostJobList) DeepCopy() *XGBoostJobList

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new XGBoostJobList.

func (*XGBoostJobList) DeepCopyInto

func (in *XGBoostJobList) DeepCopyInto(out *XGBoostJobList)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

func (*XGBoostJobList) DeepCopyObject

func (in *XGBoostJobList) DeepCopyObject() runtime.Object

DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.

type XGBoostJobSpec

type XGBoostJobSpec struct {

	// RunPolicy encapsulates various runtime policies of the distributed training
	// job, for example how to clean up resources and how long the job can stay
	// active.
	RunPolicy commonv1.RunPolicy `json:",inline"`

	// XGBoostReplicaSpecs is map of ReplicaType and ReplicaSpec
	// specifies the XGBoost replicas to run.
	// For example,
	//   {
	//     "PS": ReplicaSpec,
	//     "Worker": ReplicaSpec,
	//   }
	XGBReplicaSpecs map[commonv1.ReplicaType]*commonv1.ReplicaSpec `json:"xgbReplicaSpecs"`
}

XGBoostJobSpec defines the desired state of XGBoostJob

func (*XGBoostJobSpec) DeepCopy

func (in *XGBoostJobSpec) DeepCopy() *XGBoostJobSpec

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new XGBoostJobSpec.

func (*XGBoostJobSpec) DeepCopyInto

func (in *XGBoostJobSpec) DeepCopyInto(out *XGBoostJobSpec)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

type XGBoostJobStatus

type XGBoostJobStatus struct {
	commonv1.JobStatus `json:",inline"`
}

XGBoostJobStatus defines the observed state of XGBoostJob

func (*XGBoostJobStatus) DeepCopy

func (in *XGBoostJobStatus) DeepCopy() *XGBoostJobStatus

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new XGBoostJobStatus.

func (*XGBoostJobStatus) DeepCopyInto

func (in *XGBoostJobStatus) DeepCopyInto(out *XGBoostJobStatus)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL