Documentation ¶
Overview ¶
Package v1alpha1 contains API Schema definitions for the kubecluster.org v1alpha1 API group +kubebuilder:object:generate=true +groupName=kubecluster.org
Index ¶
- Constants
- Variables
- func GetDefaultContainerIndex(spec *corev1.PodSpec, defaultContainerName string) int
- func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenAPIDefinition
- func HasDefaultPort(spec *corev1.PodSpec, containerIndex int, defaultPortName string) bool
- func RegisterDefaults(scheme *runtime.Scheme) error
- func Resource(resource string) schema.GroupResource
- func SetDefaultPort(spec *corev1.PodSpec, defaultPortName string, defaultPort int32, ...)
- func SetDefaultRestartPolicy(replicaSpec *ReplicaSpec, defaultRestartPolicy RestartPolicy)
- func SetDefaults_KubeCluster(kcluster *KubeCluster)
- func SetDefaults_KubeClusterList(in *KubeClusterList)
- func SetTypeNameToCamelCase(replicaSpecs map[ReplicaType]*ReplicaSpec, typ ReplicaType)
- func ValidateV1alphaCluster(cluster *KubeCluster) error
- type CleanKubeNodePolicy
- type ClusterCondition
- type ClusterConditionType
- type ClusterSpec
- type ClusterStatus
- type ClusterType
- type KubeCluster
- type KubeClusterList
- type KubeNode
- type ReplicaSpec
- type ReplicaStatus
- type ReplicaTemplate
- type ReplicaType
- type RestartPolicy
- type RunPolicy
- type SchedulingPolicy
Constants ¶
const ( // KubeClusterKind is the kind name. KubeClusterKind = "KubeCluster" // KubeClusterPlural is the TensorflowPlural for KubeCluster. KubeClusterPlural = "KubeClusters" // KubeClusterSingular is the singular for KubeCluster. KubeClusterSingular = "KubeCluster" // ControllerNameLabel represents the label key for the operator name, e.g. tf-operator, mpi-operator, etc. ControllerNameLabel = "kubeclusetr.org/controller-name" // ClusterNameLabel represents the label key for the cluster name, the value is the cluster name. ClusterNameLabel = "kubeclusetr.org/clusetr-name" ClusterDefaultContainerName = "kubenode" )
const ( // ClusterTypeLabel represents the label key for the clusetr type ClusterTypeLabel = "kubeclusetr.org/clusetr-type" // ReplicaIndexLabel represents the label key for the replica-index, e.g. 0, 1, 2.. etc ReplicaIndexLabel = "kubeclusetr.org/replica-index" // ReplicaTypeLabel represents the label key for the replica-type, e.g. ps, worker etc. ReplicaTypeLabel = "kubeclusetr.org/replica-type" // ClusterRoleLabel represents the label key for the clusetr role, e.g. master. ClusterRoleLabel = "kubeclusetr.org/clusetr-role" )
Variables ¶
var ( // GroupVersion is group version used to register these objects GroupVersion = schema.GroupVersion{Group: "kubecluster.org", Version: "v1alpha1"} // SchemeBuilder is used to add go types to the GroupVersionKind scheme SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion} // AddToScheme adds the types in this group-version to the given scheme. AddToScheme = SchemeBuilder.AddToScheme )
var SchemeGroupVersion = GroupVersion
SchemeGroupVersion is group version used to register these objects.
Functions ¶
func GetOpenAPIDefinitions ¶
func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenAPIDefinition
func HasDefaultPort ¶
func RegisterDefaults ¶
RegisterDefaults adds defaulters functions to the given scheme. Public to allow building arbitrary schemes. All generated defaulters are covering - they call all nested defaulters.
func Resource ¶
func Resource(resource string) schema.GroupResource
Resource takes an unqualified resource and returns a Group-qualified GroupResource.
func SetDefaultPort ¶
func SetDefaultRestartPolicy ¶
func SetDefaultRestartPolicy(replicaSpec *ReplicaSpec, defaultRestartPolicy RestartPolicy)
func SetDefaults_KubeCluster ¶
func SetDefaults_KubeCluster(kcluster *KubeCluster)
func SetDefaults_KubeClusterList ¶
func SetDefaults_KubeClusterList(in *KubeClusterList)
func SetTypeNameToCamelCase ¶
func SetTypeNameToCamelCase(replicaSpecs map[ReplicaType]*ReplicaSpec, typ ReplicaType)
SetTypeNameToCamelCase sets the name of the replica type from any case to correct case. E.g. from server to Server; from WORKER to Worker.
func ValidateV1alphaCluster ¶
func ValidateV1alphaCluster(cluster *KubeCluster) error
Types ¶
type CleanKubeNodePolicy ¶
type CleanKubeNodePolicy string
CleanKubeNodePolicy describes how to deal with pods when the KubeCluster is finished.
const ( CleanKubeNodePolicyUndefined CleanKubeNodePolicy = "" CleanKubeNodePolicyAll CleanKubeNodePolicy = "All" CleanKubeNodePolicyRunning CleanKubeNodePolicy = "Running" CleanKubeNodePolicyNone CleanKubeNodePolicy = "None" )
func CleanKubeNodePolicyPointer ¶
func CleanKubeNodePolicyPointer(cleanKubeNodePolicy CleanKubeNodePolicy) *CleanKubeNodePolicy
type ClusterCondition ¶
type ClusterCondition struct { // Type of KubeCluster condition. Type ClusterConditionType `json:"type"` // Status of the condition, one of True, False, Unknown. Status v1.ConditionStatus `json:"status"` // The reason for the condition's last transition. Reason string `json:"reason,omitempty"` // A human readable message indicating details about the transition. Message string `json:"message,omitempty"` // The last time this condition was updated. LastUpdateTime metav1.Time `json:"lastUpdateTime,omitempty"` // Last time the condition transitioned from one status to another. LastTransitionTime metav1.Time `json:"lastTransitionTime,omitempty"` }
ClusterCondition describes the state of the KubeCluster at a certain point.
func (*ClusterCondition) DeepCopy ¶
func (in *ClusterCondition) DeepCopy() *ClusterCondition
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClusterCondition.
func (*ClusterCondition) DeepCopyInto ¶
func (in *ClusterCondition) DeepCopyInto(out *ClusterCondition)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ClusterConditionType ¶
type ClusterConditionType string
ClusterConditionType defines all kinds of types of ClusterStatus.
const ( // ClusterCreated means the KubeCluster has been accepted by the system, // but one or more of the pods/services has not been started. // This includes time before pods being scheduled and launched. ClusterCreated ClusterConditionType = "Created" // ClusterRunning means all sub-resources (e.g. services/pods) of this KubeCluster // have been successfully scheduled and launched. // The training is running without error. ClusterRunning ClusterConditionType = "Running" // ClusterRestarting means one or more sub-resources (e.g. services/pods) of this KubeCluster // reached phase failed but maybe restarted according to it's restart policy // which specified by user in v1.PodTemplateSpec. // The training is freezing/pending. ClusterRestarting ClusterConditionType = "Restarting" // ClusterSuspended means the KubeCluster has been suspended. ClusterSuspended ClusterConditionType = "Suspended" // ClusterFailed means one or more sub-resources (e.g. services/pods) of this KubeCluster // reached phase failed with no restarting. // The training has failed its execution. ClusterFailed ClusterConditionType = "Failed" )
type ClusterSpec ¶
type ClusterSpec struct { //ClusterType define the type of the cluster to be created ClusterType ClusterType `json:"clusterType"` //ClusterType define the template of the cluster to be created ClusterReplicaSpec map[ReplicaType]*ReplicaSpec `json:"clusterReplicaSpec"` // MainContainer specifies name of the main container which // run as kubenode. MainContainer string `json:"mainContainer,omitempty"` // `RunPolicy` encapsulates various runtime policies of the distributed training // cluster, for example how to clean up resources and how long the cluster can stay // active. RunPolicy RunPolicy `json:"runPolicy,omitempty"` }
ClusterSpec defines the desired state of KubeCluster
func (*ClusterSpec) DeepCopy ¶
func (in *ClusterSpec) DeepCopy() *ClusterSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClusterSpec.
func (*ClusterSpec) DeepCopyInto ¶
func (in *ClusterSpec) DeepCopyInto(out *ClusterSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ClusterStatus ¶
type ClusterStatus struct { // Conditions is an array of current observed KubeCluster conditions. Conditions []ClusterCondition `json:"conditions,omitempty"` // ReplicaStatuses is map of ReplicaType and ReplicaStatus, // specifies the status of each replica. ReplicaStatuses map[ReplicaType]*ReplicaStatus `json:"replicaStatuses,omitempty"` // Represents time when the KubeCluster was acknowledged by the KubeCluster controller. // It is not guaranteed to be set in happens-before order across separate operations. // It is represented in RFC3339 form and is in UTC. StartTime *metav1.Time `json:"startTime,omitempty"` // Represents last time when the KubeCluster was reconciled. It is not guaranteed to // be set in happens-before order across separate operations. // It is represented in RFC3339 form and is in UTC. LastReconcileTime *metav1.Time `json:"lastReconcileTime,omitempty"` // Represents time when the cluster was completed. It is not guaranteed to // be set in happens-before order across separate operations. // It is represented in RFC3339 form and is in UTC. CompletionTime *metav1.Time `json:"completionTime,omitempty"` }
ClusterStatus defines the observed state of KubeCluster
func (*ClusterStatus) DeepCopy ¶
func (in *ClusterStatus) DeepCopy() *ClusterStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClusterStatus.
func (*ClusterStatus) DeepCopyInto ¶
func (in *ClusterStatus) DeepCopyInto(out *ClusterStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ClusterType ¶
type ClusterType string
type KubeCluster ¶
type KubeCluster struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` Spec ClusterSpec `json:"spec,omitempty"` Status ClusterStatus `json:"status,omitempty"` }
KubeCluster is the Schema for the clusters API
func (*KubeCluster) DeepCopy ¶
func (in *KubeCluster) DeepCopy() *KubeCluster
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KubeCluster.
func (*KubeCluster) DeepCopyInto ¶
func (in *KubeCluster) DeepCopyInto(out *KubeCluster)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*KubeCluster) DeepCopyObject ¶
func (in *KubeCluster) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type KubeClusterList ¶
type KubeClusterList struct { metav1.TypeMeta `json:",inline"` metav1.ListMeta `json:"metadata,omitempty"` Items []KubeCluster `json:"items"` }
KubeClusterList contains a list of KubeCluster
func (*KubeClusterList) DeepCopy ¶
func (in *KubeClusterList) DeepCopy() *KubeClusterList
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KubeClusterList.
func (*KubeClusterList) DeepCopyInto ¶
func (in *KubeClusterList) DeepCopyInto(out *KubeClusterList)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (*KubeClusterList) DeepCopyObject ¶
func (in *KubeClusterList) DeepCopyObject() runtime.Object
DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
type KubeNode ¶
type KubeNode v1.PodTemplateSpec
KubeNode We use pod as the replica, so the replica spec is the pod spec
func (*KubeNode) DeepCopy ¶
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KubeNode.
func (*KubeNode) DeepCopyInto ¶
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ReplicaSpec ¶
type ReplicaSpec struct { // Replicas is the desired number of replicas of the given template. // If unspecified, defaults to 1. Replicas *int32 `json:"replicas,omitempty"` // Template is the object that describes the pod that // will be created for this replica. RestartPolicy in PodTemplateSpec // will be overide by RestartPolicy in ReplicaSpec Template ReplicaTemplate `json:"template"` // Restart policy for all replicas within the cluster. // One of Always, OnFailure, Never and ExitCode. // Default to Never. RestartPolicy RestartPolicy `json:"restartPolicy,omitempty"` }
ReplicaSpec is a description of the replica
func (*ReplicaSpec) DeepCopy ¶
func (in *ReplicaSpec) DeepCopy() *ReplicaSpec
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ReplicaSpec.
func (*ReplicaSpec) DeepCopyInto ¶
func (in *ReplicaSpec) DeepCopyInto(out *ReplicaSpec)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ReplicaStatus ¶
type ReplicaStatus struct { // The number of actively running pods. Active int32 `json:"active,omitempty"` // The number of pods which reached phase Succeeded. Activating int32 `json:"activating,omitempty"` // The number of pods which reached phase Succeeded. Failed int32 `json:"failed,omitempty"` // A Selector is a label query over a set of resources. The result of matchLabels and // matchExpressions are ANDed. An empty Selector matches all objects. A null // Selector matches no objects. Selector string `json:"selector,omitempty"` }
ReplicaStatus represents the current observed state of the replica.
func (*ReplicaStatus) DeepCopy ¶
func (in *ReplicaStatus) DeepCopy() *ReplicaStatus
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ReplicaStatus.
func (*ReplicaStatus) DeepCopyInto ¶
func (in *ReplicaStatus) DeepCopyInto(out *ReplicaStatus)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ReplicaTemplate ¶
type ReplicaTemplate KubeNode
ReplicaTemplate describes the data a replica(or a node) should have when created from a template
func (*ReplicaTemplate) DeepCopy ¶
func (in *ReplicaTemplate) DeepCopy() *ReplicaTemplate
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ReplicaTemplate.
func (*ReplicaTemplate) DeepCopyInto ¶
func (in *ReplicaTemplate) DeepCopyInto(out *ReplicaTemplate)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type ReplicaType ¶
type ReplicaType string
ReplicaType represents the type of the replica. Each operator needs to define its own set of ReplicaTypes.
type RestartPolicy ¶
type RestartPolicy string
RestartPolicy describes how the replicas should be restarted. Only one of the following restart policies may be specified. If none of the following policies is specified, the default one is RestartPolicyAlways.
const ( RestartPolicyAlways RestartPolicy = "Always" RestartPolicyOnFailure RestartPolicy = "OnFailure" RestartPolicyNever RestartPolicy = "Never" // RestartPolicyExitCode policy means that user should add exit code by themselves, // The KubeCluster operator will check these exit codes to // determine the behavior when an error occurs: // - 1-127: permanent error, do not restart. // - 128-255: retryable error, will restart the pod. RestartPolicyExitCode RestartPolicy = "ExitCode" )
type RunPolicy ¶
type RunPolicy struct { // CleanKubeNodePolicy defines the policy to kill pods after the KubeCluster completes. // Default to None. CleanKubeNodePolicy *CleanKubeNodePolicy `json:"CleanKubeNodePolicy,omitempty"` // TTLSecondsAfterFinished is the TTL to clean up clusters. // It may take extra ReconcilePeriod seconds for the cleanup, since // reconcile gets called periodically. // Default to infinite. TTLSecondsAfterFinished *int32 `json:"ttlSecondsAfterFinished,omitempty"` // Specifies the duration in seconds relative to the startTime that the KubeCluster may be active // before the system tries to terminate it; value must be positive integer. // +optional ActiveDeadlineSeconds *int64 `json:"activeDeadlineSeconds,omitempty"` // Optional number of retries before marking this KubeCluster failed. // +optional BackoffLimit *int32 `json:"backoffLimit,omitempty"` // SchedulingPolicy defines the policy related to scheduling, e.g. gang-scheduling // +optional SchedulingPolicy *SchedulingPolicy `json:"schedulingPolicy,omitempty"` // suspend specifies whether the KubeCluster controller should create Pods or not. // If a KubeCluster is created with suspend set to true, no Pods are created by // the KubeCluster controller. If a KubeCluster is suspended after creation (i.e. the // flag goes from false to true), the KubeCluster controller will delete all // active Pods and PodGroups associated with this KubeCluster. // Users must design their workload to gracefully handle this. // Suspending a KubeCluster will reset the StartTime field of the KubeCluster. // // Defaults to false. // +kubebuilder:default:=false // +optional Suspend *bool `json:"suspend,omitempty"` }
RunPolicy encapsulates various runtime policies of the distributed training KubeCluster, for example how to clean up resources and how long the KubeCluster can stay active.
func (*RunPolicy) DeepCopy ¶
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RunPolicy.
func (*RunPolicy) DeepCopyInto ¶
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
type SchedulingPolicy ¶
type SchedulingPolicy struct { MinAvailable *int32 `json:"minAvailable,omitempty"` Queue string `json:"queue,omitempty"` MinResources *map[v1.ResourceName]resource.Quantity `json:"minResources,omitempty"` PriorityClass string `json:"priorityClass,omitempty"` ScheduleTimeoutSeconds *int32 `json:"scheduleTimeoutSeconds,omitempty"` }
SchedulingPolicy encapsulates various scheduling policies of the distributed training KubeCluster, for example `minAvailable` for gang-scheduling.
func (*SchedulingPolicy) DeepCopy ¶
func (in *SchedulingPolicy) DeepCopy() *SchedulingPolicy
DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingPolicy.
func (*SchedulingPolicy) DeepCopyInto ¶
func (in *SchedulingPolicy) DeepCopyInto(out *SchedulingPolicy)
DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.