Documentation ¶
Index ¶
- Constants
- Variables
- func CalcPodResources(pod *v1.Pod) *pfResources.Resource
- func ConvertToStatus(obj interface{}, gvk k8sschema.GroupVersionKind) (interface{}, error)
- func GPUDeviceIDX(idx int) int64
- func GPUSharedDevices(deviceIDX int64) []int
- func GetJobFrameworkVersion(jobType commomschema.JobType, framework commomschema.Framework) commomschema.FrameworkVersion
- func GetJobTypeAndFramework(gvk schema.GroupVersionKind) (commomschema.JobType, commomschema.Framework)
- func GetTaskMessage(podStatus *v1.PodStatus) string
- func GetTaskStatus(podStatus *v1.PodStatus) (schema.TaskStatus, error)
- func IsAttachableVolumeResourceName(name v1.ResourceName) bool
- func IsExtendedResourceName(name v1.ResourceName) bool
- func IsGPUX(rName string) bool
- func IsHugePageResourceName(name v1.ResourceName) bool
- func IsNativeResource(name v1.ResourceName) bool
- func IsPrefixedNativeResource(name v1.ResourceName) bool
- func IsScalarResourceName(name v1.ResourceName) bool
- func IsSharedGPUX(rName string, rValue int64) (int64, bool)
- func NewMinResourceList() v1.ResourceList
- func NewResource(rl v1.ResourceList) *pfResources.Resource
- func NewResourceList(r *pfResources.Resource) v1.ResourceList
- func SharedGPUIDX(pod *v1.Pod) int64
- func SubQuota(r *pfResources.Resource, pod *v1.Pod) error
- func SubWithGPUX(total *pfResources.Resource, rr map[string]int64) map[string]interface{}
- type ContainerStatusMessage
- type PodStatusMessage
- type StatusInfo
- func ArgoWorkflowStatus(obj interface{}) (StatusInfo, error)
- func MPIJobStatus(obj interface{}) (StatusInfo, error)
- func MXNetJobStatus(obj interface{}) (StatusInfo, error)
- func PaddleJobStatus(obj interface{}) (StatusInfo, error)
- func PytorchJobStatus(obj interface{}) (StatusInfo, error)
- func RayJobStatus(obj interface{}) (StatusInfo, error)
- func SingleJobStatus(obj interface{}) (StatusInfo, error)
- func SparkAppStatus(obj interface{}) (StatusInfo, error)
- func TFJobStatus(obj interface{}) (StatusInfo, error)
- func VCJobStatus(obj interface{}) (StatusInfo, error)
Constants ¶
const ( DefaultCpuRequest = 1 DefaultMemRequest = 1073741824 // 1Gi=1024*1024*1024B=1073741824B MaxGPUIndex = 16 GPUIndexResources = "gpuDeviceIDX" )
Variables ¶
var ( PodGVK = schema.GroupVersionKind{Group: "", Version: "v1", Kind: "Pod"} VCJobGVK = schema.GroupVersionKind{Group: "batch.volcano.sh", Version: "v1alpha1", Kind: "Job"} PodGroupGVK = schema.GroupVersionKind{Group: "scheduling.volcano.sh", Version: "v1beta1", Kind: "PodGroup"} VCQueueGVK = schema.GroupVersionKind{Group: "scheduling.volcano.sh", Version: "v1beta1", Kind: "Queue"} EQuotaGVK = schema.GroupVersionKind{Group: "scheduling.volcano.sh", Version: "v1beta1", Kind: "ElasticResourceQuota"} SparkAppGVK = schema.GroupVersionKind{Group: "sparkoperator.k8s.io", Version: "v1beta2", Kind: "SparkApplication"} PaddleJobGVK = schema.GroupVersionKind{Group: "batch.paddlepaddle.org", Version: "v1", Kind: "PaddleJob"} // PyTorchJobGVK TFJobGVK defines GVK for kubeflow jobs PyTorchJobGVK = schema.GroupVersionKind{Group: "kubeflow.org", Version: "v1", Kind: "PyTorchJob"} TFJobGVK = schema.GroupVersionKind{Group: "kubeflow.org", Version: "v1", Kind: "TFJob"} MPIJobGVK = schema.GroupVersionKind{Group: "kubeflow.org", Version: "v1", Kind: "MPIJob"} MXNetJobGVK = schema.GroupVersionKind{Group: "kubeflow.org", Version: "v1", Kind: "MXJob"} XGBoostJobGVK = schema.GroupVersionKind{Group: "kubeflow.org", Version: "v1", Kind: "XGBoostJob"} RayJobGVK = schema.GroupVersionKind{Group: "ray.io", Version: "v1alpha1", Kind: "RayJob"} // ArgoWorkflowGVK defines GVK for argo Workflow ArgoWorkflowGVK = schema.GroupVersionKind{Group: "argoproj.io", Version: "v1alpha1", Kind: "Workflow"} // GVKJobStatusMap contains GroupVersionKind and convertStatus function to sync job status GVKJobStatusMap = map[schema.GroupVersionKind]bool{ SparkAppGVK: true, PaddleJobGVK: true, PodGVK: true, ArgoWorkflowGVK: true, PyTorchJobGVK: true, TFJobGVK: true, MXNetJobGVK: true, MPIJobGVK: true, RayJobGVK: true, } )
var ( GPUCorePodKey = "GPU_CORE_POD" GPUIdxKey = "GPU_IDX" GPURNamePrefix = "/gpu" GPUXName = "cgpu" GPUXCoreName = fmt.Sprintf("%s_core", GPUXName) )
var DiscoveryHandlerFunc = http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { var obj interface{} switch req.URL.Path { case "/apis/batch.volcano.sh/v1alpha1": obj = &metav1.APIResourceList{ GroupVersion: "batch.volcano.sh/v1alpha1", APIResources: []metav1.APIResource{ {Name: "jobs", Namespaced: true, Kind: "Job"}, }, } case "/apis/scheduling.volcano.sh/v1beta1": obj = &metav1.APIResourceList{ GroupVersion: "scheduling.volcano.sh/v1beta1", APIResources: []metav1.APIResource{ {Name: "queues", Namespaced: false, Kind: "Queue"}, {Name: "elasticresourcequotas", Namespaced: false, Kind: "ElasticResourceQuota"}, }, } case "/apis/sparkoperator.k8s.io/v1beta2": obj = &metav1.APIResourceList{ GroupVersion: "sparkoperator.k8s.io/v1beta2", APIResources: []metav1.APIResource{ {Name: "sparkapplications", Namespaced: true, Kind: "SparkApplication"}, }, } case "/apis/batch.paddlepaddle.org/v1": obj = &metav1.APIResourceList{ GroupVersion: "batch.paddlepaddle.org/v1", APIResources: []metav1.APIResource{ {Name: "paddlejobs", Namespaced: true, Kind: "PaddleJob"}, }, } case "/apis/kubeflow.org/v1": obj = &metav1.APIResourceList{ GroupVersion: "kubeflow.org/v1", APIResources: []metav1.APIResource{ {Name: "pytorchjobs", Namespaced: true, Kind: "PyTorchJob"}, {Name: "tfjobs", Namespaced: true, Kind: "TFJob"}, {Name: "mpijobs", Namespaced: true, Kind: "MPIJob"}, }, } case "/apis/argoproj.io/v1alpha1": obj = &metav1.APIResourceList{ GroupVersion: "argoproj.io/v1alpha1", APIResources: []metav1.APIResource{ {Name: "workflows", Namespaced: true, Kind: "Workflow"}, }, } case "/apis/ray.io/v1alpha1": obj = &metav1.APIResourceList{ GroupVersion: "ray.io/v1alpha1", APIResources: []metav1.APIResource{ {Name: "rayjobs", Namespaced: true, Kind: "RayJob"}, }, } case "/api/v1": obj = &metav1.APIResourceList{ GroupVersion: "v1", APIResources: []metav1.APIResource{ {Name: "pods", Namespaced: true, Kind: "Pod"}, {Name: "namespaces", Namespaced: false, Kind: "Namespace"}, {Name: "configmaps", Namespaced: true, Kind: "ConfigMap"}, }, } case "/api": obj = &metav1.APIVersions{ Versions: []string{ "v1", }, } case "/apis": obj = &metav1.APIGroupList{ Groups: []metav1.APIGroup{ { Name: "batch.volcano.sh", Versions: []metav1.GroupVersionForDiscovery{ {GroupVersion: "batch.volcano.sh/v1alpha1", Version: "v1alpha1"}, }, }, { Name: "scheduling.volcano.sh", Versions: []metav1.GroupVersionForDiscovery{ {GroupVersion: "scheduling.volcano.sh/v1beta1", Version: "v1beta1"}, }, }, { Name: "sparkoperator.k8s.io", Versions: []metav1.GroupVersionForDiscovery{ {GroupVersion: "sparkoperator.k8s.io/v1beta2", Version: "v1beta2"}, }, }, { Name: "batch.paddlepaddle.org", Versions: []metav1.GroupVersionForDiscovery{ {GroupVersion: "batch.paddlepaddle.org/v1", Version: "v1"}, }, }, { Name: "kubeflow.org", Versions: []metav1.GroupVersionForDiscovery{ {GroupVersion: "kubeflow.org/v1", Version: "v1"}, }, }, { Name: "argoproj.io", Versions: []metav1.GroupVersionForDiscovery{ {GroupVersion: "argoproj.io/v1alpha1", Version: "v1alpha1"}, }, }, { Name: "ray.io", Versions: []metav1.GroupVersionForDiscovery{ {GroupVersion: "ray.io/v1alpha1", Version: "v1alpha1"}, }, }, }, } default: w.WriteHeader(http.StatusNotFound) return } output, err := json.Marshal(obj) if err != nil { fmt.Printf("unexpected encoding error: %v", err) return } w.Header().Set("Content-Type", "application/json") w.WriteHeader(http.StatusOK) if _, err := w.Write(output); err != nil { fmt.Printf("unexpected w.Write error: %v", err) return } })
DiscoveryHandlerFunc for mock resource
Functions ¶
func CalcPodResources ¶ added in v0.14.3
func CalcPodResources(pod *v1.Pod) *pfResources.Resource
CalcPodResources calculate pod minimum resource
func ConvertToStatus ¶
func ConvertToStatus(obj interface{}, gvk k8sschema.GroupVersionKind) (interface{}, error)
func GPUDeviceIDX ¶ added in v0.14.6
GPUDeviceIDX process virtual gpu
func GPUSharedDevices ¶ added in v0.14.6
GPUSharedDevices get shared gpu device index
func GetJobFrameworkVersion ¶ added in v0.14.5
func GetJobFrameworkVersion(jobType commomschema.JobType, framework commomschema.Framework) commomschema.FrameworkVersion
func GetJobTypeAndFramework ¶
func GetJobTypeAndFramework(gvk schema.GroupVersionKind) (commomschema.JobType, commomschema.Framework)
func GetTaskMessage ¶
GetTaskMessage construct message from pod status
func GetTaskStatus ¶
func GetTaskStatus(podStatus *v1.PodStatus) (schema.TaskStatus, error)
func IsAttachableVolumeResourceName ¶
func IsAttachableVolumeResourceName(name v1.ResourceName) bool
IsAttachableVolumeResourceName returns true when the resource name is prefixed in attachable volume
func IsExtendedResourceName ¶
func IsExtendedResourceName(name v1.ResourceName) bool
IsExtendedResourceName returns true if: 1. the resource name is not in the default namespace; 2. resource name does not have "requests." prefix, to avoid confusion with the convention in quota 3. it satisfies the rules in IsQualifiedName() after converted into quota resource name
func IsHugePageResourceName ¶
func IsHugePageResourceName(name v1.ResourceName) bool
IsHugePageResourceName returns true if the resource name has the huge page resource prefix.
func IsNativeResource ¶
func IsNativeResource(name v1.ResourceName) bool
IsNativeResource returns true if the resource name is in the *kubernetes.io/ namespace. Partially-qualified (unprefixed) names are implicitly in the kubernetes.io/ namespace.
func IsPrefixedNativeResource ¶
func IsPrefixedNativeResource(name v1.ResourceName) bool
IsPrefixedNativeResource returns true if the resource name is in the *kubernetes.io/ namespace.
func IsScalarResourceName ¶
func IsScalarResourceName(name v1.ResourceName) bool
IsScalarResourceName validates the resource for Extended, Hugepages, Native and AttachableVolume resources
func NewMinResourceList ¶ added in v0.14.3
func NewMinResourceList() v1.ResourceList
func NewResource ¶
func NewResource(rl v1.ResourceList) *pfResources.Resource
func NewResourceList ¶
func NewResourceList(r *pfResources.Resource) v1.ResourceList
NewResourceList create a new resource object from resource list
func SharedGPUIDX ¶ added in v0.14.6
func SubWithGPUX ¶ added in v0.14.6
func SubWithGPUX(total *pfResources.Resource, rr map[string]int64) map[string]interface{}
Types ¶
type ContainerStatusMessage ¶
type ContainerStatusMessage struct { Name string `json:"name,omitempty"` ContainerID string `json:"containerID,omitempty"` RestartCount int32 `json:"restartCount,omitempty"` WaitingState *v1.ContainerStateWaiting `json:"waitingState,omitempty"` TerminatedState *v1.ContainerStateTerminated `json:"terminatedState,omitempty"` }
func (*ContainerStatusMessage) String ¶
func (cs *ContainerStatusMessage) String() string
type PodStatusMessage ¶
type PodStatusMessage struct { Phase v1.PodPhase `json:"phase,omitempty"` Message string `json:"message,omitempty"` Reason string `json:"reason,omitempty"` ContainerMessages []ContainerStatusMessage `json:"containerMessages,omitempty"` }
func (*PodStatusMessage) String ¶
func (ps *PodStatusMessage) String() string
type StatusInfo ¶
type StatusInfo struct { OriginStatus string Status commomschema.JobStatus Message string }
func ArgoWorkflowStatus ¶
func ArgoWorkflowStatus(obj interface{}) (StatusInfo, error)
ArgoWorkflowStatus get argo workflow status, message from interface{}, and covert to JobStatus
func MPIJobStatus ¶ added in v0.14.4
func MPIJobStatus(obj interface{}) (StatusInfo, error)
MPIJobStatus get job status, message for MPIJob
func MXNetJobStatus ¶ added in v0.14.4
func MXNetJobStatus(obj interface{}) (StatusInfo, error)
MXNetJobStatus get job status, message for MXNetJob
func PaddleJobStatus ¶
func PaddleJobStatus(obj interface{}) (StatusInfo, error)
PaddleJobStatus get paddle job status, message from interface{}, and covert to JobStatus
func PytorchJobStatus ¶ added in v0.14.4
func PytorchJobStatus(obj interface{}) (StatusInfo, error)
PytorchJobStatus get job status, message for PyTorchJob
func RayJobStatus ¶ added in v0.14.5
func RayJobStatus(obj interface{}) (StatusInfo, error)
RayJobStatus get job status, message for RayJob
func SingleJobStatus ¶
func SingleJobStatus(obj interface{}) (StatusInfo, error)
SingleJobStatus get single job status, message from interface{}, and covert to JobStatus
func SparkAppStatus ¶
func SparkAppStatus(obj interface{}) (StatusInfo, error)
SparkAppStatus get spark application status, message from interface{}, and covert to JobStatus
func TFJobStatus ¶ added in v0.14.4
func TFJobStatus(obj interface{}) (StatusInfo, error)
TFJobStatus get job status, message for TFJob
func VCJobStatus ¶
func VCJobStatus(obj interface{}) (StatusInfo, error)
VCJobStatus get vc job status, message from interface{}, and covert to JobStatus