Documentation ¶
Index ¶
- Constants
- Variables
- func NewClient() (kubernetes.Interface, error)
- func ResetDeviceMetrics(UUID string, nodeName string, memory float64)
- type ContainerDevice
- type ContainerDeviceRequest
- type ContainerDevices
- type GPUDevice
- type GPUDevices
- func (gs *GPUDevices) AddPodMetrics(index int, PodName string)
- func (gs *GPUDevices) AddResource(pod *v1.Pod)
- func (gs *GPUDevices) Allocate(kubeClient kubernetes.Interface, pod *v1.Pod) error
- func (gs *GPUDevices) FilterNode(pod *v1.Pod, schedulePolicy string) (int, string, error)
- func (gs *GPUDevices) GetIgnoredDevices() []string
- func (gs *GPUDevices) GetStatus() string
- func (gs *GPUDevices) HasDeviceRequest(pod *v1.Pod) bool
- func (gs *GPUDevices) Release(kubeClient kubernetes.Interface, pod *v1.Pod) error
- func (gs *GPUDevices) ScoreNode(pod *v1.Pod, schedulePolicy string) float64
- func (gs *GPUDevices) SubPodMetrics(index int, PodName string)
- func (gs *GPUDevices) SubResource(pod *v1.Pod)
- type GPUUsage
Constants ¶
View Source
const ( // VolcanoNamespace - namespace in prometheus used by volcano VolcanoNamespace = "volcano" // OnSessionOpen label OnSessionOpen = "OnSessionOpen" // OnSessionClose label OnSessionClose = "OnSessionClose" )
View Source
const ( GPUInUse = "nvidia.com/use-gputype" GPUNoUse = "nvidia.com/nouse-gputype" AssignedTimeAnnotations = "volcano.sh/vgpu-time" AssignedIDsAnnotations = "volcano.sh/vgpu-ids-new" AssignedIDsToAllocateAnnotations = "volcano.sh/devices-to-allocate" AssignedNodeAnnotations = "volcano.sh/vgpu-node" BindTimeAnnotations = "volcano.sh/bind-time" DeviceBindPhase = "volcano.sh/bind-phase" NvidiaGPUDevice = "NVIDIA" // VolcanoVGPUMemory extended gpu memory VolcanoVGPUMemory = "volcano.sh/vgpu-memory" // VolcanoVGPUMemoryPercentage extends gpu memory VolcanoVGPUMemoryPercentage = "volcano.sh/vgpu-memory-percentage" // VolcanoVGPUCores indicates utilization percentage of vgpu VolcanoVGPUCores = "volcano.sh/vgpu-cores" // VolcanoVGPUNumber virtual GPU card number VolcanoVGPUNumber = "volcano.sh/vgpu-number" // VolcanoVGPURegister virtual gpu information registered from device-plugin to scheduler VolcanoVGPURegister = "volcano.sh/node-vgpu-register" // VolcanoVGPUHandshake for vgpu VolcanoVGPUHandshake = "volcano.sh/node-vgpu-handshake" // PredicateTime is the key of predicate time PredicateTime = "volcano.sh/predicate-time" // GPUIndex is the key of gpu index GPUIndex = "volcano.sh/gpu-index" // UnhealthyGPUIDs list of unhealthy gpu ids UnhealthyGPUIDs = "volcano.sh/gpu-unhealthy-ids" // DeviceName used to indicate this device DeviceName = "hamivgpu" DefaultMemPercentage = 101 )
Variables ¶
View Source
var ( prometheus.GaugeOpts{ Subsystem: VolcanoNamespace, Name: "vgpu_device_shared_number", Help: "The number of vgpu tasks sharing this card", }, []string{"devID", "NodeName"}, ) VGPUDevicesAllocatedMemory = promauto.NewGaugeVec( prometheus.GaugeOpts{ Subsystem: VolcanoNamespace, Name: "vgpu_device_allocated_memory", Help: "The number of vgpu memory allocated in this card", }, []string{"devID", "NodeName"}, ) VGPUDevicesAllocatedCores = promauto.NewGaugeVec( prometheus.GaugeOpts{ Subsystem: VolcanoNamespace, Name: "vgpu_device_allocated_cores", Help: "The percentage of gpu compute cores allocated in this card", }, []string{"devID", "NodeName"}, ) VGPUDevicesMemoryTotal = promauto.NewGaugeVec( prometheus.GaugeOpts{ Subsystem: VolcanoNamespace, Name: "vgpu_device_memory_limit", Help: "The number of total device memory in this card", }, []string{"devID", "NodeName"}, ) VGPUPodMemoryAllocated = promauto.NewGaugeVec( prometheus.GaugeOpts{ Subsystem: VolcanoNamespace, Name: "vgpu_device_memory_allocation_for_a_certain_pod", Help: "The vgpu device memory allocated for a certain pod", }, []string{"devID", "NodeName", "podName"}, ) VGPUPodCoreAllocated = promauto.NewGaugeVec( prometheus.GaugeOpts{ Subsystem: VolcanoNamespace, Name: "vgpu_device_core_allocation_for_a_certain_pod", Help: "The vgpu device core allocated for a certain pod", }, []string{"devID", "NodeName", "podName"}, ) )
View Source
var NodeLockEnable bool
View Source
var VGPUEnable bool
Functions ¶
func ResetDeviceMetrics ¶ added in v1.10.0
Types ¶
type ContainerDevice ¶
type ContainerDeviceRequest ¶
type ContainerDevices ¶
type ContainerDevices []ContainerDevice
type GPUDevice ¶
type GPUDevice struct { // GPU ID ID int // Node this GPU Device belongs Node string // GPU Unique ID UUID string // The resource usage by pods that are sharing this GPU PodMap map[string]*GPUUsage // memory per card Memory uint // max sharing number Number uint // type of this number Type string // Health condition of this GPU Health bool // number of allocated UsedNum uint // number of device memory allocated UsedMem uint // number of core used UsedCore uint }
GPUDevice include gpu id, memory and the pods that are sharing it.
type GPUDevices ¶
type GPUDevices struct { Name string // We cache score in filter step according to schedulePolicy, to avoid recalculating in score Score float64 Device map[int]*GPUDevice }
func NewGPUDevices ¶
func NewGPUDevices(name string, node *v1.Node) *GPUDevices
func (*GPUDevices) AddPodMetrics ¶ added in v1.10.0
func (gs *GPUDevices) AddPodMetrics(index int, PodName string)
func (*GPUDevices) AddResource ¶
func (gs *GPUDevices) AddResource(pod *v1.Pod)
AddResource adds the pod to GPU pool if it is assigned
func (*GPUDevices) Allocate ¶
func (gs *GPUDevices) Allocate(kubeClient kubernetes.Interface, pod *v1.Pod) error
func (*GPUDevices) FilterNode ¶
func (*GPUDevices) GetIgnoredDevices ¶
func (gs *GPUDevices) GetIgnoredDevices() []string
func (*GPUDevices) GetStatus ¶
func (gs *GPUDevices) GetStatus() string
func (*GPUDevices) HasDeviceRequest ¶
func (gs *GPUDevices) HasDeviceRequest(pod *v1.Pod) bool
func (*GPUDevices) Release ¶
func (gs *GPUDevices) Release(kubeClient kubernetes.Interface, pod *v1.Pod) error
func (*GPUDevices) ScoreNode ¶ added in v1.9.0
func (gs *GPUDevices) ScoreNode(pod *v1.Pod, schedulePolicy string) float64
func (*GPUDevices) SubPodMetrics ¶ added in v1.10.0
func (gs *GPUDevices) SubPodMetrics(index int, PodName string)
func (*GPUDevices) SubResource ¶
func (gs *GPUDevices) SubResource(pod *v1.Pod)
SubResource frees the gpu hold by the pod
Click to show internal directories.
Click to hide internal directories.