Documentation ¶
Index ¶
- func ExtractMemoryGBFromMigFormat(migFormatResourceName v1.ResourceName) (int64, error)
- func GetGPUAnnotationsFromNode(node v1.Node) (GPUStatusAnnotationList, GPUSpecAnnotationList)
- func GetKnownGeometries() map[GPUModel][]Geometry
- func GetRequestedMigResources(pod v1.Pod) map[ProfileName]int
- func IsNvidiaMigDevice(resourceName v1.ResourceName) bool
- func SetKnownGeometries(configs map[GPUModel][]Geometry) error
- func SpecMatchesStatus(specAnnotations []GPUSpecAnnotation, statusAnnotations []GPUStatusAnnotation) bool
- type Client
- type DeviceResource
- type DeviceResourceList
- func (l DeviceResourceList) GetFree() DeviceResourceList
- func (l DeviceResourceList) GetUsed() DeviceResourceList
- func (l DeviceResourceList) GroupBy(keyFunc func(resource DeviceResource) string) map[string]DeviceResourceList
- func (l DeviceResourceList) GroupByGpuIndex() map[int]DeviceResourceList
- func (l DeviceResourceList) GroupByMigProfile() map[Profile]DeviceResourceList
- func (l DeviceResourceList) SortByDeviceId() DeviceResourceList
- type GPU
- func (g *GPU) AddPod(pod v1.Pod) error
- func (g *GPU) AllowsGeometry(geometry Geometry) bool
- func (g *GPU) ApplyGeometry(geometry Geometry) error
- func (g *GPU) CanApplyGeometry(geometry Geometry) (bool, string)
- func (g *GPU) Clone() GPU
- func (g *GPU) GetAllowedGeometries() []Geometry
- func (g *GPU) GetFreeMigDevices() map[ProfileName]int
- func (g *GPU) GetGeometry() Geometry
- func (g *GPU) GetIndex() int
- func (g *GPU) GetModel() GPUModel
- func (g *GPU) GetUsedMigDevices() map[ProfileName]int
- func (g *GPU) HasFreeMigDevices() bool
- func (g *GPU) UpdateGeometryFor(requiredProfiles map[ProfileName]int) bool
- type GPUModel
- type GPUSpecAnnotation
- type GPUSpecAnnotationList
- type GPUStatusAnnotation
- func (a GPUStatusAnnotation) GetGPUIndex() int
- func (a GPUStatusAnnotation) GetGPUIndexWithMigProfile() string
- func (a GPUStatusAnnotation) GetMigProfileName() ProfileName
- func (a GPUStatusAnnotation) GetValue() string
- func (a GPUStatusAnnotation) IsFree() bool
- func (a GPUStatusAnnotation) IsUsed() bool
- type GPUStatusAnnotationList
- func (l GPUStatusAnnotationList) Equal(other *GPUStatusAnnotationList) bool
- func (l GPUStatusAnnotationList) Filter(filteringFunc func(annotation GPUStatusAnnotation) bool) GPUStatusAnnotationList
- func (l GPUStatusAnnotationList) GetFree() GPUStatusAnnotationList
- func (l GPUStatusAnnotationList) GetUsed() GPUStatusAnnotationList
- func (l GPUStatusAnnotationList) GroupByGpuIndex() map[int]GPUStatusAnnotationList
- func (l GPUStatusAnnotationList) GroupByMigProfile() map[Profile]GPUStatusAnnotationList
- type Geometry
- type Node
- type Profile
- type ProfileList
- type ProfileName
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func ExtractMemoryGBFromMigFormat ¶
func ExtractMemoryGBFromMigFormat(migFormatResourceName v1.ResourceName) (int64, error)
func GetGPUAnnotationsFromNode ¶
func GetGPUAnnotationsFromNode(node v1.Node) (GPUStatusAnnotationList, GPUSpecAnnotationList)
func GetKnownGeometries ¶
func GetRequestedMigResources ¶
func GetRequestedMigResources(pod v1.Pod) map[ProfileName]int
func IsNvidiaMigDevice ¶
func IsNvidiaMigDevice(resourceName v1.ResourceName) bool
func SetKnownGeometries ¶
func SpecMatchesStatus ¶
func SpecMatchesStatus(specAnnotations []GPUSpecAnnotation, statusAnnotations []GPUStatusAnnotation) bool
Types ¶
type Client ¶
type Client interface { GetMigDeviceResources(ctx context.Context) (DeviceResourceList, gpu.Error) GetUsedMigDeviceResources(ctx context.Context) (DeviceResourceList, gpu.Error) GetAllocatableMigDeviceResources(ctx context.Context) (DeviceResourceList, gpu.Error) CreateMigResources(ctx context.Context, profileList ProfileList) (ProfileList, error) DeleteMigResource(ctx context.Context, resource DeviceResource) gpu.Error DeleteAllExcept(ctx context.Context, resources DeviceResourceList) error }
type DeviceResource ¶
type DeviceResource struct { resource.Device // GpuId is the Index of the parent GPU to which the MIG device belongs to GpuIndex int }
func (DeviceResource) FullResourceName ¶
func (m DeviceResource) FullResourceName() string
FullResourceName returns the full resource name of the MIG device, including the name of the resource corresponding to the MIG profile and the index of the GPU to which it belongs to.
func (DeviceResource) GetMigProfileName ¶
func (m DeviceResource) GetMigProfileName() ProfileName
GetMigProfileName returns the name of the Mig profile associated to the device
Example:
Resource name: nvidia.com/mig-1g.10gb GetMigProfileName() -> 1g.10gb
type DeviceResourceList ¶
type DeviceResourceList []DeviceResource
func (DeviceResourceList) GetFree ¶
func (l DeviceResourceList) GetFree() DeviceResourceList
func (DeviceResourceList) GetUsed ¶
func (l DeviceResourceList) GetUsed() DeviceResourceList
func (DeviceResourceList) GroupBy ¶
func (l DeviceResourceList) GroupBy(keyFunc func(resource DeviceResource) string) map[string]DeviceResourceList
func (DeviceResourceList) GroupByGpuIndex ¶
func (l DeviceResourceList) GroupByGpuIndex() map[int]DeviceResourceList
func (DeviceResourceList) GroupByMigProfile ¶
func (l DeviceResourceList) GroupByMigProfile() map[Profile]DeviceResourceList
func (DeviceResourceList) SortByDeviceId ¶
func (l DeviceResourceList) SortByDeviceId() DeviceResourceList
type GPU ¶
type GPU struct {
// contains filtered or unexported fields
}
func NewGpuOrPanic ¶
func NewGpuOrPanic(model GPUModel, index int, usedMigDevices, freeMigDevices map[ProfileName]int) GPU
func (*GPU) AddPod ¶
AddPod adds a Pod to the GPU by updating the free and used MIG devices according to the MIG resources requested by the Pod.
AddPod returns an error if the GPU does not have enough free MIG resources for the Pod.
func (*GPU) AllowsGeometry ¶
AllowsGeometry returns true if the geometry provided as argument is allowed by the GPU model
func (*GPU) ApplyGeometry ¶
ApplyGeometry applies the MIG geometry provided as argument by changing the free devices of the GPU. It returns an error if the provided geometry is not allowed or if applying it would require to delete any used device of the GPU.
func (*GPU) CanApplyGeometry ¶
CanApplyGeometry returns true if the geometry provided as argument can be applied to the GPU, otherwise it returns false and the reason why the geometry cannot be applied.
func (*GPU) GetAllowedGeometries ¶
GetAllowedGeometries returns the MIG geometries allowed by the GPU model
func (*GPU) GetFreeMigDevices ¶
func (g *GPU) GetFreeMigDevices() map[ProfileName]int
func (*GPU) GetGeometry ¶
func (*GPU) GetUsedMigDevices ¶
func (g *GPU) GetUsedMigDevices() map[ProfileName]int
func (*GPU) HasFreeMigDevices ¶
func (*GPU) UpdateGeometryFor ¶
func (g *GPU) UpdateGeometryFor(requiredProfiles map[ProfileName]int) bool
UpdateGeometryFor tries to update the geometry of the GPU in order to create the highest possible number of required profiles provided as argument, without deleting any of the used profiles.
The method returns true if the GPU geometry gets updated, false otherwise.
type GPUSpecAnnotation ¶
func NewGPUSpecAnnotationFromNodeAnnotation ¶
func NewGPUSpecAnnotationFromNodeAnnotation(key, value string) (GPUSpecAnnotation, error)
func NewGpuSpecAnnotation ¶
func NewGpuSpecAnnotation(gpuIndex int, profile ProfileName, quantity int) GPUSpecAnnotation
func (GPUSpecAnnotation) GetGPUIndex ¶
func (a GPUSpecAnnotation) GetGPUIndex() int
func (GPUSpecAnnotation) GetGPUIndexWithMigProfile ¶
func (a GPUSpecAnnotation) GetGPUIndexWithMigProfile() string
GetGPUIndexWithMigProfile returns the GPU index included in the annotation together with the respective MIG profile. Example:
Annotation
"n8s.nebuly.ai/spec-gpu-0-1g.10gb"
Result
"0-1g.10gb"
func (GPUSpecAnnotation) GetMigProfileName ¶
func (a GPUSpecAnnotation) GetMigProfileName() ProfileName
func (GPUSpecAnnotation) GetValue ¶
func (a GPUSpecAnnotation) GetValue() string
type GPUSpecAnnotationList ¶
type GPUSpecAnnotationList []GPUSpecAnnotation
func (GPUSpecAnnotationList) GroupByGpuIndex ¶
func (l GPUSpecAnnotationList) GroupByGpuIndex() map[int]GPUSpecAnnotationList
func (GPUSpecAnnotationList) GroupByMigProfile ¶
func (l GPUSpecAnnotationList) GroupByMigProfile() map[Profile]GPUSpecAnnotationList
type GPUStatusAnnotation ¶
func ComputeStatusAnnotations ¶
func ComputeStatusAnnotations(used []DeviceResource, free []DeviceResource) []GPUStatusAnnotation
func NewGPUStatusAnnotation ¶
func NewGPUStatusAnnotation(key, value string) (GPUStatusAnnotation, error)
func (GPUStatusAnnotation) GetGPUIndex ¶
func (a GPUStatusAnnotation) GetGPUIndex() int
func (GPUStatusAnnotation) GetGPUIndexWithMigProfile ¶
func (a GPUStatusAnnotation) GetGPUIndexWithMigProfile() string
GetGPUIndexWithMigProfile returns the GPU index included in the annotation together with the respective MIG profile. Example:
Annotation
"n8s.nebuly.ai/status-gpu-0-1g.10gb-used"
Result
"0-1g.10gb"
func (GPUStatusAnnotation) GetMigProfileName ¶
func (a GPUStatusAnnotation) GetMigProfileName() ProfileName
func (GPUStatusAnnotation) GetValue ¶
func (a GPUStatusAnnotation) GetValue() string
func (GPUStatusAnnotation) IsFree ¶
func (a GPUStatusAnnotation) IsFree() bool
IsFree returns true if the annotation refers to a free device
func (GPUStatusAnnotation) IsUsed ¶
func (a GPUStatusAnnotation) IsUsed() bool
IsUsed returns true if the annotation refers to a used device
type GPUStatusAnnotationList ¶
type GPUStatusAnnotationList []GPUStatusAnnotation
func (GPUStatusAnnotationList) Equal ¶
func (l GPUStatusAnnotationList) Equal(other *GPUStatusAnnotationList) bool
func (GPUStatusAnnotationList) Filter ¶
func (l GPUStatusAnnotationList) Filter(filteringFunc func(annotation GPUStatusAnnotation) bool) GPUStatusAnnotationList
func (GPUStatusAnnotationList) GetFree ¶
func (l GPUStatusAnnotationList) GetFree() GPUStatusAnnotationList
GetFree return a new GPUStatusAnnotationList containing the annotations referring to free devices
func (GPUStatusAnnotationList) GetUsed ¶
func (l GPUStatusAnnotationList) GetUsed() GPUStatusAnnotationList
GetUsed return a new GPUStatusAnnotationList containing the annotations referring to used devices
func (GPUStatusAnnotationList) GroupByGpuIndex ¶
func (l GPUStatusAnnotationList) GroupByGpuIndex() map[int]GPUStatusAnnotationList
func (GPUStatusAnnotationList) GroupByMigProfile ¶
func (l GPUStatusAnnotationList) GroupByMigProfile() map[Profile]GPUStatusAnnotationList
type Geometry ¶
type Geometry map[ProfileName]int
Geometry corresponds to the MIG Geometry of a GPU, namely the MIG profiles of the GPU with the respective quantity.
func GetAllowedGeometries ¶
func (Geometry) AsResources ¶
func (g Geometry) AsResources() map[v1.ResourceName]int
type Node ¶
func NewNode ¶
NewNode creates a new MIG Node starting from the node provided as argument.
The function constructs the MIG GPUs of the provided node using both the n8s.nebuly.ai MIG status annotations and the labels exposed by the NVIDIA gpu-feature-discovery tool. Specifically, the following labels are used: - GPU product ("nvidia.com/gpu.product") - GPU count ("nvidia.com/gpu.count")
If the v1.Node provided as arg does not have the GPU Product label, returned node will not contain any mig.GPU.
func (*Node) AddPod ¶
AddPod adds a Pod to the node by updating the free and used MIG devices of the Node GPUs according to the MIG requested required by the Pod.
AddPod returns an error if the node does not have any GPU providing enough free MIG resources for the Pod.
func (*Node) GetGeometry ¶
GetGeometry returns the overall MIG geometry of the node, which corresponds to the sum of the MIG geometry of all the GPUs present in the Node.
func (*Node) HasFreeMigCapacity ¶
HasFreeMigCapacity returns true if the Node has at least one GPU with free MIG capacity, namely it either has a free MIG device or its allowed MIG geometries allow to create at least one more MIG device.
func (*Node) UpdateGeometryFor ¶
func (n *Node) UpdateGeometryFor(profiles map[ProfileName]int) bool
UpdateGeometryFor tries to update the MIG geometry of each single GPU of the node in order to create the MIG profiles provided as argument.
The method returns true if it updates the MIG geometry of any GPU, false otherwise.
type Profile ¶
type Profile struct { GpuIndex int Name ProfileName }
type ProfileList ¶
type ProfileList []Profile
func (ProfileList) GroupByGPU ¶
func (p ProfileList) GroupByGPU() map[int]ProfileList
type ProfileName ¶
type ProfileName string
const ( Profile1g6gb ProfileName = "1g.6gb" Profile2g12gb ProfileName = "2g.12gb" Profile4g24gb ProfileName = "4g.24gb" Profile1g5gb ProfileName = "1g.5gb" Profile2g10gb ProfileName = "2g.10gb" Profile3g20gb ProfileName = "3g.20gb" Profile4g20gb ProfileName = "4g.20gb" Profile7g40gb ProfileName = "7g.40gb" Profile1g10gb ProfileName = "1g.10gb" Profile2g20gb ProfileName = "2g.20gb" Profile3g40gb ProfileName = "3g.40gb" Profile4g40gb ProfileName = "4g.40gb" Profile7g79gb ProfileName = "7g.79gb" )
func ExtractMigProfile ¶
func ExtractMigProfile(migFormatResourceName v1.ResourceName) (ProfileName, error)
ExtractMigProfile extracts the name of the MIG profile from the provided resource name, and returns an error if the resource name is not a valid NVIDIA MIG resource.
Example:
nvidia.com/mig-1g.10gb => 1g.10gb
func (ProfileName) AsResourceName ¶
func (p ProfileName) AsResourceName() v1.ResourceName
func (ProfileName) AsString ¶
func (p ProfileName) AsString() string
func (ProfileName) SmallerThan ¶
func (p ProfileName) SmallerThan(other ProfileName) bool