Documentation ¶
Index ¶
- Constants
- func NewSharedGPUManager(enableMPS, healthCheck bool, bp ShareUnit) *sharedGPUManager
- func StackTrace(all bool) string
- type NvidiaDevicePlugin
- func (m *NvidiaDevicePlugin) Allocate(ctx context.Context, reqs *pluginapi.AllocateRequest) (*pluginapi.AllocateResponse, error)
- func (m *NvidiaDevicePlugin) GetDeviceNameByIndex(index uint) (name string, found bool)
- func (m *NvidiaDevicePlugin) GetDevicePluginOptions(context.Context, *pluginapi.Empty) (*pluginapi.DevicePluginOptions, error)
- func (m *NvidiaDevicePlugin) ListAndWatch(e *pluginapi.Empty, s pluginapi.DevicePlugin_ListAndWatchServer) error
- func (m *NvidiaDevicePlugin) PreStartContainer(context.Context, *pluginapi.PreStartContainerRequest) (*pluginapi.PreStartContainerResponse, error)
- func (m *NvidiaDevicePlugin) Register(kubeletEndpoint, resourceName string) error
- func (m *NvidiaDevicePlugin) Serve() error
- func (m *NvidiaDevicePlugin) Start() error
- func (m *NvidiaDevicePlugin) Stop() error
- type ShareUnit
Constants ¶
View Source
const ( OptimisticLockErrorMsg = "the object has been modified; please apply your changes to the latest version and try again" EnvResourceIndex = "OPENXPU_XPU_SHARES_INDEX" EnvResourceByPod = "OPENXPU_XPU_SHARES_POD" EnvResourceByContainer = "OPENXPU_XPU_SHARES" EnvResourceByDev = "OPENXPU_XPU_SHARES_TOTAL" EnvAssignedFlag = "OPENXPU_XPU_SHARES_ALLOCATED" EnvResourceAssumeTime = "OPENXPU_XPU_SHARES_FILTER_STAMP" EnvResourceAssignTime = "OPENXPU_XPU_SHARES_ALLOCATED_STAMP" EnvNodeLabelForDisableCGPU = "xpu.disable.isolation" GiBPrefix = ShareUnit("GiB") MiBPrefix = ShareUnit("MiB") )
Variables ¶
This section is empty.
Functions ¶
func NewSharedGPUManager ¶
func StackTrace ¶
Types ¶
type NvidiaDevicePlugin ¶
NvidiaDevicePlugin implements the Kubernetes device plugin API
func NewNvidiaDevicePlugin ¶
func NewNvidiaDevicePlugin(mps, healthCheck bool) (*NvidiaDevicePlugin, error)
NewNvidiaDevicePlugin returns an initialized NvidiaDevicePlugin
func (*NvidiaDevicePlugin) Allocate ¶
func (m *NvidiaDevicePlugin) Allocate(ctx context.Context, reqs *pluginapi.AllocateRequest) (*pluginapi.AllocateResponse, error)
Allocate which return list of devices.
func (*NvidiaDevicePlugin) GetDeviceNameByIndex ¶
func (m *NvidiaDevicePlugin) GetDeviceNameByIndex(index uint) (name string, found bool)
func (*NvidiaDevicePlugin) GetDevicePluginOptions ¶
func (m *NvidiaDevicePlugin) GetDevicePluginOptions(context.Context, *pluginapi.Empty) (*pluginapi.DevicePluginOptions, error)
func (*NvidiaDevicePlugin) ListAndWatch ¶
func (m *NvidiaDevicePlugin) ListAndWatch(e *pluginapi.Empty, s pluginapi.DevicePlugin_ListAndWatchServer) error
ListAndWatch lists devices and update that list according to the health status
func (*NvidiaDevicePlugin) PreStartContainer ¶
func (m *NvidiaDevicePlugin) PreStartContainer(context.Context, *pluginapi.PreStartContainerRequest) (*pluginapi.PreStartContainerResponse, error)
func (*NvidiaDevicePlugin) Register ¶
func (m *NvidiaDevicePlugin) Register(kubeletEndpoint, resourceName string) error
Register registers the device plugin for the given resourceName with Kubelet.
func (*NvidiaDevicePlugin) Serve ¶
func (m *NvidiaDevicePlugin) Serve() error
Serve starts the gRPC server and register the device plugin to Kubelet
func (*NvidiaDevicePlugin) Start ¶
func (m *NvidiaDevicePlugin) Start() error
Start starts the gRPC server of the device plugin
func (*NvidiaDevicePlugin) Stop ¶
func (m *NvidiaDevicePlugin) Stop() error
Stop stops the gRPC server
Click to show internal directories.
Click to hide internal directories.