Documentation ¶
Index ¶
- Constants
- Variables
- type Config
- type NvidiaDevice
- func (d *NvidiaDevice) ConfigSchema() (*hclspec.Spec, error)
- func (d *NvidiaDevice) Fingerprint(ctx context.Context) (<-chan *device.FingerprintResponse, error)
- func (d *NvidiaDevice) PluginInfo() (*base.PluginInfoResponse, error)
- func (d *NvidiaDevice) Reserve(deviceIDs []string) (*device.ContainerReservation, error)
- func (d *NvidiaDevice) SetConfig(cfg *base.Config) error
- func (d *NvidiaDevice) Stats(ctx context.Context, interval time.Duration) (<-chan *device.StatsResponse, error)
Constants ¶
const ( // Attribute names and units for reporting Fingerprint output MemoryAttr = "memory" PowerAttr = "power" BAR1Attr = "bar1" DriverVersionAttr = "driver_version" CoresClockAttr = "cores_clock" MemoryClockAttr = "memory_clock" PCIBandwidthAttr = "pci_bandwidth" DisplayStateAttr = "display_state" PersistenceModeAttr = "persistence_mode" )
const ( // Attribute names for reporting stats output PowerUsageAttr = "Power usage" PowerUsageUnit = "W" PowerUsageDesc = "Power usage for this GPU in watts and " + "its associated circuitry (e.g. memory) / Maximum GPU Power" GPUUtilizationAttr = "GPU utilization" GPUUtilizationUnit = "%" GPUUtilizationDesc = "Percent of time over the past sample period " + "during which one or more kernels were executing on the GPU." MemoryUtilizationAttr = "Memory utilization" MemoryUtilizationUnit = "%" MemoryUtilizationDesc = "Percentage of bandwidth used during the past sample period" EncoderUtilizationAttr = "Encoder utilization" EncoderUtilizationUnit = "%" EncoderUtilizationDesc = "Percent of time over the past sample period " + "during which GPU Encoder was used" DecoderUtilizationAttr = "Decoder utilization" DecoderUtilizationUnit = "%" DecoderUtilizationDesc = "Percent of time over the past sample period " + "during which GPU Decoder was used" TemperatureAttr = "Temperature" TemperatureUnit = "C" // Celsius degrees TemperatureDesc = "Temperature of the Unit" MemoryStateAttr = "Memory state" MemoryStateUnit = "MiB" // Mebibytes MemoryStateDesc = "UsedMemory / TotalMemory" BAR1StateAttr = "BAR1 buffer state" BAR1StateUnit = "MiB" // Mebibytes BAR1StateDesc = "UsedBAR1 / TotalBAR1" ECCErrorsL1CacheAttr = "ECC L1 errors" ECCErrorsL1CacheUnit = "#" // number of errors ECCErrorsL1CacheDesc = "Requested L1Cache error counter for the device" ECCErrorsL2CacheAttr = "ECC L2 errors" ECCErrorsL2CacheUnit = "#" // number of errors ECCErrorsL2CacheDesc = "Requested L2Cache error counter for the device" ECCErrorsDeviceAttr = "ECC memory errors" ECCErrorsDeviceUnit = "#" // number of errors ECCErrorsDeviceDesc = "Requested memory error counter for the device" )
const (
// Nvidia-container-runtime environment variable names
NvidiaVisibleDevices = "NVIDIA_VISIBLE_DEVICES"
)
Variables ¶
var ( // PluginID is the nvidia plugin metadata registered in the plugin // catalog. PluginID = loader.PluginID{ Name: pluginName, PluginType: base.PluginTypeDevice, } // PluginConfig is the nvidia factory function registered in the // plugin catalog. PluginConfig = &loader.InternalPluginConfig{ Factory: func(ctx context.Context, l log.Logger) interface{} { return NewNvidiaDevice(ctx, l) }, } )
Functions ¶
This section is empty.
Types ¶
type Config ¶
type Config struct { IgnoredGPUIDs []string `codec:"ignored_gpu_ids"` FingerprintPeriod string `codec:"fingerprint_period"` }
Config contains configuration information for the plugin.
type NvidiaDevice ¶
type NvidiaDevice struct {
// contains filtered or unexported fields
}
NvidiaDevice contains all plugin specific data
func NewNvidiaDevice ¶
func NewNvidiaDevice(_ context.Context, log log.Logger) *NvidiaDevice
NewNvidiaDevice returns a new nvidia device plugin.
func (*NvidiaDevice) ConfigSchema ¶
func (d *NvidiaDevice) ConfigSchema() (*hclspec.Spec, error)
ConfigSchema returns the plugins configuration schema.
func (*NvidiaDevice) Fingerprint ¶
func (d *NvidiaDevice) Fingerprint(ctx context.Context) (<-chan *device.FingerprintResponse, error)
Fingerprint streams detected devices. If device changes are detected or the devices health changes, messages will be emitted.
func (*NvidiaDevice) PluginInfo ¶
func (d *NvidiaDevice) PluginInfo() (*base.PluginInfoResponse, error)
PluginInfo returns information describing the plugin.
func (*NvidiaDevice) Reserve ¶
func (d *NvidiaDevice) Reserve(deviceIDs []string) (*device.ContainerReservation, error)
Reserve returns information on how to mount given devices. Assumption is made that nomad server is responsible for correctness of GPU allocations, handling tricky cases such as double-allocation of single GPU
func (*NvidiaDevice) SetConfig ¶
func (d *NvidiaDevice) SetConfig(cfg *base.Config) error
SetConfig is used to set the configuration of the plugin.
func (*NvidiaDevice) Stats ¶
func (d *NvidiaDevice) Stats(ctx context.Context, interval time.Duration) (<-chan *device.StatsResponse, error)
Stats streams statistics for the detected devices.