v1

package
v0.17.0-rc.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 28, 2024 License: Apache-2.0 Imports: 14 Imported by: 1

Documentation

Index

Constants

View Source
const (
	ResourceNamePrefix              = "nvidia.com"
	DefaultSharedResourceNameSuffix = ".shared"
	MaxResourceNameLength           = 63
)

Constants related to resource names

View Source
const (
	MigStrategyNone   = "none"
	MigStrategySingle = "single"
	MigStrategyMixed  = "mixed"
)

Constants representing the various MIG strategies

View Source
const (
	DeviceListStrategyEnvVar         = "envvar"
	DeviceListStrategyVolumeMounts   = "volume-mounts"
	DeviceListStrategyCDIAnnotations = "cdi-annotations"
	DeviceListStrategyCDICRI         = "cdi-cri"
)

Constants to represent the various device list strategies

View Source
const (
	DeviceIDStrategyUUID  = "uuid"
	DeviceIDStrategyIndex = "index"
)

Constants to represent the various device id strategies

View Source
const (
	DefaultCDIAnnotationPrefix = cdiapi.AnnotationPrefix
	DefaultNvidiaCTKPath       = "/usr/bin/nvidia-ctk"
	DefaultContainerDriverRoot = "/driver-root"
)

Constants related to generating CDI specifications

View Source
const (
	SharingStrategyMPS         = SharingStrategy("mps")
	SharingStrategyNone        = SharingStrategy("none")
	SharingStrategyTimeSlicing = SharingStrategy("time-slicing")
)
View Source
const (
	ImexChannelEnvVar = "NVIDIA_IMEX_CHANNELS"
)
View Source
const Version = "v1"

Version indicates the version of the 'Config' struct used to hold configuration information.

Variables

This section is empty.

Functions

func AssertChannelIDsValid added in v0.17.0

func AssertChannelIDsValid(ids []int) error

AssertChannelIDsIsValid checks whether the specified list of channel IDs is valid.

func DisableResourceNamingInConfig added in v0.15.0

func DisableResourceNamingInConfig(logger logger, config *Config)

DisableResourceNamingInConfig temporarily disable the resource renaming feature of the plugin. This may be reenabled in a future release.

Types

type CommandLineFlags

type CommandLineFlags struct {
	MigStrategy             *string                 `json:"migStrategy"                yaml:"migStrategy"`
	FailOnInitError         *bool                   `json:"failOnInitError"            yaml:"failOnInitError"`
	MpsRoot                 *string                 `json:"mpsRoot,omitempty"          yaml:"mpsRoot,omitempty"`
	NvidiaDriverRoot        *string                 `json:"nvidiaDriverRoot,omitempty" yaml:"nvidiaDriverRoot,omitempty"`
	NvidiaDevRoot           *string                 `json:"nvidiaDevRoot,omitempty"    yaml:"nvidiaDevRoot,omitempty"`
	GDSEnabled              *bool                   `json:"gdsEnabled"                 yaml:"gdsEnabled"`
	MOFEDEnabled            *bool                   `json:"mofedEnabled"               yaml:"mofedEnabled"`
	UseNodeFeatureAPI       *bool                   `json:"useNodeFeatureAPI"          yaml:"useNodeFeatureAPI"`
	DeviceDiscoveryStrategy *string                 `json:"deviceDiscoveryStrategy"    yaml:"deviceDiscoveryStrategy"`
	Plugin                  *PluginCommandLineFlags `json:"plugin,omitempty"           yaml:"plugin,omitempty"`
	GFD                     *GFDCommandLineFlags    `json:"gfd,omitempty"              yaml:"gfd,omitempty"`
}

CommandLineFlags holds the list of command line flags used to configure the device plugin and GFD.

type Config

type Config struct {
	Version   string    `json:"version"             yaml:"version"`
	Flags     Flags     `json:"flags,omitempty"     yaml:"flags,omitempty"`
	Resources Resources `json:"resources,omitempty" yaml:"resources,omitempty"`
	Sharing   Sharing   `json:"sharing,omitempty"   yaml:"sharing,omitempty"`
	Imex      Imex      `json:"imex,omitempty"      yaml:"imex,omitempty"`
}

Config is a versioned struct used to hold configuration information.

func NewConfig

func NewConfig(c *cli.Context, flags []cli.Flag) (*Config, error)

NewConfig builds out a Config struct from a config file (or command line flags). The data stored in the config will be populated in order of precedence from (1) command line, (2) environment variable, (3) config file.

type DeviceListStrategies added in v0.14.0

type DeviceListStrategies map[string]bool

DeviceListStrategies defines which strategies are enabled and should be used when passing the device list to the container runtime.

func NewDeviceListStrategies added in v0.14.0

func NewDeviceListStrategies(strategies []string) (DeviceListStrategies, error)

NewDeviceListStrategies constructs a new DeviceListStrategy

func (DeviceListStrategies) AllCDIEnabled added in v0.16.0

func (s DeviceListStrategies) AllCDIEnabled() bool

AllCDIEnabled returns whether all strategies being used require CDI.

func (DeviceListStrategies) AnyCDIEnabled added in v0.16.0

func (s DeviceListStrategies) AnyCDIEnabled() bool

AnyCDIEnabled returns whether any of the strategies being used require CDI.

func (DeviceListStrategies) Includes added in v0.14.0

func (s DeviceListStrategies) Includes(strategy string) bool

Includes returns whether the given strategy is present in the set of strategies.

type Duration

type Duration time.Duration

Duration wraps a time.Duration function with custom JSON marshaling/unmarshaling

func (Duration) MarshalJSON

func (d Duration) MarshalJSON() ([]byte, error)

MarshalJSON marshals 'Duration' to its raw bytes representation

func (*Duration) UnmarshalJSON

func (d *Duration) UnmarshalJSON(b []byte) error

UnmarshalJSON unmarshals raw bytes into a 'Duration' type.

type Flags

type Flags struct {
	CommandLineFlags
}

Flags holds the full list of flags used to configure the device plugin and GFD.

func (*Flags) UpdateFromCLIFlags

func (f *Flags) UpdateFromCLIFlags(c *cli.Context, flags []cli.Flag)

UpdateFromCLIFlags updates Flags from settings in the cli Flags if they are set.

type GFDCommandLineFlags

type GFDCommandLineFlags struct {
	Oneshot         *bool     `json:"oneshot"         yaml:"oneshot"`
	NoTimestamp     *bool     `json:"noTimestamp"     yaml:"noTimestamp"`
	SleepInterval   *Duration `json:"sleepInterval"   yaml:"sleepInterval"`
	OutputFile      *string   `json:"outputFile"      yaml:"outputFile"`
	MachineTypeFile *string   `json:"machineTypeFile" yaml:"machineTypeFile"`
}

GFDCommandLineFlags holds the list of command line flags specific to GFD.

type Imex added in v0.17.0

type Imex struct {
	// ChannelIDs defines a list of channel IDs to inject into containers that request NVIDIA devices.
	// If a channel ID is specified and the associated channel device node exists, the corresponding
	// channel will be added to the ContainerAllocateResponse for containers with access to NVIDIA
	// devices.
	ChannelIDs []int `json:"channelIDs,omitempty" yaml:"channelIDs,omitempty"`
	// Required specifies whether the requested IMEX channel IDs are required or not.
	// If a channel is required, it is expected to exist as the device plugin starts.
	// If it is not required its injection is skipped if the device nodes do not exist or if its
	// existence cannot be queried.
	Required bool `json:"required,omitempty" yaml:"required,omitempty"`
}

Imex stores the configuration options for fabric-attached devices.

type PluginCommandLineFlags

type PluginCommandLineFlags struct {
	PassDeviceSpecs     *bool                   `json:"passDeviceSpecs"     yaml:"passDeviceSpecs"`
	DeviceListStrategy  *deviceListStrategyFlag `json:"deviceListStrategy"  yaml:"deviceListStrategy"`
	DeviceIDStrategy    *string                 `json:"deviceIDStrategy"    yaml:"deviceIDStrategy"`
	CDIAnnotationPrefix *string                 `json:"cdiAnnotationPrefix" yaml:"cdiAnnotationPrefix"`
	NvidiaCTKPath       *string                 `json:"nvidiaCTKPath"       yaml:"nvidiaCTKPath"`
	ContainerDriverRoot *string                 `json:"containerDriverRoot" yaml:"containerDriverRoot"`
}

PluginCommandLineFlags holds the list of command line flags specific to the device plugin.

type ReplicatedDeviceRef

type ReplicatedDeviceRef string

ReplicatedDeviceRef can either be a full GPU index, a MIG index, or a UUID (full GPU or MIG)

func (ReplicatedDeviceRef) IsGPUIndex

func (d ReplicatedDeviceRef) IsGPUIndex() bool

IsGPUIndex checks if a ReplicatedDeviceRef is a full GPU index

func (ReplicatedDeviceRef) IsGpuUUID

func (d ReplicatedDeviceRef) IsGpuUUID() bool

IsGpuUUID checks if a ReplicatedDeviceRef is a GPU UUID A GPU UUID must be of the form GPU-b1028956-cfa2-0990-bf4a-5da9abb51763

func (ReplicatedDeviceRef) IsMigIndex

func (d ReplicatedDeviceRef) IsMigIndex() bool

IsMigIndex checks if a ReplicatedDeviceRef is a MIG index

func (ReplicatedDeviceRef) IsMigUUID

func (d ReplicatedDeviceRef) IsMigUUID() bool

IsMigUUID checks if a ReplicatedDeviceRef is a MIG UUID A MIG UUID can be of one of two forms:

  • MIG-b1028956-cfa2-0990-bf4a-5da9abb51763
  • MIG-GPU-b1028956-cfa2-0990-bf4a-5da9abb51763/3/0

func (ReplicatedDeviceRef) IsUUID

func (d ReplicatedDeviceRef) IsUUID() bool

IsUUID checks if a ReplicatedDeviceRef is a UUID

type ReplicatedDevices

type ReplicatedDevices struct {
	All   bool
	Count int
	List  []ReplicatedDeviceRef
}

ReplicatedDevices encapsulates the set of devices that should be replicated for a given resource. This struct should be treated as a 'union' and only one of the fields in this struct should be set at any given time.

func (*ReplicatedDevices) MarshalJSON

func (s *ReplicatedDevices) MarshalJSON() ([]byte, error)

MarshalJSON marshals ReplicatedDevices to its raw bytes representation

func (*ReplicatedDevices) UnmarshalJSON

func (s *ReplicatedDevices) UnmarshalJSON(b []byte) error

UnmarshalJSON unmarshals raw bytes into a 'ReplicatedDevices' struct.

type ReplicatedResource

type ReplicatedResource struct {
	Name     ResourceName      `json:"name"             yaml:"name"`
	Rename   ResourceName      `json:"rename,omitempty" yaml:"rename,omitempty"`
	Devices  ReplicatedDevices `json:"devices"          yaml:"devices,flow"`
	Replicas int               `json:"replicas"         yaml:"replicas"`
}

ReplicatedResource represents a resource to be replicated.

func (*ReplicatedResource) UnmarshalJSON

func (s *ReplicatedResource) UnmarshalJSON(b []byte) error

UnmarshalJSON unmarshals raw bytes into a 'ReplicatedResource' struct.

type ReplicatedResources added in v0.15.0

type ReplicatedResources struct {
	RenameByDefault            bool                 `json:"renameByDefault,omitempty"            yaml:"renameByDefault,omitempty"`
	FailRequestsGreaterThanOne bool                 `json:"failRequestsGreaterThanOne,omitempty" yaml:"failRequestsGreaterThanOne,omitempty"`
	Resources                  []ReplicatedResource `json:"resources,omitempty"                  yaml:"resources,omitempty"`
}

ReplicatedResources defines generic options for replicating devices.

func (*ReplicatedResources) UnmarshalJSON added in v0.15.0

func (s *ReplicatedResources) UnmarshalJSON(b []byte) error

UnmarshalJSON unmarshals raw bytes into a 'ReplicatedResources' struct.

type Resource

type Resource struct {
	Pattern ResourcePattern `json:"pattern" yaml:"pattern"`
	Name    ResourceName    `json:"name"    yaml:"name"`
}

Resource pairs a pattern matcher with a resource name.

func NewResource

func NewResource(pattern, name string) (*Resource, error)

NewResource builds a resource from a name and pattern

func (*Resource) UnmarshalJSON

func (r *Resource) UnmarshalJSON(b []byte) error

UnmarshalJSON unmarshals raw bytes into a 'Resource' struct.

type ResourceName

type ResourceName string

ResourceName represents a valid resource name in Kubernetes

func NewResourceName

func NewResourceName(n string) (ResourceName, error)

NewResourceName builds a resource name from the standard prefix and a name. An error is returned if the format is incorrect.

func (ResourceName) DefaultSharedRename

func (r ResourceName) DefaultSharedRename() ResourceName

DefaultSharedRename returns the default renaming to apply when this resource is shared

func (ResourceName) Split

func (r ResourceName) Split() (string, string)

Split splits a full resource name into prefix and name

func (*ResourceName) UnmarshalJSON

func (r *ResourceName) UnmarshalJSON(b []byte) error

UnmarshalJSON unmarshals raw bytes into a 'ResourceName' type.

type ResourcePattern

type ResourcePattern string

ResourcePattern is used to match a resource name to a specific pattern

func (ResourcePattern) Matches

func (p ResourcePattern) Matches(s string) bool

Matches checks if the provided string matches the ResourcePattern or not.

type Resources

type Resources struct {
	GPUs []Resource `json:"gpus"           yaml:"gpus"`
	MIGs []Resource `json:"mig,omitempty"  yaml:"mig,omitempty"`
}

Resources lists full GPUs and MIG devices separately.

func (*Resources) AddGPUResource

func (r *Resources) AddGPUResource(pattern, name string) error

AddGPUResource adds a GPU resource to the list of GPU resources.

func (*Resources) AddMIGResource

func (r *Resources) AddMIGResource(pattern, name string) error

AddMIGResource adds a MIG resource to the list of MIG resources.

type Sharing

type Sharing struct {
	// TimeSlicing defines the set of replicas to be made for timeSlicing available resources.
	TimeSlicing ReplicatedResources `json:"timeSlicing,omitempty" yaml:"timeSlicing,omitempty"`
	// MPS defines the set of replicas to be shared using MPS
	MPS *ReplicatedResources `json:"mps,omitempty"         yaml:"mps,omitempty"`
}

Sharing encapsulates the set of sharing strategies that are supported.

func (*Sharing) ReplicatedResources added in v0.15.0

func (s *Sharing) ReplicatedResources() *ReplicatedResources

ReplicatedResources returns the resources associated with the active sharing strategy.

func (*Sharing) SharingStrategy added in v0.15.0

func (s *Sharing) SharingStrategy() SharingStrategy

SharingStrategy returns the active sharing strategy.

type SharingStrategy added in v0.15.0

type SharingStrategy string

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL