device

package
v0.31.0-h Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Aug 12, 2023 License: MPL-2.0 Imports: 15 Imported by: 0

Documentation

Index

Constants

View Source
const (
	// ApiVersion010 is the initial API version for the device plugins
	ApiVersion010 = "v0.1.0"
)
View Source
const (
	// DeviceTypeGPU is a canonical device type for a GPU.
	DeviceTypeGPU = "gpu"
)

Variables

View Source
var (
	// ErrPluginDisabled indicates that the device plugin is disabled
	ErrPluginDisabled = fmt.Errorf("device is not enabled")
)

Functions

func Serve

func Serve(dev DevicePlugin, logger log.Logger)

Serve is used to serve a device plugin

Types

type ContainerReservation

type ContainerReservation struct {
	// Envs are a set of environment variables to set for the task.
	Envs map[string]string

	// Mounts are used to mount host volumes into a container that may include
	// libraries, etc.
	Mounts []*Mount

	// Devices are the set of devices to mount into the container.
	Devices []*DeviceSpec
}

ContainerReservation describes how to mount a device into a container. A container is an isolated environment that shares the host's OS.

type Device

type Device struct {
	// ID is the identifier for the device.
	ID string

	// Healthy marks whether the device is healthy and can be used for
	// scheduling.
	Healthy bool

	// HealthDesc describes why the device may be unhealthy.
	HealthDesc string

	// HwLocality captures hardware locality information for the device.
	HwLocality *DeviceLocality
}

Device is an instance of a particular device.

func (*Device) Validate

func (d *Device) Validate() error

Validate validates that the device is valid

type DeviceGroup

type DeviceGroup struct {
	// Vendor is the vendor providing the device (nvidia, intel, etc).
	Vendor string

	// Type is the type of the device (gpu, fpga, etc).
	Type string

	// Name is the devices model name.
	Name string

	// Devices is the set of device instances.
	Devices []*Device

	// Attributes are a set of attributes shared for all the devices.
	Attributes map[string]*structs.Attribute
}

DeviceGroup is a grouping of devices that share a common vendor, device type and name.

func (*DeviceGroup) Validate

func (d *DeviceGroup) Validate() error

Validate validates that the device group is valid

type DeviceGroupStats

type DeviceGroupStats struct {
	Vendor string
	Type   string
	Name   string

	// InstanceStats is a mapping of each device ID to its statistics.
	InstanceStats map[string]*DeviceStats
}

DeviceGroupStats contains statistics for each device of a particular device group, identified by the vendor, type and name of the device.

type DeviceLocality

type DeviceLocality struct {
	// PciBusID is the PCI bus ID of the device.
	PciBusID string
}

DeviceLocality captures hardware locality information for a device.

type DevicePlugin

type DevicePlugin interface {
	base.BasePlugin

	// Fingerprint returns a stream of devices that are detected.
	Fingerprint(ctx context.Context) (<-chan *FingerprintResponse, error)

	// Reserve is used to reserve a set of devices and retrieve mount
	// instructions.
	Reserve(deviceIDs []string) (*ContainerReservation, error)

	// Stats returns a stream of statistics per device collected at the passed
	// interval.
	Stats(ctx context.Context, interval time.Duration) (<-chan *StatsResponse, error)
}

DevicePlugin is the interface for a plugin that can expose detected devices to Nomad and inform it how to mount them.

type DeviceSpec

type DeviceSpec struct {
	// TaskPath is the location to mount the device in the task's file system.
	TaskPath string

	// HostPath is the host location of the device.
	HostPath string

	// CgroupPerms defines the permissions to use when mounting the device.
	CgroupPerms string
}

DeviceSpec captures how to mount a device into a container.

type DeviceStats

type DeviceStats struct {
	// Summary exposes a single summary metric that should be the most
	// informative to users.
	Summary *structs.StatValue

	// Stats contains the verbose statistics for the device.
	Stats *structs.StatObject

	// Timestamp is the time the statistics were collected.
	Timestamp time.Time
}

DeviceStats is the statistics for an individual device

type FingerprintFn

type FingerprintFn func(context.Context) (<-chan *FingerprintResponse, error)

func ErrorChFingerprinter

func ErrorChFingerprinter(err error) FingerprintFn

ErrorChFingerprinter returns an error fingerprinting over the channel

func StaticFingerprinter

func StaticFingerprinter(devices []*DeviceGroup) FingerprintFn

StaticFingerprinter fingerprints the passed devices just once

type FingerprintResponse

type FingerprintResponse struct {
	// Devices is a set of devices that have been detected.
	Devices []*DeviceGroup

	// Error is populated when fingerprinting has failed.
	Error error
}

FingerprintResponse includes a set of detected devices or an error in the process of fingerprinting.

func NewFingerprint

func NewFingerprint(devices ...*DeviceGroup) *FingerprintResponse

NewFingerprint takes a set of device groups and returns a fingerprint response

func NewFingerprintError

func NewFingerprintError(err error) *FingerprintResponse

NewFingerprintError takes an error and returns a fingerprint response

type MockDevicePlugin

type MockDevicePlugin struct {
	*base.MockPlugin
	FingerprintF FingerprintFn
	ReserveF     ReserveFn
	StatsF       StatsFn
}

MockDevicePlugin is used for testing. Each function can be set as a closure to make assertions about how data is passed through the base plugin layer.

func (*MockDevicePlugin) Fingerprint

func (p *MockDevicePlugin) Fingerprint(ctx context.Context) (<-chan *FingerprintResponse, error)

func (*MockDevicePlugin) Reserve

func (p *MockDevicePlugin) Reserve(devices []string) (*ContainerReservation, error)

func (*MockDevicePlugin) Stats

func (p *MockDevicePlugin) Stats(ctx context.Context, interval time.Duration) (<-chan *StatsResponse, error)

type Mount

type Mount struct {
	// TaskPath is the location in the task's file system to mount.
	TaskPath string

	// HostPath is the host directory path to mount.
	HostPath string

	// ReadOnly defines whether the mount should be read only to the task.
	ReadOnly bool
}

Mount is used to mount a host directory into a container.

type PluginDevice

type PluginDevice struct {
	plugin.NetRPCUnsupportedPlugin
	Impl DevicePlugin
}

PluginDevice is wraps a DevicePlugin and implements go-plugins GRPCPlugin interface to expose the interface over gRPC.

func (*PluginDevice) GRPCClient

func (p *PluginDevice) GRPCClient(ctx context.Context, broker *plugin.GRPCBroker, c *grpc.ClientConn) (interface{}, error)

func (*PluginDevice) GRPCServer

func (p *PluginDevice) GRPCServer(broker *plugin.GRPCBroker, s *grpc.Server) error

type ReserveFn

type ReserveFn func([]string) (*ContainerReservation, error)

func ErrorReserve

func ErrorReserve(err error) ReserveFn

ErrorReserve returns the passed error

func StaticReserve

func StaticReserve(out *ContainerReservation) ReserveFn

StaticReserve returns the passed container reservation

type StatsFn

type StatsFn func(context.Context, time.Duration) (<-chan *StatsResponse, error)

func ErrorChStats

func ErrorChStats(err error) StatsFn

ErrorChStats returns an error collecting stats over the channel

func StaticStats

func StaticStats(out []*DeviceGroupStats) StatsFn

StaticStats returns the passed statistics

type StatsResponse

type StatsResponse struct {
	// Groups contains statistics for each device group.
	Groups []*DeviceGroupStats

	// Error is populated when collecting statistics has failed.
	Error error
}

StatsResponse returns statistics for each device group.

func NewStatsError

func NewStatsError(err error) *StatsResponse

NewStatsError takes an error and returns a stats response

Directories

Path Synopsis
cmd

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL