config

package
v0.2.3 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Nov 21, 2024 License: Apache-2.0 Imports: 59 Imported by: 0

Documentation

Overview

Package config provides the gpud configuration data for the server.

Index

Constants

View Source
const (
	DefaultAPIVersion = "v1"
	DefaultGPUdPort   = 15132
)

Variables

View Source
var (
	DefaultRefreshPeriod             = metav1.Duration{Duration: time.Minute}
	DefaultRetentionPeriod           = metav1.Duration{Duration: 30 * time.Minute}
	DefaultRefreshComponentsInterval = metav1.Duration{Duration: time.Minute}
)
View Source
var (
	DefaultNVIDIALibraries = []string{

		"libnvidia-ml.so",

		"libcuda.so",
	}
	DefaultNVIDIALibrariesSearchDirs = []string{

		"/",
		"/usr/lib64",
		"/usr/lib/x86_64-linux-gnu",
		"/usr/lib/aarch64-linux-gnu",
		"/usr/lib/x86_64-linux-gnu/nvidia/current",
		"/usr/lib/aarch64-linux-gnu/nvidia/current",
		"/lib64",
		"/lib/x86_64-linux-gnu",
		"/lib/aarch64-linux-gnu",
		"/lib/x86_64-linux-gnu/nvidia/current",
		"/lib/aarch64-linux-gnu/nvidia/current",
	}
)
View Source
var ErrInvalidAutoUpdateExitCode = errors.New("auto_update_exit_code is only valid when auto_update is enabled")

Functions

func DefaultConfigFile

func DefaultConfigFile() (string, error)

func DefaultContainerdComponent added in v0.0.4

func DefaultContainerdComponent(ctx context.Context) (any, bool)

func DefaultDmesgComponent added in v0.0.4

func DefaultDmesgComponent(ctx context.Context) (any, bool, error)

func DefaultDockerContainerComponent added in v0.0.4

func DefaultDockerContainerComponent(ctx context.Context, ignoreConnectionErrors bool) (any, bool)

func DefaultFifoFile

func DefaultFifoFile() (string, error)

func DefaultK8sPodComponent added in v0.0.4

func DefaultK8sPodComponent(ctx context.Context, ignoreConnectionErrors bool) (any, bool)

func DefaultStateFile

func DefaultStateFile() (string, error)

Types

type Config

type Config struct {
	APIVersion string `json:"api_version"`

	// Basic server annotations (e.g., machine id, host name, etc.).
	Annotations map[string]string `json:"annotations,omitempty"`

	// Address for the server to listen on.
	Address string `json:"address"`

	// Component specific configurations.
	Components map[string]any `json:"components,omitempty"`

	// State file that persists the latest status.
	// If empty, the states are not persisted to file.
	State string `json:"state"`

	// Amount of time to retain states/metrics for.
	// Once elapsed, old states/metrics are purged/compacted.
	RetentionPeriod metav1.Duration `json:"retention_period"`

	// Interval at which to refresh selected components.
	// Disables refresh if not set.
	RefreshComponentsInterval metav1.Duration `json:"refresh_components_interval"`

	// Set true to enable profiler.
	Pprof bool `json:"pprof"`

	// Configures the local web configuration.
	Web *Web `json:"web,omitempty"`

	// Set false to disable auto update
	EnableAutoUpdate bool `json:"enable_auto_update"`

	// Exit code to exit with when auto updating.
	// Only valid when the auto update is enabled.
	// Set -1 to disable the auto update by exit code.
	AutoUpdateExitCode int `json:"auto_update_exit_code"`
}

Config provides gpud configuration data for the server

func DefaultConfig

func DefaultConfig(ctx context.Context, opts ...OpOption) (*Config, error)

func LoadConfigYAML

func LoadConfigYAML(file string) (*Config, error)

func ParseConfigYAML

func ParseConfigYAML(data []byte) (*Config, error)

func (*Config) SyncYAML

func (config *Config) SyncYAML(file string) error

func (*Config) Validate

func (config *Config) Validate() error

func (*Config) YAML

func (config *Config) YAML() ([]byte, error)

type Op added in v0.0.4

type Op struct {
	FilesToCheck                  []string
	KernelModulesToCheck          []string
	ExpectedPortStates            *infiniband.ExpectedPortStates
	DockerIgnoreConnectionErrors  bool
	KubeletIgnoreConnectionErrors bool
}

func (*Op) ApplyOpts added in v0.0.5

func (op *Op) ApplyOpts(opts []OpOption) error

type OpOption added in v0.0.4

type OpOption func(*Op)

func WithDockerIgnoreConnectionErrors added in v0.0.5

func WithDockerIgnoreConnectionErrors(b bool) OpOption

func WithExpectedPortStates added in v0.2.0

func WithExpectedPortStates(exp infiniband.ExpectedPortStates) OpOption

func WithFilesToCheck added in v0.0.4

func WithFilesToCheck(files ...string) OpOption

func WithKernelModulesToCheck added in v0.2.0

func WithKernelModulesToCheck(modules ...string) OpOption

func WithKubeletIgnoreConnectionErrors added in v0.0.5

func WithKubeletIgnoreConnectionErrors(b bool) OpOption

type Web

type Web struct {
	// Enable the web interface.
	Enable bool `json:"enable"`

	// Enable the admin interface.
	Admin bool `json:"admin"`

	// RefreshPeriod is the time period to refresh metrics.
	RefreshPeriod metav1.Duration `json:"refresh_period"`

	// SincePeriod is the time period to start displaying metrics from.
	SincePeriod metav1.Duration `json:"since_period"`
}

Configures the local web configuration.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL