Documentation ¶
Overview ¶
Package config provides the gpud configuration data for the server.
Index ¶
- Constants
- Variables
- func DefaultConfigFile() (string, error)
- func DefaultContainerdComponent(ctx context.Context) (any, bool)
- func DefaultDmesgComponent(ctx context.Context) (any, bool, error)
- func DefaultDockerContainerComponent(ctx context.Context, ignoreConnectionErrors bool) (any, bool)
- func DefaultFifoFile() (string, error)
- func DefaultK8sPodComponent(ctx context.Context, ignoreConnectionErrors bool) (any, bool)
- func DefaultStateFile() (string, error)
- type Config
- type Op
- type OpOption
- type Web
Constants ¶
View Source
const ( DefaultAPIVersion = "v1" DefaultGPUdPort = 15132 )
Variables ¶
View Source
var ( DefaultRefreshPeriod = metav1.Duration{Duration: time.Minute} DefaultRetentionPeriod = metav1.Duration{Duration: 30 * time.Minute} DefaultRefreshComponentsInterval = metav1.Duration{Duration: time.Minute} )
View Source
var ( DefaultNVIDIALibraries = []string{ "libnvidia-ml.so", "libcuda.so", } DefaultNVIDIALibrariesSearchDirs = []string{ "/", "/usr/lib64", "/usr/lib/x86_64-linux-gnu", "/usr/lib/aarch64-linux-gnu", "/usr/lib/x86_64-linux-gnu/nvidia/current", "/usr/lib/aarch64-linux-gnu/nvidia/current", "/lib64", "/lib/x86_64-linux-gnu", "/lib/aarch64-linux-gnu", "/lib/x86_64-linux-gnu/nvidia/current", "/lib/aarch64-linux-gnu/nvidia/current", } )
View Source
var ErrInvalidAutoUpdateExitCode = errors.New("auto_update_exit_code is only valid when auto_update is enabled")
Functions ¶
func DefaultConfigFile ¶
func DefaultContainerdComponent ¶ added in v0.0.4
func DefaultDmesgComponent ¶ added in v0.0.4
func DefaultDockerContainerComponent ¶ added in v0.0.4
func DefaultFifoFile ¶
func DefaultK8sPodComponent ¶ added in v0.0.4
func DefaultStateFile ¶
Types ¶
type Config ¶
type Config struct { APIVersion string `json:"api_version"` // Basic server annotations (e.g., machine id, host name, etc.). Annotations map[string]string `json:"annotations,omitempty"` // Address for the server to listen on. Address string `json:"address"` // Component specific configurations. Components map[string]any `json:"components,omitempty"` // State file that persists the latest status. // If empty, the states are not persisted to file. State string `json:"state"` // Amount of time to retain states/metrics for. // Once elapsed, old states/metrics are purged/compacted. RetentionPeriod metav1.Duration `json:"retention_period"` // Interval at which to refresh selected components. // Disables refresh if not set. RefreshComponentsInterval metav1.Duration `json:"refresh_components_interval"` // Set true to enable profiler. Pprof bool `json:"pprof"` // Configures the local web configuration. Web *Web `json:"web,omitempty"` // Set false to disable auto update EnableAutoUpdate bool `json:"enable_auto_update"` // Exit code to exit with when auto updating. // Only valid when the auto update is enabled. // Set -1 to disable the auto update by exit code. AutoUpdateExitCode int `json:"auto_update_exit_code"` }
Config provides gpud configuration data for the server
func LoadConfigYAML ¶
func ParseConfigYAML ¶
type Op ¶ added in v0.0.4
type Op struct { FilesToCheck []string KernelModulesToCheck []string ExpectedPortStates *infiniband.ExpectedPortStates DockerIgnoreConnectionErrors bool KubeletIgnoreConnectionErrors bool }
type OpOption ¶ added in v0.0.4
type OpOption func(*Op)
func WithDockerIgnoreConnectionErrors ¶ added in v0.0.5
func WithExpectedPortStates ¶ added in v0.2.0
func WithExpectedPortStates(exp infiniband.ExpectedPortStates) OpOption
func WithFilesToCheck ¶ added in v0.0.4
func WithKernelModulesToCheck ¶ added in v0.2.0
func WithKubeletIgnoreConnectionErrors ¶ added in v0.0.5
type Web ¶
type Web struct { // Enable the web interface. Enable bool `json:"enable"` // Enable the admin interface. Admin bool `json:"admin"` // RefreshPeriod is the time period to refresh metrics. RefreshPeriod metav1.Duration `json:"refresh_period"` // SincePeriod is the time period to start displaying metrics from. SincePeriod metav1.Duration `json:"since_period"` }
Configures the local web configuration.
Click to show internal directories.
Click to hide internal directories.