config

package
v0.4.4-rc.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 20, 2025 License: Apache-2.0 Imports: 66 Imported by: 0

Documentation

Overview

Package config provides the gpud configuration data for the server.

Index

Constants

View Source
const (
	DefaultAPIVersion = "v1"
	DefaultGPUdPort   = 15132
)

Variables

View Source
var (
	DefaultRefreshPeriod = metav1.Duration{Duration: time.Minute}

	// keep the metrics only for the last 3 hours
	DefaultRetentionPeriod = metav1.Duration{Duration: 3 * time.Hour}

	// compact/vacuum is disruptive to existing queries (including reads)
	// but necessary to keep the state database from growing indefinitely
	// TODO: disabled for now, until we have a better way to detect the performance issue
	DefaultCompactPeriod = metav1.Duration{Duration: 0}

	DefaultRefreshComponentsInterval = metav1.Duration{Duration: time.Minute}
)
View Source
var ErrInvalidAutoUpdateExitCode = errors.New("auto_update_exit_code is only valid when auto_update is enabled")

Functions

func DefaultConfigFile

func DefaultConfigFile() (string, error)

func DefaultContainerdComponent

func DefaultContainerdComponent(ctx context.Context) (any, bool)

func DefaultDockerContainerComponent

func DefaultDockerContainerComponent(ctx context.Context, ignoreConnectionErrors bool) (any, bool)

func DefaultFifoFile

func DefaultFifoFile() (string, error)

func DefaultK8sPodComponent

func DefaultK8sPodComponent(ctx context.Context, ignoreConnectionErrors bool) (any, bool)

func DefaultStateFile

func DefaultStateFile() (string, error)

Types

type Config

type Config struct {
	APIVersion string `json:"api_version"`

	// Basic server annotations (e.g., machine id, host name, etc.).
	Annotations map[string]string `json:"annotations,omitempty"`

	// Address for the server to listen on.
	Address string `json:"address"`

	// Component specific configurations.
	Components map[string]any `json:"components,omitempty"`

	// State file that persists the latest status.
	// If empty, the states are not persisted to file.
	State string `json:"state"`

	// Amount of time to retain states/metrics for.
	// Once elapsed, old states/metrics are purged/compacted.
	RetentionPeriod metav1.Duration `json:"retention_period"`

	// Interval at which to compact the state database.
	CompactPeriod metav1.Duration `json:"compact_period"`

	// Interval at which to refresh selected components.
	// Disables refresh if not set.
	RefreshComponentsInterval metav1.Duration `json:"refresh_components_interval"`

	// Set true to enable profiler.
	Pprof bool `json:"pprof"`

	// Configures the local web configuration.
	Web *Web `json:"web,omitempty"`

	// Overwrites the tool binaries for testing.
	ToolOverwriteOptions ToolOverwriteOptions `json:"tool_overwrite_options"`

	// Set false to disable auto update
	EnableAutoUpdate bool `json:"enable_auto_update"`

	// Exit code to exit with when auto updating.
	// Only valid when the auto update is enabled.
	// Set -1 to disable the auto update by exit code.
	AutoUpdateExitCode int `json:"auto_update_exit_code"`
}

Config provides gpud configuration data for the server

func DefaultConfig

func DefaultConfig(ctx context.Context, opts ...OpOption) (*Config, error)

func (*Config) Validate

func (config *Config) Validate() error

type Op

type Op struct {
	FilesToCheck                  []string
	KernelModulesToCheck          []string
	DockerIgnoreConnectionErrors  bool
	KubeletIgnoreConnectionErrors bool

	nvidia_common.ToolOverwrites
}

func (*Op) ApplyOpts

func (op *Op) ApplyOpts(opts []OpOption) error

type OpOption

type OpOption func(*Op)

func WithDockerIgnoreConnectionErrors

func WithDockerIgnoreConnectionErrors(b bool) OpOption

func WithFilesToCheck

func WithFilesToCheck(files ...string) OpOption

func WithIbstatCommand

func WithIbstatCommand(p string) OpOption

Specifies the ibstat binary path to overwrite the default path.

func WithKernelModulesToCheck

func WithKernelModulesToCheck(modules ...string) OpOption

func WithKubeletIgnoreConnectionErrors

func WithKubeletIgnoreConnectionErrors(b bool) OpOption

func WithNvidiaSMICommand

func WithNvidiaSMICommand(p string) OpOption

Specifies the nvidia-smi binary path to overwrite the default path.

func WithNvidiaSMIQueryCommand

func WithNvidiaSMIQueryCommand(p string) OpOption

type ToolOverwriteOptions

type ToolOverwriteOptions struct {
	NvidiaSMICommand      string `json:"nvidia_smi_command"`
	NvidiaSMIQueryCommand string `json:"nvidia_smi_query_command"`
	IbstatCommand         string `json:"ibstat_command"`
}

type Web

type Web struct {
	// Enable the web interface.
	Enable bool `json:"enable"`

	// Enable the admin interface.
	Admin bool `json:"admin"`

	// RefreshPeriod is the time period to refresh metrics.
	RefreshPeriod metav1.Duration `json:"refresh_period"`

	// SincePeriod is the time period to start displaying metrics from.
	SincePeriod metav1.Duration `json:"since_period"`
}

Configures the local web configuration.

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL