config

package

v0.7.9 Latest Latest Go to latest Published: Apr 8, 2024 License: Apache-2.0 Imports: 9 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/sustainable-computing-io/kepler

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
Variables
func GetBindAddress(cmdSet string) string
func GetCGroupVersion() int
func GetDefaultPowerModelURL(modelOutputType, energySource string) string
func GetKernelSourceDirs() []string
func GetMetricPath(cmdSet string) string
func GetModelConfigMap() map[string]string
func GetRedfishCredFilePath() string
func GetRedfishProbeIntervalInSeconds() int
func GetRedfishSkipSSLVerify() bool
func InitModelConfigMap()
func IsCgroupMetricsEnabled() bool
func IsExposeCPUFrequencyMetricsEnabled() bool
func IsExposeContainerStatsEnabled() bool
func IsExposeProcessStatsEnabled() bool
func IsExposeQATMetricsEnabled() bool
func IsExposeVMStatsEnabled() bool
func IsHCMetricsEnabled() bool
func IsIRQCounterMetricsEnabled() bool
func IsIdlePowerEnabled() bool
func LogConfigs()
func SetEnableAPIServer(enabled bool)
func SetEnabledEBPFCgroupID(enabled bool)
func SetEnabledGPU(enabled bool)
func SetEnabledHardwareCounterMetrics(enabled bool)
func SetEnabledIdlePower(enabled bool)
func SetEnabledQAT(enabled bool)
func SetEstimatorConfig(modelName, selectFilter string)
func SetGpuUsageMetric(metric string)
func SetKernelSourceDir(dir string) error
func SetKubeConfig(k string)
func SetModelServerEndpoint(serverEndpoint string)
func SetModelServerReqEndpoint() (modelServerReqEndpoint string)
func SetRedfishCredFilePath(credFilePath string)
func SetRedfishProbeIntervalInSeconds(interval string)
func SetRedfishSkipSSLVerify(skipSSLVerify bool)
type Client

Constants ¶

View Source

const (
	CORE      = "core"
	DRAM      = "dram"
	UNCORE    = "uncore"
	PKG       = "package"
	GPU       = "gpu"
	OTHER     = "other"
	PLATFORM  = "platform"
	FREQUENCY = "frequency"

	// counter - attacher package
	CPUCycle       = "cpu_cycles"
	CPURefCycle    = "cpu_ref_cycles"
	CPUInstruction = "cpu_instructions"
	CacheMiss      = "cache_miss"
	TaskClock      = "task_clock_ms"

	// bpf - attacher package
	CPUTime       = "bpf_cpu_time_ms"
	PageCacheHit  = "bpf_page_cache_hit"
	IRQNetTXLabel = "bpf_net_tx_irq"
	IRQNetRXLabel = "bpf_net_rx_irq"
	IRQBlockLabel = "bpf_block_irq"

	// cgroup - cgroup package
	CgroupfsMemory       = "cgroupfs_memory_usage_bytes"
	CgroupfsKernelMemory = "cgroupfs_kernel_memory_usage_bytes"
	CgroupfsTCPMemory    = "cgroupfs_tcp_memory_usage_bytes"
	CgroupfsCPU          = "cgroupfs_cpu_usage_us"
	CgroupfsSystemCPU    = "cgroupfs_system_cpu_usage_us"
	CgroupfsUserCPU      = "cgroupfs_user_cpu_usage_us"
	CgroupfsReadIO       = "cgroupfs_ioread_bytes"
	CgroupfsWriteIO      = "cgroupfs_iowrite_bytes"
	BytesReadIO          = "bytes_read"
	BytesWriteIO         = "bytes_writes"
	BlockDevicesIO       = "block_devices_used"

	// system
	CPUFrequency = "avg_cpu_frequency"

	// NVIDIA GPU
	GPUComputeUtilization = "gpu_compute_util"
	GPUMemUtilization     = "gpu_mem_util"

	// Intel QuickAssist Technology (QAT)
	// TODO: test if different request has different energy consumption.
	QATUtilization = "qta_sample_cnt"

	// Energy Metrics
	// Absolute energy and power
	AbsEnergyInCore     = "abs_energy_in_core"
	AbsEnergyInDRAM     = "abs_energy_in_dram"
	AbsEnergyInUnCore   = "abs_energy_in_uncore"
	AbsEnergyInPkg      = "abs_energy_in_pkg"
	AbsEnergyInGPU      = "abs_energy_in_gpu"
	AbsEnergyInOther    = "abs_energy_in_other"
	AbsEnergyInPlatform = "abs_energy_in_platform"
	// Dynamic energy and power
	DynEnergyInCore     = "dyn_energy_in_core"
	DynEnergyInDRAM     = "dyn_energy_in_dram"
	DynEnergyInUnCore   = "dyn_energy_in_uncore"
	DynEnergyInPkg      = "dyn_energy_in_pkg"
	DynEnergyInGPU      = "dyn_energy_in_gpu"
	DynEnergyInOther    = "dyn_energy_in_other"
	DynEnergyInPlatform = "dyn_energy_in_platform"
	// Idle energy and power
	IdleEnergyInCore     = "idle_energy_in_core"
	IdleEnergyInDRAM     = "idle_energy_in_dram"
	IdleEnergyInUnCore   = "idle_energy_in_uncore"
	IdleEnergyInPkg      = "idle_energy_in_pkg"
	IdleEnergyInGPU      = "idle_energy_in_gpu"
	IdleEnergyInOther    = "idle_energy_in_other"
	IdleEnergyInPlatform = "idle_energy_in_platform"
)

View Source

const (

	// MaxIRQ is the maximum number of IRQs to be monitored
	MaxIRQ = 10
)

Variables ¶

View Source

var (
	EnabledMSR            = false
	EnabledBPFBatchDelete = true

	KernelVersion = float32(0)

	KeplerNamespace              = getConfig("KEPLER_NAMESPACE", defaultNamespace)
	UseLibBPFAttacher            = false
	EnabledEBPFCgroupID          = getBoolConfig("ENABLE_EBPF_CGROUPID", true)
	EnabledGPU                   = getBoolConfig("ENABLE_GPU", false)
	EnabledQAT                   = getBoolConfig("ENABLE_QAT", false)
	EnableProcessStats           = getBoolConfig("ENABLE_PROCESS_METRICS", false)
	ExposeContainerStats         = getBoolConfig("EXPOSE_CONTAINER_METRICS", true)
	ExposeVMStats                = getBoolConfig("EXPOSE_VM_METRICS", true)
	ExposeHardwareCounterMetrics = getBoolConfig("EXPOSE_HW_COUNTER_METRICS", true)
	ExposeCgroupMetrics          = getBoolConfig("EXPOSE_CGROUP_METRICS", true)
	ExposeIRQCounterMetrics      = getBoolConfig("EXPOSE_IRQ_COUNTER_METRICS", true)
	ExposeIdlePowerMetrics       = getBoolConfig("EXPOSE_ESTIMATED_IDLE_POWER_METRICS", false)
	ExposeCPUFrequencyMetrics    = getBoolConfig("EXPOSE_CPU_FREQUENCY_METRICS", false)

	MetricPathKey   = "METRIC_PATH"
	BindAddressKey  = "BIND_ADDRESS"
	CPUArchOverride = getConfig("CPU_ARCH_OVERRIDE", "")
	MaxLookupRetry  = getIntConfig("MAX_LOOKUP_RETRY", defaultMaxLookupRetry)
	BPFSampleRate   = getIntConfig("EXPERIMENTAL_BPF_SAMPLE_RATE", 0)

	EstimatorModel        = getConfig("ESTIMATOR_MODEL", defaultMetricValue)         // auto-select
	EstimatorSelectFilter = getConfig("ESTIMATOR_SELECT_FILTER", defaultMetricValue) // no filter
	CoreUsageMetric       = getConfig("CORE_USAGE_METRIC", CPUInstruction)
	DRAMUsageMetric       = getConfig("DRAM_USAGE_METRIC", CacheMiss)
	UncoreUsageMetric     = getConfig("UNCORE_USAGE_METRIC", defaultMetricValue)  // no metric (evenly divided)
	GpuUsageMetric        = getConfig("GPU_USAGE_METRIC", GPUComputeUtilization)  // no metric (evenly divided)
	GeneralUsageMetric    = getConfig("GENERAL_USAGE_METRIC", defaultMetricValue) // for uncategorized energy

	SamplePeriodSec = uint64(getIntConfig("SAMPLE_PERIOD_SEC", defaultSamplePeriodSec))

	// nvidia dcgm hostengine endpoint
	DCGMHostEngineEndpoint = getConfig("NVIDIA_HOSTENGINE_ENDPOINT", "localhost:5555")

	////////////////////////////////////
	ModelServerEnable   = getBoolConfig("MODEL_SERVER_ENABLE", false)
	ModelServerEndpoint = SetModelServerReqEndpoint()
	// for model config
	ModelConfigValues map[string]string
	// model_parameter_prefix
	NodePlatformPowerKey        = "NODE_TOTAL"
	NodeComponentsPowerKey      = "NODE_COMPONENTS"
	ContainerPlatformPowerKey   = "CONTAINER_TOTAL"
	ContainerComponentsPowerKey = "CONTAINER_COMPONENTS"
	ProcessPlatformPowerKey     = "PROCESS_TOTAL"
	ProcessComponentsPowerKey   = "PROCESS_COMPONENTS"

	// model_parameter_attribute
	RatioEnabledKey          = "RATIO" // the default container power model is RATIO but ESTIMATOR or LINEAR_REGRESSION can be used
	EstimatorEnabledKey      = "ESTIMATOR"
	LocalRegressorEnabledKey = "LOCAL_REGRESSOR"
	InitModelURLKey          = "INIT_URL"
	FixedTrainerNameKey      = "TRAINER"
	FixedNodeTypeKey         = "NODE_TYPE"
	ModelFiltersKey          = "FILTERS"
	DefaultTrainerName       = "SGDRegressorTrainer"

	// KubeConfig is used to start k8s client with the pod running outside the cluster
	KubeConfig      = ""
	EnableAPIServer = false
)

Functions ¶

func GetBindAddress ¶

func GetBindAddress(cmdSet string) string

func GetDefaultPowerModelURL ¶ added in v0.5.5

func GetDefaultPowerModelURL(modelOutputType, energySource string) string

return local path to power model weight e.g., /var/lib/kepler/data/acpi_AbsPowerModel.json

func GetKernelSourceDirs ¶ added in v0.5.1

func GetKernelSourceDirs() []string

func GetMetricPath ¶

func GetMetricPath(cmdSet string) string

func GetModelConfigMap ¶ added in v0.5.4

func GetModelConfigMap() map[string]string

func GetRedfishCredFilePath ¶ added in v0.5.2

func GetRedfishCredFilePath() string

func GetRedfishProbeIntervalInSeconds ¶ added in v0.5.2

func GetRedfishProbeIntervalInSeconds() int

func GetRedfishSkipSSLVerify ¶ added in v0.5.2

func GetRedfishSkipSSLVerify() bool

func InitModelConfigMap ¶

func InitModelConfigMap()

InitModelConfigMap initializes map of config from MODEL_CONFIG

func IsCgroupMetricsEnabled ¶ added in v0.5.5

func IsCgroupMetricsEnabled() bool

func IsExposeCPUFrequencyMetricsEnabled ¶ added in v0.7.3

func IsExposeCPUFrequencyMetricsEnabled() bool

IsExposeCPUFrequencyMetricsEnabled returns false if CPUFrequency metrics are disabled to minimize overhead.

func IsExposeContainerStatsEnabled ¶ added in v0.7.3

func IsExposeContainerStatsEnabled() bool

IsExposeContainerStatsEnabled returns false if container metrics are disabled to minimize overhead in the Kepler standalone mode.

func IsExposeProcessStatsEnabled ¶ added in v0.7.3

func IsExposeProcessStatsEnabled() bool

IsExposeProcessStatsEnabled returns false if process metrics are disabled to minimize overhead in the Kepler standalone mode.

func IsExposeQATMetricsEnabled ¶ added in v0.7.3

func IsExposeQATMetricsEnabled() bool

IsExposeCPUFrequencyMetricsEnabled returns false if CPUFrequency metrics are disabled to minimize overhead.

func IsExposeVMStatsEnabled ¶ added in v0.7.3

func IsExposeVMStatsEnabled() bool

IsExposeVMStatsEnabled returns false if VM metrics are disabled to minimize overhead.

func IsHCMetricsEnabled ¶ added in v0.7.3

func IsHCMetricsEnabled() bool

func IsIRQCounterMetricsEnabled ¶ added in v0.5.5

func IsIRQCounterMetricsEnabled() bool

func IsIdlePowerEnabled ¶ added in v0.6.1

func IsIdlePowerEnabled() bool

IsIdlePowerEnabled always return true if Kepler has access to system power metrics. However, if pre-trained power models are being used, Kepler should only expose metrics if the user is aware of the implications.

func LogConfigs ¶

func LogConfigs()

func SetEnableAPIServer ¶ added in v0.5.1

func SetEnableAPIServer(enabled bool)

SetEnableAPIServer enables Kepler to watch apiserver

func SetEnabledEBPFCgroupID ¶

func SetEnabledEBPFCgroupID(enabled bool)

SetEnabledEBPFCgroupID enables the eBPF code to collect cgroup id if the system has kernel version > 4.18

func SetEnabledGPU ¶

func SetEnabledGPU(enabled bool)

SetEnabledGPU enables the exposure of gpu metrics

func SetEnabledHardwareCounterMetrics ¶

func SetEnabledHardwareCounterMetrics(enabled bool)

SetEnabledHardwareCounterMetrics enables the exposure of hardware counter metrics

func SetEnabledIdlePower ¶ added in v0.6.1

func SetEnabledIdlePower(enabled bool)

SetEnabledIdlePower allows enabling idle power exposure in Kepler's metrics. When direct power metrics access is available, idle power exposure is automatic. With pre-trained power models, awareness of implications is crucial. Estimated idle power is useful for bare-metal or single VM setups. In VM environments, accurately distributing idle power is tough due to unknown co-running VMs. Wrong division results in significant accuracy errors, duplicatiing the host idle power across all VMs. Container pre-trained models focus on dynamic power. Estimating idle power in limited information scenarios (like VMs) is complex. Idle power prediction is limited to bare-metal or single VM setups. Know the number of runnign VMs becomes crucial for achieving a fair distribution of idle power, particularly when following the GHG (Greenhouse Gas) protocol.

func SetEnabledQAT ¶ added in v0.5.4

func SetEnabledQAT(enabled bool)

SetEnabledQAT enables the exposure of qat metrics

func SetEstimatorConfig ¶

func SetEstimatorConfig(modelName, selectFilter string)

func SetGpuUsageMetric ¶ added in v0.7.4

func SetGpuUsageMetric(metric string)

func SetKernelSourceDir ¶ added in v0.5.1

func SetKernelSourceDir(dir string) error

SetKernelSourceDir sets the directory for all kernel source. This is used for bcc. Only the top level directory is needed.

func SetKubeConfig ¶ added in v0.5.1

func SetKubeConfig(k string)

SetKubeConfig set kubeconfig file

func SetModelServerEndpoint ¶

func SetModelServerEndpoint(serverEndpoint string)

func SetModelServerReqEndpoint ¶

func SetModelServerReqEndpoint() (modelServerReqEndpoint string)

func SetRedfishCredFilePath ¶ added in v0.5.2

func SetRedfishCredFilePath(credFilePath string)

func SetRedfishProbeIntervalInSeconds ¶ added in v0.5.2

func SetRedfishProbeIntervalInSeconds(interval string)

func SetRedfishSkipSSLVerify ¶ added in v0.5.2

func SetRedfishSkipSSLVerify(skipSSLVerify bool)

Types ¶

type Client ¶

type Client interface {
	// contains filtered or unexported methods
}

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL

Documentation ¶

Index ¶

Constants ¶

Variables ¶

Functions ¶

func GetBindAddress ¶

func GetCGroupVersion ¶

func GetDefaultPowerModelURL ¶ added in v0.5.5

func GetKernelSourceDirs ¶ added in v0.5.1

func GetMetricPath ¶

func GetModelConfigMap ¶ added in v0.5.4

func GetRedfishCredFilePath ¶ added in v0.5.2

func GetRedfishProbeIntervalInSeconds ¶ added in v0.5.2

func GetRedfishSkipSSLVerify ¶ added in v0.5.2

func InitModelConfigMap ¶

func IsCgroupMetricsEnabled ¶ added in v0.5.5

func IsExposeCPUFrequencyMetricsEnabled ¶ added in v0.7.3

func IsExposeContainerStatsEnabled ¶ added in v0.7.3

func IsExposeProcessStatsEnabled ¶ added in v0.7.3

func IsExposeQATMetricsEnabled ¶ added in v0.7.3

func IsExposeVMStatsEnabled ¶ added in v0.7.3

func IsHCMetricsEnabled ¶ added in v0.7.3

func IsIRQCounterMetricsEnabled ¶ added in v0.5.5

func IsIdlePowerEnabled ¶ added in v0.6.1

func LogConfigs ¶

func SetEnableAPIServer ¶ added in v0.5.1

func SetEnabledEBPFCgroupID ¶

func SetEnabledGPU ¶

func SetEnabledHardwareCounterMetrics ¶

func SetEnabledIdlePower ¶ added in v0.6.1

func SetEnabledQAT ¶ added in v0.5.4

func SetEstimatorConfig ¶

func SetGpuUsageMetric ¶ added in v0.7.4

func SetKernelSourceDir ¶ added in v0.5.1

func SetKubeConfig ¶ added in v0.5.1

func SetModelServerEndpoint ¶

func SetModelServerReqEndpoint ¶

func SetRedfishCredFilePath ¶ added in v0.5.2

func SetRedfishProbeIntervalInSeconds ¶ added in v0.5.2

func SetRedfishSkipSSLVerify ¶ added in v0.5.2

Types ¶

type Client ¶

Source Files ¶