Documentation ¶
Index ¶
- Constants
- Variables
- func GetBindAddress(cmdSet string) string
- func GetCGroupVersion() int
- func GetDefaultPowerModelURL(modelOutputType, energySource string) string
- func GetKernelSourceDirs() []string
- func GetMetricPath(cmdSet string) string
- func GetModelConfigMap() map[string]string
- func GetRedfishCredFilePath() string
- func GetRedfishProbeIntervalInSeconds() int
- func GetRedfishSkipSSLVerify() bool
- func InitModelConfigMap()
- func IsCgroupMetricsEnabled() bool
- func IsIRQCounterMetricsEnabled() bool
- func IsIdlePowerEnabled() bool
- func LogConfigs()
- func SetEnableAPIServer(enabled bool)
- func SetEnabledEBPFCgroupID(enabled bool)
- func SetEnabledGPU(enabled bool)
- func SetEnabledHardwareCounterMetrics(enabled bool)
- func SetEnabledIdlePower(enabled bool)
- func SetEnabledQAT(enabled bool)
- func SetEstimatorConfig(modelName, selectFilter string)
- func SetKernelSourceDir(dir string) error
- func SetKubeConfig(k string)
- func SetModelServerEndpoint(serverEndpoint string)
- func SetModelServerReqEndpoint() (modelServerReqEndpoint string)
- func SetRedfishCredFilePath(credFilePath string)
- func SetRedfishProbeIntervalInSeconds(interval string)
- func SetRedfishSkipSSLVerify(skipSSLVerify bool)
- type Client
Constants ¶
const ( // counter - attacher package CPUCycle = "cpu_cycles" CPURefCycle = "cpu_ref_cycles" CPUInstruction = "cpu_instructions" CacheMiss = "cache_miss" // bpf - attacher package CPUTime = "bpf_cpu_time_us" PageCacheHit = "bpf_page_cache_hit" IRQNetTXLabel = "bpf_net_tx_irq" IRQNetRXLabel = "bpf_net_rx_irq" IRQBlockLabel = "bpf_block_irq" // cgroup - cgroup package CgroupfsMemory = "cgroupfs_memory_usage_bytes" CgroupfsKernelMemory = "cgroupfs_kernel_memory_usage_bytes" CgroupfsTCPMemory = "cgroupfs_tcp_memory_usage_bytes" CgroupfsCPU = "cgroupfs_cpu_usage_us" CgroupfsSystemCPU = "cgroupfs_system_cpu_usage_us" CgroupfsUserCPU = "cgroupfs_user_cpu_usage_us" CgroupfsReadIO = "cgroupfs_ioread_bytes" CgroupfsWriteIO = "cgroupfs_iowrite_bytes" BytesReadIO = "bytes_read" BytesWriteIO = "bytes_writes" BlockDevicesIO = "block_devices_used" // kubelet - package KubeletCPUUsage = "kubelet_cpu_usage" KubeletMemoryUsage = "kubelet_memory_bytes" KubeletContainerCPU = "container_cpu_usage_seconds_total" KubeletContainerMemory = "container_memory_working_set_bytes" KubeletNodeCPU = "node_cpu_usage_seconds_total" KubeletNodeMemory = "node_memory_working_set_bytes" // system CPUFrequency = "avg_cpu_frequency" // GPU GPUSMUtilization = "gpu_sm_util" GPUMemUtilization = "gpu_mem_util" // Metric suffix AggregatedUsageSuffix = "total" AggregatedEnergySuffix = "joules_total" )
const (
// MaxIRQ is the maximum number of IRQs to be monitored
MaxIRQ = 10
)
Variables ¶
var ( EnabledMSR = false EnabledBPFBatchDelete = true KernelVersion = float32(0) KeplerNamespace = getConfig("KEPLER_NAMESPACE", defaultNamespace) UseLibBPFAttacher = false EnabledEBPFCgroupID = getBoolConfig("ENABLE_EBPF_CGROUPID", true) EnabledGPU = getBoolConfig("ENABLE_GPU", false) EnabledQAT = getBoolConfig("ENABLE_QAT", false) EnableProcessMetrics = getBoolConfig("ENABLE_PROCESS_METRICS", false) ExposeHardwareCounterMetrics = getBoolConfig("EXPOSE_HW_COUNTER_METRICS", true) ExposeCgroupMetrics = getBoolConfig("EXPOSE_CGROUP_METRICS", true) ExposeIRQCounterMetrics = getBoolConfig("EXPOSE_IRQ_COUNTER_METRICS", true) ExposeIdlePowerMetrics = getBoolConfig("EXPOSE_ESTIMATED_IDLE_POWER_METRICS", false) MetricPathKey = "METRIC_PATH" BindAddressKey = "BIND_ADDRESS" CPUArchOverride = getConfig("CPU_ARCH_OVERRIDE", "") MaxLookupRetry = getIntConfig("MAX_LOOKUP_RETRY", defaultMaxLookupRetry) BPFSampleRate = getIntConfig("EXPERIMENTAL_BPF_SAMPLE_RATE", 0) EstimatorModel = getConfig("ESTIMATOR_MODEL", defaultMetricValue) // auto-select EstimatorSelectFilter = getConfig("ESTIMATOR_SELECT_FILTER", defaultMetricValue) // no filter CoreUsageMetric = getConfig("CORE_USAGE_METRIC", CPUInstruction) DRAMUsageMetric = getConfig("DRAM_USAGE_METRIC", CacheMiss) UncoreUsageMetric = getConfig("UNCORE_USAGE_METRIC", defaultMetricValue) // no metric (evenly divided) GpuUsageMetric = getConfig("GPU_USAGE_METRIC", GPUSMUtilization) // no metric (evenly divided) GeneralUsageMetric = getConfig("GENERAL_USAGE_METRIC", defaultMetricValue) // for uncategorized energy SamplePeriodSec = uint64(getIntConfig("SAMPLE_PERIOD_SEC", defaultSamplePeriodSec)) //////////////////////////////////// ModelServerEnable = getBoolConfig("MODEL_SERVER_ENABLE", false) ModelServerEndpoint = SetModelServerReqEndpoint() // for model config ModelConfigValues map[string]string // model_parameter_prefix NodePlatformPowerKey = "NODE_TOTAL" NodeComponentsPowerKey = "NODE_COMPONENTS" ContainerPlatformPowerKey = "CONTAINER_TOTAL" ContainerComponentsPowerKey = "CONTAINER_COMPONENTS" ProcessPlatformPowerKey = "PROCESS_TOTAL" ProcessComponentsPowerKey = "PROCESS_COMPONENTS" // model_parameter_attribute RatioEnabledKey = "RATIO" // the default container power model is RATIO but ESTIMATOR or LINEAR_REGRESSION can be used EstimatorEnabledKey = "ESTIMATOR" LinearRegressionEnabledKey = "LINEAR_REGRESSION" InitModelURLKey = "INIT_URL" FixedTrainerNameKey = "TRAINER" FixedNodeTypeKey = "NODE_TYPE" ModelFiltersKey = "FILTERS" // KubeConfig is used to start k8s client with the pod running outside the cluster KubeConfig = "" EnableAPIServer = false )
Functions ¶
func GetBindAddress ¶
func GetDefaultPowerModelURL ¶ added in v0.5.5
return local path to power model weight e.g., /var/lib/kepler/data/acpi_AbsPowerModel.json
func GetKernelSourceDirs ¶ added in v0.5.1
func GetKernelSourceDirs() []string
func GetMetricPath ¶
func GetModelConfigMap ¶ added in v0.5.4
func GetRedfishCredFilePath ¶ added in v0.5.2
func GetRedfishCredFilePath() string
func GetRedfishProbeIntervalInSeconds ¶ added in v0.5.2
func GetRedfishProbeIntervalInSeconds() int
func GetRedfishSkipSSLVerify ¶ added in v0.5.2
func GetRedfishSkipSSLVerify() bool
func InitModelConfigMap ¶
func InitModelConfigMap()
InitModelConfigMap initializes map of config from MODEL_CONFIG
func IsCgroupMetricsEnabled ¶ added in v0.5.5
func IsCgroupMetricsEnabled() bool
func IsIRQCounterMetricsEnabled ¶ added in v0.5.5
func IsIRQCounterMetricsEnabled() bool
func IsIdlePowerEnabled ¶ added in v0.6.1
func IsIdlePowerEnabled() bool
IsIdlePowerEnabled always return true if Kepler has access to system power metrics. However, if pre-trained power models are being used, Kepler should only expose metrics if the user is aware of the implications.
func LogConfigs ¶
func LogConfigs()
func SetEnableAPIServer ¶ added in v0.5.1
func SetEnableAPIServer(enabled bool)
SetEnableAPIServer enables Kepler to watch apiserver
func SetEnabledEBPFCgroupID ¶
func SetEnabledEBPFCgroupID(enabled bool)
SetEnabledEBPFCgroupID enables the eBPF code to collect cgroup id if the system has kernel version > 4.18
func SetEnabledGPU ¶
func SetEnabledGPU(enabled bool)
SetEnabledGPU enables the exposure of gpu metrics
func SetEnabledHardwareCounterMetrics ¶
func SetEnabledHardwareCounterMetrics(enabled bool)
SetEnabledHardwareCounterMetrics enables the exposure of hardware counter metrics
func SetEnabledIdlePower ¶ added in v0.6.1
func SetEnabledIdlePower(enabled bool)
SetEnabledIdlePower allows enabling idle power exposure in Kepler's metrics. When direct power metrics access is available, idle power exposure is automatic. With pre-trained power models, awareness of implications is crucial. Estimated idle power is useful for bare-metal or single VM setups. In VM environments, accurately distributing idle power is tough due to unknown co-running VMs. Wrong division results in significant accuracy errors, duplicatiing the host idle power across all VMs. Container pre-trained models focus on dynamic power. Estimating idle power in limited information scenarios (like VMs) is complex. Idle power prediction is limited to bare-metal or single VM setups. Know the number of runnign VMs becomes crucial for achieving a fair distribution of idle power, particularly when following the GHG (Greenhouse Gas) protocol.
func SetEnabledQAT ¶ added in v0.5.4
func SetEnabledQAT(enabled bool)
SetEnabledQAT enables the exposure of qat metrics
func SetEstimatorConfig ¶
func SetEstimatorConfig(modelName, selectFilter string)
func SetKernelSourceDir ¶ added in v0.5.1
SetKernelSourceDir sets the directory for all kernel source. This is used for bcc. Only the top level directory is needed.
func SetModelServerEndpoint ¶
func SetModelServerEndpoint(serverEndpoint string)
func SetModelServerReqEndpoint ¶
func SetModelServerReqEndpoint() (modelServerReqEndpoint string)
func SetRedfishCredFilePath ¶ added in v0.5.2
func SetRedfishCredFilePath(credFilePath string)
func SetRedfishProbeIntervalInSeconds ¶ added in v0.5.2
func SetRedfishProbeIntervalInSeconds(interval string)
func SetRedfishSkipSSLVerify ¶ added in v0.5.2
func SetRedfishSkipSSLVerify(skipSSLVerify bool)