Documentation
¶
Index ¶
- Constants
- Variables
- func GetNodeName() string
- func InitAvailableParamAndMetrics()
- type CPUModelData
- type CPUS
- type ContainerMetrics
- func (c *ContainerMetrics) GetDynEnergyStat(component string) (energyStat *types.UInt64Stat)
- func (c *ContainerMetrics) GetIdleEnergyStat(component string) (energyStat *types.UInt64Stat)
- func (c *ContainerMetrics) ResetDeltaValues()
- func (c *ContainerMetrics) SetLatestProcess(cgroupPID, pid uint64, comm string)
- func (c *ContainerMetrics) String() string
- func (c *ContainerMetrics) SumAllDynAggrValues() uint64
- func (c *ContainerMetrics) SumAllDynDeltaValues() uint64
- func (c *ContainerMetrics) ToEstimatorValues(featuresName []string, shouldNormalize bool) (values []float64)
- func (c *ContainerMetrics) ToPrometheusValue(metric string) string
- func (c *ContainerMetrics) UpdateCgroupMetrics() error
- type NodeMetrics
- func (ne *NodeMetrics) AddNodeResUsageFromContainerResUsage(containersMetrics map[string]*ContainerMetrics)
- func (ne *NodeMetrics) CalcDynEnergy(component, id string)
- func (ne *NodeMetrics) CalcIdleEnergy(component string)
- func (ne *NodeMetrics) GetAggrDynEnergyPerID(component, id string) uint64
- func (ne *NodeMetrics) GetAggrIdleEnergyPerID(component, id string) uint64
- func (ne *NodeMetrics) GetDeltaDynEnergyPerID(component, id string) uint64
- func (ne *NodeMetrics) GetDeltaIdleEnergyPerID(component, id string) uint64
- func (ne *NodeMetrics) GetNodeResUsagePerResType(resource string) (float64, error)
- func (ne *NodeMetrics) GetSumAggrDynEnergyFromAllSources(component string) uint64
- func (ne *NodeMetrics) GetSumAggrIdleEnergyFromAllSources(component string) uint64
- func (ne *NodeMetrics) GetSumDeltaDynEnergyFromAllSources(component string) uint64
- func (ne *NodeMetrics) GetSumDeltaIdleEnergyFromAllSources(component string) uint64
- func (ne *NodeMetrics) ResetDeltaValues()
- func (ne *NodeMetrics) SetNodeComponentsEnergy(componentsEnergy map[int]source.NodeComponentsEnergy, gauge, isIdleEnergy bool)
- func (ne *NodeMetrics) SetNodeGPUEnergy(gpuEnergy []uint32, isIdleEnergy bool)
- func (ne *NodeMetrics) SetNodeOtherComponentsEnergy()
- func (ne *NodeMetrics) SetNodePlatformEnergy(platformEnergy map[string]float64, gauge, isIdleEnergy bool)
- func (ne *NodeMetrics) String() string
- func (ne *NodeMetrics) ToEstimatorValues(featuresName []string, shouldNormalize bool) []float64
- func (ne *NodeMetrics) UpdateDynEnergy()
- func (ne *NodeMetrics) UpdateIdleEnergyWithMinValue(isComponentsSystemCollectionSupported bool)
- type ProcessMetrics
- func (p *ProcessMetrics) GetDynEnergyStat(component string) *types.UInt64Stat
- func (p *ProcessMetrics) GetIdleEnergyStat(component string) *types.UInt64Stat
- func (p *ProcessMetrics) ResetDeltaValues()
- func (p *ProcessMetrics) String() string
- func (p *ProcessMetrics) SumAllDynAggrValues() uint64
- func (p *ProcessMetrics) SumAllDynDeltaValues() uint64
- func (p *ProcessMetrics) ToEstimatorValues(featuresName []string, shouldNormalize bool) (values []float64)
- type VMMetrics
Constants ¶
const ( CORE = "core" DRAM = "dram" UNCORE = "uncore" PKG = "package" GPU = "gpu" OTHER = "other" PLATFORM = "platform" FREQUENCY = "frequency" DYN = "_DYN" IDLE = "_IDLE" )
const ( // TO-DO: merge to cgroup stat ByteReadLabel = config.BytesReadIO ByteWriteLabel = config.BytesWriteIO DeltaPrefix = "curr_" AggrPrefix = "total_" )
const ( CPUModelDataPath = "/var/lib/kepler/data/cpus.yaml" CPUPmuNamePath = "/sys/devices/cpu/caps/pmu_name" CPUTopologyPath = "/sys/devices/system/cpu/cpu%d/topology/physical_package_id" )
Variables ¶
var ( // ContainerFloatFeatureNames holds the feature name of the container float collector_metric. This is specific for the machine-learning based models. ContainerFloatFeatureNames []string = []string{} // ContainerUintFeaturesNames holds the feature name of the container utint collector_metric. This is specific for the machine-learning based models. ContainerUintFeaturesNames []string = []string{} // ContainerFeaturesNames holds all the feature name of the container collector_metric. This is specific for the machine-learning based models. ContainerFeaturesNames []string = []string{} )
var ( NodeName = GetNodeName() NodeCPUArchitecture = getCPUArch() NodeCPUPackageMap = getCPUPackageMap() // NodeMetricNames holds the name of the system metadata information. NodeMetadataFeatureNames []string = []string{"cpu_architecture"} // SystemMetadata holds the metadata regarding the system information NodeMetadataFeatureValues []string = []string{NodeCPUArchitecture} )
var ( // ProcessMetricNames holds the list of names of the container metric ProcessMetricNames []string // ProcessFloatFeatureNames holds the feature name of the container float collector_metric. This is specific for the machine-learning based models. ProcessFloatFeatureNames []string = []string{} // ProcessUintFeaturesNames holds the feature name of the container utint collector_metric. This is specific for the machine-learning based models. ProcessUintFeaturesNames []string // ProcessFeaturesNames holds all the feature name of the container collector_metric. This is specific for the machine-learning based models. ProcessFeaturesNames []string )
var ( // AvailableBPFSWCounters holds a list of eBPF counters that might be collected AvailableBPFSWCounters []string // AvailableBPFHWCounters holds a list of hardware counters that might be collected AvailableBPFHWCounters []string // AvailableCGroupMetrics holds a list of cgroup metrics exposed by the cgroup that might be collected AvailableCGroupMetrics []string // AvailableKubeletMetrics holds a list of cgrpup metrics exposed by kubelet that might be collected AvailableKubeletMetrics []string // AvailableContainerKubeletMetrics holds a list of cgrpup metrics exposed by kubelet specific to container AvailableContainerKubeletMetrics []string // AvailableNodeKubeletMetrics holds a list of cgroup metrics exposed by kubelet specific to node AvailableNodeKubeletMetrics []string // CPUHardwareCounterEnabled defined if hardware counters should be accounted and exported CPUHardwareCounterEnabled = false )
var ( // VMMetricNames holds the list of names of the vm metric VMMetricNames []string // VMFloatFeatureNames holds the feature name of the vm float collector_metric. This is specific for the machine-learning based models. VMFloatFeatureNames []string = []string{} // VMUintFeaturesNames holds the feature name of the vm utint collector_metric. This is specific for the machine-learning based models. VMUintFeaturesNames []string // VMFeaturesNames holds all the feature name of the vm collector_metric. This is specific for the machine-learning based models. VMFeaturesNames []string )
Functions ¶
func GetNodeName ¶ added in v0.5.2
func GetNodeName() string
func InitAvailableParamAndMetrics ¶
func InitAvailableParamAndMetrics()
Types ¶
type CPUModelData ¶
type ContainerMetrics ¶
type ContainerMetrics struct { ProcessMetrics CGroupPID uint64 PIDS []uint64 ContainerName string PodName string Namespace string ContainerID string CurrProcesses int CgroupStatHandler cgroup.CCgroupStatHandler CgroupStatMap map[string]*types.UInt64StatCollection }
func NewContainerMetrics ¶
func NewContainerMetrics(containerName, podName, podNamespace, containerID string) *ContainerMetrics
NewContainerMetrics creates a new ContainerMetrics instance
func (*ContainerMetrics) GetDynEnergyStat ¶ added in v0.5.1
func (c *ContainerMetrics) GetDynEnergyStat(component string) (energyStat *types.UInt64Stat)
func (*ContainerMetrics) GetIdleEnergyStat ¶ added in v0.5.1
func (c *ContainerMetrics) GetIdleEnergyStat(component string) (energyStat *types.UInt64Stat)
func (*ContainerMetrics) ResetDeltaValues ¶
func (c *ContainerMetrics) ResetDeltaValues()
ResetCurr reset all current value to 0
func (*ContainerMetrics) SetLatestProcess ¶
func (c *ContainerMetrics) SetLatestProcess(cgroupPID, pid uint64, comm string)
SetLatestProcess set cgroupPID, PID, and command to the latest captured process NOTICE: can lose main container info for multi-container pod
func (*ContainerMetrics) String ¶
func (c *ContainerMetrics) String() string
func (*ContainerMetrics) SumAllDynAggrValues ¶
func (c *ContainerMetrics) SumAllDynAggrValues() uint64
func (*ContainerMetrics) SumAllDynDeltaValues ¶
func (c *ContainerMetrics) SumAllDynDeltaValues() uint64
func (*ContainerMetrics) ToEstimatorValues ¶
func (c *ContainerMetrics) ToEstimatorValues(featuresName []string, shouldNormalize bool) (values []float64)
ToEstimatorValues return values regarding metricNames. Since Kepler collects metrics at intervals of SamplePeriodSec, which is greater than 1 second, and the power models are trained to estimate power in 1 second interval. It is necessary to normalize the resource utilization by the SamplePeriodSec. Note that this is important because the power curve can be different for higher or lower resource usage within 1 second interval.
func (*ContainerMetrics) ToPrometheusValue ¶
func (c *ContainerMetrics) ToPrometheusValue(metric string) string
ToPrometheusValue return the value regarding metric label
func (*ContainerMetrics) UpdateCgroupMetrics ¶ added in v0.5.1
func (c *ContainerMetrics) UpdateCgroupMetrics() error
type NodeMetrics ¶
type NodeMetrics struct { ResourceUsage map[string]float64 // Absolute energy is the sum of Idle + Dynamic energy. AbsEnergyInCore *types.UInt64StatCollection AbsEnergyInDRAM *types.UInt64StatCollection AbsEnergyInUncore *types.UInt64StatCollection AbsEnergyInPkg *types.UInt64StatCollection AbsEnergyInGPU *types.UInt64StatCollection AbsEnergyInOther *types.UInt64StatCollection AbsEnergyInPlatform *types.UInt64StatCollection DynEnergyInCore *types.UInt64StatCollection DynEnergyInDRAM *types.UInt64StatCollection DynEnergyInUncore *types.UInt64StatCollection DynEnergyInPkg *types.UInt64StatCollection DynEnergyInGPU *types.UInt64StatCollection DynEnergyInOther *types.UInt64StatCollection DynEnergyInPlatform *types.UInt64StatCollection IdleEnergyInCore *types.UInt64StatCollection IdleEnergyInDRAM *types.UInt64StatCollection IdleEnergyInUncore *types.UInt64StatCollection IdleEnergyInPkg *types.UInt64StatCollection IdleEnergyInGPU *types.UInt64StatCollection IdleEnergyInOther *types.UInt64StatCollection IdleEnergyInPlatform *types.UInt64StatCollection CPUFrequency map[int32]uint64 // IdleCPUUtilization is used to determine idle periods IdleCPUUtilization uint64 FoundNewIdleState bool // Accelerator-QAT Utilization QATUtilization map[string]qat.DeviceUtilizationSample }
func NewNodeMetrics ¶
func NewNodeMetrics() *NodeMetrics
func (*NodeMetrics) AddNodeResUsageFromContainerResUsage ¶
func (ne *NodeMetrics) AddNodeResUsageFromContainerResUsage(containersMetrics map[string]*ContainerMetrics)
AddNodeResResourceUsageFromContainerResResourceUsage adds the sum of all container resource usage as the node resource usage
func (*NodeMetrics) CalcDynEnergy ¶
func (ne *NodeMetrics) CalcDynEnergy(component, id string)
func (*NodeMetrics) CalcIdleEnergy ¶
func (ne *NodeMetrics) CalcIdleEnergy(component string)
func (*NodeMetrics) GetAggrDynEnergyPerID ¶
func (ne *NodeMetrics) GetAggrDynEnergyPerID(component, id string) uint64
GetAggrDynEnergyPerID returns the aggr dynamic energy from all source (e.g. package or gpu ids)
func (*NodeMetrics) GetAggrIdleEnergyPerID ¶
func (ne *NodeMetrics) GetAggrIdleEnergyPerID(component, id string) uint64
GetAggrIdleEnergyPerID returns the aggr idle energy for a given id
func (*NodeMetrics) GetDeltaDynEnergyPerID ¶
func (ne *NodeMetrics) GetDeltaDynEnergyPerID(component, id string) uint64
GetDeltaDynEnergyPerID returns the delta dynamic energy from all source (e.g. package or gpu ids)
func (*NodeMetrics) GetDeltaIdleEnergyPerID ¶
func (ne *NodeMetrics) GetDeltaIdleEnergyPerID(component, id string) uint64
GetDeltaIdleEnergyPerID returns the delta idle energy from all source (e.g. package or gpu ids)
func (*NodeMetrics) GetNodeResUsagePerResType ¶
func (ne *NodeMetrics) GetNodeResUsagePerResType(resource string) (float64, error)
func (*NodeMetrics) GetSumAggrDynEnergyFromAllSources ¶
func (ne *NodeMetrics) GetSumAggrDynEnergyFromAllSources(component string) uint64
GetSumAggrDynEnergyFromAllSources returns the sum of aggr dynamic energy of all source (e.g. package or gpu ids)
func (*NodeMetrics) GetSumAggrIdleEnergyFromAllSources ¶ added in v0.5.4
func (ne *NodeMetrics) GetSumAggrIdleEnergyFromAllSources(component string) uint64
GetSumAggrIdleEnergyFromAllSources returns the sum of aggr idle energy of all source (e.g. package or gpu ids)
func (*NodeMetrics) GetSumDeltaDynEnergyFromAllSources ¶
func (ne *NodeMetrics) GetSumDeltaDynEnergyFromAllSources(component string) uint64
GetSumDeltaDynEnergyFromAllSources returns the sum of delta dynamic energy of all source (e.g. package or gpu ids)
func (*NodeMetrics) GetSumDeltaIdleEnergyFromAllSources ¶ added in v0.5.4
func (ne *NodeMetrics) GetSumDeltaIdleEnergyFromAllSources(component string) uint64
GetSumDeltaIdleEnergyFromAllSources returns the sum of delta idle energy of all source (e.g. package or gpu ids)
func (*NodeMetrics) ResetDeltaValues ¶
func (ne *NodeMetrics) ResetDeltaValues()
func (*NodeMetrics) SetNodeComponentsEnergy ¶
func (ne *NodeMetrics) SetNodeComponentsEnergy(componentsEnergy map[int]source.NodeComponentsEnergy, gauge, isIdleEnergy bool)
SetNodeComponentsEnergy adds the idle or absolute energy consumption collected from the node's components (e.g., using RAPL). Absolute energy is the sum of Idle + Dynamic energy.
func (*NodeMetrics) SetNodeGPUEnergy ¶ added in v0.5.4
func (ne *NodeMetrics) SetNodeGPUEnergy(gpuEnergy []uint32, isIdleEnergy bool)
SetNodeGPUEnergy adds the lastest energy consumption of each GPU power consumption. Right now we don't support other types of accelerators than GPU, but we will in the future.
func (*NodeMetrics) SetNodeOtherComponentsEnergy ¶
func (ne *NodeMetrics) SetNodeOtherComponentsEnergy()
SetNodeOtherComponentsEnergy adds the lastest energy consumption collected from the other node's components than CPU and DRAM Other components energy is a special case where the energy is calculated and not measured
func (*NodeMetrics) SetNodePlatformEnergy ¶ added in v0.5.4
func (ne *NodeMetrics) SetNodePlatformEnergy(platformEnergy map[string]float64, gauge, isIdleEnergy bool)
SetNodePlatformEnergy adds the idle or absolute energy consumption from the node sensor. Absolute energy is the sum of Idle + Dynamic energy.
func (*NodeMetrics) String ¶
func (ne *NodeMetrics) String() string
func (*NodeMetrics) ToEstimatorValues ¶ added in v0.5.4
func (ne *NodeMetrics) ToEstimatorValues(featuresName []string, shouldNormalize bool) []float64
ToEstimatorValues return values regarding metricNames. The metrics can be related to resource utilization or power consumption. Since Kepler collects metrics at intervals of SamplePeriodSec, which is greater than 1 second, and the power models are trained to estimate power in 1 second interval. It is necessary to normalize the resource utilization by the SamplePeriodSec. Note that this is important because the power curve can be different for higher or lower resource usage within 1 second interval.
func (*NodeMetrics) UpdateDynEnergy ¶
func (ne *NodeMetrics) UpdateDynEnergy()
UpdateDynEnergy calculates the dynamic energy
func (*NodeMetrics) UpdateIdleEnergyWithMinValue ¶ added in v0.5.4
func (ne *NodeMetrics) UpdateIdleEnergyWithMinValue(isComponentsSystemCollectionSupported bool)
type ProcessMetrics ¶
type ProcessMetrics struct { PID uint64 Command string // ebpf metrics BPFStats map[string]*types.UInt64Stat // Energy metrics DynEnergyInCore *types.UInt64Stat DynEnergyInDRAM *types.UInt64Stat DynEnergyInUncore *types.UInt64Stat DynEnergyInPkg *types.UInt64Stat DynEnergyInGPU *types.UInt64Stat DynEnergyInOther *types.UInt64Stat DynEnergyInPlatform *types.UInt64Stat IdleEnergyInCore *types.UInt64Stat IdleEnergyInDRAM *types.UInt64Stat IdleEnergyInUncore *types.UInt64Stat IdleEnergyInPkg *types.UInt64Stat IdleEnergyInGPU *types.UInt64Stat IdleEnergyInOther *types.UInt64Stat IdleEnergyInPlatform *types.UInt64Stat }
func NewProcessMetrics ¶
func NewProcessMetrics(pid uint64, command string) *ProcessMetrics
NewProcessMetrics creates a new ProcessMetrics instance
func (*ProcessMetrics) GetDynEnergyStat ¶ added in v0.5.1
func (p *ProcessMetrics) GetDynEnergyStat(component string) *types.UInt64Stat
func (*ProcessMetrics) GetIdleEnergyStat ¶ added in v0.5.1
func (p *ProcessMetrics) GetIdleEnergyStat(component string) *types.UInt64Stat
func (*ProcessMetrics) ResetDeltaValues ¶
func (p *ProcessMetrics) ResetDeltaValues()
ResetCurr reset all current value to 0
func (*ProcessMetrics) String ¶
func (p *ProcessMetrics) String() string
func (*ProcessMetrics) SumAllDynAggrValues ¶
func (p *ProcessMetrics) SumAllDynAggrValues() uint64
func (*ProcessMetrics) SumAllDynDeltaValues ¶
func (p *ProcessMetrics) SumAllDynDeltaValues() uint64
func (*ProcessMetrics) ToEstimatorValues ¶
func (p *ProcessMetrics) ToEstimatorValues(featuresName []string, shouldNormalize bool) (values []float64)
ToEstimatorValues return values regarding metricNames. Since Kepler collects metrics at intervals of SamplePeriodSec, which is greater than 1 second, and the power models are trained to estimate power in 1 second interval. It is necessary to normalize the resource utilization by the SamplePeriodSec. Note that this is important because the power curve can be different for higher or lower resource usage within 1 second interval.
type VMMetrics ¶ added in v0.6.1
type VMMetrics struct { PID uint64 Name string BPFStats map[string]*types.UInt64Stat // ebpf metrics DynEnergyInCore *types.UInt64Stat DynEnergyInDRAM *types.UInt64Stat DynEnergyInUncore *types.UInt64Stat DynEnergyInPkg *types.UInt64Stat DynEnergyInGPU *types.UInt64Stat DynEnergyInOther *types.UInt64Stat DynEnergyInPlatform *types.UInt64Stat IdleEnergyInCore *types.UInt64Stat IdleEnergyInDRAM *types.UInt64Stat IdleEnergyInUncore *types.UInt64Stat IdleEnergyInPkg *types.UInt64Stat IdleEnergyInGPU *types.UInt64Stat IdleEnergyInOther *types.UInt64Stat IdleEnergyInPlatform *types.UInt64Stat }
func NewVMMetrics ¶ added in v0.6.1
NewVMMetrics creates a new VMMetrics instance
func (*VMMetrics) ResetDeltaValues ¶ added in v0.6.1
func (p *VMMetrics) ResetDeltaValues()
ResetCurr reset all current value to 0