Documentation ¶
Index ¶
- Constants
- Variables
- func GetNodeName() string
- func InitAvailableParamAndMetrics()
- type CPUModelData
- type ContainerMetrics
- func (c *ContainerMetrics) GetDynEnergyStat(component string) (energyStat *types.UInt64Stat)
- func (c *ContainerMetrics) GetIdleEnergyStat(component string) (energyStat *types.UInt64Stat)
- func (c *ContainerMetrics) ResetDeltaValues()
- func (c *ContainerMetrics) SetLatestProcess(cgroupPID, pid uint64, comm string)
- func (c *ContainerMetrics) String() string
- func (c *ContainerMetrics) SumAllDynAggrValues() uint64
- func (c *ContainerMetrics) SumAllDynDeltaValues() uint64
- func (c *ContainerMetrics) ToEstimatorValues(featuresName []string, shouldNormalize bool) (values []float64)
- func (c *ContainerMetrics) ToPrometheusValue(metric string) string
- func (c *ContainerMetrics) UpdateCgroupMetrics() error
- type NodeMetrics
- func (ne *NodeMetrics) AddNodeResUsageFromContainerResUsage(containersMetrics map[string]*ContainerMetrics)
- func (ne *NodeMetrics) CalcDynEnergy(component, id string)
- func (ne *NodeMetrics) CalcIdleEnergy(component string)
- func (ne *NodeMetrics) GetAggrDynEnergyPerID(component, id string) uint64
- func (ne *NodeMetrics) GetAggrIdleEnergyPerID(component, id string) uint64
- func (ne *NodeMetrics) GetDeltaDynEnergyPerID(component, id string) uint64
- func (ne *NodeMetrics) GetDeltaIdleEnergyPerID(component, id string) uint64
- func (ne *NodeMetrics) GetNodeResUsagePerResType(resource string) (float64, error)
- func (ne *NodeMetrics) GetSumAggrDynEnergyFromAllSources(component string) uint64
- func (ne *NodeMetrics) GetSumAggrIdleEnergyFromAllSources(component string) uint64
- func (ne *NodeMetrics) GetSumDeltaDynEnergyFromAllSources(component string) uint64
- func (ne *NodeMetrics) GetSumDeltaIdleEnergyFromAllSources(component string) uint64
- func (ne *NodeMetrics) ResetDeltaValues()
- func (ne *NodeMetrics) SetNodeComponentsEnergy(componentsEnergy map[int]source.NodeComponentsEnergy, gauge, isIdleEnergy bool)
- func (ne *NodeMetrics) SetNodeGPUEnergy(gpuEnergy []uint32, isIdleEnergy bool)
- func (ne *NodeMetrics) SetNodeOtherComponentsEnergy()
- func (ne *NodeMetrics) SetNodePlatformEnergy(platformEnergy map[string]float64, gauge, isIdleEnergy bool)
- func (ne *NodeMetrics) String() string
- func (ne *NodeMetrics) ToEstimatorValues(featuresName []string, shouldNormalize bool) []float64
- func (ne *NodeMetrics) UpdateDynEnergy()
- func (ne *NodeMetrics) UpdateIdleEnergyWithMinValue()
- type ProcessMetrics
- func (p *ProcessMetrics) GetDynEnergyStat(component string) *types.UInt64Stat
- func (p *ProcessMetrics) GetIdleEnergyStat(component string) *types.UInt64Stat
- func (p *ProcessMetrics) ResetDeltaValues()
- func (p *ProcessMetrics) String() string
- func (p *ProcessMetrics) SumAllDynAggrValues() uint64
- func (p *ProcessMetrics) SumAllDynDeltaValues() uint64
- func (p *ProcessMetrics) ToEstimatorValues(featuresName []string, shouldNormalize bool) (values []float64)
Constants ¶
const ( CORE = "core" DRAM = "dram" UNCORE = "uncore" PKG = "pkg" GPU = "gpu" OTHER = "other" PLATFORM = "platform" FREQUENCY = "frequency" )
const ( // TO-DO: merge to cgroup stat ByteReadLabel = config.BytesReadIO ByteWriteLabel = config.BytesWriteIO DeltaPrefix = "curr_" AggrPrefix = "total_" )
Variables ¶
var ( // ContainerFloatFeatureNames holds the feature name of the container float collector_metric. This is specific for the machine-learning based models. ContainerFloatFeatureNames []string = []string{} // ContainerUintFeaturesNames holds the feature name of the container utint collector_metric. This is specific for the machine-learning based models. ContainerUintFeaturesNames []string = []string{} // ContainerFeaturesNames holds all the feature name of the container collector_metric. This is specific for the machine-learning based models. ContainerFeaturesNames []string = []string{} )
var ( NodeName = GetNodeName() NodeCPUArchitecture = getCPUArch() NodeCPUPackageMap = getCPUPackageMap() // NodeMetricNames holds the name of the system metadata information. NodeMetadataFeatureNames []string = []string{"cpu_architecture"} // SystemMetadata holds the metadata regarding the system information NodeMetadataFeatureValues []string = []string{NodeCPUArchitecture} )
var ( // ProcessMetricNames holds the list of names of the container metric ProcessMetricNames []string // ProcessFloatFeatureNames holds the feature name of the container float collector_metric. This is specific for the machine-learning based models. ProcessFloatFeatureNames []string = []string{} // ProcessUintFeaturesNames holds the feature name of the container utint collector_metric. This is specific for the machine-learning based models. ProcessUintFeaturesNames []string // ProcessFeaturesNames holds all the feature name of the container collector_metric. This is specific for the machine-learning based models. ProcessFeaturesNames []string )
var ( // AvailableEBPFCounters holds a list of eBPF counters that might be collected AvailableEBPFCounters []string // AvailableHWCounters holds a list of hardware counters that might be collected AvailableHWCounters []string // AvailableCGroupMetrics holds a list of cgroup metrics exposed by the cgroup that might be collected AvailableCGroupMetrics []string // AvailableKubeletMetrics holds a list of cgrpup metrics exposed by kubelet that might be collected AvailableKubeletMetrics []string // CPUHardwareCounterEnabled defined if hardware counters should be accounted and exported CPUHardwareCounterEnabled = false )
var CPUModelDataPath = "/var/lib/kepler/data/normalized_cpu_arch.csv"
Functions ¶
func GetNodeName ¶ added in v0.5.2
func GetNodeName() string
func InitAvailableParamAndMetrics ¶
func InitAvailableParamAndMetrics()
Types ¶
type CPUModelData ¶
type CPUModelData struct {
Architecture string `csv:"Architecture"`
}
type ContainerMetrics ¶
type ContainerMetrics struct { ProcessMetrics CGroupPID uint64 PIDS []uint64 ContainerName string PodName string Namespace string ContainerID string CurrProcesses int CgroupStatHandler cgroup.CCgroupStatHandler CgroupStatMap map[string]*types.UInt64StatCollection // TODO: kubelet stat metrics is deprecated since it duplicates the cgroup metrics. We will remove it soon. KubeletStats map[string]*types.UInt64Stat }
func NewContainerMetrics ¶
func NewContainerMetrics(containerName, podName, podNamespace, containerID string) *ContainerMetrics
NewContainerMetrics creates a new ContainerMetrics instance
func (*ContainerMetrics) GetDynEnergyStat ¶ added in v0.5.1
func (c *ContainerMetrics) GetDynEnergyStat(component string) (energyStat *types.UInt64Stat)
func (*ContainerMetrics) GetIdleEnergyStat ¶ added in v0.5.1
func (c *ContainerMetrics) GetIdleEnergyStat(component string) (energyStat *types.UInt64Stat)
func (*ContainerMetrics) ResetDeltaValues ¶
func (c *ContainerMetrics) ResetDeltaValues()
ResetCurr reset all current value to 0
func (*ContainerMetrics) SetLatestProcess ¶
func (c *ContainerMetrics) SetLatestProcess(cgroupPID, pid uint64, comm string)
SetLatestProcess set cgroupPID, PID, and command to the latest captured process NOTICE: can lose main container info for multi-container pod
func (*ContainerMetrics) String ¶
func (c *ContainerMetrics) String() string
func (*ContainerMetrics) SumAllDynAggrValues ¶
func (c *ContainerMetrics) SumAllDynAggrValues() uint64
func (*ContainerMetrics) SumAllDynDeltaValues ¶
func (c *ContainerMetrics) SumAllDynDeltaValues() uint64
func (*ContainerMetrics) ToEstimatorValues ¶
func (c *ContainerMetrics) ToEstimatorValues(featuresName []string, shouldNormalize bool) (values []float64)
ToEstimatorValues return values regarding metricNames. Since Kepler collects metrics at intervals of SamplePeriodSec, which is greater than 1 second, and the power models are trained to estimate power in 1 second interval. It is necessary to normalize the resource utilization by the SamplePeriodSec. Note that this is important because the power curve can be different for higher or lower resource usage within 1 second interval.
func (*ContainerMetrics) ToPrometheusValue ¶
func (c *ContainerMetrics) ToPrometheusValue(metric string) string
ToPrometheusValue return the value regarding metric label
func (*ContainerMetrics) UpdateCgroupMetrics ¶ added in v0.5.1
func (c *ContainerMetrics) UpdateCgroupMetrics() error
type NodeMetrics ¶
type NodeMetrics struct { ResourceUsage map[string]float64 // Absolute energy is the sum of Idle + Dynamic energy. AbsEnergyInCore *types.UInt64StatCollection AbsEnergyInDRAM *types.UInt64StatCollection AbsEnergyInUncore *types.UInt64StatCollection AbsEnergyInPkg *types.UInt64StatCollection AbsEnergyInGPU *types.UInt64StatCollection AbsEnergyInOther *types.UInt64StatCollection AbsEnergyInPlatform *types.UInt64StatCollection DynEnergyInCore *types.UInt64StatCollection DynEnergyInDRAM *types.UInt64StatCollection DynEnergyInUncore *types.UInt64StatCollection DynEnergyInPkg *types.UInt64StatCollection DynEnergyInGPU *types.UInt64StatCollection DynEnergyInOther *types.UInt64StatCollection DynEnergyInPlatform *types.UInt64StatCollection IdleEnergyInCore *types.UInt64StatCollection IdleEnergyInDRAM *types.UInt64StatCollection IdleEnergyInUncore *types.UInt64StatCollection IdleEnergyInPkg *types.UInt64StatCollection IdleEnergyInGPU *types.UInt64StatCollection IdleEnergyInOther *types.UInt64StatCollection IdleEnergyInPlatform *types.UInt64StatCollection CPUFrequency map[int32]uint64 // IdleCPUUtilization is used to determine idle periods IdleCPUUtilization uint64 FoundNewIdleState bool // Accelerator-QAT Utilization QATUtilization map[string]qat.DeviceUtilizationSample }
func NewNodeMetrics ¶
func NewNodeMetrics() *NodeMetrics
func (*NodeMetrics) AddNodeResUsageFromContainerResUsage ¶
func (ne *NodeMetrics) AddNodeResUsageFromContainerResUsage(containersMetrics map[string]*ContainerMetrics)
AddNodeResResourceUsageFromContainerResResourceUsage adds the sum of all container resource usage as the node resource usage
func (*NodeMetrics) CalcDynEnergy ¶
func (ne *NodeMetrics) CalcDynEnergy(component, id string)
func (*NodeMetrics) CalcIdleEnergy ¶
func (ne *NodeMetrics) CalcIdleEnergy(component string)
func (*NodeMetrics) GetAggrDynEnergyPerID ¶
func (ne *NodeMetrics) GetAggrDynEnergyPerID(component, id string) uint64
GetAggrDynEnergyPerID returns the aggr dynamic energy from all source (e.g. package or gpu ids)
func (*NodeMetrics) GetAggrIdleEnergyPerID ¶
func (ne *NodeMetrics) GetAggrIdleEnergyPerID(component, id string) uint64
GetAggrIdleEnergyPerID returns the aggr idle energy for a given id
func (*NodeMetrics) GetDeltaDynEnergyPerID ¶
func (ne *NodeMetrics) GetDeltaDynEnergyPerID(component, id string) uint64
GetDeltaDynEnergyPerID returns the delta dynamic energy from all source (e.g. package or gpu ids)
func (*NodeMetrics) GetDeltaIdleEnergyPerID ¶
func (ne *NodeMetrics) GetDeltaIdleEnergyPerID(component, id string) uint64
GetDeltaIdleEnergyPerID returns the delta idle energy from all source (e.g. package or gpu ids)
func (*NodeMetrics) GetNodeResUsagePerResType ¶
func (ne *NodeMetrics) GetNodeResUsagePerResType(resource string) (float64, error)
func (*NodeMetrics) GetSumAggrDynEnergyFromAllSources ¶
func (ne *NodeMetrics) GetSumAggrDynEnergyFromAllSources(component string) uint64
GetSumAggrDynEnergyFromAllSources returns the sum of aggr dynamic energy of all source (e.g. package or gpu ids)
func (*NodeMetrics) GetSumAggrIdleEnergyFromAllSources ¶ added in v0.5.4
func (ne *NodeMetrics) GetSumAggrIdleEnergyFromAllSources(component string) uint64
GetSumAggrIdleEnergyFromAllSources returns the sum of aggr idle energy of all source (e.g. package or gpu ids)
func (*NodeMetrics) GetSumDeltaDynEnergyFromAllSources ¶
func (ne *NodeMetrics) GetSumDeltaDynEnergyFromAllSources(component string) uint64
GetSumDeltaDynEnergyFromAllSources returns the sum of delta dynamic energy of all source (e.g. package or gpu ids)
func (*NodeMetrics) GetSumDeltaIdleEnergyFromAllSources ¶ added in v0.5.4
func (ne *NodeMetrics) GetSumDeltaIdleEnergyFromAllSources(component string) uint64
GetSumDeltaIdleEnergyFromAllSources returns the sum of delta idle energy of all source (e.g. package or gpu ids)
func (*NodeMetrics) ResetDeltaValues ¶
func (ne *NodeMetrics) ResetDeltaValues()
func (*NodeMetrics) SetNodeComponentsEnergy ¶
func (ne *NodeMetrics) SetNodeComponentsEnergy(componentsEnergy map[int]source.NodeComponentsEnergy, gauge, isIdleEnergy bool)
SetNodeComponentsEnergy adds the idle or absolute energy consumption collected from the node's components (e.g., using RAPL). Absolute energy is the sum of Idle + Dynamic energy.
func (*NodeMetrics) SetNodeGPUEnergy ¶ added in v0.5.4
func (ne *NodeMetrics) SetNodeGPUEnergy(gpuEnergy []uint32, isIdleEnergy bool)
SetNodeGPUEnergy adds the lastest energy consumption of each GPU power consumption. Right now we don't support other types of accelerators than GPU, but we will in the future.
func (*NodeMetrics) SetNodeOtherComponentsEnergy ¶
func (ne *NodeMetrics) SetNodeOtherComponentsEnergy()
SetNodeOtherComponentsEnergy adds the lastest energy consumption collected from the other node's components than CPU and DRAM Other components energy is a special case where the energy is calculated and not measured
func (*NodeMetrics) SetNodePlatformEnergy ¶ added in v0.5.4
func (ne *NodeMetrics) SetNodePlatformEnergy(platformEnergy map[string]float64, gauge, isIdleEnergy bool)
SetNodePlatformEnergy adds the idle or absolute energy consumption from the node sensor. Absolute energy is the sum of Idle + Dynamic energy.
func (*NodeMetrics) String ¶
func (ne *NodeMetrics) String() string
func (*NodeMetrics) ToEstimatorValues ¶ added in v0.5.4
func (ne *NodeMetrics) ToEstimatorValues(featuresName []string, shouldNormalize bool) []float64
ToEstimatorValues return values regarding metricNames. The metrics can be related to resource utilization or power consumption. Since Kepler collects metrics at intervals of SamplePeriodSec, which is greater than 1 second, and the power models are trained to estimate power in 1 second interval. It is necessary to normalize the resource utilization by the SamplePeriodSec. Note that this is important because the power curve can be different for higher or lower resource usage within 1 second interval.
func (*NodeMetrics) UpdateDynEnergy ¶
func (ne *NodeMetrics) UpdateDynEnergy()
UpdateDynEnergy calculates the dynamic energy
func (*NodeMetrics) UpdateIdleEnergyWithMinValue ¶ added in v0.5.4
func (ne *NodeMetrics) UpdateIdleEnergyWithMinValue()
type ProcessMetrics ¶
type ProcessMetrics struct { PID uint64 Command string CounterStats map[string]*types.UInt64Stat // ebpf metrics CPUTime *types.UInt64Stat SoftIRQCount []types.UInt64Stat DynEnergyInCore *types.UInt64Stat DynEnergyInDRAM *types.UInt64Stat DynEnergyInUncore *types.UInt64Stat DynEnergyInPkg *types.UInt64Stat DynEnergyInGPU *types.UInt64Stat DynEnergyInOther *types.UInt64Stat DynEnergyInPlatform *types.UInt64Stat IdleEnergyInCore *types.UInt64Stat IdleEnergyInDRAM *types.UInt64Stat IdleEnergyInUncore *types.UInt64Stat IdleEnergyInPkg *types.UInt64Stat IdleEnergyInGPU *types.UInt64Stat IdleEnergyInOther *types.UInt64Stat IdleEnergyInPlatform *types.UInt64Stat }
func NewProcessMetrics ¶
func NewProcessMetrics(pid uint64, command string) *ProcessMetrics
NewProcessMetrics creates a new ProcessMetrics instance
func (*ProcessMetrics) GetDynEnergyStat ¶ added in v0.5.1
func (p *ProcessMetrics) GetDynEnergyStat(component string) *types.UInt64Stat
func (*ProcessMetrics) GetIdleEnergyStat ¶ added in v0.5.1
func (p *ProcessMetrics) GetIdleEnergyStat(component string) *types.UInt64Stat
func (*ProcessMetrics) ResetDeltaValues ¶
func (p *ProcessMetrics) ResetDeltaValues()
ResetCurr reset all current value to 0
func (*ProcessMetrics) String ¶
func (p *ProcessMetrics) String() string
func (*ProcessMetrics) SumAllDynAggrValues ¶
func (p *ProcessMetrics) SumAllDynAggrValues() uint64
func (*ProcessMetrics) SumAllDynDeltaValues ¶
func (p *ProcessMetrics) SumAllDynDeltaValues() uint64
func (*ProcessMetrics) ToEstimatorValues ¶
func (p *ProcessMetrics) ToEstimatorValues(featuresName []string, shouldNormalize bool) (values []float64)
ToEstimatorValues return values regarding metricNames. Since Kepler collects metrics at intervals of SamplePeriodSec, which is greater than 1 second, and the power models are trained to estimate power in 1 second interval. It is necessary to normalize the resource utilization by the SamplePeriodSec. Note that this is important because the power curve can be different for higher or lower resource usage within 1 second interval.