metric

package
v0.5.4 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Aug 16, 2023 License: Apache-2.0 Imports: 18 Imported by: 0

Documentation

Index

Constants

View Source
const (
	CORE      = "core"
	DRAM      = "dram"
	UNCORE    = "uncore"
	PKG       = "pkg"
	GPU       = "gpu"
	OTHER     = "other"
	PLATFORM  = "platform"
	FREQUENCY = "frequency"
)
View Source
const (
	// TO-DO: merge to cgroup stat
	ByteReadLabel  = config.BytesReadIO
	ByteWriteLabel = config.BytesWriteIO

	DeltaPrefix = "curr_"
	AggrPrefix  = "total_"
)

Variables

View Source
var (
	// ContainerFloatFeatureNames holds the feature name of the container float collector_metric. This is specific for the machine-learning based models.
	ContainerFloatFeatureNames []string = []string{}
	// ContainerUintFeaturesNames holds the feature name of the container utint collector_metric. This is specific for the machine-learning based models.
	ContainerUintFeaturesNames []string = []string{}
	// ContainerFeaturesNames holds all the feature name of the container collector_metric. This is specific for the machine-learning based models.
	ContainerFeaturesNames []string = []string{}
)
View Source
var (
	NodeName            = GetNodeName()
	NodeCPUArchitecture = getCPUArch()
	NodeCPUPackageMap   = getCPUPackageMap()

	// NodeMetricNames holds the name of the system metadata information.
	NodeMetadataFeatureNames []string = []string{"cpu_architecture"}
	// SystemMetadata holds the metadata regarding the system information
	NodeMetadataFeatureValues []string = []string{NodeCPUArchitecture}
)
View Source
var (
	// ProcessMetricNames holds the list of names of the container metric
	ProcessMetricNames []string
	// ProcessFloatFeatureNames holds the feature name of the container float collector_metric. This is specific for the machine-learning based models.
	ProcessFloatFeatureNames []string = []string{}
	// ProcessUintFeaturesNames holds the feature name of the container utint collector_metric. This is specific for the machine-learning based models.
	ProcessUintFeaturesNames []string
	// ProcessFeaturesNames holds all the feature name of the container collector_metric. This is specific for the machine-learning based models.
	ProcessFeaturesNames []string
)
View Source
var (
	// AvailableEBPFCounters holds a list of eBPF counters that might be collected
	AvailableEBPFCounters []string
	// AvailableHWCounters holds a list of hardware counters that might be collected
	AvailableHWCounters []string
	// AvailableCGroupMetrics holds a list of cgroup metrics exposed by the cgroup that might be collected
	AvailableCGroupMetrics []string
	// AvailableKubeletMetrics holds a list of cgrpup metrics exposed by kubelet that might be collected
	AvailableKubeletMetrics []string

	// CPUHardwareCounterEnabled defined if hardware counters should be accounted and exported
	CPUHardwareCounterEnabled = false
)
View Source
var CPUModelDataPath = "/var/lib/kepler/data/normalized_cpu_arch.csv"

Functions

func GetNodeName added in v0.5.2

func GetNodeName() string

func InitAvailableParamAndMetrics

func InitAvailableParamAndMetrics()

Types

type CPUModelData

type CPUModelData struct {
	Architecture string `csv:"Architecture"`
}

type ContainerMetrics

type ContainerMetrics struct {
	ProcessMetrics

	CGroupPID     uint64
	PIDS          []uint64
	ContainerName string
	PodName       string
	Namespace     string
	ContainerID   string

	CurrProcesses int

	CgroupStatHandler cgroup.CCgroupStatHandler
	CgroupStatMap     map[string]*types.UInt64StatCollection
	// TODO: kubelet stat metrics is deprecated since it duplicates the cgroup metrics. We will remove it soon.
	KubeletStats map[string]*types.UInt64Stat
}

func NewContainerMetrics

func NewContainerMetrics(containerName, podName, podNamespace, containerID string) *ContainerMetrics

NewContainerMetrics creates a new ContainerMetrics instance

func (*ContainerMetrics) GetDynEnergyStat added in v0.5.1

func (c *ContainerMetrics) GetDynEnergyStat(component string) (energyStat *types.UInt64Stat)

func (*ContainerMetrics) GetIdleEnergyStat added in v0.5.1

func (c *ContainerMetrics) GetIdleEnergyStat(component string) (energyStat *types.UInt64Stat)

func (*ContainerMetrics) ResetDeltaValues

func (c *ContainerMetrics) ResetDeltaValues()

ResetCurr reset all current value to 0

func (*ContainerMetrics) SetLatestProcess

func (c *ContainerMetrics) SetLatestProcess(cgroupPID, pid uint64, comm string)

SetLatestProcess set cgroupPID, PID, and command to the latest captured process NOTICE: can lose main container info for multi-container pod

func (*ContainerMetrics) String

func (c *ContainerMetrics) String() string

func (*ContainerMetrics) SumAllDynAggrValues

func (c *ContainerMetrics) SumAllDynAggrValues() uint64

func (*ContainerMetrics) SumAllDynDeltaValues

func (c *ContainerMetrics) SumAllDynDeltaValues() uint64

func (*ContainerMetrics) ToEstimatorValues

func (c *ContainerMetrics) ToEstimatorValues(featuresName []string, shouldNormalize bool) (values []float64)

ToEstimatorValues return values regarding metricNames. Since Kepler collects metrics at intervals of SamplePeriodSec, which is greater than 1 second, and the power models are trained to estimate power in 1 second interval. It is necessary to normalize the resource utilization by the SamplePeriodSec. Note that this is important because the power curve can be different for higher or lower resource usage within 1 second interval.

func (*ContainerMetrics) ToPrometheusValue

func (c *ContainerMetrics) ToPrometheusValue(metric string) string

ToPrometheusValue return the value regarding metric label

func (*ContainerMetrics) UpdateCgroupMetrics added in v0.5.1

func (c *ContainerMetrics) UpdateCgroupMetrics() error

type NodeMetrics

type NodeMetrics struct {
	ResourceUsage map[string]float64

	// Absolute energy is the sum of Idle + Dynamic energy.
	AbsEnergyInCore     *types.UInt64StatCollection
	AbsEnergyInDRAM     *types.UInt64StatCollection
	AbsEnergyInUncore   *types.UInt64StatCollection
	AbsEnergyInPkg      *types.UInt64StatCollection
	AbsEnergyInGPU      *types.UInt64StatCollection
	AbsEnergyInOther    *types.UInt64StatCollection
	AbsEnergyInPlatform *types.UInt64StatCollection

	DynEnergyInCore     *types.UInt64StatCollection
	DynEnergyInDRAM     *types.UInt64StatCollection
	DynEnergyInUncore   *types.UInt64StatCollection
	DynEnergyInPkg      *types.UInt64StatCollection
	DynEnergyInGPU      *types.UInt64StatCollection
	DynEnergyInOther    *types.UInt64StatCollection
	DynEnergyInPlatform *types.UInt64StatCollection

	IdleEnergyInCore     *types.UInt64StatCollection
	IdleEnergyInDRAM     *types.UInt64StatCollection
	IdleEnergyInUncore   *types.UInt64StatCollection
	IdleEnergyInPkg      *types.UInt64StatCollection
	IdleEnergyInGPU      *types.UInt64StatCollection
	IdleEnergyInOther    *types.UInt64StatCollection
	IdleEnergyInPlatform *types.UInt64StatCollection

	CPUFrequency map[int32]uint64

	// IdleCPUUtilization is used to determine idle periods
	IdleCPUUtilization uint64
	FoundNewIdleState  bool

	// Accelerator-QAT Utilization
	QATUtilization map[string]qat.DeviceUtilizationSample
}

func NewNodeMetrics

func NewNodeMetrics() *NodeMetrics

func (*NodeMetrics) AddNodeResUsageFromContainerResUsage

func (ne *NodeMetrics) AddNodeResUsageFromContainerResUsage(containersMetrics map[string]*ContainerMetrics)

AddNodeResResourceUsageFromContainerResResourceUsage adds the sum of all container resource usage as the node resource usage

func (*NodeMetrics) CalcDynEnergy

func (ne *NodeMetrics) CalcDynEnergy(component, id string)

func (*NodeMetrics) CalcIdleEnergy

func (ne *NodeMetrics) CalcIdleEnergy(component string)

func (*NodeMetrics) GetAggrDynEnergyPerID

func (ne *NodeMetrics) GetAggrDynEnergyPerID(component, id string) uint64

GetAggrDynEnergyPerID returns the aggr dynamic energy from all source (e.g. package or gpu ids)

func (*NodeMetrics) GetAggrIdleEnergyPerID

func (ne *NodeMetrics) GetAggrIdleEnergyPerID(component, id string) uint64

GetAggrIdleEnergyPerID returns the aggr idle energy for a given id

func (*NodeMetrics) GetDeltaDynEnergyPerID

func (ne *NodeMetrics) GetDeltaDynEnergyPerID(component, id string) uint64

GetDeltaDynEnergyPerID returns the delta dynamic energy from all source (e.g. package or gpu ids)

func (*NodeMetrics) GetDeltaIdleEnergyPerID

func (ne *NodeMetrics) GetDeltaIdleEnergyPerID(component, id string) uint64

GetDeltaIdleEnergyPerID returns the delta idle energy from all source (e.g. package or gpu ids)

func (*NodeMetrics) GetNodeResUsagePerResType

func (ne *NodeMetrics) GetNodeResUsagePerResType(resource string) (float64, error)

func (*NodeMetrics) GetSumAggrDynEnergyFromAllSources

func (ne *NodeMetrics) GetSumAggrDynEnergyFromAllSources(component string) uint64

GetSumAggrDynEnergyFromAllSources returns the sum of aggr dynamic energy of all source (e.g. package or gpu ids)

func (*NodeMetrics) GetSumAggrIdleEnergyFromAllSources added in v0.5.4

func (ne *NodeMetrics) GetSumAggrIdleEnergyFromAllSources(component string) uint64

GetSumAggrIdleEnergyFromAllSources returns the sum of aggr idle energy of all source (e.g. package or gpu ids)

func (*NodeMetrics) GetSumDeltaDynEnergyFromAllSources

func (ne *NodeMetrics) GetSumDeltaDynEnergyFromAllSources(component string) uint64

GetSumDeltaDynEnergyFromAllSources returns the sum of delta dynamic energy of all source (e.g. package or gpu ids)

func (*NodeMetrics) GetSumDeltaIdleEnergyFromAllSources added in v0.5.4

func (ne *NodeMetrics) GetSumDeltaIdleEnergyFromAllSources(component string) uint64

GetSumDeltaIdleEnergyFromAllSources returns the sum of delta idle energy of all source (e.g. package or gpu ids)

func (*NodeMetrics) ResetDeltaValues

func (ne *NodeMetrics) ResetDeltaValues()

func (*NodeMetrics) SetNodeComponentsEnergy

func (ne *NodeMetrics) SetNodeComponentsEnergy(componentsEnergy map[int]source.NodeComponentsEnergy, gauge, isIdleEnergy bool)

SetNodeComponentsEnergy adds the idle or absolute energy consumption collected from the node's components (e.g., using RAPL). Absolute energy is the sum of Idle + Dynamic energy.

func (*NodeMetrics) SetNodeGPUEnergy added in v0.5.4

func (ne *NodeMetrics) SetNodeGPUEnergy(gpuEnergy []uint32, isIdleEnergy bool)

SetNodeGPUEnergy adds the lastest energy consumption of each GPU power consumption. Right now we don't support other types of accelerators than GPU, but we will in the future.

func (*NodeMetrics) SetNodeOtherComponentsEnergy

func (ne *NodeMetrics) SetNodeOtherComponentsEnergy()

SetNodeOtherComponentsEnergy adds the lastest energy consumption collected from the other node's components than CPU and DRAM Other components energy is a special case where the energy is calculated and not measured

func (*NodeMetrics) SetNodePlatformEnergy added in v0.5.4

func (ne *NodeMetrics) SetNodePlatformEnergy(platformEnergy map[string]float64, gauge, isIdleEnergy bool)

SetNodePlatformEnergy adds the idle or absolute energy consumption from the node sensor. Absolute energy is the sum of Idle + Dynamic energy.

func (*NodeMetrics) String

func (ne *NodeMetrics) String() string

func (*NodeMetrics) ToEstimatorValues added in v0.5.4

func (ne *NodeMetrics) ToEstimatorValues(featuresName []string, shouldNormalize bool) []float64

ToEstimatorValues return values regarding metricNames. The metrics can be related to resource utilization or power consumption. Since Kepler collects metrics at intervals of SamplePeriodSec, which is greater than 1 second, and the power models are trained to estimate power in 1 second interval. It is necessary to normalize the resource utilization by the SamplePeriodSec. Note that this is important because the power curve can be different for higher or lower resource usage within 1 second interval.

func (*NodeMetrics) UpdateDynEnergy

func (ne *NodeMetrics) UpdateDynEnergy()

UpdateDynEnergy calculates the dynamic energy

func (*NodeMetrics) UpdateIdleEnergyWithMinValue added in v0.5.4

func (ne *NodeMetrics) UpdateIdleEnergyWithMinValue()

type ProcessMetrics

type ProcessMetrics struct {
	PID          uint64
	Command      string
	CounterStats map[string]*types.UInt64Stat
	// ebpf metrics
	CPUTime             *types.UInt64Stat
	SoftIRQCount        []types.UInt64Stat
	DynEnergyInCore     *types.UInt64Stat
	DynEnergyInDRAM     *types.UInt64Stat
	DynEnergyInUncore   *types.UInt64Stat
	DynEnergyInPkg      *types.UInt64Stat
	DynEnergyInGPU      *types.UInt64Stat
	DynEnergyInOther    *types.UInt64Stat
	DynEnergyInPlatform *types.UInt64Stat

	IdleEnergyInCore     *types.UInt64Stat
	IdleEnergyInDRAM     *types.UInt64Stat
	IdleEnergyInUncore   *types.UInt64Stat
	IdleEnergyInPkg      *types.UInt64Stat
	IdleEnergyInGPU      *types.UInt64Stat
	IdleEnergyInOther    *types.UInt64Stat
	IdleEnergyInPlatform *types.UInt64Stat
}

func NewProcessMetrics

func NewProcessMetrics(pid uint64, command string) *ProcessMetrics

NewProcessMetrics creates a new ProcessMetrics instance

func (*ProcessMetrics) GetDynEnergyStat added in v0.5.1

func (p *ProcessMetrics) GetDynEnergyStat(component string) *types.UInt64Stat

func (*ProcessMetrics) GetIdleEnergyStat added in v0.5.1

func (p *ProcessMetrics) GetIdleEnergyStat(component string) *types.UInt64Stat

func (*ProcessMetrics) ResetDeltaValues

func (p *ProcessMetrics) ResetDeltaValues()

ResetCurr reset all current value to 0

func (*ProcessMetrics) String

func (p *ProcessMetrics) String() string

func (*ProcessMetrics) SumAllDynAggrValues

func (p *ProcessMetrics) SumAllDynAggrValues() uint64

func (*ProcessMetrics) SumAllDynDeltaValues

func (p *ProcessMetrics) SumAllDynDeltaValues() uint64

func (*ProcessMetrics) ToEstimatorValues

func (p *ProcessMetrics) ToEstimatorValues(featuresName []string, shouldNormalize bool) (values []float64)

ToEstimatorValues return values regarding metricNames. Since Kepler collects metrics at intervals of SamplePeriodSec, which is greater than 1 second, and the power models are trained to estimate power in 1 second interval. It is necessary to normalize the resource utilization by the SamplePeriodSec. Note that this is important because the power curve can be different for higher or lower resource usage within 1 second interval.

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL