Documentation ¶
Index ¶
- Constants
- Variables
- func ExternalMustRegister(metrics ...prometheus.Collector)
- func RecordBESuppressCores(suppressType string, value float64)
- func RecordBESuppressLSUsedCPU(value float64)
- func RecordCollectNodeCPUInfoStatus(err error)
- func RecordCollectNodeLocalStorageInfoStatus(err error)
- func RecordCollectNodeNUMAInfoStatus(err error)
- func RecordContainerCPI(status *corev1.ContainerStatus, pod *corev1.Pod, cycles, instructions float64)
- func RecordContainerCoreSchedCookie(namespace, podName, podUID, containerName, containerID, groupID string, ...)
- func RecordContainerPSI(status *corev1.ContainerStatus, pod *corev1.Pod, psi *system.PSIByResource)
- func RecordContainerResourceLimits(resourceName string, unit string, status *corev1.ContainerStatus, ...)
- func RecordContainerResourceRequests(resourceName string, unit string, status *corev1.ContainerStatus, ...)
- func RecordContainerScaledCFSBurstUS(podNS, podName, containerID, containerName string, value float64)
- func RecordContainerScaledCFSQuotaUS(podNS, podName, containerID, containerName string, value float64)
- func RecordCoreSchedCookieManageStatus(groupID string, isSucceeded bool)
- func RecordKoordletStartTime(nodeName string, value float64)
- func RecordKubeletRequestDuration(verb, path, code string, seconds float64)
- func RecordNodePredictedResourceReclaimable(resourceName string, unit string, predictor string, value float64)
- func RecordNodeResourceAllocatable(resourceName string, unit string, value float64)
- func RecordNodeResourcePriorityReclaimable(resourceName string, unit string, priority string, value float64)
- func RecordNodeUsedCPU(value float64)
- func RecordPodEviction(namespace, podName, reasonType string)
- func RecordPodPSI(pod *corev1.Pod, psi *system.PSIByResource)
- func RecordResourceUpdateDuration(updaterType, status string, seconds float64)
- func RecordRuntimeHookInvokedDurationMilliSeconds(hookName, stage string, err error, seconds float64)
- func RecordRuntimeHookReconcilerInvokedDurationMilliSeconds(level, resourceType string, err error, seconds float64)
- func Register(node *corev1.Node)
- func ResetCPUBurstCollector()
- func ResetContainerCPI()
- func ResetContainerCoreSchedCookie(namespace, podName, podUID, containerName, containerID, groupID string, ...)
- func ResetContainerPSI()
- func ResetContainerResourceLimits()
- func ResetContainerResourceRequests()
- func ResetPodPSI()
- func SinceInSeconds(start time.Time) float64
- type PSIRecord
Constants ¶
View Source
const ( CoreSchedCookieKey = "core_sched_cookie" CoreSchedGroupKey = "core_sched_group" )
View Source
const ( CPIField = "cpi_field" Cycles = "cycles" Instructions = "instructions" )
View Source
const ( HTTPVerbKey = "verb" HTTPPathKey = "path" HTTPCodeKey = "code" )
View Source
const ( KoordletSubsystem = "koordlet" NodeKey = "node" PriorityKey = "priority" PredictorKey = "predictor" StatusKey = "status" StatusSucceed = "succeeded" StatusFailed = "failed" EvictionReasonKey = "reason" BESuppressTypeKey = "type" ContainerID = "container_id" ContainerName = "container_name" PodUID = "pod_uid" PodName = "pod_name" PodNamespace = "pod_namespace" ResourceKey = "resource" UnitKey = "unit" UnitCore = "core" UnitByte = "byte" UnitInteger = "integer" )
View Source
const ( PSIDegree = "psi_degree" PSIPrecision = "psi_precision" PSIResourceType = "psi_resource_type" CPUFullSupported = "cpu_full_supported" )
View Source
const ( ResourceTypeCPU = "cpu" ResourceTypeMem = "mem" ResourceTypeIO = "io" Precision10 = "avg10" Precision60 = "avg60" Precision300 = "avg300" DegreeSome = "some" DegreeFull = "full" )
View Source
const ( // ResourceUpdaterType represents the type of resource udpater, including cgroup files, resctrl files, etc ResourceUpdaterType = "type" // ResourceUpdateStatusKey represents the status of resource update ResourceUpdateStatusKey = "status" )
View Source
const ( ResourceUpdateStatusSuccess = "success" ResourceUpdateStatusFailed = "failed" )
View Source
const ( // RuntimeHookName represents the hook plugin name of runtime hook. RuntimeHookName = "hook" // RuntimeHookStage represents the stage of invoked runtime hook. RuntimeHookStage = "stage" // RuntimeHookReconcilerLevel represents the level (e.g. pod-level) of invoked runtime hook reconciler. RuntimeHookReconcilerLevel = "level" // RuntimeHookReconcilerResourceType represents the resource type (e.g. cpu.cfs_quota_us) of invoked runtime hook reconciler. RuntimeHookReconcilerResourceType = "resource_type" )
View Source
const (
DefaultHTTPPath = "/metrics"
)
View Source
const (
ExternalHTTPPath = "/external-metrics"
)
View Source
const (
HTTPVerbGet = "get"
)
View Source
const (
InternalHTTPPath = "/internal-metrics"
)
Variables ¶
View Source
var ( KoordletStartTime = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Subsystem: KoordletSubsystem, Name: "start_time", Help: "the start time of koordlet", }, []string{NodeKey}) CollectNodeCPUInfoStatus = prometheus.NewCounterVec(prometheus.CounterOpts{ Subsystem: KoordletSubsystem, Name: "collect_node_cpu_info_status", Help: "the count of CollectNodeCPUInfo status", }, []string{NodeKey, StatusKey}) CollectNodeNUMAInfoStatus = prometheus.NewCounterVec(prometheus.CounterOpts{ Subsystem: KoordletSubsystem, Name: "collect_node_numa_info_status", Help: "the count of CollectNodeNUMAInfo status", }, []string{NodeKey, StatusKey}) CollectNodeLocalStorageInfoStatus = prometheus.NewCounterVec(prometheus.CounterOpts{ Subsystem: KoordletSubsystem, Name: "collect_node_local_storage_info_status", Help: "the count of CollectNodeLocalStorageInfo status", }, []string{NodeKey, StatusKey}) PodEviction = prometheus.NewCounterVec(prometheus.CounterOpts{ Subsystem: KoordletSubsystem, Name: "pod_eviction", Help: "Number of eviction launched by koordlet", }, []string{NodeKey, EvictionReasonKey}) PodEvictionDetail = metrics.NewGCCounterVec("pod_eviction_detail", prometheus.NewCounterVec(prometheus.CounterOpts{ Subsystem: KoordletSubsystem, Name: "pod_eviction_detail", Help: "evict detail launched by koordlet", }, []string{NodeKey, PodNamespace, PodName, EvictionReasonKey})) NodeUsedCPU = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Subsystem: KoordletSubsystem, Name: "node_used_cpu_cores", Help: "Number of cpu cores used by node in realtime", }, []string{NodeKey}) CommonCollectors = []prometheus.Collector{ KoordletStartTime, CollectNodeCPUInfoStatus, CollectNodeNUMAInfoStatus, CollectNodeLocalStorageInfoStatus, PodEviction, PodEvictionDetail.GetCounterVec(), NodeUsedCPU, } )
View Source
var ( ContainerCoreSchedCookie = metrics.NewGCGaugeVec("container_core_sched_cookie", prometheus.NewGaugeVec(prometheus.GaugeOpts{ Subsystem: KoordletSubsystem, Name: "container_core_sched_cookie", Help: "the core scheduling cookie of the container", }, []string{NodeKey, PodName, PodNamespace, PodUID, ContainerName, ContainerID, CoreSchedGroupKey, CoreSchedCookieKey})) CoreSchedCookieManageStatus = metrics.NewGCCounterVec("core_sched_cookie_manage_status", prometheus.NewCounterVec(prometheus.CounterOpts{ Subsystem: KoordletSubsystem, Name: "core_sched_cookie_manage_status", Help: "the manage status of the core scheduling cookie", }, []string{NodeKey, CoreSchedGroupKey, StatusKey})) CoreSchedCollector = []prometheus.Collector{ ContainerCoreSchedCookie.GetGaugeVec(), CoreSchedCookieManageStatus.GetCounterVec(), } )
View Source
var ( ContainerCPI = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Subsystem: KoordletSubsystem, Name: "container_cpi", Help: "Container cpi collected by koordlet", }, []string{NodeKey, ContainerID, ContainerName, PodUID, PodName, PodNamespace, CPIField}) CPICollectors = []prometheus.Collector{ ContainerCPI, } )
View Source
var ( ContainerScaledCFSBurstUS = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Subsystem: KoordletSubsystem, Name: "container_scaled_cfs_burst_us", Help: "The maximum accumulated run-time(in microseconds) in container-level set by koordlet", }, []string{NodeKey, PodNamespace, PodName, ContainerID, ContainerName}) ContainerScaledCFSQuotaUS = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Subsystem: KoordletSubsystem, Name: "container_scaled_cfs_quota_us", Help: "Run-time replenished within a period (in microseconds) in container-level set by koordlet", }, []string{NodeKey, PodNamespace, PodName, ContainerID, ContainerName}) CPUBurstCollector = []prometheus.Collector{ ContainerScaledCFSBurstUS, ContainerScaledCFSQuotaUS, } )
View Source
var ( BESuppressCPU = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Subsystem: KoordletSubsystem, Name: "be_suppress_cpu_cores", Help: "Number of cores suppress by koordlet", }, []string{NodeKey, BESuppressTypeKey}) BESuppressLSUsedCPU = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Subsystem: KoordletSubsystem, Name: "be_suppress_ls_used_cpu_cores", Help: "Number of cpu cores used by LS. We consider non-BE pods and podMeta-missing pods as LS.", }, []string{NodeKey}) CPUSuppressCollector = []prometheus.Collector{ BESuppressCPU, BESuppressLSUsedCPU, } )
View Source
var ( NodeName string Node *corev1.Node )
View Source
var ( NodePredictedResourceReclaimable = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Subsystem: KoordletSubsystem, Name: "node_predicted_resource_reclaimable", Help: "the node reclaimable resources predicted by koordinator", }, []string{NodeKey, PredictorKey, ResourceKey, UnitKey}) PredictionCollectors = []prometheus.Collector{ NodePredictedResourceReclaimable, } )
View Source
var ( ContainerPSI = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Subsystem: KoordletSubsystem, Name: "container_psi", Help: "Container psi collected by koordlet", }, []string{NodeKey, ContainerID, ContainerName, PodUID, PodName, PodNamespace, PSIResourceType, PSIPrecision, PSIDegree, CPUFullSupported}) PodPSI = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Subsystem: KoordletSubsystem, Name: "pod_psi", Help: "Pod psi collected by koordlet", }, []string{NodeKey, PodUID, PodName, PodNamespace, PSIResourceType, PSIPrecision, PSIDegree, CPUFullSupported}) PSICollectors = []prometheus.Collector{ ContainerPSI, PodPSI, } )
View Source
var ( NodeResourceAllocatable = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Subsystem: KoordletSubsystem, Name: "node_resource_allocatable", Help: "the node allocatable of resources updated by koordinator", }, []string{NodeKey, ResourceKey, UnitKey}) NodeResourcePriorityReclaimable = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Subsystem: KoordletSubsystem, Name: "node_priority_resource_reclaimable", Help: "the node reclaimable of different priorities resources updated by koordinator", }, []string{NodeKey, PriorityKey, ResourceKey, UnitKey}) ContainerResourceRequests = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Subsystem: KoordletSubsystem, Name: "container_resource_requests", Help: "the container requests of resources updated by koordinator", }, []string{NodeKey, ResourceKey, UnitKey, PodUID, PodName, PodNamespace, ContainerID, ContainerName}) ContainerResourceLimits = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Subsystem: KoordletSubsystem, Name: "container_resource_limits", Help: "the container limits of resources updated by koordinator", }, []string{NodeKey, ResourceKey, UnitKey, PodUID, PodName, PodNamespace, ContainerID, ContainerName}) ResourceSummaryCollectors = []prometheus.Collector{ NodeResourceAllocatable, NodeResourcePriorityReclaimable, ContainerResourceRequests, ContainerResourceLimits, } )
View Source
var ( // ExternalRegistry register metrics for users such as PMU or extended resources settings ExternalRegistry = prometheus.NewRegistry() )
View Source
var ( // InternalRegistry only register metrics of koordlet itself for performance and functional monitor // TODO consider using k8s.io/component-base/metrics to replace github.com/prometheus/client_golang/prometheus InternalRegistry = legacyregistry.DefaultGatherer )
View Source
var (
KubeletStubCollector = []prometheus.Collector{
kubeletRequestDurationSeconds,
}
)
View Source
var (
ResourceExecutorCollector = []prometheus.Collector{
resourceUpdateDurationMilliSeconds,
}
)
View Source
var (
RuntimeHookCollectors = []prometheus.Collector{
runtimeHookInvokedDurationMilliSeconds,
runtimeHookReconcilerInvokedDurationMilliSeconds,
}
)
Functions ¶
func ExternalMustRegister ¶ added in v1.4.1
func ExternalMustRegister(metrics ...prometheus.Collector)
func RecordBESuppressCores ¶
func RecordBESuppressLSUsedCPU ¶ added in v1.1.1
func RecordBESuppressLSUsedCPU(value float64)
func RecordCollectNodeCPUInfoStatus ¶
func RecordCollectNodeCPUInfoStatus(err error)
func RecordCollectNodeLocalStorageInfoStatus ¶ added in v1.3.0
func RecordCollectNodeLocalStorageInfoStatus(err error)
func RecordCollectNodeNUMAInfoStatus ¶ added in v1.3.0
func RecordCollectNodeNUMAInfoStatus(err error)
func RecordContainerCPI ¶ added in v1.1.0
func RecordContainerCPI(status *corev1.ContainerStatus, pod *corev1.Pod, cycles, instructions float64)
func RecordContainerCoreSchedCookie ¶ added in v1.4.0
func RecordContainerPSI ¶ added in v1.1.0
func RecordContainerPSI(status *corev1.ContainerStatus, pod *corev1.Pod, psi *system.PSIByResource)
func RecordContainerResourceLimits ¶ added in v1.1.1
func RecordContainerResourceRequests ¶ added in v1.1.1
func RecordContainerScaledCFSBurstUS ¶ added in v1.1.1
func RecordContainerScaledCFSQuotaUS ¶ added in v1.1.1
func RecordCoreSchedCookieManageStatus ¶ added in v1.4.0
func RecordKoordletStartTime ¶
func RecordKubeletRequestDuration ¶ added in v1.5.0
RecordKubeletRequestDuration records the duration of kubelet http request
func RecordNodePredictedResourceReclaimable ¶ added in v1.3.0
func RecordNodeResourceAllocatable ¶ added in v1.1.1
func RecordNodeResourcePriorityReclaimable ¶ added in v1.3.0
func RecordNodeUsedCPU ¶ added in v1.1.1
func RecordNodeUsedCPU(value float64)
func RecordPodEviction ¶
func RecordPodEviction(namespace, podName, reasonType string)
func RecordPodPSI ¶ added in v1.1.0
func RecordPodPSI(pod *corev1.Pod, psi *system.PSIByResource)
func RecordResourceUpdateDuration ¶ added in v1.5.0
func RecordRuntimeHookInvokedDurationMilliSeconds ¶ added in v1.5.0
func RecordRuntimeHookReconcilerInvokedDurationMilliSeconds ¶ added in v1.5.0
func ResetCPUBurstCollector ¶ added in v1.1.1
func ResetCPUBurstCollector()
func ResetContainerCPI ¶ added in v1.1.0
func ResetContainerCPI()
func ResetContainerCoreSchedCookie ¶ added in v1.4.0
func ResetContainerPSI ¶ added in v1.1.0
func ResetContainerPSI()
func ResetContainerResourceLimits ¶ added in v1.1.1
func ResetContainerResourceLimits()
func ResetContainerResourceRequests ¶ added in v1.1.1
func ResetContainerResourceRequests()
func ResetPodPSI ¶ added in v1.1.0
func ResetPodPSI()
func SinceInSeconds ¶ added in v1.5.0
Types ¶
Click to show internal directories.
Click to hide internal directories.