Documentation ¶
Overview ¶
Package metrics holds prometheus metrics objects and related utility functions. It does not abstract away the prometheus client but the caller rarely needs to refer to prometheus directly.
Index ¶
- Constants
- Variables
- func BoolToFloat64(v bool) float64
- func DumpMetrics() ([]*models.Metric, error)
- func Errno2Outcome(errno unix.Errno) string
- func Error2Outcome(err error) string
- func FlushLoggingMetrics()
- func GetCounterValue(m prometheus.Counter) float64
- func GetGaugeValue(m prometheus.Gauge) float64
- func InitOperatorMetrics()
- func LabelOutcome2Code(outcome string) int
- func Metric[S any](ctor func() S) cell.Cell
- func Register(c prometheus.Collector) error
- func RegisterList(list []prometheus.Collector) error
- func Reinitialize()
- func Unregister(c prometheus.Collector) bool
- func UpdateMapCapacity(groupName string, capacity uint32)
- type APIEventTSHelper
- type GaugeWithThreshold
- type LegacyMetrics
- type LoggingHook
- type Registry
- func (r *Registry) DumpMetrics() ([]*models.Metric, error)
- func (r *Registry) MustRegister(c ...prometheus.Collector)
- func (r *Registry) Register(c prometheus.Collector) error
- func (r *Registry) RegisterList(list []prometheus.Collector) error
- func (r *Registry) Reinitialize()
- func (r *Registry) Unregister(c prometheus.Collector) bool
- type RegistryConfig
- type RegistryParams
- type ResponderWrapper
Constants ¶
const ( // ErrorTimeout is the value used to notify timeout errors. ErrorTimeout = "timeout" // ErrorProxy is the value used to notify errors on Proxy. ErrorProxy = "proxy" // L7DNS is the value used to report DNS label on metrics L7DNS = "dns" // SubsystemBPF is the subsystem to scope metrics related to the bpf syscalls. SubsystemBPF = "bpf" // SubsystemDatapath is the subsystem to scope metrics related to management of // the datapath. It is prepended to metric names and separated with a '_'. SubsystemDatapath = "datapath" // SubsystemAgent is the subsystem to scope metrics related to the cilium agent itself. SubsystemAgent = "agent" // SubsystemFQDN is the subsystem to scope metrics related to the FQDN proxy. SubsystemIPCache = "ipcache" // SubsystemK8s is the subsystem to scope metrics related to Kubernetes SubsystemK8s = "k8s" // SubsystemK8sClient is the subsystem to scope metrics related to the kubernetes client. SubsystemK8sClient = "k8s_client" // SubsystemWorkQueue is the subsystem to scope metrics related to the workqueue. SubsystemWorkQueue = "k8s_workqueue" // SubsystemKVStore is the subsystem to scope metrics related to the kvstore. SubsystemKVStore = "kvstore" // SubsystemFQDN is the subsystem to scope metrics related to the FQDN proxy. SubsystemFQDN = "fqdn" // SubsystemNodes is the subsystem to scope metrics related to the node manager. SubsystemNodes = "nodes" // SubsystemTriggers is the subsystem to scope metrics related to the trigger package. SubsystemTriggers = "triggers" // SubsystemAPILimiter is the subsystem to scope metrics related to the API limiter package. SubsystemAPILimiter = "api_limiter" // CiliumAgentNamespace is used to scope metrics from the Cilium Agent CiliumAgentNamespace = "cilium" // CiliumClusterMeshAPIServerNamespace is used to scope metrics from the // Cilium Cluster Mesh API Server CiliumClusterMeshAPIServerNamespace = "cilium_clustermesh_apiserver" // CiliumClusterMeshAPIServerNamespace is used to scope metrics from // Cilium KVStoreMesh CiliumKVStoreMeshNamespace = "cilium_kvstoremesh" // CiliumOperatorNamespace is used to scope metrics from the Cilium Operator CiliumOperatorNamespace = "cilium_operator" // LabelError indicates the type of error (string) LabelError = "error" // LabelOutcome indicates whether the outcome of the operation was successful or not LabelOutcome = "outcome" // LabelAttempts is the number of attempts it took to complete the operation LabelAttempts = "attempts" // LabelValueFalse is the string value for true metric label values. LabelValueTrue = "true" // LabelValueFalse is the string value for false metric label values. LabelValueFalse = "false" // LabelValueOutcomeSuccess is used as a successful outcome of an operation LabelValueOutcomeSuccess = "success" // LabelValueOutcomeFail is used as an unsuccessful outcome of an operation LabelValueOutcomeFail = "fail" // LabelValueOutcomeFailure is used as an unsuccessful outcome of an operation. // NOTE: This should only be used for existing metrics, new metrics should use LabelValueOutcomeFail. LabelValueOutcomeFailure = "failure" // LabelDropReason is used to describe reason for dropping a packets/bytes LabelDropReason = "reason" // LabelEventSourceAPI marks event-related metrics that come from the API LabelEventSourceAPI = "api" // LabelEventSourceK8s marks event-related metrics that come from k8s LabelEventSourceK8s = "k8s" // LabelEventSourceFQDN marks event-related metrics that come from pkg/fqdn LabelEventSourceFQDN = "fqdn" // LabelEventSourceContainerd marks event-related metrics that come from docker LabelEventSourceContainerd = "docker" // LabelDatapathArea marks which area the metrics are related to (eg, which BPF map) LabelDatapathArea = "area" // LabelDatapathName marks a unique identifier for this metric. // The name should be defined once for a given type of error. LabelDatapathName = "name" // LabelDatapathFamily marks which protocol family (IPv4, IPV6) the metric is related to. LabelDatapathFamily = "family" // LabelProtocol marks the L4 protocol (TCP, ANY) for the metric. LabelProtocol = "protocol" // LabelSignalType marks the signal name LabelSignalType = "signal" // LabelSignalData marks the signal data LabelSignalData = "data" // LabelStatus the label from completed task LabelStatus = "status" // LabelPolicyEnforcement is the label used to see the enforcement status LabelPolicyEnforcement = "enforcement" // LabelPolicySource is the label used to see the enforcement status LabelPolicySource = "source" LabelSource = "source" // LabelScope is the label used to defined multiples scopes in the same // metric. For example, one counter may measure a metric over the scope of // the entire event (scope=global), or just part of an event // (scope=slow_path) LabelScope = "scope" // LabelProtocolL7 is the label used when working with layer 7 protocols. LabelProtocolL7 = "protocol_l7" // LabelBuildState is the state a build queue entry is in LabelBuildState = "state" // LabelBuildQueueName is the name of the build queue LabelBuildQueueName = "name" // LabelAction is the label used to defined what kind of action was performed in a metric LabelAction = "action" // LabelSubsystem is the label used to refer to any of the child process // started by cilium (Envoy, monitor, etc..) LabelSubsystem = "subsystem" // LabelKind is the kind of a label LabelKind = "kind" // LabelEventSource is the source of a label for event metrics // i.e. k8s, containerd, api. LabelEventSource = "source" // LabelPath is the label for the API path LabelPath = "path" // LabelMethod is the label for the HTTP method LabelMethod = "method" // LabelAPIReturnCode is the HTTP code returned for that API path LabelAPIReturnCode = "return_code" // LabelOperation is the label for BPF maps operations LabelOperation = "operation" // LabelMapName is the label for the BPF map name LabelMapName = "map_name" LabelMapGroup = "map_group" // LabelVersion is the label for the version number LabelVersion = "version" // LabelVersionRevision is the label for the version revision LabelVersionRevision = "revision" // LabelArch is the label for the platform architecture (e.g. linux/amd64) LabelArch = "arch" // LabelDirection is the label for traffic direction LabelDirection = "direction" // LabelSourceCluster is the label for source cluster name LabelSourceCluster = "source_cluster" // LabelSourceNodeName is the label for source node name LabelSourceNodeName = "source_node_name" // LabelTargetCluster is the label for target cluster name LabelTargetCluster = "target_cluster" // LabelTargetNodeIP is the label for target node IP LabelTargetNodeIP = "target_node_ip" // LabelTargetNodeName is the label for target node name LabelTargetNodeName = "target_node_name" // LabelTargetNodeType is the label for target node type (local_node, remote_intra_cluster, vs remote_inter_cluster) LabelTargetNodeType = "target_node_type" LabelLocationLocalNode = "local_node" LabelLocationRemoteIntraCluster = "remote_intra_cluster" LabelLocationRemoteInterCluster = "remote_inter_cluster" // Rule label is a label for a L7 rule name. LabelL7Rule = "rule" // LabelL7ProxyType is the label for denoting a L7 proxy type. LabelL7ProxyType = "proxy_type" // LabelType is the label for type in general (e.g. endpoint, node) LabelType = "type" LabelPeerEndpoint = "endpoint" LabelPeerNode = "node" LabelTrafficHTTP = "http" LabelTrafficICMP = "icmp" LabelAddressType = "address_type" LabelAddressTypePrimary = "primary" LabelAddressTypeSecondary = "secondary" )
const DefaultMapCapacity = 65536
In general, most bpf maps are allocated to occupy a 16-bit key size. To reduce the number of metrics that need to be emitted for map capacity, we assume a default map size of 2^16 entries for all maps, which can be assumed unless specified otherwise.
Variables ¶
var ( NoOpMetric prometheus.Metric = &mockMetric{} NoOpCollector prometheus.Collector = &collector{} NoOpCounter metricpkg.Counter = &counter{NoOpMetric, NoOpCollector} NoOpCounterVec metricpkg.Vec[metricpkg.Counter] = &counterVec{NoOpCollector} NoOpObserver metricpkg.Observer = &observer{} NoOpHistogram metricpkg.Histogram = &histogram{NoOpCollector} NoOpObserverVec metricpkg.Vec[metricpkg.Observer] = &observerVec{NoOpCollector} NoOpGauge metricpkg.Gauge = &gauge{NoOpMetric, NoOpCollector} NoOpGaugeVec metricpkg.Vec[metricpkg.Gauge] = &gaugeVec{NoOpCollector} NoOpGaugeDeletableVec metricpkg.DeletableVec[metricpkg.Gauge] = &gaugeDeletableVec{gaugeVec{NoOpCollector}} )
var ( // LabelValuesBool is metric label value set for boolean type. LabelValuesBool = metric.NewValues(LabelValueTrue, LabelValueFalse) // Namespace is used to scope metrics from cilium. It is prepended to metric // names and separated with a '_' Namespace = CiliumAgentNamespace BPFMapPressure = true // BootstrapTimes is the durations of cilium-agent bootstrap sequence. BootstrapTimes = NoOpObserverVec // APIInteractions is the total time taken to process an API call made // to the cilium-agent APIInteractions = NoOpObserverVec // NodeConnectivityStatus is the connectivity status between local node to // other node intra or inter cluster. NodeConnectivityStatus = NoOpGaugeDeletableVec // NodeConnectivityLatency is the connectivity latency between local node to // other node intra or inter cluster. NodeConnectivityLatency = NoOpGaugeDeletableVec // Endpoint is a function used to collect this metric. // It must be thread-safe. Endpoint metric.GaugeFunc // EndpointMaxIfindex is the maximum observed interface index for existing endpoints EndpointMaxIfindex = NoOpGauge // EndpointRegenerationTotal is a count of the number of times any endpoint // has been regenerated and success/fail outcome EndpointRegenerationTotal = NoOpCounterVec // EndpointStateCount is the total count of the endpoints in various states. EndpointStateCount = NoOpGaugeVec // EndpointRegenerationTimeStats is the total time taken to regenerate // endpoints, labeled by span name and status ("success" or "failure") EndpointRegenerationTimeStats = NoOpObserverVec // EndpointPropagationDelay is the delay between creation of local CiliumEndpoint // and update for that CiliumEndpoint received through CiliumEndpointSlice. // Measure of local CEP roundtrip time with CiliumEndpointSlice feature enabled. EndpointPropagationDelay = NoOpObserverVec // Policy // Policy is the number of policies loaded into the agent Policy = NoOpGauge // PolicyRegenerationCount is the total number of successful policy // regenerations. // Deprecated: Use EndpointRegenerationTotal. PolicyRegenerationCount = NoOpCounter // PolicyRegenerationTimeStats is the total time taken to generate policies. // Deprecated: Use EndpointRegenerationTimeStats. PolicyRegenerationTimeStats = NoOpObserverVec // PolicyRevision is the current policy revision number for this agent PolicyRevision = NoOpGauge // PolicyChangeTotal is a count of policy changes by outcome ("success" or // "failure") PolicyChangeTotal = NoOpCounterVec // PolicyEndpointStatus is the number of endpoints with policy labeled by enforcement type PolicyEndpointStatus = NoOpGaugeVec // PolicyImplementationDelay is a distribution of times taken from adding a // policy (and incrementing the policy revision) to seeing it in the datapath // per Endpoint. This reflects the actual delay perceived by traffic flowing // through the datapath. The longest times will roughly correlate with the // time taken to fully deploy an endpoint. PolicyImplementationDelay = NoOpObserverVec // CIDRGroupsReferenced is the number of CNPs and CCNPs referencing at least one CiliumCIDRGroup. // CNPs with empty or non-existing CIDRGroupRefs are not considered. CIDRGroupsReferenced = NoOpGauge // CIDRGroupTranslationTimeStats is the time taken to translate the policy field `FromCIDRGroupRef` // after the referenced CIDRGroups have been updated or deleted. CIDRGroupTranslationTimeStats = NoOpHistogram // Identity is the number of identities currently in use on the node by type Identity = NoOpGaugeVec // EventTS is the time in seconds since epoch that we last received an // event that was handled by Cilium. This metric tracks the source of the // event which can be one of K8s or Cilium's API. EventTS = NoOpGaugeVec // EventLagK8s is the lag calculation for k8s Pod events. EventLagK8s = NoOpGauge // ProxyRedirects is the number of redirects labeled by protocol ProxyRedirects = NoOpGaugeVec // ProxyPolicyL7Total is a count of all l7 requests handled by proxy ProxyPolicyL7Total = NoOpCounterVec // ProxyUpstreamTime is how long the upstream server took to reply labeled // by error, protocol and span time ProxyUpstreamTime = NoOpObserverVec // ProxyDatapathUpdateTimeout is a count of all the timeouts encountered while // updating the datapath due to an FQDN IP update ProxyDatapathUpdateTimeout = NoOpCounter // ConntrackGCRuns is the number of times that the conntrack GC // process was run. ConntrackGCRuns = NoOpCounterVec // ConntrackGCKeyFallbacks number of times that the conntrack key fallback was invalid. ConntrackGCKeyFallbacks = NoOpCounterVec // ConntrackGCSize the number of entries in the conntrack table ConntrackGCSize = NoOpGaugeVec // NatGCSize the number of entries in the nat table NatGCSize = NoOpGaugeVec // ConntrackGCDuration the duration of the conntrack GC process in milliseconds. ConntrackGCDuration = NoOpObserverVec // ConntrackDumpReset marks the count for conntrack dump resets ConntrackDumpResets = NoOpCounterVec // SignalsHandled is the number of signals received. SignalsHandled = NoOpCounterVec // ServicesEventsCount counts the number of services ServicesEventsCount = NoOpCounterVec // ServiceImplementationDelay the execution duration of the service handler in milliseconds. // The metric reflects the time it took to program the service excluding the event queue latency. ServiceImplementationDelay = NoOpObserverVec // ErrorsWarnings is the number of errors and warnings in cilium-agent instances ErrorsWarnings = NoOpCounterVec // ControllerRuns is the number of times that a controller process runs. ControllerRuns = NoOpCounterVec // ControllerRunsDuration the duration of the controller process in seconds ControllerRunsDuration = NoOpObserverVec // subprocess, labeled by Subsystem SubprocessStart = NoOpCounterVec // KubernetesEventProcessed is the number of Kubernetes events // processed labeled by scope, action and execution result KubernetesEventProcessed = NoOpCounterVec // KubernetesEventReceived is the number of Kubernetes events received // labeled by scope, action, valid data and equalness. KubernetesEventReceived = NoOpCounterVec // KubernetesAPIInteractions is the total time taken to process an API call made // to the kube-apiserver KubernetesAPIInteractions = NoOpObserverVec // KubernetesAPIRateLimiterLatency is the client side rate limiter latency metric KubernetesAPIRateLimiterLatency = NoOpObserverVec // KubernetesAPICallsTotal is the counter for all API calls made to // kube-apiserver. KubernetesAPICallsTotal = NoOpCounterVec // KubernetesCNPStatusCompletion is the number of seconds it takes to // complete a CNP status update KubernetesCNPStatusCompletion = NoOpObserverVec // TerminatingEndpointsEvents is the number of terminating endpoint events received from kubernetes. TerminatingEndpointsEvents = NoOpCounter // IPAMEvent is the number of IPAM events received labeled by action and // datapath family type IPAMEvent = NoOpCounterVec // IPAMCapacity tracks the total number of IPs that could be allocated. To // get the current number of available IPs, it would be this metric // subtracted by IPAMEvent{allocated}. IPAMCapacity = NoOpGaugeVec // KVStoreOperationsDuration records the duration of kvstore operations KVStoreOperationsDuration = NoOpObserverVec // KVStoreEventsQueueDuration records the duration in seconds of time // received event was blocked before it could be queued KVStoreEventsQueueDuration = NoOpObserverVec // KVStoreQuorumErrors records the number of kvstore quorum errors KVStoreQuorumErrors = NoOpCounterVec // FQDNGarbageCollectorCleanedTotal is the number of domains cleaned by the // GC job. FQDNGarbageCollectorCleanedTotal = NoOpCounter // FQDNActiveNames is the number of domains inside the DNS cache that have // not expired (by TTL), per endpoint. FQDNActiveNames = NoOpGaugeVec // FQDNActiveIPs is the number of IPs inside the DNS cache associated with // a domain that has not expired (by TTL) and are currently active, per // endpoint. FQDNActiveIPs = NoOpGaugeVec // FQDNAliveZombieConnections is the number IPs associated with domains // that have expired (by TTL) yet still associated with an active // connection (aka zombie), per endpoint. FQDNAliveZombieConnections = NoOpGaugeVec // FQDNSemaphoreRejectedTotal is the total number of DNS requests rejected // by the DNS proxy because too many requests were in flight, as enforced by // the admission semaphore. FQDNSemaphoreRejectedTotal = NoOpCounter // IPCacheErrorsTotal is the total number of IPCache events handled in // the IPCache subsystem that resulted in errors. IPCacheErrorsTotal = NoOpCounterVec // IPCacheEventsTotal is the total number of IPCache events handled in // the IPCache subsystem. IPCacheEventsTotal = NoOpCounterVec // BPFSyscallDuration is the metric for bpf syscalls duration. BPFSyscallDuration = NoOpObserverVec // BPFMapOps is the metric to measure the number of operations done to a // bpf map. BPFMapOps = NoOpCounterVec // BPFMapCapacity is the max capacity of bpf maps, labelled by map group classification. BPFMapCapacity = NoOpGaugeVec // TriggerPolicyUpdateTotal is the metric to count total number of // policy update triggers TriggerPolicyUpdateTotal = NoOpCounterVec // TriggerPolicyUpdateFolds is the current level folding that is // happening when running policy update triggers TriggerPolicyUpdateFolds = NoOpGauge // TriggerPolicyUpdateCallDuration measures the latency and call // duration of policy update triggers TriggerPolicyUpdateCallDuration = NoOpObserverVec // VersionMetric labelled by Cilium version VersionMetric = NoOpGaugeVec // APILimiterWaitHistoryDuration is a histogram that measures the // individual wait durations of API limiters APILimiterWaitHistoryDuration = NoOpObserverVec // APILimiterWaitDuration is the gauge of the current mean, min, and // max wait duration APILimiterWaitDuration = NoOpGaugeVec // APILimiterProcessingDuration is the gauge of the mean and estimated // processing duration APILimiterProcessingDuration = NoOpGaugeVec // APILimiterRequestsInFlight is the gauge of the current and max // requests in flight APILimiterRequestsInFlight = NoOpGaugeVec // APILimiterRateLimit is the gauge of the current rate limiting // configuration including limit and burst APILimiterRateLimit = NoOpGaugeVec // APILimiterAdjustmentFactor is the gauge representing the latest // adjustment factor that was applied APILimiterAdjustmentFactor = NoOpGaugeVec // APILimiterProcessedRequests is the counter of the number of // processed (successful and failed) requests APILimiterProcessedRequests = NoOpCounterVec // WorkQueueDepth is the depth of the workqueue // // We set actual metrics here instead of NoOp for the workqueue metrics // because these metrics will be registered with workqueue.SetProvider // by init function in watcher.go. Otherwise, we will register NoOps. // WorkQueueDepth = metric.NewGaugeVec(metric.GaugeOpts{ ConfigName: Namespace + "_" + SubsystemWorkQueue + "_depth", Namespace: Namespace, Subsystem: SubsystemWorkQueue, Name: "depth", Help: "Current depth of workqueue.", }, []string{"name"}) // WorkQueueAddsTotal is the total number of adds to the workqueue WorkQueueAddsTotal = metric.NewCounterVec(metric.CounterOpts{ ConfigName: Namespace + "_" + SubsystemWorkQueue + "_adds_total", Namespace: Namespace, Subsystem: SubsystemWorkQueue, Name: "adds_total", Help: "Total number of adds handled by workqueue.", }, []string{"name"}) // WorkQueueLatency is the latency of how long an item stays in the workqueue WorkQueueLatency = metric.NewHistogramVec(metric.HistogramOpts{ ConfigName: Namespace + "_" + SubsystemWorkQueue + "_queue_duration_seconds", Namespace: Namespace, Subsystem: SubsystemWorkQueue, Name: "queue_duration_seconds", Help: "How long in seconds an item stays in workqueue before being requested.", Buckets: prometheus.ExponentialBuckets(10e-9, 10, 10), }, []string{"name"}) // WorkQueueDuration is the duration of how long processing an item for the workqueue WorkQueueDuration = metric.NewHistogramVec(metric.HistogramOpts{ ConfigName: Namespace + "_" + SubsystemWorkQueue + "_work_duration_seconds", Namespace: Namespace, Subsystem: SubsystemWorkQueue, Name: "work_duration_seconds", Help: "How long in seconds processing an item from workqueue takes.", Buckets: prometheus.ExponentialBuckets(10e-9, 10, 10), }, []string{"name"}) // WorkQueueUnfinishedWork is how many seconds of work has been done that is in progress WorkQueueUnfinishedWork = metric.NewGaugeVec(metric.GaugeOpts{ ConfigName: Namespace + "_" + SubsystemWorkQueue + "_unfinished_work_seconds", Namespace: Namespace, Subsystem: SubsystemWorkQueue, Name: "unfinished_work_seconds", Help: "How many seconds of work has been done that " + "is in progress and hasn't been observed by work_duration. Large " + "values indicate stuck threads. One can deduce the number of stuck " + "threads by observing the rate at which this increases.", }, []string{"name"}) // WorkQueueLongestRunningProcessor is the longest running processor in the workqueue WorkQueueLongestRunningProcessor = metric.NewGaugeVec(metric.GaugeOpts{ ConfigName: Namespace + "_" + SubsystemWorkQueue + "_longest_running_processor_seconds", Namespace: Namespace, Subsystem: SubsystemWorkQueue, Name: "longest_running_processor_seconds", Help: "How many seconds has the longest running " + "processor for workqueue been running.", }, []string{"name"}) // WorkQueueRetries is the number of retries for handled by the workqueue WorkQueueRetries = metric.NewCounterVec(metric.CounterOpts{ ConfigName: Namespace + "_" + SubsystemWorkQueue + "_retries_total", Namespace: Namespace, Subsystem: SubsystemWorkQueue, Name: "retries_total", Help: "Total number of retries handled by workqueue.", }, []string{"name"}) )
var Cell = cell.Module("metrics", "Metrics", cell.Provide(NewRegistry), Metric(NewLegacyMetrics), cell.Config(defaultRegistryConfig), cell.Invoke(func(_ *Registry) { FlushLoggingMetrics() }), )
Functions ¶
func BoolToFloat64 ¶
func DumpMetrics ¶
DumpMetrics gets the current Cilium metrics and dumps all into a models.Metrics structure.If metrics cannot be retrieved, returns an error
func Errno2Outcome ¶
Errno2Outcome converts a unix.Errno to LabelOutcome
func Error2Outcome ¶
Error2Outcome converts an error to LabelOutcome
func FlushLoggingMetrics ¶ added in v1.14.11
func FlushLoggingMetrics()
FlushLoggingMetrics will cause all logging hook metrics accumulated prior to the errors_warnings metrics being registered with the Prometheus collector to be incremented to their respective errors_warnings metrics tuple.
func GetCounterValue ¶
func GetCounterValue(m prometheus.Counter) float64
GetCounterValue returns the current value stored for the counter
func GetGaugeValue ¶
func GetGaugeValue(m prometheus.Gauge) float64
GetGaugeValue returns the current value stored for the gauge. This function is useful in tests.
func InitOperatorMetrics ¶ added in v1.14.11
func InitOperatorMetrics()
InitOperatorMetrics is used to init legacy metrics necessary during operator init.
func LabelOutcome2Code ¶ added in v1.15.4
LabelOutcome2Code converts a label outcome to a code
func Metric ¶ added in v1.16.0
Metric constructs a new metric cell.
This cell type provides `S` to the hive as returned by `ctor`, it also makes each individual field value available via the `hive-metrics` value group. Infrastructure components such as a registry, inspection tool, or documentation generator can collect all metrics in the hive via this value group.
The `ctor` constructor must return a struct or pointer to a struct of type `S`. The returned struct must only contain public fields. All field types should implement the `github.com/cilium/cilium/pkg/metrics/metric.WithMetadata` and `github.com/prometheus/client_golang/prometheus.Collector` interfaces.
func RegisterList ¶
func RegisterList(list []prometheus.Collector) error
RegisterList registers a list of collectors. If registration of one collector fails, no collector is registered.
func Reinitialize ¶
func Reinitialize()
func UpdateMapCapacity ¶ added in v1.15.0
Types ¶
type APIEventTSHelper ¶
type APIEventTSHelper struct { Next http.Handler TSGauge metric.Vec[metric.Gauge] Histogram metric.Vec[metric.Observer] }
APIEventTSHelper is intended to be a global middleware to track metrics around API calls. It records the timestamp of an API call in the provided gauge.
func (*APIEventTSHelper) ServeHTTP ¶
func (m *APIEventTSHelper) ServeHTTP(r http.ResponseWriter, req *http.Request)
ServeHTTP implements the http.Handler interface. It records the timestamp this API call began at, then chains to the next handler.
type GaugeWithThreshold ¶
type GaugeWithThreshold struct {
// contains filtered or unexported fields
}
GaugeWithThreshold is a prometheus gauge that registers itself with prometheus if over a threshold value and unregisters when under.
func NewBPFMapPressureGauge ¶
func NewBPFMapPressureGauge(mapname string, threshold float64) *GaugeWithThreshold
NewBPFMapPressureGauge creates a new GaugeWithThreshold for the cilium_bpf_map_pressure metric with the map name as constant label.
func NewGaugeWithThreshold ¶
func NewGaugeWithThreshold(name string, subsystem string, desc string, labels map[string]string, threshold float64) *GaugeWithThreshold
NewGaugeWithThreshold creates a new GaugeWithThreshold.
func (*GaugeWithThreshold) Set ¶
func (gwt *GaugeWithThreshold) Set(value float64)
Set the value of the GaugeWithThreshold.
type LegacyMetrics ¶
type LegacyMetrics struct { BootstrapTimes metric.Vec[metric.Observer] APIInteractions metric.Vec[metric.Observer] NodeConnectivityStatus metric.DeletableVec[metric.Gauge] NodeConnectivityLatency metric.DeletableVec[metric.Gauge] Endpoint metric.GaugeFunc EndpointMaxIfindex metric.Gauge EndpointRegenerationTotal metric.Vec[metric.Counter] EndpointStateCount metric.Vec[metric.Gauge] EndpointRegenerationTimeStats metric.Vec[metric.Observer] EndpointPropagationDelay metric.Vec[metric.Observer] Policy metric.Gauge PolicyRegenerationCount metric.Counter PolicyRegenerationTimeStats metric.Vec[metric.Observer] PolicyRevision metric.Gauge PolicyChangeTotal metric.Vec[metric.Counter] PolicyEndpointStatus metric.Vec[metric.Gauge] PolicyImplementationDelay metric.Vec[metric.Observer] CIDRGroupsReferenced metric.Gauge CIDRGroupTranslationTimeStats metric.Histogram Identity metric.Vec[metric.Gauge] EventTS metric.Vec[metric.Gauge] EventLagK8s metric.Gauge ProxyRedirects metric.Vec[metric.Gauge] ProxyPolicyL7Total metric.Vec[metric.Counter] ProxyUpstreamTime metric.Vec[metric.Observer] ProxyDatapathUpdateTimeout metric.Counter ConntrackGCRuns metric.Vec[metric.Counter] ConntrackGCKeyFallbacks metric.Vec[metric.Counter] ConntrackGCSize metric.Vec[metric.Gauge] NatGCSize metric.Vec[metric.Gauge] ConntrackGCDuration metric.Vec[metric.Observer] ConntrackDumpResets metric.Vec[metric.Counter] SignalsHandled metric.Vec[metric.Counter] ServicesEventsCount metric.Vec[metric.Counter] ServiceImplementationDelay metric.Vec[metric.Observer] ErrorsWarnings metric.Vec[metric.Counter] ControllerRuns metric.Vec[metric.Counter] ControllerRunsDuration metric.Vec[metric.Observer] SubprocessStart metric.Vec[metric.Counter] KubernetesEventProcessed metric.Vec[metric.Counter] KubernetesEventReceived metric.Vec[metric.Counter] KubernetesAPIInteractions metric.Vec[metric.Observer] KubernetesAPIRateLimiterLatency metric.Vec[metric.Observer] KubernetesAPICallsTotal metric.Vec[metric.Counter] KubernetesCNPStatusCompletion metric.Vec[metric.Observer] TerminatingEndpointsEvents metric.Counter IPAMEvent metric.Vec[metric.Counter] IPAMCapacity metric.Vec[metric.Gauge] KVStoreOperationsDuration metric.Vec[metric.Observer] KVStoreEventsQueueDuration metric.Vec[metric.Observer] KVStoreQuorumErrors metric.Vec[metric.Counter] FQDNGarbageCollectorCleanedTotal metric.Counter FQDNActiveNames metric.Vec[metric.Gauge] FQDNActiveIPs metric.Vec[metric.Gauge] FQDNAliveZombieConnections metric.Vec[metric.Gauge] FQDNSemaphoreRejectedTotal metric.Counter IPCacheErrorsTotal metric.Vec[metric.Counter] IPCacheEventsTotal metric.Vec[metric.Counter] BPFSyscallDuration metric.Vec[metric.Observer] BPFMapOps metric.Vec[metric.Counter] BPFMapCapacity metric.Vec[metric.Gauge] TriggerPolicyUpdateTotal metric.Vec[metric.Counter] TriggerPolicyUpdateFolds metric.Gauge TriggerPolicyUpdateCallDuration metric.Vec[metric.Observer] VersionMetric metric.Vec[metric.Gauge] APILimiterWaitHistoryDuration metric.Vec[metric.Observer] APILimiterWaitDuration metric.Vec[metric.Gauge] APILimiterProcessingDuration metric.Vec[metric.Gauge] APILimiterRequestsInFlight metric.Vec[metric.Gauge] APILimiterRateLimit metric.Vec[metric.Gauge] APILimiterAdjustmentFactor metric.Vec[metric.Gauge] APILimiterProcessedRequests metric.Vec[metric.Counter] WorkQueueDepth metric.Vec[metric.Gauge] WorkQueueAddsTotal metric.Vec[metric.Counter] WorkQueueLatency metric.Vec[metric.Observer] WorkQueueDuration metric.Vec[metric.Observer] WorkQueueUnfinishedWork metric.Vec[metric.Gauge] WorkQueueLongestRunningProcessor metric.Vec[metric.Gauge] WorkQueueRetries metric.Vec[metric.Counter] }
func NewLegacyMetrics ¶
func NewLegacyMetrics() *LegacyMetrics
type LoggingHook ¶
type LoggingHook struct {
// contains filtered or unexported fields
}
LoggingHook is a hook for logrus which counts error and warning messages as a Prometheus metric.
func NewLoggingHook ¶
func NewLoggingHook() *LoggingHook
NewLoggingHook returns a new instance of LoggingHook for the given Cilium component.
func (*LoggingHook) Fire ¶
func (h *LoggingHook) Fire(entry *logrus.Entry) error
Fire is the main method which is called every time when logger has an error or warning message.
func (*LoggingHook) Levels ¶
func (h *LoggingHook) Levels() []logrus.Level
Levels returns the list of logging levels on which the hook is triggered.
type Registry ¶
type Registry struct {
// contains filtered or unexported fields
}
Registry is a cell around a prometheus registry. This registry starts an HTTP server as part of its lifecycle on which all enabled metrics will be available. A reference to this registry can also be used to dynamically register or unregister `prometheus.Collector`s.
func NewRegistry ¶
func NewRegistry(params RegistryParams) *Registry
func (*Registry) DumpMetrics ¶
DumpMetrics gets the current Cilium metrics and dumps all into a models.Metrics structure.If metrics cannot be retrieved, returns an error
func (*Registry) MustRegister ¶
func (r *Registry) MustRegister(c ...prometheus.Collector)
MustRegister adds the collector to the registry, exposing this metric to prometheus scrapes. It will panic on error.
func (*Registry) Register ¶
func (r *Registry) Register(c prometheus.Collector) error
Register registers a collector
func (*Registry) RegisterList ¶
func (r *Registry) RegisterList(list []prometheus.Collector) error
RegisterList registers a list of collectors. If registration of one collector fails, no collector is registered.
func (*Registry) Reinitialize ¶
func (r *Registry) Reinitialize()
Reinitialize creates a new internal registry and re-registers metrics to it.
func (*Registry) Unregister ¶
func (r *Registry) Unregister(c prometheus.Collector) bool
Unregister unregisters a collector
type RegistryConfig ¶
type RegistryConfig struct { // PrometheusServeAddr IP:Port on which to serve prometheus metrics (pass ":Port" to bind on all interfaces, "" is off) PrometheusServeAddr string // This is a list of metrics to be enabled or disabled, format is `+`/`-` + `{metric name}` Metrics []string }
func (RegistryConfig) Flags ¶
func (rc RegistryConfig) Flags(flags *pflag.FlagSet)
type RegistryParams ¶
type RegistryParams struct { cell.In Logger logrus.FieldLogger Shutdowner hive.Shutdowner Lifecycle cell.Lifecycle AutoMetrics []metricpkg.WithMetadata `group:"hive-metrics"` Config RegistryConfig DaemonConfig *option.DaemonConfig }
RegistryParams are the parameters needed to construct a Registry
type ResponderWrapper ¶
type ResponderWrapper struct { http.ResponseWriter // contains filtered or unexported fields }
func (*ResponderWrapper) WriteHeader ¶
func (rw *ResponderWrapper) WriteHeader(code int)