metrics

package
v0.0.0-...-ec72bf4 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 13, 2024 License: Apache-2.0 Imports: 17 Imported by: 0

Documentation

Overview

Package metrics holds prometheus metrics objects and related utility functions. It does not abstract away the prometheus client but the caller rarely needs to refer to prometheus directly.

Index

Constants

View Source
const (
	// ErrorTimeout is the value used to notify timeout errors.
	ErrorTimeout = "timeout"

	// ErrorProxy is the value used to notify errors on Proxy.
	ErrorProxy = "proxy"

	//L7DNS is the value used to report DNS label on metrics
	L7DNS = "dns"

	// SubsystemBPF is the subsystem to scope metrics related to the bpf syscalls.
	SubsystemBPF = "bpf"

	// SubsystemDatapath is the subsystem to scope metrics related to management of
	// the datapath. It is prepended to metric names and separated with a '_'.
	SubsystemDatapath = "datapath"

	// SubsystemAgent is the subsystem to scope metrics related to the cce agent itself.
	SubsystemAgent = "agent"

	// SubsystemK8s is the subsystem to scope metrics related to Kubernetes
	SubsystemK8s = "k8s"

	// SubsystemK8sClient is the subsystem to scope metrics related to the kubernetes client.
	SubsystemK8sClient = "k8s_client"

	// SubsystemKVStore is the subsystem to scope metrics related to the kvstore.
	SubsystemKVStore = "kvstore"

	// SubsystemFQDN is the subsystem to scope metrics related to the FQDN proxy.
	SubsystemFQDN = "fqdn"

	// SubsystemNodes is the subsystem to scope metrics related to the node manager.
	SubsystemNodes = "nodes"

	// SubsystemTriggers is the subsystem to scope metrics related to the trigger package.
	SubsystemTriggers = "triggers"

	// SubsystemAPILimiter is the subsystem to scope metrics related to the API limiter package.
	SubsystemAPILimiter = "api_limiter"

	// SubsystemNodeNeigh is the subsystem to scope metrics related to management of node neighbor.
	SubsystemNodeNeigh = "node_neigh"

	// Namespace is used to scope metrics from cce. It is prepended to metric
	// names and separated with a '_'
	Namespace = "cce"

	// LabelError indicates the type of error (string)
	LabelError = "error"

	// LabelOutcome indicates whether the outcome of the operation was successful or not
	LabelOutcome = "outcome"

	// LabelAttempts is the number of attempts it took to complete the operation
	LabelAttempts = "attempts"

	// LabelValueOutcomeSuccess is used as a successful outcome of an operation
	LabelValueOutcomeSuccess = "success"

	// LabelValueOutcomeFail is used as an unsuccessful outcome of an operation
	LabelValueOutcomeFail = "fail"

	// LabelEventSourceAPI marks event-related metrics that come from the API
	LabelEventSourceAPI = "api"

	// LabelEventSourceK8s marks event-related metrics that come from k8s
	LabelEventSourceK8s = "k8s"

	// LabelEventSourceFQDN marks event-related metrics that come from pkg/fqdn
	LabelEventSourceFQDN = "fqdn"

	// LabelEventSourceContainerd marks event-related metrics that come from docker
	LabelEventSourceContainerd = "docker"

	// LabelDatapathArea marks which area the metrics are related to (eg, which BPF map)
	LabelDatapathArea = "area"

	// LabelDatapathName marks a unique identifier for this metric.
	// The name should be defined once for a given type of error.
	LabelDatapathName = "name"

	// LabelDatapathFamily marks which protocol family (IPv4, IPV6) the metric is related to.
	LabelDatapathFamily = "family"

	// LabelProtocol marks the L4 protocol (TCP, ANY) for the metric.
	LabelProtocol = "protocol"

	// LabelSignalType marks the signal name
	LabelSignalType = "signal"

	// LabelSignalData marks the signal data
	LabelSignalData = "data"

	// LabelStatus the label from completed task
	LabelStatus = "status"

	// LabelPolicyEnforcement is the label used to see the enforcement status
	LabelPolicyEnforcement = "enforcement"

	// LabelPolicySource is the label used to see the enforcement status
	LabelPolicySource = "source"

	// LabelScope is the label used to defined multiples scopes in the same
	// metric. For example, one counter may measure a metric over the scope of
	// the entire event (scope=global), or just part of an event
	// (scope=slow_path)
	LabelScope = "scope"

	// LabelProtocolL7 is the label used when working with layer 7 protocols.
	LabelProtocolL7 = "protocol_l7"

	// LabelBuildState is the state a build queue entry is in
	LabelBuildState = "state"

	// LabelBuildQueueName is the name of the build queue
	LabelBuildQueueName = "name"

	// LabelAction is the label used to defined what kind of action was performed in a metric
	LabelAction = "action"

	// LabelSubsystem is the label used to refer to any of the child process
	// started by cce (Envoy, monitor, etc..)
	LabelSubsystem = "subsystem"

	// LabelKind is the kind of a label
	LabelKind = "kind"

	// LabelEventSource is the source of a label for event metrics
	// i.e. k8s, containerd, api.
	LabelEventSource = "source"

	// LabelPath is the label for the API path
	LabelPath = "path"
	// LabelMethod is the label for the HTTP method
	LabelMethod = "method"

	// LabelAPIReturnCode is the HTTP code returned for that API path
	LabelAPIReturnCode = "return_code"

	// LabelOperation is the label for BPF maps operations
	LabelOperation = "operation"

	// LabelMapName is the label for the BPF map name
	LabelMapName = "map_name"

	// LabelVersion is the label for the version number
	LabelVersion = "version"

	// LabelDirection is the label for traffic direction
	LabelDirection = "direction"

	// LabelSourceCluster is the label for source cluster name
	LabelSourceCluster = "source_cluster"

	// LabelSourceNodeName is the label for source node name
	LabelSourceNodeName = "source_node_name"

	// LabelTargetCluster is the label for target cluster name
	LabelTargetCluster = "target_cluster"

	// LabelTargetNodeIP is the label for target node IP
	LabelTargetNodeIP = "target_node_ip"

	// LabelTargetNodeName is the label for target node name
	LabelTargetNodeName = "target_node_name"

	// LabelTargetNodeType is the label for target node type (local_node, remote_intra_cluster, vs remote_inter_cluster)
	LabelTargetNodeType = "target_node_type"

	LabelLocationLocalNode          = "local_node"
	LabelLocationRemoteIntraCluster = "remote_intra_cluster"
	LabelLocationRemoteInterCluster = "remote_inter_cluster"

	// LabelType is the label for type in general (e.g. endpoint, node)
	LabelType         = "type"
	LabelPeerEndpoint = "endpoint"
	LabelPeerNode     = "node"

	LabelTrafficHTTP = "http"
	LabelTrafficICMP = "icmp"

	LabelAddressType          = "address_type"
	LabelAddressTypePrimary   = "primary"
	LabelAddressTypeSecondary = "secondary"

	// LabelEventMethod is the label for the method of an event
	LabelEventMethod       = "method"
	LabelEventMethodAdd    = "add"
	LabelEventMethodUpdate = "update"
	LabelEventMethodDelete = "delete"

	LabelErrorReason = "reason"
)

Variables

View Source
var (
	NoOpMetric    prometheus.Metric    = &metric{}
	NoOpCollector prometheus.Collector = &collector{}

	NoOpCounter     prometheus.Counter     = &counter{NoOpMetric, NoOpCollector}
	NoOpCounterVec  CounterVec             = &counterVec{NoOpCollector}
	NoOpObserver    prometheus.Observer    = &observer{}
	NoOpObserverVec prometheus.ObserverVec = &observerVec{NoOpCollector}
	NoOpGauge       prometheus.Gauge       = &gauge{NoOpMetric, NoOpCollector}
	NoOpGaugeVec    GaugeVec               = &gaugeVec{NoOpCollector}
)
View Source
var (

	// APIInteractions is the total time taken to process an API call made
	// to the cce-agent
	APIInteractions = NoOpObserverVec

	// NodeConnectivityStatus is the connectivity status between local node to
	// other node intra or inter cluster.
	NodeConnectivityStatus = NoOpGaugeVec

	// NodeConnectivityLatency is the connectivity latency between local node to
	// other node intra or inter cluster.
	NodeConnectivityLatency = NoOpGaugeVec

	// EventTS is the timestamp of k8s resource events.
	EventTS = NoOpGaugeVec

	// EventLagK8s is the lag calculation for k8s Pod events.
	EventLagK8s = NoOpGauge

	// SignalsHandled is the number of signals received.
	SignalsHandled = NoOpCounterVec

	// ServicesCount number of services
	ServicesCount = NoOpCounterVec

	// ErrorsWarnings is the number of errors and warnings in cce-agent instances
	ErrorsWarnings = NoOpCounterVec

	// ControllerRuns is the number of times that a controller process runs.
	ControllerRuns = NoOpCounterVec

	// ControllerRunsDuration the duration of the controller process in seconds
	ControllerRunsDuration = NoOpObserverVec

	// subprocess, labeled by Subsystem
	SubprocessStart = NoOpCounterVec

	// KubernetesEventProcessed is the number of Kubernetes events
	// processed labeled by scope, action and execution result
	KubernetesEventProcessed = NoOpCounterVec

	// KubernetesEventReceived is the number of Kubernetes events received
	// labeled by scope, action, valid data and equalness.
	KubernetesEventReceived = NoOpCounterVec

	// KubernetesAPIInteractions is the total time taken to process an API call made
	// to the kube-apiserver
	KubernetesAPIInteractions = NoOpObserverVec

	// KubernetesAPICallsTotal is the counter for all API calls made to
	// kube-apiserver.
	KubernetesAPICallsTotal = NoOpCounterVec

	// KubernetesCNPStatusCompletion is the number of seconds it takes to
	// complete a CNP status update
	KubernetesCNPStatusCompletion = NoOpObserverVec

	// IpamEvent is the number of IPAM events received labeled by action and
	// datapath family type
	IpamEvent = NoOpCounterVec

	// VersionMetric labelled by CCE version
	VersionMetric = NoOpGaugeVec

	// APILimiterProcessHistoryDuration is a histogram that measures the
	// individual wait durations of API limiters
	APILimiterProcessHistoryDuration = NoOpObserverVec

	// APILimiterRequestsInFlight is the gauge of the current and max
	// requests in flight
	APILimiterRequestsInFlight = NoOpGaugeVec

	// APILimiterRateLimit is the gauge of the current rate limiting
	// configuration including limit and burst
	APILimiterRateLimit = NoOpGaugeVec

	// APILimiterAdjustmentFactor is the gauge representing the latest
	// adjustment factor that was applied
	APILimiterAdjustmentFactor = NoOpGaugeVec

	// APILimiterProcessedRequests is the counter of the number of
	// processed (successful and failed) requests
	APILimiterProcessedRequests = NoOpCounterVec

	// Cloud API
	CloudAPIRequestDurationMillisesconds = NoOpObserverVec

	// WorkQuqueLens is the gauge representing the length of the work queue
	WorkQueueLens = NoOpGaugeVec

	// WorkQueueEventCount is the counter of the number of events processed
	WorkQueueEventCount = NoOpCounterVec

	// ControllerHandlerDurationMilliseconds is the histogram of the duration
	ControllerHandlerDurationMilliseconds = NoOpObserverVec

	// NoAvailableSubnetNodeCount is the counter of nodes that no avaiable subnet to create new eni
	IPAMErrorCounter = NoOpCounterVec

	// SubnetIPsGuage is the gauge of available IPs in subnet and borrowed IPs by eni
	SubnetIPsGuage = NoOpGaugeVec
)

Functions

func BoolToFloat64

func BoolToFloat64(v bool) float64

func DefaultMetrics

func DefaultMetrics() map[string]struct{}

func DumpMetrics

func DumpMetrics() ([]*models.Metric, error)

DumpMetrics gets the current CCE metrics and dumps all into a models.Metrics structure.If metrics cannot be retrieved, returns an error

func Enable

func Enable(addr string) <-chan error

Enable begins serving prometheus metrics on the address passed in. Addresses of the form ":8080" will bind the port on all interfaces.

func Errno2Outcome

func Errno2Outcome(errno unix.Errno) string

Errno2Outcome converts a unix.Errno to LabelOutcome

func Error2Outcome

func Error2Outcome(err error) string

Error2Outcome converts an error to LabelOutcome

func GetCounterValue

func GetCounterValue(m prometheus.Counter) float64

GetCounterValue returns the current value stored for the counter

func GetGaugeValue

func GetGaugeValue(m prometheus.Gauge) float64

GetGaugeValue returns the current value stored for the gauge. This function is useful in tests.

func MustRegister

func MustRegister(c ...prometheus.Collector)

MustRegister adds the collector to the registry, exposing this metric to prometheus scrapes. It will panic on error.

func Register

func Register(c prometheus.Collector) error

Register registers a collector

func RegisterList

func RegisterList(list []prometheus.Collector) error

RegisterList registers a list of collectors. If registration of one collector fails, no collector is registered.

func Unregister

func Unregister(c prometheus.Collector) bool

Unregister unregisters a collector

Types

type APIEventTSHelper

type APIEventTSHelper struct {
	Next      http.Handler
	TSGauge   GaugeVec
	Histogram prometheus.ObserverVec
}

APIEventTSHelper is intended to be a global middleware to track metrics around API calls. It records the timestamp of an API call in the provided gauge.

func (*APIEventTSHelper) ServeHTTP

func (m *APIEventTSHelper) ServeHTTP(r http.ResponseWriter, req *http.Request)

ServeHTTP implements the http.Handler interface. It records the timestamp this API call began at, then chains to the next handler.

type Configuration

type Configuration struct {
	APIInteractionsEnabled         bool
	NodeConnectivityStatusEnabled  bool
	NodeConnectivityLatencyEnabled bool

	EventTSEnabled           bool
	EventLagK8sEnabled       bool
	EventTSContainerdEnabled bool
	EventTSAPIEnabled        bool

	NoOpObserverVecEnabled bool
	NoOpCounterVecEnabled  bool

	SignalsHandledEnabled                bool
	ServicesCountEnabled                 bool
	ErrorsWarningsEnabled                bool
	ControllerRunsEnabled                bool
	ControllerRunsDurationEnabled        bool
	SubprocessStartEnabled               bool
	KubernetesEventProcessedEnabled      bool
	KubernetesEventReceivedEnabled       bool
	KubernetesTimeBetweenEventsEnabled   bool
	KubernetesAPIInteractionsEnabled     bool
	KubernetesAPICallsEnabled            bool
	KubernetesCNPStatusCompletionEnabled bool
	IpamEventEnabled                     bool
	SubnetIPsGuageEnabled                bool

	VersionMetric                        bool
	APILimiterProcessHistoryDuration     bool
	APILimiterRequestsInFlight           bool
	APILimiterRateLimit                  bool
	APILimiterAdjustmentFactor           bool
	APILimiterProcessedRequests          bool
	CloudAPIRequestDurationMillisesconds bool

	WorkQueueLens                         bool
	WorkQueueEventCount                   bool
	ControllerHandlerDurationMilliseconds bool
}

func CreateConfiguration

func CreateConfiguration(metricsEnabled []string) (Configuration, []prometheus.Collector)

CreateConfiguration returns a Configuration with all metrics that are considered enabled from the given slice of metricsEnabled as well as a slice of prometheus.Collectors that must be registered in the prometheus default register.

type CounterVec

type CounterVec interface {
	WithLabelValues(lvls ...string) prometheus.Counter
	GetMetricWithLabelValues(lvs ...string) (prometheus.Counter, error)
	With(labels prometheus.Labels) prometheus.Counter
	prometheus.Collector
}

type GaugeVec

type GaugeVec interface {
	WithLabelValues(lvls ...string) prometheus.Gauge
	prometheus.Collector
}

type GaugeWithThreshold

type GaugeWithThreshold struct {
	// contains filtered or unexported fields
}

GaugeWithThreshold is a prometheus gauge that registers itself with prometheus if over a threshold value and unregisters when under.

func NewBPFMapPressureGauge

func NewBPFMapPressureGauge(mapname string, threshold float64) *GaugeWithThreshold

NewBPFMapPressureGauge creates a new GaugeWithThreshold for the cce_bpf_map_pressure metric with the map name as constant label.

func NewGaugeWithThreshold

func NewGaugeWithThreshold(name string, subsystem string, desc string, labels map[string]string, threshold float64) *GaugeWithThreshold

NewGaugeWithThreshold creates a new GaugeWithThreshold.

func (*GaugeWithThreshold) Set

func (gwt *GaugeWithThreshold) Set(value float64)

Set the value of the GaugeWithThreshold.

type LoggingHook

type LoggingHook struct {
	// contains filtered or unexported fields
}

LoggingHook is a hook for logrus which counts error and warning messages as a Prometheus metric.

func NewLoggingHook

func NewLoggingHook(component string) *LoggingHook

NewLoggingHook returns a new instance of LoggingHook for the given CCE component.

func (*LoggingHook) Fire

func (h *LoggingHook) Fire(entry *logrus.Entry) error

Fire is the main method which is called every time when logger has an error or warning message.

func (*LoggingHook) Levels

func (h *LoggingHook) Levels() []logrus.Level

Levels returns the list of logging levels on which the hook is triggered.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL