metrics

package
v0.1.10 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Sep 5, 2024 License: Apache-2.0 Imports: 5 Imported by: 0

Documentation

Index

Constants

View Source
const (
	MetricNamePrefix = "lunettes_"
)

Variables

View Source
var (
	SchedulingResultCount = prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Name: MetricNamePrefix + "scheduling_count",
			Help: "shceduling count",
		},

		[]string{"result"},
	)

	ContainerStartingResultCount = prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Name: MetricNamePrefix + "container_starting_count",
			Help: "container starting result count",
		},

		[]string{"result"},
	)

	// DebugMethodDurationMilliSeconds 统计方法耗时
	DebugMethodDurationMilliSeconds = prometheus.NewSummaryVec(
		prometheus.SummaryOpts{
			Name:       MetricNamePrefix + "debug_method_duration_milliseconds",
			Help:       "how long a method taken",
			Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.95: 0.001, 0.99: 0.001},
		},
		[]string{"method"},
	)

	// QueryMethodDurationMilliSeconds 统计查询方法耗时
	QueryMethodDurationMilliSeconds = prometheus.NewSummaryVec(
		prometheus.SummaryOpts{
			Name:       MetricNamePrefix + "query_method_duration_milliseconds",
			Help:       "how long a query method taken",
			Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.95: 0.001, 0.99: 0.001},
		},
		[]string{"method"},
	)

	ConsumerDurationMilliSeconds = prometheus.NewSummary(
		prometheus.SummaryOpts{
			Name:       MetricNamePrefix + "consumer_duration_milliseconds",
			Help:       "how long consumer consume one event",
			Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.95: 0.001, 0.99: 0.001},
		},
	)

	// TraceCreatedCount lunettes_trace_created_count
	TraceCreatedCount = prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Name: MetricNamePrefix + "trace_created_count",
			Help: "how many trace record have been scheduled so far",
		},
		[]string{"trace_type"},
	)

	TraceNewBeforeFinishCount = prometheus.NewCounter(
		prometheus.CounterOpts{
			Name: MetricNamePrefix + "trace_new_before_finish_count",
			Help: "how many trace added before older one have finishi",
		},
	)

	SpanCreatedCount = prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Name: MetricNamePrefix + "span_created_count",
			Help: "how many spans have been created so far",
		},
		[]string{"servive", "operation"},
	)

	TraceTimeoutCount = prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Name: MetricNamePrefix + "trace_timeout_count",
			Help: "how many timeout trace record have been scheduled so far",
		},
		[]string{"trace_type"},
	)

	TraceTimeoutCorrectToReadyCount = prometheus.NewCounter(
		prometheus.CounterOpts{
			Name: MetricNamePrefix + "trace_timeout_corrected_count",
			Help: "how many timeout traces but corrected to normal",
		},
	)

	PodLifecycleDurationMilliSeconds = prometheus.NewGaugeVec(
		prometheus.GaugeOpts{
			Name: MetricNamePrefix + "pod_phase_duration_milliseconds",
			Help: "how long a trace from start to finish",
		},
		[]string{"phase"},
	)

	LRUCacheCounter = prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Name: MetricNamePrefix + "lru_cache_fetch_count",
			Help: "how many times of get element form lru cache",
		},
		[]string{"name", "type"},
	)
)
View Source
var (
	StartupLatencyBuckets = []float64{0.5, 1, 2, 4, 6, 10, 20, 30, 45, 60, 120, 180, 240, 300, 360,
		480, 600, 900, 1200, 1800, 2700, 3600, 7200, 14400, 43200, 86400}

	// PodStartupLatencyExcludingShceduling is a prometheus metric for monitoring pod startup latency.
	PodStartupLatencyExcludingShceduling = prometheus.NewHistogramVec(
		prometheus.HistogramOpts{
			Name:    "slo_pod_startup_latency_excluding_scheduing_second",
			Help:    "Pod startup latencies in seconds, without scheduling times",
			Buckets: StartupLatencyBuckets,
		},
		[]string{"cluster", "namespace", "ownerref", "milestone", "is_job"},
	)

	// PodStartupLatency is a prometheus metric for monitoring pod startup latency including scheduling times.
	PodStartupLatency = prometheus.NewHistogramVec(
		prometheus.HistogramOpts{
			Name:    "slo_pod_startup_latency_second",
			Help:    "Pod startup latencies in seconds, with image pull times",
			Buckets: StartupLatencyBuckets,
		},
		[]string{"cluster", "namespace", "ownerref", "milestone", "is_job", "scheduling_strategy", "cores"},
	)

	// PodStartupSLOLatency is a prometheus metric for monitoring pod startup latency including scheduling times.
	PodStartupSLOLatency = prometheus.NewHistogramVec(
		prometheus.HistogramOpts{
			Name:    "slo_pod_startup_slo_latency_second",
			Help:    "Pod startup slo latencies in seconds, with image pull times",
			Buckets: StartupLatencyBuckets,
		},
		[]string{"cluster", "namespace", "ownerref", "milestone", "is_job", "slo_time", "cores"},
	)

	// PodStartupK8sSLOLatency is a prometheus metric for monitoring pod startup latency including scheduling times.
	PodStartupK8sSLOLatency = prometheus.NewHistogramVec(
		prometheus.HistogramOpts{
			Name:    "slo_pod_startup_k8s_slo_latency_second",
			Help:    "Pod startup slo latencies in seconds, with image pull times",
			Buckets: StartupLatencyBuckets,
		},
		[]string{"cluster", "namespace", "slo_time", "cores"},
	)

	// PodStartupResultExcludingScheduling Pod startup result without scheduling
	PodStartupResultExcludingScheduling = prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Name: "slo_pod_startup_result_excluding_scheduling_count",
			Help: "Pod startup result, succeed or failed or timeout",
		},
		[]string{"cluster", "namespace", "ownerref", "result", "is_job", "delivery_status"},
	)

	// PodStartupResult is a prometheus metric for monitoring pod startup result
	PodStartupResult = prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Name: "slo_pod_startup_result_count",
			Help: "Pod startup result, succeed or failed or timeout",
		},
		[]string{"cluster", "namespace", "ownerref", "result", "scheduling_strategy", "cores", "is_job", "node_ip", "delivery_status", "podslo"},
	)

	// PodCreateTotal is a prometheus metric for monitoring pod create total count
	PodCreateTotal = prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Name: "slo_pod_create_total_count",
			Help: "Pod create total",
		},
		[]string{"cluster", "namespace", "cores", "is_job"},
	)

	// PodStartupSLOResult is a prometheus metric for monitoring pod startup result
	PodStartupSLOResult = prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Name: "slo_pod_startup_slo_result_count",
			Help: "Pod startup slo result, succeed or failed or timeout",
		},
		[]string{"cluster", "namespace", "ownerref", "result", "slo_time", "cores", "is_job", "priority", "delivery_status", "slo_reason", "slotime_adjusted"},
	)
	// PodStartupK8sSLOResult is a prometheus metric for monitoring pod startup result
	PodStartupK8sSLOResult = prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Name: "slo_pod_startup_k8s_slo_result_count",
			Help: "Pod startup slo result, succeed or failed or timeout",
		},
		[]string{"cluster", "namespace", "result", "slo_time", "cores", "delivery_status", "slo_reason"},
	)

	// PodCreateAPIResult 记录pod create API的返回结果
	PodCreateAPIResult = prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Name: "slo_pod_create_api_result_count",
			Help: "pod create api return result",
		},
		[]string{"cluster", "namespace", "resultCode"},
	)

	//EventConsumedCount is a prometheus metric for monitoring speed of event consumed
	EventConsumedCount = prometheus.NewCounter(
		prometheus.CounterOpts{
			Name: "lunettes_events_consumed_create_slo",
			Help: "events processed by consumer",
		},
	)

	MethodDurationMilliSeconds = prometheus.NewGaugeVec(
		prometheus.GaugeOpts{
			Name: "lunettes_method_duration_milliseconds",
			Help: "how long an method operation to completed",
		},
		[]string{"phase"},
	)

	// PodDeleteResult delete
	PodDeleteResult = prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Name: "slo_pod_delete_result_count",
			Help: "Pod delete result, succeed or failed or reason",
		},
		[]string{"cluster", "namespace", "node_ip", "result"},
	)

	PodDeleteResultInDay = prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Name: "slo_pod_delete_result_count_day",
			Help: "Pod delete result, succeed or failed or reason",
		},
		[]string{"cluster", "namespace", "result"},
	)

	PodDeleteResultInWeek = prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Name: "slo_pod_delete_result_count_week",
			Help: "Pod delete result, succeed or failed or reason",
		},
		[]string{"cluster", "namespace", "result"},
	)

	PodDeleteApiCode = prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Name: "slo_pod_delete_api_code",
			Help: "Pod delete api response code",
		},
		[]string{"cluster", "namespace", "apicode"},
	)
	PodDeleteLatency = prometheus.NewHistogramVec(
		prometheus.HistogramOpts{
			Name:    "slo_pod_delete_latency",
			Help:    "pod delete latency",
			Buckets: []float64{0.5, 1, 2, 4, 6, 10, 20, 30, 45, 60, 120, 180, 240, 300, 360, 480, 600, 900, 1200, 1800, 2700, 3600, 7200, 14400, 43200, 86400, 86400 * 2, 86400 * 3, 86400 * 4, 86400 * 5, 86400 * 6, 86400 * 7},
		},
		[]string{"cluster", "namespace", "phase"},
	)

	PodDeleteLatencyQuantiles = prometheus.NewSummaryVec(
		prometheus.SummaryOpts{
			Name:       "slo_pod_delete_latency_quantiles_in_seconds",
			Help:       "pod delete latency in seconds with quantiles",
			MaxAge:     time.Hour,
			Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
		},
		[]string{"pod_type"},
	)

	//PodUpgradeResultCounter update
	PodUpgradeResultCounter = prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Name: "slo_pod_upgrade_result_count",
			Help: "",
		},
		[]string{"cluster", "namespace", "node_ip", "result"},
	)

	// SloAnalysisResultGauge slo analysis result of the configmap
	SloAnalysisResultGauge = prometheus.NewGaugeVec(
		prometheus.GaugeOpts{
			Name: "slo_analysis_result",
			Help: "slo analysis result of the configmap.",
		},
		[]string{"is_custom", "result", "description", "type"},
	)
)
View Source
var (
	SpansProcessedPods = prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Name: spansPrefix + "processed_pods_count",
			Help: "how many pods have been processed by spans module",
		},
		[]string{},
	)

	SpansInMemPodsCount = prometheus.NewGaugeVec(
		prometheus.GaugeOpts{
			Name: spansPrefix + "in_mem_pods_count",
			Help: "how many pods currently in spans module",
		},
		[]string{},
	)

	BaseLabels = map[string]bool{
		"cluster": true, "namespace": true, "resource": true, "type": true, "action_type": true,
	}
	SpanConsumingLabels     = map[string]bool{}
	SpansConsumingStatistic = prometheus.NewHistogramVec(
		prometheus.HistogramOpts{
			Name:    spansPrefix + "span_consuming_millisecond_statistic",
			Help:    "Time consuming statistic of span.",
			Buckets: []float64{},
		},
		[]string{"cluster", "namespace", "resource", "type", "action_type"},
	)
)
View Source
var (
	TraceProcessingLatencyBuckets = []float64{0.5, 1, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 30, 45, 60, 120, 180, 240, 300, 360,
		480, 600, 900, 1200, 1800, 2700, 3600, 7200, 14400, 43200, 86400}

	// 每个 trace 交付的时间延迟
	TraceProcessingLatency = prometheus.NewHistogramVec(
		prometheus.HistogramOpts{
			Name:    "trace_processing_latency_seconds",
			Help:    "time used to process one trace",
			Buckets: TraceProcessingLatencyBuckets,
		},

		[]string{"type"},
	)
)

Functions

func ClearRequestResourceMetric

func ClearRequestResourceMetric()

clear the metric data of request resource info periodically

func DefineSpanStatistic

func DefineSpanStatistic(labels []string)

redefine SpansConsumingStatistic if extra-properties chanes

func IncreaseFailedTraceCount

func IncreaseFailedTraceCount(traceType, namespace, reasonName string)

IncreaseFailedTraceCount increase counter for failed traces

func IncreaseSucceedTraceCount

func IncreaseSucceedTraceCount(traceType, namespace string)

IncreaseSucceedTraceCount increase counter for succeed traces

func ObserveQueryMethodDuration

func ObserveQueryMethodDuration(method string, begin time.Time)

func ObserveTraceDuration

func ObserveTraceDuration(traceType, namespace string, v float64)

ObserveTraceDuration observe for trace durations

func UpdateServiceOperationDurationMetrics

func UpdateServiceOperationDurationMetrics(service, operation, namespace string, d float64)

UpdateServiceOperationDurationMetrics update ServiceOperationDurationMilliSeconds

func UpdateServiceOperationResultMetrics

func UpdateServiceOperationResultMetrics(service, operation, namespace string, err bool)

UpdateServiceOperationResultMetrics update ServiceOperationDurationMilliSeconds

Types

This section is empty.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL