metrics

package

v0.1.10 Latest Latest Go to latest Published: Sep 5, 2024 License: Apache-2.0 Imports: 5 Imported by: 0

Documentation ¶

Index ¶

Constants
Variables
func ClearRequestResourceMetric()
func DefineSpanStatistic(labels []string)
func IncreaseFailedTraceCount(traceType, namespace, reasonName string)
func IncreaseSucceedTraceCount(traceType, namespace string)
func ObserveQueryMethodDuration(method string, begin time.Time)
func ObserveTraceDuration(traceType, namespace string, v float64)
func UpdateServiceOperationDurationMetrics(service, operation, namespace string, d float64)
func UpdateServiceOperationResultMetrics(service, operation, namespace string, err bool)

Constants ¶

const (
	MetricNamePrefix = "lunettes_"
)

Variables ¶

View Source

var (
	SchedulingResultCount = prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Name: MetricNamePrefix + "scheduling_count",
			Help: "shceduling count",
		},

		[]string{"result"},
	)

	ContainerStartingResultCount = prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Name: MetricNamePrefix + "container_starting_count",
			Help: "container starting result count",
		},

		[]string{"result"},
	)

	// DebugMethodDurationMilliSeconds 统计方法耗时
	DebugMethodDurationMilliSeconds = prometheus.NewSummaryVec(
		prometheus.SummaryOpts{
			Name:       MetricNamePrefix + "debug_method_duration_milliseconds",
			Help:       "how long a method taken",
			Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.95: 0.001, 0.99: 0.001},
		},
		[]string{"method"},
	)

	// QueryMethodDurationMilliSeconds 统计查询方法耗时
	QueryMethodDurationMilliSeconds = prometheus.NewSummaryVec(
		prometheus.SummaryOpts{
			Name:       MetricNamePrefix + "query_method_duration_milliseconds",
			Help:       "how long a query method taken",
			Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.95: 0.001, 0.99: 0.001},
		},
		[]string{"method"},
	)

	ConsumerDurationMilliSeconds = prometheus.NewSummary(
		prometheus.SummaryOpts{
			Name:       MetricNamePrefix + "consumer_duration_milliseconds",
			Help:       "how long consumer consume one event",
			Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.95: 0.001, 0.99: 0.001},
		},
	)

	// TraceCreatedCount lunettes_trace_created_count
	TraceCreatedCount = prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Name: MetricNamePrefix + "trace_created_count",
			Help: "how many trace record have been scheduled so far",
		},
		[]string{"trace_type"},
	)

	TraceNewBeforeFinishCount = prometheus.NewCounter(
		prometheus.CounterOpts{
			Name: MetricNamePrefix + "trace_new_before_finish_count",
			Help: "how many trace added before older one have finishi",
		},
	)

	SpanCreatedCount = prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Name: MetricNamePrefix + "span_created_count",
			Help: "how many spans have been created so far",
		},
		[]string{"servive", "operation"},
	)

	TraceTimeoutCount = prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Name: MetricNamePrefix + "trace_timeout_count",
			Help: "how many timeout trace record have been scheduled so far",
		},
		[]string{"trace_type"},
	)

	TraceTimeoutCorrectToReadyCount = prometheus.NewCounter(
		prometheus.CounterOpts{
			Name: MetricNamePrefix + "trace_timeout_corrected_count",
			Help: "how many timeout traces but corrected to normal",
		},
	)

	PodLifecycleDurationMilliSeconds = prometheus.NewGaugeVec(
		prometheus.GaugeOpts{
			Name: MetricNamePrefix + "pod_phase_duration_milliseconds",
			Help: "how long a trace from start to finish",
		},
		[]string{"phase"},
	)

	LRUCacheCounter = prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Name: MetricNamePrefix + "lru_cache_fetch_count",
			Help: "how many times of get element form lru cache",
		},
		[]string{"name", "type"},
	)
)

View Source

var (
	StartupLatencyBuckets = []float64{0.5, 1, 2, 4, 6, 10, 20, 30, 45, 60, 120, 180, 240, 300, 360,
		480, 600, 900, 1200, 1800, 2700, 3600, 7200, 14400, 43200, 86400}

	// PodStartupLatencyExcludingShceduling is a prometheus metric for monitoring pod startup latency.
	PodStartupLatencyExcludingShceduling = prometheus.NewHistogramVec(
		prometheus.HistogramOpts{
			Name:    "slo_pod_startup_latency_excluding_scheduing_second",
			Help:    "Pod startup latencies in seconds, without scheduling times",
			Buckets: StartupLatencyBuckets,
		},
		[]string{"cluster", "namespace", "ownerref", "milestone", "is_job"},
	)

	// PodStartupLatency is a prometheus metric for monitoring pod startup latency including scheduling times.
	PodStartupLatency = prometheus.NewHistogramVec(
		prometheus.HistogramOpts{
			Name:    "slo_pod_startup_latency_second",
			Help:    "Pod startup latencies in seconds, with image pull times",
			Buckets: StartupLatencyBuckets,
		},
		[]string{"cluster", "namespace", "ownerref", "milestone", "is_job", "scheduling_strategy", "cores"},
	)

	// PodStartupSLOLatency is a prometheus metric for monitoring pod startup latency including scheduling times.
	PodStartupSLOLatency = prometheus.NewHistogramVec(
		prometheus.HistogramOpts{
			Name:    "slo_pod_startup_slo_latency_second",
			Help:    "Pod startup slo latencies in seconds, with image pull times",
			Buckets: StartupLatencyBuckets,
		},
		[]string{"cluster", "namespace", "ownerref", "milestone", "is_job", "slo_time", "cores"},
	)

	// PodStartupK8sSLOLatency is a prometheus metric for monitoring pod startup latency including scheduling times.
	PodStartupK8sSLOLatency = prometheus.NewHistogramVec(
		prometheus.HistogramOpts{
			Name:    "slo_pod_startup_k8s_slo_latency_second",
			Help:    "Pod startup slo latencies in seconds, with image pull times",
			Buckets: StartupLatencyBuckets,
		},
		[]string{"cluster", "namespace", "slo_time", "cores"},
	)

	// PodStartupResultExcludingScheduling Pod startup result without scheduling
	PodStartupResultExcludingScheduling = prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Name: "slo_pod_startup_result_excluding_scheduling_count",
			Help: "Pod startup result, succeed or failed or timeout",
		},
		[]string{"cluster", "namespace", "ownerref", "result", "is_job", "delivery_status"},
	)

	// PodStartupResult is a prometheus metric for monitoring pod startup result
	PodStartupResult = prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Name: "slo_pod_startup_result_count",
			Help: "Pod startup result, succeed or failed or timeout",
		},
		[]string{"cluster", "namespace", "ownerref", "result", "scheduling_strategy", "cores", "is_job", "node_ip", "delivery_status", "podslo"},
	)

	// PodCreateTotal is a prometheus metric for monitoring pod create total count
	PodCreateTotal = prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Name: "slo_pod_create_total_count",
			Help: "Pod create total",
		},
		[]string{"cluster", "namespace", "cores", "is_job"},
	)

	// PodStartupSLOResult is a prometheus metric for monitoring pod startup result
	PodStartupSLOResult = prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Name: "slo_pod_startup_slo_result_count",
			Help: "Pod startup slo result, succeed or failed or timeout",
		},
		[]string{"cluster", "namespace", "ownerref", "result", "slo_time", "cores", "is_job", "priority", "delivery_status", "slo_reason", "slotime_adjusted"},
	)
	// PodStartupK8sSLOResult is a prometheus metric for monitoring pod startup result
	PodStartupK8sSLOResult = prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Name: "slo_pod_startup_k8s_slo_result_count",
			Help: "Pod startup slo result, succeed or failed or timeout",
		},
		[]string{"cluster", "namespace", "result", "slo_time", "cores", "delivery_status", "slo_reason"},
	)

	// PodCreateAPIResult 记录pod create API的返回结果
	PodCreateAPIResult = prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Name: "slo_pod_create_api_result_count",
			Help: "pod create api return result",
		},
		[]string{"cluster", "namespace", "resultCode"},
	)

	//EventConsumedCount is a prometheus metric for monitoring speed of event consumed
	EventConsumedCount = prometheus.NewCounter(
		prometheus.CounterOpts{
			Name: "lunettes_events_consumed_create_slo",
			Help: "events processed by consumer",
		},
	)

	MethodDurationMilliSeconds = prometheus.NewGaugeVec(
		prometheus.GaugeOpts{
			Name: "lunettes_method_duration_milliseconds",
			Help: "how long an method operation to completed",
		},
		[]string{"phase"},
	)

	// PodDeleteResult delete
	PodDeleteResult = prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Name: "slo_pod_delete_result_count",
			Help: "Pod delete result, succeed or failed or reason",
		},
		[]string{"cluster", "namespace", "node_ip", "result"},
	)

	PodDeleteResultInDay = prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Name: "slo_pod_delete_result_count_day",
			Help: "Pod delete result, succeed or failed or reason",
		},
		[]string{"cluster", "namespace", "result"},
	)

	PodDeleteResultInWeek = prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Name: "slo_pod_delete_result_count_week",
			Help: "Pod delete result, succeed or failed or reason",
		},
		[]string{"cluster", "namespace", "result"},
	)

	PodDeleteApiCode = prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Name: "slo_pod_delete_api_code",
			Help: "Pod delete api response code",
		},
		[]string{"cluster", "namespace", "apicode"},
	)
	PodDeleteLatency = prometheus.NewHistogramVec(
		prometheus.HistogramOpts{
			Name:    "slo_pod_delete_latency",
			Help:    "pod delete latency",
			Buckets: []float64{0.5, 1, 2, 4, 6, 10, 20, 30, 45, 60, 120, 180, 240, 300, 360, 480, 600, 900, 1200, 1800, 2700, 3600, 7200, 14400, 43200, 86400, 86400 * 2, 86400 * 3, 86400 * 4, 86400 * 5, 86400 * 6, 86400 * 7},
		},
		[]string{"cluster", "namespace", "phase"},
	)

	PodDeleteLatencyQuantiles = prometheus.NewSummaryVec(
		prometheus.SummaryOpts{
			Name:       "slo_pod_delete_latency_quantiles_in_seconds",
			Help:       "pod delete latency in seconds with quantiles",
			MaxAge:     time.Hour,
			Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
		},
		[]string{"pod_type"},
	)

	//PodUpgradeResultCounter update
	PodUpgradeResultCounter = prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Name: "slo_pod_upgrade_result_count",
			Help: "",
		},
		[]string{"cluster", "namespace", "node_ip", "result"},
	)

	// SloAnalysisResultGauge slo analysis result of the configmap
	SloAnalysisResultGauge = prometheus.NewGaugeVec(
		prometheus.GaugeOpts{
			Name: "slo_analysis_result",
			Help: "slo analysis result of the configmap.",
		},
		[]string{"is_custom", "result", "description", "type"},
	)
)

View Source

var (
	SpansProcessedPods = prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Name: spansPrefix + "processed_pods_count",
			Help: "how many pods have been processed by spans module",
		},
		[]string{},
	)

	SpansInMemPodsCount = prometheus.NewGaugeVec(
		prometheus.GaugeOpts{
			Name: spansPrefix + "in_mem_pods_count",
			Help: "how many pods currently in spans module",
		},
		[]string{},
	)

	BaseLabels = map[string]bool{
		"cluster": true, "namespace": true, "resource": true, "type": true, "action_type": true,
	}
	SpanConsumingLabels     = map[string]bool{}
	SpansConsumingStatistic = prometheus.NewHistogramVec(
		prometheus.HistogramOpts{
			Name:    spansPrefix + "span_consuming_millisecond_statistic",
			Help:    "Time consuming statistic of span.",
			Buckets: []float64{},
		},
		[]string{"cluster", "namespace", "resource", "type", "action_type"},
	)
)

View Source

var (
	TraceProcessingLatencyBuckets = []float64{0.5, 1, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 30, 45, 60, 120, 180, 240, 300, 360,
		480, 600, 900, 1200, 1800, 2700, 3600, 7200, 14400, 43200, 86400}

	// 每个 trace 交付的时间延迟
	TraceProcessingLatency = prometheus.NewHistogramVec(
		prometheus.HistogramOpts{
			Name:    "trace_processing_latency_seconds",
			Help:    "time used to process one trace",
			Buckets: TraceProcessingLatencyBuckets,
		},

		[]string{"type"},
	)
)

Functions ¶

func ClearRequestResourceMetric ¶

func ClearRequestResourceMetric()

clear the metric data of request resource info periodically

func DefineSpanStatistic ¶

func DefineSpanStatistic(labels []string)

redefine SpansConsumingStatistic if extra-properties chanes

func IncreaseFailedTraceCount ¶

func IncreaseFailedTraceCount(traceType, namespace, reasonName string)

IncreaseFailedTraceCount increase counter for failed traces

func IncreaseSucceedTraceCount ¶

func IncreaseSucceedTraceCount(traceType, namespace string)

IncreaseSucceedTraceCount increase counter for succeed traces

func ObserveQueryMethodDuration ¶

func ObserveQueryMethodDuration(method string, begin time.Time)

func ObserveTraceDuration ¶

func ObserveTraceDuration(traceType, namespace string, v float64)

ObserveTraceDuration observe for trace durations

func UpdateServiceOperationDurationMetrics ¶

func UpdateServiceOperationDurationMetrics(service, operation, namespace string, d float64)

UpdateServiceOperationDurationMetrics update ServiceOperationDurationMilliSeconds

func UpdateServiceOperationResultMetrics ¶

func UpdateServiceOperationResultMetrics(service, operation, namespace string, err bool)

UpdateServiceOperationResultMetrics update ServiceOperationDurationMilliSeconds

Types ¶

This section is empty.

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL