metrics

package
v0.5.3 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 9, 2024 License: Apache-2.0 Imports: 5 Imported by: 0

Documentation

Index

Constants

View Source
const (
	AdmissionResultSuccess      AdmissionResult = "success"
	AdmissionResultInadmissible AdmissionResult = "inadmissible"

	PendingStatusActive       = "active"
	PendingStatusInadmissible = "inadmissible"

	// CQStatusPending means the ClusterQueue is accepted but not yet active,
	// this can be because of a missing ResourceFlavor referenced by the ClusterQueue.
	// In this state, the ClusterQueue can't admit new workloads and its quota can't be borrowed
	// by other active ClusterQueues in the cohort.
	CQStatusPending ClusterQueueStatus = "pending"
	// CQStatusActive means the ClusterQueue can admit new workloads and its quota
	// can be borrowed by other ClusterQueues in the cohort.
	CQStatusActive ClusterQueueStatus = "active"
	// CQStatusTerminating means the clusterQueue is in pending deletion.
	CQStatusTerminating ClusterQueueStatus = "terminating"
)

Variables

View Source
var (
	CQStatuses = []ClusterQueueStatus{CQStatusPending, CQStatusActive, CQStatusTerminating}

	PendingWorkloads = prometheus.NewGaugeVec(
		prometheus.GaugeOpts{
			Subsystem: constants.KueueName,
			Name:      "pending_workloads",
			Help: `The number of pending workloads, per 'cluster_queue' and 'status'.
'status' can have the following values:
- "active" means that the workloads are in the admission queue.
- "inadmissible" means there was a failed admission attempt for these workloads and they won't be retried until cluster conditions, which could make this workload admissible, change`,
		}, []string{"cluster_queue", "status"},
	)

	AdmittedWorkloadsTotal = prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Subsystem: constants.KueueName,
			Name:      "admitted_workloads_total",
			Help:      "The total number of admitted workloads per 'cluster_queue'",
		}, []string{"cluster_queue"},
	)

	ReservingActiveWorkloads = prometheus.NewGaugeVec(
		prometheus.GaugeOpts{
			Subsystem: constants.KueueName,
			Name:      "reserving_active_workloads",
			Help:      "The number of Workloads that are reserving quota, per 'cluster_queue'",
		}, []string{"cluster_queue"},
	)

	AdmittedActiveWorkloads = prometheus.NewGaugeVec(
		prometheus.GaugeOpts{
			Subsystem: constants.KueueName,
			Name:      "admitted_active_workloads",
			Help:      "The number of admitted Workloads that are active (unsuspended and not finished), per 'cluster_queue'",
		}, []string{"cluster_queue"},
	)

	ClusterQueueByStatus = prometheus.NewGaugeVec(
		prometheus.GaugeOpts{
			Subsystem: constants.KueueName,
			Name:      "cluster_queue_status",
			Help: `Reports 'cluster_queue' with its 'status' (with possible values 'pending', 'active' or 'terminated').
For a ClusterQueue, the metric only reports a value of 1 for one of the statuses.`,
		}, []string{"cluster_queue", "status"},
	)

	// Optional cluster queue metrics
	ClusterQueueResourceReservations = prometheus.NewGaugeVec(
		prometheus.GaugeOpts{
			Subsystem: constants.KueueName,
			Name:      "cluster_queue_resource_reservation",
			Help:      `Reports the cluster_queue's total resource reservation within all the flavors`,
		}, []string{"cohort", "cluster_queue", "flavor", "resource"},
	)

	ClusterQueueResourceUsage = prometheus.NewGaugeVec(
		prometheus.GaugeOpts{
			Subsystem: constants.KueueName,
			Name:      "cluster_queue_resource_usage",
			Help:      `Reports the cluster_queue's total resource usage within all the flavors`,
		}, []string{"cohort", "cluster_queue", "flavor", "resource"},
	)

	ClusterQueueResourceNominalQuota = prometheus.NewGaugeVec(
		prometheus.GaugeOpts{
			Subsystem: constants.KueueName,
			Name:      "cluster_queue_nominal_quota",
			Help:      `Reports the cluster_queue's resource nominal quota within all the flavors`,
		}, []string{"cohort", "cluster_queue", "flavor", "resource"},
	)

	ClusterQueueResourceBorrowingLimit = prometheus.NewGaugeVec(
		prometheus.GaugeOpts{
			Subsystem: constants.KueueName,
			Name:      "cluster_queue_borrowing_limit",
			Help:      `Reports the cluster_queue's resource borrowing limit within all the flavors`,
		}, []string{"cohort", "cluster_queue", "flavor", "resource"},
	)
)

Functions

func AdmissionAttempt

func AdmissionAttempt(result AdmissionResult, duration time.Duration)

func AdmittedWorkload

func AdmittedWorkload(cqName kueue.ClusterQueueReference, waitTime time.Duration)

func ClearCacheMetrics

func ClearCacheMetrics(cqName string)

func ClearClusterQueueResourceMetrics added in v0.5.0

func ClearClusterQueueResourceMetrics(cqName string)

func ClearClusterQueueResourceQuotas added in v0.5.0

func ClearClusterQueueResourceQuotas(cqName, flavor, resource string)

func ClearClusterQueueResourceReservations added in v0.5.0

func ClearClusterQueueResourceReservations(cqName, flavor, resource string)

func ClearClusterQueueResourceUsage added in v0.5.0

func ClearClusterQueueResourceUsage(cqName, flavor, resource string)

func ClearQueueSystemMetrics

func ClearQueueSystemMetrics(cqName string)

func Register

func Register()

func ReportClusterQueueQuotas added in v0.5.0

func ReportClusterQueueQuotas(cohort, queue, flavor, resource string, nominal, borrowing float64)

func ReportClusterQueueResourceReservations added in v0.5.0

func ReportClusterQueueResourceReservations(cohort, queue, flavor, resource string, usage float64)

func ReportClusterQueueResourceUsage added in v0.5.0

func ReportClusterQueueResourceUsage(cohort, queue, flavor, resource string, usage float64)

func ReportClusterQueueStatus

func ReportClusterQueueStatus(cqName string, cqStatus ClusterQueueStatus)

func ReportPendingWorkloads

func ReportPendingWorkloads(cqName string, active, inadmissible int)

Types

type AdmissionResult

type AdmissionResult string

type ClusterQueueStatus

type ClusterQueueStatus string

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL