Documentation ¶
Index ¶
- Constants
- Variables
- func AdmissionAttempt(result AdmissionResult, duration time.Duration)
- func AdmissionChecksWaitTime(cqName kueue.ClusterQueueReference, waitTime time.Duration)
- func AdmittedWorkload(cqName kueue.ClusterQueueReference, waitTime time.Duration)
- func ClearCacheMetrics(cqName string)
- func ClearClusterQueueMetrics(cqName string)
- func ClearClusterQueueResourceMetrics(cqName string)
- func ClearClusterQueueResourceQuotas(cqName, flavor, resource string)
- func ClearClusterQueueResourceReservations(cqName, flavor, resource string)
- func ClearClusterQueueResourceUsage(cqName, flavor, resource string)
- func QuotaReservedWorkload(cqName kueue.ClusterQueueReference, waitTime time.Duration)
- func Register()
- func ReportClusterQueueQuotas(cohort, queue, flavor, resource string, nominal, borrowing, lending float64)
- func ReportClusterQueueResourceReservations(cohort, queue, flavor, resource string, usage float64)
- func ReportClusterQueueResourceUsage(cohort, queue, flavor, resource string, usage float64)
- func ReportClusterQueueStatus(cqName string, cqStatus ClusterQueueStatus)
- func ReportClusterQueueWeightedShare(cq string, weightedShare int64)
- func ReportEvictedWorkloads(cqName, reason string)
- func ReportPendingWorkloads(cqName string, active, inadmissible int)
- func ReportPreemption(preemptingCqName, preemptingReason, targetCqName string)
- type AdmissionResult
- type ClusterQueueStatus
Constants ¶
View Source
const ( AdmissionResultSuccess AdmissionResult = "success" AdmissionResultInadmissible AdmissionResult = "inadmissible" PendingStatusActive = "active" PendingStatusInadmissible = "inadmissible" // CQStatusPending means the ClusterQueue is accepted but not yet active, // this can be because of: // - a missing ResourceFlavor referenced by the ClusterQueue // - a missing or inactive AdmissionCheck referenced by the ClusterQueue // - the ClusterQueue is stopped // In this state, the ClusterQueue can't admit new workloads and its quota can't be borrowed // by other active ClusterQueues in the cohort. CQStatusPending ClusterQueueStatus = "pending" // CQStatusActive means the ClusterQueue can admit new workloads and its quota // can be borrowed by other ClusterQueues in the cohort. CQStatusActive ClusterQueueStatus = "active" // CQStatusTerminating means the clusterQueue is in pending deletion. CQStatusTerminating ClusterQueueStatus = "terminating" )
Variables ¶
View Source
var ( CQStatuses = []ClusterQueueStatus{CQStatusPending, CQStatusActive, CQStatusTerminating} AdmissionAttemptsTotal = prometheus.NewCounterVec( prometheus.CounterOpts{ Subsystem: constants.KueueName, Name: "admission_attempts_total", Help: `The total number of attempts to admit workloads. Each admission attempt might try to admit more than one workload. The label 'result' can have the following values: - 'success' means that at least one workload was admitted., - 'inadmissible' means that no workload was admitted.`, }, []string{"result"}, ) AdmissionCyclePreemptionSkips = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Subsystem: constants.KueueName, Name: "admission_cycle_preemption_skips", Help: "The number of Workloads in the ClusterQueue that got preemption candidates " + "but had to be skipped because other ClusterQueues needed the same resources in the same cycle", }, []string{"cluster_queue"}, ) PendingWorkloads = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Subsystem: constants.KueueName, Name: "pending_workloads", Help: `The number of pending workloads, per 'cluster_queue' and 'status'. 'status' can have the following values: - "active" means that the workloads are in the admission queue. - "inadmissible" means there was a failed admission attempt for these workloads and they won't be retried until cluster conditions, which could make this workload admissible, change`, }, []string{"cluster_queue", "status"}, ) QuotaReservedWorkloadsTotal = prometheus.NewCounterVec( prometheus.CounterOpts{ Subsystem: constants.KueueName, Name: "quota_reserved_workloads_total", Help: "The total number of quota reserved workloads per 'cluster_queue'", }, []string{"cluster_queue"}, ) AdmittedWorkloadsTotal = prometheus.NewCounterVec( prometheus.CounterOpts{ Subsystem: constants.KueueName, Name: "admitted_workloads_total", Help: "The total number of admitted workloads per 'cluster_queue'", }, []string{"cluster_queue"}, ) EvictedWorkloadsTotal = prometheus.NewCounterVec( prometheus.CounterOpts{ Subsystem: constants.KueueName, Name: "evicted_workloads_total", Help: `The number of evicted workloads per 'cluster_queue', The label 'reason' can have the following values: - "Preempted" means that the workload was evicted in order to free resources for a workload with a higher priority or reclamation of nominal quota. - "PodsReadyTimeout" means that the eviction took place due to a PodsReady timeout. - "AdmissionCheck" means that the workload was evicted because at least one admission check transitioned to False. - "ClusterQueueStopped" means that the workload was evicted because the ClusterQueue is stopped. - "Deactivated" means that the workload was evicted because spec.active is set to false`, }, []string{"cluster_queue", "reason"}, ) PreemptedWorkloadsTotal = prometheus.NewCounterVec( prometheus.CounterOpts{ Subsystem: constants.KueueName, Name: "preempted_workloads_total", Help: `The number of preempted workloads per 'preempting_cluster_queue', The label 'reason' can have the following values: - "InClusterQueue" means that the workload was preempted by a workload in the same ClusterQueue. - "InCohortReclamation" means that the workload was preempted by a workload in the same cohort due to reclamation of nominal quota. - "InCohortFairSharing" means that the workload was preempted by a workload in the same cohort due to fair sharing. - "InCohortReclaimWhileBorrowing" means that the workload was preempted by a workload in the same cohort due to reclamation of nominal quota while borrowing.`, }, []string{"preempting_cluster_queue", "reason"}, ) ReservingActiveWorkloads = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Subsystem: constants.KueueName, Name: "reserving_active_workloads", Help: "The number of Workloads that are reserving quota, per 'cluster_queue'", }, []string{"cluster_queue"}, ) AdmittedActiveWorkloads = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Subsystem: constants.KueueName, Name: "admitted_active_workloads", Help: "The number of admitted Workloads that are active (unsuspended and not finished), per 'cluster_queue'", }, []string{"cluster_queue"}, ) ClusterQueueByStatus = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Subsystem: constants.KueueName, Name: "cluster_queue_status", Help: `Reports 'cluster_queue' with its 'status' (with possible values 'pending', 'active' or 'terminated'). For a ClusterQueue, the metric only reports a value of 1 for one of the statuses.`, }, []string{"cluster_queue", "status"}, ) ClusterQueueResourceReservations = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Subsystem: constants.KueueName, Name: "cluster_queue_resource_reservation", Help: `Reports the cluster_queue's total resource reservation within all the flavors`, }, []string{"cohort", "cluster_queue", "flavor", "resource"}, ) ClusterQueueResourceUsage = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Subsystem: constants.KueueName, Name: "cluster_queue_resource_usage", Help: `Reports the cluster_queue's total resource usage within all the flavors`, }, []string{"cohort", "cluster_queue", "flavor", "resource"}, ) ClusterQueueResourceNominalQuota = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Subsystem: constants.KueueName, Name: "cluster_queue_nominal_quota", Help: `Reports the cluster_queue's resource nominal quota within all the flavors`, }, []string{"cohort", "cluster_queue", "flavor", "resource"}, ) ClusterQueueResourceBorrowingLimit = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Subsystem: constants.KueueName, Name: "cluster_queue_borrowing_limit", Help: `Reports the cluster_queue's resource borrowing limit within all the flavors`, }, []string{"cohort", "cluster_queue", "flavor", "resource"}, ) ClusterQueueResourceLendingLimit = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Subsystem: constants.KueueName, Name: "cluster_queue_lending_limit", Help: `Reports the cluster_queue's resource lending limit within all the flavors`, }, []string{"cohort", "cluster_queue", "flavor", "resource"}, ) prometheus.GaugeOpts{ Subsystem: constants.KueueName, Name: "cluster_queue_weighted_share", Help: `Reports a value that representing the maximum of the ratios of usage above nominal quota to the lendable resources in the cohort, among all the resources provided by the ClusterQueue, and divided by the weight. If zero, it means that the usage of the ClusterQueue is below the nominal quota. If the ClusterQueue has a weight of zero, this will return 9223372036854775807, the maximum possible share value.`, }, []string{"cluster_queue"}, ) )
Functions ¶
func AdmissionAttempt ¶
func AdmissionAttempt(result AdmissionResult, duration time.Duration)
func AdmissionChecksWaitTime ¶ added in v0.7.0
func AdmissionChecksWaitTime(cqName kueue.ClusterQueueReference, waitTime time.Duration)
func AdmittedWorkload ¶
func AdmittedWorkload(cqName kueue.ClusterQueueReference, waitTime time.Duration)
func ClearCacheMetrics ¶
func ClearCacheMetrics(cqName string)
func ClearClusterQueueMetrics ¶ added in v0.8.1
func ClearClusterQueueMetrics(cqName string)
func ClearClusterQueueResourceMetrics ¶ added in v0.5.0
func ClearClusterQueueResourceMetrics(cqName string)
func ClearClusterQueueResourceQuotas ¶ added in v0.5.0
func ClearClusterQueueResourceQuotas(cqName, flavor, resource string)
func ClearClusterQueueResourceReservations ¶ added in v0.5.0
func ClearClusterQueueResourceReservations(cqName, flavor, resource string)
func ClearClusterQueueResourceUsage ¶ added in v0.5.0
func ClearClusterQueueResourceUsage(cqName, flavor, resource string)
func QuotaReservedWorkload ¶ added in v0.7.0
func QuotaReservedWorkload(cqName kueue.ClusterQueueReference, waitTime time.Duration)
func ReportClusterQueueQuotas ¶ added in v0.5.0
func ReportClusterQueueResourceReservations ¶ added in v0.5.0
func ReportClusterQueueResourceUsage ¶ added in v0.5.0
func ReportClusterQueueStatus ¶
func ReportClusterQueueStatus(cqName string, cqStatus ClusterQueueStatus)
func ReportClusterQueueWeightedShare ¶ added in v0.7.0
func ReportEvictedWorkloads ¶ added in v0.7.0
func ReportEvictedWorkloads(cqName, reason string)
func ReportPendingWorkloads ¶
func ReportPreemption ¶ added in v0.8.0
func ReportPreemption(preemptingCqName, preemptingReason, targetCqName string)
Types ¶
type AdmissionResult ¶
type AdmissionResult string
type ClusterQueueStatus ¶
type ClusterQueueStatus string
Click to show internal directories.
Click to hide internal directories.