Documentation ¶
Index ¶
- Constants
- Variables
- func AdmissionAttempt(result AdmissionResult, duration time.Duration)
- func AdmissionChecksWaitTime(cqName kueue.ClusterQueueReference, waitTime time.Duration)
- func AdmittedWorkload(cqName kueue.ClusterQueueReference, waitTime time.Duration)
- func ClearCacheMetrics(cqName string)
- func ClearClusterQueueMetrics(cqName string)
- func ClearClusterQueueResourceMetrics(cqName string)
- func ClearClusterQueueResourceQuotas(cqName, flavor, resource string)
- func ClearClusterQueueResourceReservations(cqName, flavor, resource string)
- func ClearClusterQueueResourceUsage(cqName, flavor, resource string)
- func ClearLocalQueueCacheMetrics(lq LocalQueueReference)
- func ClearLocalQueueMetrics(lq LocalQueueReference)
- func ClearLocalQueueResourceMetrics(lq LocalQueueReference)
- func LocalQueueAdmissionChecksWaitTime(lq LocalQueueReference, waitTime time.Duration)
- func LocalQueueAdmittedWorkload(lq LocalQueueReference, waitTime time.Duration)
- func LocalQueueQuotaReservedWorkload(lq LocalQueueReference, waitTime time.Duration)
- func QuotaReservedWorkload(cqName kueue.ClusterQueueReference, waitTime time.Duration)
- func Register()
- func RegisterLQMetrics()
- func ReportClusterQueueQuotas(cohort, queue, flavor, resource string, nominal, borrowing, lending float64)
- func ReportClusterQueueResourceReservations(cohort, queue, flavor, resource string, usage float64)
- func ReportClusterQueueResourceUsage(cohort, queue, flavor, resource string, usage float64)
- func ReportClusterQueueStatus(cqName string, cqStatus ClusterQueueStatus)
- func ReportClusterQueueWeightedShare(cq string, weightedShare int64)
- func ReportEvictedWorkloads(cqName, reason string)
- func ReportLocalQueueEvictedWorkloads(lq LocalQueueReference, reason string)
- func ReportLocalQueuePendingWorkloads(lq LocalQueueReference, active, inadmissible int)
- func ReportLocalQueueResourceReservations(lq LocalQueueReference, flavor, resource string, usage float64)
- func ReportLocalQueueResourceUsage(lq LocalQueueReference, flavor, resource string, usage float64)
- func ReportLocalQueueStatus(lq LocalQueueReference, conditionStatus metav1.ConditionStatus)
- func ReportPendingWorkloads(cqName string, active, inadmissible int)
- func ReportPreemption(preemptingCqName, preemptingReason, targetCqName string)
- type AdmissionResult
- type ClusterQueueStatus
- type LocalQueueReference
Constants ¶
View Source
const ( AdmissionResultSuccess AdmissionResult = "success" AdmissionResultInadmissible AdmissionResult = "inadmissible" PendingStatusActive = "active" PendingStatusInadmissible = "inadmissible" // CQStatusPending means the ClusterQueue is accepted but not yet active, // this can be because of: // - a missing ResourceFlavor referenced by the ClusterQueue // - a missing or inactive AdmissionCheck referenced by the ClusterQueue // - the ClusterQueue is stopped // In this state, the ClusterQueue can't admit new workloads and its quota can't be borrowed // by other active ClusterQueues in the cohort. CQStatusPending ClusterQueueStatus = "pending" // CQStatusActive means the ClusterQueue can admit new workloads and its quota // can be borrowed by other ClusterQueues in the cohort. CQStatusActive ClusterQueueStatus = "active" // CQStatusTerminating means the clusterQueue is in pending deletion. CQStatusTerminating ClusterQueueStatus = "terminating" )
Variables ¶
View Source
var ( CQStatuses = []ClusterQueueStatus{CQStatusPending, CQStatusActive, CQStatusTerminating} AdmissionAttemptsTotal = prometheus.NewCounterVec( prometheus.CounterOpts{ Subsystem: constants.KueueName, Name: "admission_attempts_total", Help: `The total number of attempts to admit workloads. Each admission attempt might try to admit more than one workload. The label 'result' can have the following values: - 'success' means that at least one workload was admitted., - 'inadmissible' means that no workload was admitted.`, }, []string{"result"}, ) AdmissionCyclePreemptionSkips = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Subsystem: constants.KueueName, Name: "admission_cycle_preemption_skips", Help: "The number of Workloads in the ClusterQueue that got preemption candidates " + "but had to be skipped because other ClusterQueues needed the same resources in the same cycle", }, []string{"cluster_queue"}, ) PendingWorkloads = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Subsystem: constants.KueueName, Name: "pending_workloads", Help: `The number of pending workloads, per 'cluster_queue' and 'status'. 'status' can have the following values: - "active" means that the workloads are in the admission queue. - "inadmissible" means there was a failed admission attempt for these workloads and they won't be retried until cluster conditions, which could make this workload admissible, change`, }, []string{"cluster_queue", "status"}, ) LocalQueuePendingWorkloads = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Subsystem: constants.KueueName, Name: "local_queue_pending_workloads", Help: `The number of pending workloads, per 'local_queue' and 'status'. 'status' can have the following values: - "active" means that the workloads are in the admission queue. - "inadmissible" means there was a failed admission attempt for these workloads and they won't be retried until cluster conditions, which could make this workload admissible, change`, }, []string{"name", "namespace", "status"}, ) QuotaReservedWorkloadsTotal = prometheus.NewCounterVec( prometheus.CounterOpts{ Subsystem: constants.KueueName, Name: "quota_reserved_workloads_total", Help: "The total number of quota reserved workloads per 'cluster_queue'", }, []string{"cluster_queue"}, ) LocalQueueQuotaReservedWorkloadsTotal = prometheus.NewCounterVec( prometheus.CounterOpts{ Subsystem: constants.KueueName, Name: "local_queue_quota_reserved_workloads_total", Help: "The total number of quota reserved workloads per 'local_queue'", }, []string{"name", "namespace"}, ) AdmittedWorkloadsTotal = prometheus.NewCounterVec( prometheus.CounterOpts{ Subsystem: constants.KueueName, Name: "admitted_workloads_total", Help: "The total number of admitted workloads per 'cluster_queue'", }, []string{"cluster_queue"}, ) LocalQueueAdmittedWorkloadsTotal = prometheus.NewCounterVec( prometheus.CounterOpts{ Subsystem: constants.KueueName, Name: "local_queue_admitted_workloads_total", Help: "The total number of admitted workloads per 'local_queue'", }, []string{"name", "namespace"}, ) EvictedWorkloadsTotal = prometheus.NewCounterVec( prometheus.CounterOpts{ Subsystem: constants.KueueName, Name: "evicted_workloads_total", Help: `The number of evicted workloads per 'cluster_queue', The label 'reason' can have the following values: - "Preempted" means that the workload was evicted in order to free resources for a workload with a higher priority or reclamation of nominal quota. - "PodsReadyTimeout" means that the eviction took place due to a PodsReady timeout. - "AdmissionCheck" means that the workload was evicted because at least one admission check transitioned to False. - "ClusterQueueStopped" means that the workload was evicted because the ClusterQueue is stopped. - "Deactivated" means that the workload was evicted because spec.active is set to false`, }, []string{"cluster_queue", "reason"}, ) LocalQueueEvictedWorkloadsTotal = prometheus.NewCounterVec( prometheus.CounterOpts{ Subsystem: constants.KueueName, Name: "local_queue_evicted_workloads_total", Help: `The number of evicted workloads per 'local_queue', The label 'reason' can have the following values: - "Preempted" means that the workload was evicted in order to free resources for a workload with a higher priority or reclamation of nominal quota. - "PodsReadyTimeout" means that the eviction took place due to a PodsReady timeout. - "AdmissionCheck" means that the workload was evicted because at least one admission check transitioned to False. - "ClusterQueueStopped" means that the workload was evicted because the ClusterQueue is stopped. - "Deactivated" means that the workload was evicted because spec.active is set to false`, }, []string{"name", "namespace", "reason"}, ) PreemptedWorkloadsTotal = prometheus.NewCounterVec( prometheus.CounterOpts{ Subsystem: constants.KueueName, Name: "preempted_workloads_total", Help: `The number of preempted workloads per 'preempting_cluster_queue', The label 'reason' can have the following values: - "InClusterQueue" means that the workload was preempted by a workload in the same ClusterQueue. - "InCohortReclamation" means that the workload was preempted by a workload in the same cohort due to reclamation of nominal quota. - "InCohortFairSharing" means that the workload was preempted by a workload in the same cohort due to fair sharing. - "InCohortReclaimWhileBorrowing" means that the workload was preempted by a workload in the same cohort due to reclamation of nominal quota while borrowing.`, }, []string{"preempting_cluster_queue", "reason"}, ) ReservingActiveWorkloads = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Subsystem: constants.KueueName, Name: "reserving_active_workloads", Help: "The number of Workloads that are reserving quota, per 'cluster_queue'", }, []string{"cluster_queue"}, ) LocalQueueReservingActiveWorkloads = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Subsystem: constants.KueueName, Name: "local_queue_reserving_active_workloads", Help: "The number of Workloads that are reserving quota, per 'localQueue'", }, []string{"name", "namespace"}, ) AdmittedActiveWorkloads = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Subsystem: constants.KueueName, Name: "admitted_active_workloads", Help: "The number of admitted Workloads that are active (unsuspended and not finished), per 'cluster_queue'", }, []string{"cluster_queue"}, ) LocalQueueAdmittedActiveWorkloads = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Subsystem: constants.KueueName, Name: "local_queue_admitted_active_workloads", Help: "The number of admitted Workloads that are active (unsuspended and not finished), per 'localQueue'", }, []string{"name", "namespace"}, ) ClusterQueueByStatus = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Subsystem: constants.KueueName, Name: "cluster_queue_status", Help: `Reports 'cluster_queue' with its 'status' (with possible values 'pending', 'active' or 'terminated'). For a ClusterQueue, the metric only reports a value of 1 for one of the statuses.`, }, []string{"cluster_queue", "status"}, ) LocalQueueByStatus = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Subsystem: constants.KueueName, Name: "local_queue_status", Help: `Reports 'localQueue' with its 'active' status (with possible values 'True', 'False', or 'Unknown'). For a LocalQueue, the metric only reports a value of 1 for one of the statuses.`, }, []string{"name", "namespace", "active"}, ) ClusterQueueResourceReservations = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Subsystem: constants.KueueName, Name: "cluster_queue_resource_reservation", Help: `Reports the cluster_queue's total resource reservation within all the flavors`, }, []string{"cohort", "cluster_queue", "flavor", "resource"}, ) ClusterQueueResourceUsage = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Subsystem: constants.KueueName, Name: "cluster_queue_resource_usage", Help: `Reports the cluster_queue's total resource usage within all the flavors`, }, []string{"cohort", "cluster_queue", "flavor", "resource"}, ) LocalQueueResourceReservations = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Subsystem: constants.KueueName, Name: "local_queue_resource_reservation", Help: `Reports the localQueue's total resource reservation within all the flavors`, }, []string{"name", "namespace", "flavor", "resource"}, ) LocalQueueResourceUsage = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Subsystem: constants.KueueName, Name: "local_queue_resource_usage", Help: `Reports the localQueue's total resource usage within all the flavors`, }, []string{"name", "namespace", "flavor", "resource"}, ) ClusterQueueResourceNominalQuota = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Subsystem: constants.KueueName, Name: "cluster_queue_nominal_quota", Help: `Reports the cluster_queue's resource nominal quota within all the flavors`, }, []string{"cohort", "cluster_queue", "flavor", "resource"}, ) ClusterQueueResourceBorrowingLimit = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Subsystem: constants.KueueName, Name: "cluster_queue_borrowing_limit", Help: `Reports the cluster_queue's resource borrowing limit within all the flavors`, }, []string{"cohort", "cluster_queue", "flavor", "resource"}, ) ClusterQueueResourceLendingLimit = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Subsystem: constants.KueueName, Name: "cluster_queue_lending_limit", Help: `Reports the cluster_queue's resource lending limit within all the flavors`, }, []string{"cohort", "cluster_queue", "flavor", "resource"}, ) prometheus.GaugeOpts{ Subsystem: constants.KueueName, Name: "cluster_queue_weighted_share", Help: `Reports a value that representing the maximum of the ratios of usage above nominal quota to the lendable resources in the cohort, among all the resources provided by the ClusterQueue, and divided by the weight. If zero, it means that the usage of the ClusterQueue is below the nominal quota. If the ClusterQueue has a weight of zero, this will return 9223372036854775807, the maximum possible share value.`, }, []string{"cluster_queue"}, ) )
View Source
var (
ConditionStatusValues = []metav1.ConditionStatus{metav1.ConditionTrue, metav1.ConditionFalse, metav1.ConditionUnknown}
)
Functions ¶
func AdmissionAttempt ¶
func AdmissionAttempt(result AdmissionResult, duration time.Duration)
func AdmissionChecksWaitTime ¶ added in v0.7.0
func AdmissionChecksWaitTime(cqName kueue.ClusterQueueReference, waitTime time.Duration)
func AdmittedWorkload ¶
func AdmittedWorkload(cqName kueue.ClusterQueueReference, waitTime time.Duration)
func ClearCacheMetrics ¶
func ClearCacheMetrics(cqName string)
func ClearClusterQueueMetrics ¶ added in v0.8.1
func ClearClusterQueueMetrics(cqName string)
func ClearClusterQueueResourceMetrics ¶ added in v0.5.0
func ClearClusterQueueResourceMetrics(cqName string)
func ClearClusterQueueResourceQuotas ¶ added in v0.5.0
func ClearClusterQueueResourceQuotas(cqName, flavor, resource string)
func ClearClusterQueueResourceReservations ¶ added in v0.5.0
func ClearClusterQueueResourceReservations(cqName, flavor, resource string)
func ClearClusterQueueResourceUsage ¶ added in v0.5.0
func ClearClusterQueueResourceUsage(cqName, flavor, resource string)
func ClearLocalQueueCacheMetrics ¶ added in v0.10.0
func ClearLocalQueueCacheMetrics(lq LocalQueueReference)
func ClearLocalQueueMetrics ¶ added in v0.10.0
func ClearLocalQueueMetrics(lq LocalQueueReference)
func ClearLocalQueueResourceMetrics ¶ added in v0.10.0
func ClearLocalQueueResourceMetrics(lq LocalQueueReference)
func LocalQueueAdmissionChecksWaitTime ¶ added in v0.10.0
func LocalQueueAdmissionChecksWaitTime(lq LocalQueueReference, waitTime time.Duration)
func LocalQueueAdmittedWorkload ¶ added in v0.10.0
func LocalQueueAdmittedWorkload(lq LocalQueueReference, waitTime time.Duration)
func LocalQueueQuotaReservedWorkload ¶ added in v0.10.0
func LocalQueueQuotaReservedWorkload(lq LocalQueueReference, waitTime time.Duration)
func QuotaReservedWorkload ¶ added in v0.7.0
func QuotaReservedWorkload(cqName kueue.ClusterQueueReference, waitTime time.Duration)
func RegisterLQMetrics ¶ added in v0.10.0
func RegisterLQMetrics()
func ReportClusterQueueQuotas ¶ added in v0.5.0
func ReportClusterQueueResourceReservations ¶ added in v0.5.0
func ReportClusterQueueResourceUsage ¶ added in v0.5.0
func ReportClusterQueueStatus ¶
func ReportClusterQueueStatus(cqName string, cqStatus ClusterQueueStatus)
func ReportClusterQueueWeightedShare ¶ added in v0.7.0
func ReportEvictedWorkloads ¶ added in v0.7.0
func ReportEvictedWorkloads(cqName, reason string)
func ReportLocalQueueEvictedWorkloads ¶ added in v0.10.0
func ReportLocalQueueEvictedWorkloads(lq LocalQueueReference, reason string)
func ReportLocalQueuePendingWorkloads ¶ added in v0.10.0
func ReportLocalQueuePendingWorkloads(lq LocalQueueReference, active, inadmissible int)
func ReportLocalQueueResourceReservations ¶ added in v0.10.0
func ReportLocalQueueResourceReservations(lq LocalQueueReference, flavor, resource string, usage float64)
func ReportLocalQueueResourceUsage ¶ added in v0.10.0
func ReportLocalQueueResourceUsage(lq LocalQueueReference, flavor, resource string, usage float64)
func ReportLocalQueueStatus ¶ added in v0.10.0
func ReportLocalQueueStatus(lq LocalQueueReference, conditionStatus metav1.ConditionStatus)
func ReportPendingWorkloads ¶
func ReportPreemption ¶ added in v0.8.0
func ReportPreemption(preemptingCqName, preemptingReason, targetCqName string)
Types ¶
type AdmissionResult ¶
type AdmissionResult string
type ClusterQueueStatus ¶
type ClusterQueueStatus string
type LocalQueueReference ¶ added in v0.10.0
func LQRefFromLocalQueueKey ¶ added in v0.10.0
func LQRefFromLocalQueueKey(lqKey string) LocalQueueReference
func LQRefFromWorkload ¶ added in v0.10.0
func LQRefFromWorkload(wl *kueue.Workload) LocalQueueReference
Click to show internal directories.
Click to hide internal directories.