metrics

package
v0.15.6 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Nov 29, 2024 License: Apache-2.0 Imports: 16 Imported by: 0

Documentation

Index

Constants

View Source
const MetricPrefix = "armada_"

Variables

View Source
var ClusterAvailableCapacityDesc = prometheus.NewDesc(
	MetricPrefix+"cluster_available_capacity",
	"Cluster capacity available for Armada jobs",
	[]string{"cluster", "pool", "resourceType", "nodeType"},
	nil,
)
View Source
var ClusterCapacityDesc = prometheus.NewDesc(
	MetricPrefix+"cluster_capacity",
	"Cluster capacity",
	[]string{"cluster", "pool", "resourceType", "nodeType"},
	nil,
)
View Source
var ClusterCordonedStatusDesc = prometheus.NewDesc(
	MetricPrefix+"cluster_cordoned_status",
	"Cluster cordoned status",
	[]string{"cluster", "reason", "setByUser"},
	nil,
)
View Source
var CountQueueResourcesDesc = prometheus.NewDesc(
	MetricPrefix+"queue_resource_queued_count",
	"Count of queued jobs requiring resource",
	[]string{"pool", "priorityClass", "queueName", "queue", "resourceType"},
	nil,
)
View Source
var JobRunDurationDesc = prometheus.NewDesc(
	MetricPrefix+"job_run_time_seconds",
	"Run time for Armada jobs",
	[]string{"pool", "priorityClass", "queueName", "queue"},
	nil,
)
View Source
var MaxJobRunDurationDesc = prometheus.NewDesc(
	MetricPrefix+"job_run_time_seconds_max",
	"Max run time for Armada jobs",
	[]string{"pool", "priorityClass", "queueName", "queue"},
	nil,
)
View Source
var MaxQueueAllocatedDesc = prometheus.NewDesc(
	MetricPrefix+"queue_resource_allocated_max",
	"Max resource allocated by a running job",
	[]string{"pool", "priorityClass", "queueName", "queue", "resourceType"},
	nil,
)
View Source
var MaxQueueDurationDesc = prometheus.NewDesc(
	MetricPrefix+"job_queued_seconds_max",
	"Max queue time for Armada jobs",
	[]string{"pool", "priorityClass", "queueName", "queue"},
	nil,
)
View Source
var MaxQueueResourcesDesc = prometheus.NewDesc(
	MetricPrefix+"queue_resource_queued_max",
	"Max resource required by queued job",
	[]string{"pool", "priorityClass", "queueName", "queue", "resourceType"},
	nil,
)
View Source
var MedianJobRunDurationDesc = prometheus.NewDesc(
	MetricPrefix+"job_run_time_seconds_median",
	"Median run time for Armada jobs",
	[]string{"pool", "priorityClass", "queueName", "queue"},
	nil,
)
View Source
var MedianQueueAllocatedDesc = prometheus.NewDesc(
	MetricPrefix+"queue_resource_allocated_median",
	"Median resource allocated by a running job",
	[]string{"pool", "priorityClass", "queueName", "queue", "resourceType"},
	nil,
)
View Source
var MedianQueueDurationDesc = prometheus.NewDesc(
	MetricPrefix+"job_queued_seconds_median",
	"Median queue time for Armada jobs",
	[]string{"pool", "priorityClass", "queueName", "queue"},
	nil,
)
View Source
var MedianQueueResourcesDesc = prometheus.NewDesc(
	MetricPrefix+"queue_resource_queued_median",
	"Median resource required by queued jobs",
	[]string{"pool", "priorityClass", "queueName", "queue", "resourceType"},
	nil,
)
View Source
var MinJobRunDurationDesc = prometheus.NewDesc(
	MetricPrefix+"job_run_time_seconds_min",
	"Min run time for Armada jobs",
	[]string{"pool", "priorityClass", "queueName", "queue"},
	nil,
)
View Source
var MinQueueAllocatedDesc = prometheus.NewDesc(
	MetricPrefix+"queue_resource_allocated_min",
	"Min resource allocated by a running job",
	[]string{"pool", "priorityClass", "queueName", "queue", "resourceType"},
	nil,
)
View Source
var MinQueueDurationDesc = prometheus.NewDesc(
	MetricPrefix+"job_queued_seconds_min",
	"Min queue time for Armada jobs",
	[]string{"pool", "priorityClass", "queueName", "queue"},
	nil,
)
View Source
var MinQueueResourcesDesc = prometheus.NewDesc(
	MetricPrefix+"queue_resource_queued_min",
	"Min resource required by queued job",
	[]string{"pool", "priorityClass", "queueName", "queue", "resourceType"},
	nil,
)
View Source
var QueueAllocatedDesc = prometheus.NewDesc(
	MetricPrefix+"queue_resource_allocated",
	"Resource allocated to running jobs of a queue",
	[]string{"cluster", "pool", "priorityClass", "queueName", "queue", "resourceType", "nodeType"},
	nil,
)
View Source
var QueueDistinctSchedulingKeysDesc = prometheus.NewDesc(
	MetricPrefix+"queue_distinct_scheduling_keys",
	"Number of distinct scheduling keys requested by a queue",
	[]string{"queueName", "queue"},
	nil,
)
View Source
var QueueDurationDesc = prometheus.NewDesc(
	MetricPrefix+"job_queued_seconds",
	"Queued time for Armada jobs",
	[]string{"pool", "priorityClass", "queueName", "queue"},
	nil,
)
View Source
var QueueLabelDesc = prometheus.NewDesc(
	queueLabelMetricName,
	queueLabelMetricDescription,
	queueLabelDefaultLabels,
	nil,
)

QueueLabelDesc so it can be added to AllDescs which makes Describe() work properly

actual describe for this metric is generated dynamically as the labels are dynamic
View Source
var QueueLeasedPodCountDesc = prometheus.NewDesc(
	MetricPrefix+"queue_leased_pod_count",
	"Number of leased pods",
	[]string{"cluster", "pool", "queueName", "queue", "phase", "nodeType"},
	nil,
)
View Source
var QueuePriorityDesc = prometheus.NewDesc(
	MetricPrefix+"queue_priority",
	"Queue priority factor",
	[]string{"queueName", "queue"},
	nil,
)
View Source
var QueueResourcesDesc = prometheus.NewDesc(
	MetricPrefix+"queue_resource_queued",
	"Resource required by queued jobs",
	[]string{"pool", "priorityClass", "queueName", "queue", "resourceType"},
	nil,
)
View Source
var QueueSizeDesc = prometheus.NewDesc(
	MetricPrefix+"queue_size",
	"Number of jobs in a queue",
	[]string{"queueName", "queue"},
	nil,
)
View Source
var QueueUsedDesc = prometheus.NewDesc(
	MetricPrefix+"queue_resource_used",
	"Resource actually being used by running jobs of a queue",
	[]string{"cluster", "pool", "queueName", "queue", "resourceType", "nodeType"},
	nil,
)

Functions

func CollectQueueMetrics

func CollectQueueMetrics(queueCounts map[string]int, queueDistinctSchedulingKeyCounts map[string]int, metricsProvider QueueMetricProvider) []prometheus.Metric

func Describe

func Describe(out chan<- *prometheus.Desc)

func NewClusterAvailableCapacity added in v0.3.66

func NewClusterAvailableCapacity(value float64, cluster string, pool string, resource string, nodeType string) prometheus.Metric

func NewClusterCordonedStatus added in v0.15.2

func NewClusterCordonedStatus(value float64, cluster string, reason string, setByUser string) prometheus.Metric

func NewClusterTotalCapacity added in v0.3.66

func NewClusterTotalCapacity(value float64, cluster string, pool string, resource string, nodeType string) prometheus.Metric

func NewCountQueueResources

func NewCountQueueResources(value uint64, pool string, priorityClass string, queue string, resource string) prometheus.Metric

func NewJobRunRunDuration

func NewJobRunRunDuration(count uint64, sum float64, buckets map[float64]uint64, pool string, priorityClass string, queue string) prometheus.Metric

func NewMaxJobRunDuration

func NewMaxJobRunDuration(value float64, pool string, priorityClass string, queue string) prometheus.Metric

func NewMaxQueueAllocated

func NewMaxQueueAllocated(value float64, pool string, priorityClass string, queue string, resource string) prometheus.Metric

func NewMaxQueueDuration

func NewMaxQueueDuration(value float64, pool string, priorityClass string, queue string) prometheus.Metric

func NewMaxQueueResources

func NewMaxQueueResources(value float64, pool string, priorityClass string, queue string, resource string) prometheus.Metric

func NewMedianJobRunDuration

func NewMedianJobRunDuration(value float64, pool string, priorityClass string, queue string) prometheus.Metric

func NewMedianQueueAllocated

func NewMedianQueueAllocated(value float64, pool string, priorityClass string, queue string, resource string) prometheus.Metric

func NewMedianQueueDuration

func NewMedianQueueDuration(value float64, pool string, priorityClass string, queue string) prometheus.Metric

func NewMedianQueueResources

func NewMedianQueueResources(value float64, pool string, priorityClass string, queue string, resource string) prometheus.Metric

func NewMinJobRunDuration

func NewMinJobRunDuration(value float64, pool string, priorityClass string, queue string) prometheus.Metric

func NewMinQueueAllocated

func NewMinQueueAllocated(value float64, pool string, priorityClass string, queue string, resource string) prometheus.Metric

func NewMinQueueDuration

func NewMinQueueDuration(value float64, pool string, priorityClass string, queue string) prometheus.Metric

func NewMinQueueResources

func NewMinQueueResources(value float64, pool string, priorityClass string, queue string, resource string) prometheus.Metric

func NewQueueAllocated added in v0.3.66

func NewQueueAllocated(value float64, queue string, cluster string, pool string, priorityClass string, resource string, nodeType string) prometheus.Metric

func NewQueueDistinctSchedulingKeyMetric added in v0.4.52

func NewQueueDistinctSchedulingKeyMetric(value int, queue string) prometheus.Metric

func NewQueueDuration

func NewQueueDuration(count uint64, sum float64, buckets map[float64]uint64, pool string, priorityClass string, queue string) prometheus.Metric

func NewQueueLabelsMetric added in v0.10.0

func NewQueueLabelsMetric(queue string, labels map[string]string) prometheus.Metric

func NewQueueLeasedPodCount added in v0.3.66

func NewQueueLeasedPodCount(value float64, cluster string, pool string, queue string, phase string, nodeType string) prometheus.Metric

func NewQueuePriorityMetric added in v0.8.7

func NewQueuePriorityMetric(value float64, queue string) prometheus.Metric

func NewQueueResources

func NewQueueResources(value float64, pool string, priorityClass string, queue string, resource string) prometheus.Metric

func NewQueueSizeMetric

func NewQueueSizeMetric(value int, queue string) prometheus.Metric

func NewQueueUsed added in v0.3.66

func NewQueueUsed(value float64, queue string, cluster string, pool string, resource string, nodeType string) prometheus.Metric

Types

type FloatMetrics

type FloatMetrics struct {
	// contains filtered or unexported fields
}

func (*FloatMetrics) GetBuckets

func (d *FloatMetrics) GetBuckets() map[float64]uint64

func (*FloatMetrics) GetCount

func (d *FloatMetrics) GetCount() uint64

func (*FloatMetrics) GetMax

func (d *FloatMetrics) GetMax() float64

func (*FloatMetrics) GetMedian

func (d *FloatMetrics) GetMedian() float64

func (*FloatMetrics) GetMin

func (d *FloatMetrics) GetMin() float64

func (*FloatMetrics) GetSum

func (d *FloatMetrics) GetSum() float64

type FloatMetricsRecorder

type FloatMetricsRecorder struct {
	// contains filtered or unexported fields
}

func NewDefaultJobDurationMetricsRecorder

func NewDefaultJobDurationMetricsRecorder() *FloatMetricsRecorder

func NewFloatMetricsRecorder

func NewFloatMetricsRecorder(buckets ...float64) *FloatMetricsRecorder

func (*FloatMetricsRecorder) GetMetrics

func (d *FloatMetricsRecorder) GetMetrics() *FloatMetrics

func (*FloatMetricsRecorder) Record

func (d *FloatMetricsRecorder) Record(value float64)

type HttpMetricsProvider added in v0.3.91

type HttpMetricsProvider struct {
	// contains filtered or unexported fields
}

HttpMetricsProvider is a metrics provider scraping metrics from a url.

func NewHttpMetricsProvider added in v0.3.91

func NewHttpMetricsProvider(url string, client *http.Client) *HttpMetricsProvider

func (*HttpMetricsProvider) Collect added in v0.3.91

func (srv *HttpMetricsProvider) Collect(ctx context.Context, _ *logrus.Entry) (map[string]float64, error)

type JobMetricsRecorder

type JobMetricsRecorder struct {
	// contains filtered or unexported fields
}

func NewJobMetricsRecorder

func NewJobMetricsRecorder() *JobMetricsRecorder

func (*JobMetricsRecorder) Metrics

func (r *JobMetricsRecorder) Metrics() []*QueueMetrics

func (*JobMetricsRecorder) RecordJobRuntime

func (r *JobMetricsRecorder) RecordJobRuntime(pool string, priorityClass string, jobRuntime time.Duration)

func (*JobMetricsRecorder) RecordResources

func (r *JobMetricsRecorder) RecordResources(pool string, priorityClass string, resources armadaresource.ComputeResourcesFloat)

type ManualMetricsProvider added in v0.3.91

type ManualMetricsProvider struct {
	// contains filtered or unexported fields
}

func (*ManualMetricsProvider) Collect added in v0.3.91

func (srv *ManualMetricsProvider) Collect(_ context.Context, _ *logrus.Entry) (map[string]float64, error)

func (*ManualMetricsProvider) WithCollectionDelay added in v0.3.91

func (srv *ManualMetricsProvider) WithCollectionDelay(d time.Duration) *ManualMetricsProvider

func (*ManualMetricsProvider) WithMetrics added in v0.3.91

func (srv *ManualMetricsProvider) WithMetrics(metrics map[string]float64) *ManualMetricsProvider

type MetricsProvider added in v0.3.91

type MetricsProvider interface {
	Collect(context.Context, *logrus.Entry) (map[string]float64, error)
}

type QueueMetricProvider

type QueueMetricProvider interface {
	GetQueuedJobMetrics(queueName string) []*QueueMetrics
	GetRunningJobMetrics(queueName string) []*QueueMetrics
	GetAllQueues() []*api.Queue
}

type QueueMetrics

type QueueMetrics struct {
	Pool          string
	PriorityClass string
	Resources     ResourceMetrics
	Durations     *FloatMetrics
}

type QueueMetricsRecorder

type QueueMetricsRecorder struct {
	Pool          string
	PriorityClass string
	// contains filtered or unexported fields
}

type ResourceMetrics

type ResourceMetrics map[string]*FloatMetrics

type ResourceMetricsRecorder

type ResourceMetricsRecorder struct {
	// contains filtered or unexported fields
}

func NewResourceMetricsRecorder

func NewResourceMetricsRecorder() *ResourceMetricsRecorder

func (*ResourceMetricsRecorder) GetMetrics

func (d *ResourceMetricsRecorder) GetMetrics() ResourceMetrics

func (*ResourceMetricsRecorder) Record

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL