Documentation ¶
Index ¶
- type AggregateType
- type ApplicationConfiguration
- type ClientConfiguration
- type CustomUsageMetric
- type CustomUsageMetrics
- type EtcdClusterHealthMonitoringConfiguration
- type EtcdConfiguration
- type ExecutorConfiguration
- type IngressConfiguration
- type KubernetesConfiguration
- type MetricConfiguration
- type PodDefaults
- type StateChecksConfiguration
- type TaskConfiguration
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type AggregateType ¶ added in v0.3.47
type AggregateType string
const ( Sum AggregateType = "Sum" Mean = "Mean" )
type ApplicationConfiguration ¶
type ApplicationConfiguration struct { ClusterId string Pool string SubmitConcurrencyLimit int UpdateConcurrencyLimit int DeleteConcurrencyLimit int JobLeaseRequestTimeout time.Duration // MaxLeasedJobs is the maximum jobs the executor should have in Leased state ay any one time (i.e jobs not submitted to kubernetes) // It is largely used to calculate how many new jobs to request from the scheduler MaxLeasedJobs int }
type ClientConfiguration ¶
type ClientConfiguration struct {
MaxMessageSizeBytes int
}
type CustomUsageMetric ¶ added in v0.3.47
type CustomUsageMetric struct { Name string PrometheusMetricName string PrometheusPodNameLabel string AggregateType AggregateType Multiplier float64 }
type CustomUsageMetrics ¶ added in v0.3.47
type CustomUsageMetrics struct { Namespace string EndpointSelectorLabelName string EndpointSelectorLabelValue string Metrics []CustomUsageMetric }
type EtcdClusterHealthMonitoringConfiguration ¶ added in v0.3.91
type EtcdClusterHealthMonitoringConfiguration struct { // Etcd cluster name. Used in metrics exported by Armada. Name string `validate:"gt=0"` // Metric URLs of the etcd replicas making up this cluster. MetricUrls []string `validate:"gt=0"` // The cluster is considered unhealthy when for any replica in the cluster: // etcd_mvcc_db_total_size_in_use_in_bytes / etcd_server_quota_backend_bytes // > FractionOfStorageInUseLimit. FractionOfStorageInUseLimit float64 `validate:"gt=0,lte=1"` // The cluster is considered unhealthy when for any replica in the cluster: // etcd_mvcc_db_total_size_in_bytes / etcd_server_quota_backend_bytes // > FractionOfStorageLimit. FractionOfStorageLimit float64 `validate:"gt=0,lte=1"` // A replica is considered unavailable if the executor has failed to collect metrics from it for this amount of time. // The cluster is considered unhealthy if there are less than MinimumReplicasAvailable replicas available. ReplicaTimeout time.Duration `validate:"gt=0"` MinimumReplicasAvailable int `validate:"gt=0"` // Interval with which to scrape metrics from each etcd replica. ScrapeInterval time.Duration `validate:"gt=0"` // The time it takes to scrape metrics is exported as a prometheus histogram with exponential buckets. // These settings control the size and number of such buckets. ScrapeDelayBucketsStart float64 `validate:"gt=0"` ScrapeDelayBucketsFactor float64 `validate:"gt=1"` ScrapeDelayBucketsCount int `validate:"gt=0"` }
EtcdClusterHealthMonitoringConfiguration contains settings associated with monitoring the health of an etcd cluster.
type EtcdConfiguration ¶
type EtcdConfiguration struct { // Etcd health monitoring configuration. // If provided, the executor monitors etcd health and stops requesting jobs while any etcd cluster is unhealthy. EtcdClustersHealthMonitoring []EtcdClusterHealthMonitoringConfiguration }
type ExecutorConfiguration ¶
type ExecutorConfiguration struct { HttpPort uint16 // If non-nil, net/http/pprof endpoints are exposed on localhost on this port. PprofPort *uint16 Metric MetricConfiguration Application ApplicationConfiguration ExecutorApiConnection client.ApiConnectionDetails Client ClientConfiguration GRPC keepalive.ClientParameters Kubernetes KubernetesConfiguration Task TaskConfiguration }
type IngressConfiguration ¶
type KubernetesConfiguration ¶
type KubernetesConfiguration struct { // Wether to impersonate users when creating Kubernetes objects. ImpersonateUsers bool // Max number of Kubernetes API queries per second // and max number of concurrent Kubernetes API queries. QPS float32 Burst int Etcd EtcdConfiguration NodePoolLabel string NodeTypeLabel string NodeIdLabel string TrackedNodeLabels []string AvoidNodeLabelsOnRetry []string ToleratedTaints []string MinimumPodAge time.Duration StuckTerminatingPodExpiry time.Duration FailedPodExpiry time.Duration MaxTerminatedPods int PodDefaults *PodDefaults StateChecks StateChecksConfiguration PendingPodChecks *podchecks.Checks FatalPodSubmissionErrors []string // Minimum amount of resources marked as allocated to non-Armada pods on each node. // I.e., if the total resources allocated to non-Armada pods on some node drops below this value, // the executor adds a fictional allocation to make up the difference, such that the total is at least this. // Hence, specifying can ensure that, e.g., if a deamonset pod restarts, those resources are not considered for scheduling. MinimumResourcesMarkedAllocatedToNonArmadaPodsPerNode armadaresource.ComputeResources // When adding a fictional allocation to ensure resources allocated to non-Armada pods is at least // MinimumResourcesMarkedAllocatedToNonArmadaPodsPerNode, those resources are marked allocated at this priority. MinimumResourcesMarkedAllocatedToNonArmadaPodsPerNodePriority int32 PodKillTimeout time.Duration }
type MetricConfiguration ¶
type MetricConfiguration struct { Port uint16 ExposeQueueUsageMetrics bool CustomUsageMetrics []CustomUsageMetrics }
type PodDefaults ¶
type PodDefaults struct { SchedulerName string Ingress *IngressConfiguration }
type StateChecksConfiguration ¶ added in v0.3.77
type StateChecksConfiguration struct { // Once a pod is submitted to kubernetes, this is how long we'll wait for it to appear in the kubernetes informer state // If the pod hasn't appeared after this duration, it is considered missing DeadlineForSubmittedPodConsideredMissing time.Duration // Once the executor has seen a pod appear on the cluster, it considers that run Active // If we get into a state where there is no longer a pod backing that Active run, this is how long we'll wait before we consider the pod missing // The most likely cause of this is actually a bug in the executors processing of the kubernetes state // However without it - we can have runs get indefinitely stuck as Active with no backing pod DeadlineForActivePodConsideredMissing time.Duration }
type TaskConfiguration ¶
type TaskConfiguration struct { UtilisationReportingInterval time.Duration MissingJobEventReconciliationInterval time.Duration JobLeaseRenewalInterval time.Duration AllocateSpareClusterCapacityInterval time.Duration PodIssueHandlingInterval time.Duration PodDeletionInterval time.Duration QueueUsageDataRefreshInterval time.Duration UtilisationEventProcessingInterval time.Duration UtilisationEventReportingInterval time.Duration ResourceCleanupInterval time.Duration StateProcessorInterval time.Duration }
Click to show internal directories.
Click to hide internal directories.