configuration

package

v0.14.0 Latest Latest Go to latest Published: Oct 9, 2024 License: Apache-2.0 Imports: 7 Imported by: 0

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/armadaproject/armada

Links

Open Source Insights

Documentation ¶

Index ¶

type AggregateType
type ApplicationConfiguration
type ClientConfiguration
type CustomUsageMetric
type CustomUsageMetrics
type EtcdClusterHealthMonitoringConfiguration
type EtcdConfiguration
type ExecutorConfiguration
- func (c ExecutorConfiguration) Validate() error
type IngressConfiguration
type KubernetesConfiguration
type MetricConfiguration
type PodDefaults
type StateChecksConfiguration
type TaskConfiguration

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

This section is empty.

Types ¶

type AggregateType ¶ added in v0.3.47

type AggregateType string

const (
	Sum  AggregateType = "Sum"
	Mean               = "Mean"
)

type ApplicationConfiguration ¶

type ApplicationConfiguration struct {
	ClusterId              string
	Pool                   string
	SubmitConcurrencyLimit int
	UpdateConcurrencyLimit int
	DeleteConcurrencyLimit int
	JobLeaseRequestTimeout time.Duration
	// MaxLeasedJobs is the maximum jobs the executor should have in Leased state ay any one time (i.e jobs not submitted to kubernetes)
	// It is largely used to calculate how many new jobs to request from the scheduler
	MaxLeasedJobs int
}

type ClientConfiguration ¶

type ClientConfiguration struct {
	MaxMessageSizeBytes int
}

type CustomUsageMetric ¶ added in v0.3.47

type CustomUsageMetric struct {
	Name                   string
	PrometheusMetricName   string
	PrometheusPodNameLabel string
	AggregateType          AggregateType
	Multiplier             float64
}

type CustomUsageMetrics ¶ added in v0.3.47

type CustomUsageMetrics struct {
	Namespace                  string
	EndpointSelectorLabelName  string
	EndpointSelectorLabelValue string
	Metrics                    []CustomUsageMetric
}

type EtcdClusterHealthMonitoringConfiguration ¶ added in v0.3.91

type EtcdClusterHealthMonitoringConfiguration struct {
	// Etcd cluster name. Used in metrics exported by Armada.
	Name string `validate:"gt=0"`
	// Metric URLs of the etcd replicas making up this cluster.
	MetricUrls []string `validate:"gt=0"`
	// The cluster is considered unhealthy when for any replica in the cluster:
	// etcd_mvcc_db_total_size_in_use_in_bytes / etcd_server_quota_backend_bytes
	// > FractionOfStorageInUseLimit.
	FractionOfStorageInUseLimit float64 `validate:"gt=0,lte=1"`
	// The cluster is considered unhealthy when for any replica in the cluster:
	// etcd_mvcc_db_total_size_in_bytes / etcd_server_quota_backend_bytes
	// > FractionOfStorageLimit.
	FractionOfStorageLimit float64 `validate:"gt=0,lte=1"`
	// A replica is considered unavailable if the executor has failed to collect metrics from it for this amount of time.
	// The cluster is considered unhealthy if there are less than MinimumReplicasAvailable replicas available.
	ReplicaTimeout           time.Duration `validate:"gt=0"`
	MinimumReplicasAvailable int           `validate:"gt=0"`
	// Interval with which to scrape metrics from each etcd replica.
	ScrapeInterval time.Duration `validate:"gt=0"`
	// The time it takes to scrape metrics is exported as a prometheus histogram with exponential buckets.
	// These settings control the size and number of such buckets.
	ScrapeDelayBucketsStart  float64 `validate:"gt=0"`
	ScrapeDelayBucketsFactor float64 `validate:"gt=1"`
	ScrapeDelayBucketsCount  int     `validate:"gt=0"`
}

EtcdClusterHealthMonitoringConfiguration contains settings associated with monitoring the health of an etcd cluster.

type EtcdConfiguration ¶

type EtcdConfiguration struct {
	// Etcd health monitoring configuration.
	// If provided, the executor monitors etcd health and stops requesting jobs while any etcd cluster is unhealthy.
	EtcdClustersHealthMonitoring []EtcdClusterHealthMonitoringConfiguration
}

type ExecutorConfiguration ¶

type ExecutorConfiguration struct {
	HttpPort uint16
	// If non-nil, net/http/pprof endpoints are exposed on localhost on this port.
	Profiling             *profilingconfig.ProfilingConfig
	Metric                MetricConfiguration
	Application           ApplicationConfiguration
	ExecutorApiConnection client.ApiConnectionDetails
	Client                ClientConfiguration
	GRPC                  keepalive.ClientParameters

	Kubernetes KubernetesConfiguration
	Task       TaskConfiguration
}

func (ExecutorConfiguration) Validate ¶ added in v0.12.1

func (c ExecutorConfiguration) Validate() error

type IngressConfiguration ¶

type IngressConfiguration struct {
	HostnameSuffix string
	CertNameSuffix string
	Annotations    map[string]string
}

type KubernetesConfiguration ¶

type KubernetesConfiguration struct {
	// Wether to impersonate users when creating Kubernetes objects.
	ImpersonateUsers bool
	// Max number of Kubernetes API queries per second
	// and max number of concurrent Kubernetes API queries.
	QPS                       float32
	Burst                     int
	Etcd                      EtcdConfiguration
	NodePoolLabel             string
	NodeTypeLabel             string
	NodeIdLabel               string
	TrackedNodeLabels         []string
	AvoidNodeLabelsOnRetry    []string
	ToleratedTaints           []string
	MinimumPodAge             time.Duration
	StuckTerminatingPodExpiry time.Duration
	FailedPodExpiry           time.Duration
	MaxTerminatedPods         int
	PodDefaults               *PodDefaults
	StateChecks               StateChecksConfiguration
	PendingPodChecks          *podchecks.Checks
	FatalPodSubmissionErrors  []string
	// Minimum amount of resources marked as allocated to non-Armada pods on each node.
	// I.e., if the total resources allocated to non-Armada pods on some node drops below this value,
	// the executor adds a fictional allocation to make up the difference, such that the total is at least this.
	// Hence, specifying can ensure that, e.g., if a deamonset pod restarts, those resources are not considered for scheduling.
	MinimumResourcesMarkedAllocatedToNonArmadaPodsPerNode armadaresource.ComputeResources
	// When adding a fictional allocation to ensure resources allocated to non-Armada pods is at least
	// MinimumResourcesMarkedAllocatedToNonArmadaPodsPerNode, those resources are marked allocated at this priority.
	MinimumResourcesMarkedAllocatedToNonArmadaPodsPerNodePriority int32
	PodKillTimeout                                                time.Duration
}

type MetricConfiguration ¶

type MetricConfiguration struct {
	Port                    uint16
	ExposeQueueUsageMetrics bool
	CustomUsageMetrics      []CustomUsageMetrics
}

type PodDefaults ¶

type PodDefaults struct {
	SchedulerName string
	Ingress       *IngressConfiguration
}

type StateChecksConfiguration ¶ added in v0.3.77

type StateChecksConfiguration struct {
	// Once a pod is submitted to kubernetes, this is how long we'll wait for it to appear in the kubernetes informer state
	// If the pod hasn't appeared after this duration, it is considered missing
	DeadlineForSubmittedPodConsideredMissing time.Duration
	// Once the executor has seen a pod appear on the cluster, it considers that run Active
	// If we get into a state where there is no longer a pod backing that Active run, this is how long we'll wait before we consider the pod missing
	// The most likely cause of this is actually a bug in the executors processing of the kubernetes state
	// However without it - we can have runs get indefinitely stuck as Active with no backing pod
	DeadlineForActivePodConsideredMissing time.Duration
}

type TaskConfiguration ¶

type TaskConfiguration struct {
	UtilisationReportingInterval          time.Duration
	MissingJobEventReconciliationInterval time.Duration
	JobLeaseRenewalInterval               time.Duration
	AllocateSpareClusterCapacityInterval  time.Duration
	PodIssueHandlingInterval              time.Duration
	PodDeletionInterval                   time.Duration
	QueueUsageDataRefreshInterval         time.Duration
	UtilisationEventProcessingInterval    time.Duration
	UtilisationEventReportingInterval     time.Duration
	ResourceCleanupInterval               time.Duration
	StateProcessorInterval                time.Duration
}

Source Files ¶

View all Source files

Directories ¶

Path	Synopsis
podchecks

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL