configuration

package

v0.3.40 Latest Latest Go to latest Published: Nov 22, 2022 License: Apache-2.0 Imports: 6 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/G-Research/armada

Documentation ¶

Index ¶

type ArmadaConfig
type DatabaseRetentionPolicy
type EventApiConfig
type EventRetentionPolicy
type EventsConfig
type JetstreamConfig
type LeaseSettings
type MetricsConfig
type NatsConfig
type NewSchedulerConfig
type PostgresConfig
type PreemptionConfig
type PriorityClass
type PulsarConfig
type QueueManagementConfig
type SchedulingConfig
- func (c *SchedulingConfig) GetResourceScarcity(pool string) map[string]float64

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

This section is empty.

Types ¶

type ArmadaConfig ¶

type ArmadaConfig struct {
	Auth authconfig.AuthConfig

	GrpcPort    uint16
	HttpPort    uint16
	MetricsPort uint16

	CorsAllowedOrigins []string

	Grpc grpcconfig.GrpcConfig

	PriorityHalfTime      time.Duration
	CancelJobsBatchSize   int
	Redis                 redis.UniversalOptions
	Events                EventsConfig
	EventsNats            NatsConfig
	EventsJetstream       JetstreamConfig
	EventsRedis           redis.UniversalOptions
	EventsApiRedis        redis.UniversalOptions
	DefaultToLegacyEvents bool

	Scheduling        SchedulingConfig
	NewScheduler      NewSchedulerConfig
	QueueManagement   QueueManagementConfig
	DatabaseRetention DatabaseRetentionPolicy
	EventRetention    EventRetentionPolicy
	Pulsar            PulsarConfig
	Postgres          PostgresConfig // Used for Pulsar submit API deduplication
	EventApi          EventApiConfig
	Metrics           MetricsConfig
}

type DatabaseRetentionPolicy ¶ added in v0.2.10

type DatabaseRetentionPolicy struct {
	JobRetentionDuration time.Duration
}

type EventApiConfig ¶ added in v0.3.13

type EventApiConfig struct {
	Enabled          bool
	QueryConcurrency int
	JobsetCacheSize  int
	UpdateTopic      string
	Postgres         PostgresConfig
}

type EventRetentionPolicy ¶ added in v0.0.11

type EventRetentionPolicy struct {
	ExpiryEnabled     bool
	RetentionDuration time.Duration
}

type EventsConfig ¶ added in v0.2.12

type EventsConfig struct {
	StoreQueue     string // Queue group for event storage processors
	JobStatusQueue string // Queue group for running job status processor

	ProcessorBatchSize             int           // Maximum event batch size
	ProcessorMaxTimeBetweenBatches time.Duration // Maximum time between batches
	ProcessorTimeout               time.Duration // Timeout for reporting event or stopping batcher before erroring out
}

type JetstreamConfig ¶ added in v0.2.11

type JetstreamConfig struct {
	Servers     []string
	StreamName  string
	Replicas    int
	Subject     string
	MaxAgeDays  int
	ConnTimeout time.Duration
	InMemory    bool // Whether stream should be stored in memory (as opposed to on disk)
}

type LeaseSettings ¶ added in v0.1.0

type LeaseSettings struct {
	ExpireAfter        time.Duration
	ExpiryLoopInterval time.Duration
}

type MetricsConfig ¶ added in v0.1.23

type MetricsConfig struct {
	Port            uint16
	RefreshInterval time.Duration
}

type NatsConfig ¶ added in v0.1.6

type NatsConfig struct {
	Servers   []string
	ClusterID string
	Subject   string
	Timeout   time.Duration // Timeout for receiving a reply back from the stan server for PublishAsync
}

type NewSchedulerConfig ¶ added in v0.3.32

type NewSchedulerConfig struct {
	Enabled bool
}

NewSchedulerConfig stores config for the new Pulsar-based scheduler. This scheduler will eventually replace the current scheduler.

type PostgresConfig ¶ added in v0.3.0

type PostgresConfig struct {
	MaxOpenConns    int
	MaxIdleConns    int
	ConnMaxLifetime time.Duration
	Connection      map[string]string
}

type PreemptionConfig ¶ added in v0.3.30

type PreemptionConfig struct {
	// If true, Armada will:
	// 1. Validate that submitted pods specify no or a valid priority class.
	// 2. Assign a default priority class to submitted pods that do not specify a priority class.
	// 3. Assign jobs to executors that may preempt currently running jobs.
	Enabled bool
	// Map from priority class names to priority classes.
	// Must be consistent with Kubernetes priority classes.
	// I.e., priority classes defined here must be defined in all executor clusters and should map to the same priority.
	PriorityClasses map[string]PriorityClass
	// Priority class assigned to pods that do not specify one.
	// Must be an entry in PriorityClasses above.
	DefaultPriorityClass string
}

TODO: Remove. Move PriorityClasses and DefaultPriorityClass into SchedulingConfig.

type PriorityClass ¶ added in v0.3.37

type PriorityClass struct {
	Priority int32
	// Max fraction of resources assigned to jobs of this priority or lower.
	// Must be non-increasing with higher priority.
	//
	// For example, the following examples are valid configurations.
	// A:
	// - 2: 10%
	// - 1: 100%
	//
	// B:
	// - 9: 10%
	// - 5: 50%
	// - 3: 80%
	MaximalResourceFractionPerQueue map[string]float64
}

type PulsarConfig ¶ added in v0.3.0

type PulsarConfig struct {
	// Flag controlling if Pulsar is enabled or not.
	Enabled bool
	// Pulsar URL
	URL string
	// Path to the trusted TLS certificate file (must exist)
	TLSTrustCertsFilePath string
	// Whether Pulsar client accept untrusted TLS certificate from broker
	TLSAllowInsecureConnection bool
	// Whether the Pulsar client will validate the hostname in the broker's TLS Cert matches the actual hostname.
	TLSValidateHostname bool
	// Max number of connections to a single broker that will be kept in the pool. (Default: 1 connection)
	MaxConnectionsPerBroker int
	// Whether Pulsar authentication is enabled
	AuthenticationEnabled bool
	// Authentication type. For now only "JWT" auth is valid
	AuthenticationType string
	// Path to the JWT token (must exist). This must be set if AutheticationType is "JWT"
	JwtTokenPath                string
	JobsetEventsTopic           string
	RedisFromPulsarSubscription string
	// Compression to use.  Valid values are "None", "LZ4", "Zlib", "Zstd".  Default is "None"
	CompressionType string
	// Compression Level to use.  Valid values are "Default", "Better", "Faster".  Default is "Default"
	CompressionLevel string
	// Used to construct an executorconfig.IngressConfiguration,
	// which is used when converting Armada-specific IngressConfig and ServiceConfig objects into k8s objects.
	HostnameSuffix string
	CertNameSuffix string
	Annotations    map[string]string
	// Settings for deduplication, which relies on a postgres server.
	DedupTable string
	// Log all pulsar events
	EventsPrinterSubscription string
	EventsPrinter             bool
	// Maximum allowed message size in bytes
	MaxAllowedMessageSize uint
	// Timeout when polling pulsar for messages
	ReceiveTimeout time.Duration
	// Backoff from polling when Pulsar returns an error
	BackoffTime time.Duration
}

type QueueManagementConfig ¶ added in v0.1.20

type QueueManagementConfig struct {
	AutoCreateQueues       bool
	DefaultPriorityFactor  float64
	DefaultQueuedJobsLimit int
}

type SchedulingConfig ¶ added in v0.0.2

type SchedulingConfig struct {
	Preemption                                PreemptionConfig
	UseProbabilisticSchedulingForAllResources bool
	// Number of jobs to load from the database at a time.
	QueueLeaseBatchSize uint
	// Minimum resources to schedule per request from an executor.
	// Applies to the old scheduler.
	MinimumResourceToSchedule common.ComputeResourcesFloat
	// Maximum total size in bytes of all jobs returned in a single lease jobs call.
	// Applies to the old scheduler. But is not necessary since we now stream job leases.
	MaximumLeasePayloadSizeBytes int
	// Fraction of total resources across clusters that can be assigned in a single lease jobs call.
	// Applies to both the old and new scheduler.
	MaximalClusterFractionToSchedule map[string]float64
	// Fraction of resources that can be assigned to any single queue,
	// within a single lease jobs call.
	// Applies to both the old and new scheduler.
	MaximalResourceFractionToSchedulePerQueue map[string]float64
	// Fraction of resources that can be assigned to any single queue.
	// Applies to both the old and new scheduler.
	MaximalResourceFractionPerQueue map[string]float64
	// Max number of jobs to scheduler per lease jobs call.
	MaximumJobsToSchedule uint
	// Probability of using the new sheduler.
	// Set to 0 to disable the new scheduler and to 1 to disable the old scheduler.
	ProbabilityOfUsingNewScheduler float64
	// The scheduler stores reports about scheduling decisions for each queue.
	// These can be queried by users. To limit memory usage, old reports are deleted
	// to keep the number of stored reports within this limit.
	MaxQueueReportsToStore int
	// The scheduler stores reports about scheduling decisions for each job.
	// These can be queried by users. To limit memory usage, old reports are deleted
	// to keep the number of stored reports within this limit.
	MaxJobReportsToStore int
	Lease                LeaseSettings
	DefaultJobLimits     common.ComputeResources
	// Set of tolerations added to all submitted pods.
	DefaultJobTolerations []v1.Toleration
	// Set of tolerations added to all submitted pods of a given priority class.
	DefaultJobTolerationsByPriorityClass map[string][]v1.Toleration
	// Maximum number of times a job is retried before considered failed.
	MaxRetries uint
	// Weights used when computing fair share.
	// Overrides dynamic scarcity calculation if provided.
	// Applies to both the new and old scheduler.
	ResourceScarcity map[string]float64
	// Applies only to the old scheduler.
	PoolResourceScarcity map[string]map[string]float64
	MaxPodSpecSizeBytes  uint
	MinJobResources      v1.ResourceList
	// Resources, e.g., "cpu", "memory", and "nvidia.com/gpu",
	// for which the scheduler creates indexes for efficient lookup.
	// Applies only to the new scheduler.
	IndexedResources []string
	// Node labels that the scheduler creates indexes for efficient lookup of.
	// Should include node labels frequently used for scheduling.
	// Since the scheduler can efficiently sort out nodes for which these labels
	// are not set correctly when looking for a node a pod can be scheduled on.
	//
	// If not set, no labels are indexed.
	//
	// Applies only to the new scheduler.
	IndexedNodeLabels []string
	// Taint keys that the scheduler creates indexes for efficient lookup of.
	// Should include taints frequently used for scheduling.
	// Since the scheduler can efficiently sort out nodes for which these taints
	// are not set correctly when looking for a node a pod can be scheduled on.
	//
	// If not set, all taints are indexed.
	//
	// Applies only to the new scheduler.
	IndexedTaints []string
	// Kubernetes pods may specify a termination grace period.
	// When Pods are cancelled/preempted etc., they are first sent a SIGTERM.
	// If a pod has not exited within its termination grace period,
	// it is killed forcefully by Kubernetes sending it a SIGKILL.
	//
	// This is the minimum allowed termination grace period.
	// It should normally be set to a positive value, e.g., 1 second.
	// Since a zero grace period causes Kubernetes to force delete pods,
	// which may causes issues where resources associated with the pod, e.g.,
	// containers, are not cleaned up correctly.
	//
	// The grace period of pods that either
	// - do not set a grace period, or
	// - explicitly set a grace period of 0 seconds,
	// is automatically set to MinTerminationGracePeriod.
	MinTerminationGracePeriod time.Duration
	// Max allowed grace period.
	// Should normally not be set greater than single-digit minutes,
	// since cancellation and preemption may need to wait for this amount of time.
	MaxTerminationGracePeriod time.Duration
	// Jobs with equal value for this annotation make up a gang.
	// All jobs in a gang are guaranteed to be scheduled onto the same cluster at the same time.
	GangIdAnnotation string
	// All jobs in a gang must specify the total number of jobs in the gang via this annotation.
	// The cardinality should be expressed as an integer, e.g., "3".
	GangCardinalityAnnotation string
}

func (*SchedulingConfig) GetResourceScarcity ¶ added in v0.1.25

func (c *SchedulingConfig) GetResourceScarcity(pool string) map[string]float64

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL