Documentation ¶
Index ¶
Constants ¶
View Source
const ( // GangIdAnnotation maps to a unique id of the gang the job is part of; jobs with equal value make up a gang. // All jobs in a gang are guaranteed to be scheduled onto the same cluster at the same time. GangIdAnnotation = "armadaproject.io/gangId" // GangCardinalityAnnotation All jobs in a gang must specify the total number of jobs in the gang via this annotation. // The cardinality should be expressed as a positive integer, e.g., "3". GangCardinalityAnnotation = "armadaproject.io/gangCardinality" // The jobs that make up a gang may be constrained to be scheduled across a set of uniform nodes. // Specifically, if provided, all gang jobs are scheduled onto nodes for which the value of the provided label is equal. // Used to ensure, e.g., that all gang jobs are scheduled onto the same cluster or rack. GangNodeUniformityLabelAnnotation = "armadaproject.io/gangNodeUniformityLabel" // GangNumJobsScheduledAnnotation is set by the scheduler and indicates how many gang jobs were scheduled. // FailFastAnnotation, if set to true, ensures Armada does not re-schedule jobs that fail to start. // Instead, the job the pod is part of fails immediately. FailFastAnnotation = "armadaproject.io/failFast" )
Variables ¶
This section is empty.
Functions ¶
func IsSchedulingAnnotation ¶
Types ¶
type ArmadaConfig ¶
type ArmadaConfig struct { Auth authconfig.AuthConfig GrpcPort uint16 HttpPort uint16 MetricsPort uint16 Profiling *profilingconfig.ProfilingConfig CorsAllowedOrigins []string GrpcGatewayPath string Grpc grpcconfig.GrpcConfig SchedulerApiConnection client.ApiConnectionDetails EventsApiRedis redis.UniversalOptions Pulsar PulsarConfig Postgres PostgresConfig // Needs to point to the lookout db QueryApi QueryApiConfig // Period At which the Queue cache will be refreshed QueueCacheRefreshPeriod time.Duration // Config relating to job submission. Submission SubmissionConfig }
type PostgresConfig ¶
TODO: we can probably just typedef this to map[string]string
type PulsarConfig ¶
type PulsarConfig struct { // Pulsar URL URL string `validate:"required"` // Path to the trusted TLS certificate file (must exist) TLSTrustCertsFilePath string // Whether Pulsar client accept untrusted TLS certificate from broker TLSAllowInsecureConnection bool // Whether the Pulsar client will validate the hostname in the broker's TLS Cert matches the actual hostname. TLSValidateHostname bool // Max number of connections to a single broker that will be kept in the pool. (Default: 1 connection) MaxConnectionsPerBroker int // Whether Pulsar authentication is enabled AuthenticationEnabled bool // Authentication type. For now only "JWT" auth is valid AuthenticationType string // Path to the JWT token (must exist). This must be set if AuthenticationType is "JWT" JwtTokenPath string // The pulsar topic that Jobset Events will be published to JobsetEventsTopic string // Compression to use. Valid values are "None", "LZ4", "Zlib", "Zstd". Default is "None" CompressionType pulsar.CompressionType // Compression Level to use. Valid values are "Default", "Better", "Faster". Default is "Default" CompressionLevel pulsar.CompressionLevel // Maximum allowed Events per message MaxAllowedEventsPerMessage int `validate:"gte=0"` // Maximum allowed message size in bytes MaxAllowedMessageSize uint // Timeout when sending messages asynchronously SendTimeout time.Duration `validate:"required"` // Backoff from polling when Pulsar returns an error BackoffTime time.Duration // Number of pulsar messages that will be queued by the pulsar consumer. ReceiverQueueSize int }
type QueryApiConfig ¶
type QueryApiConfig struct {
MaxQueryItems int
}
type SubmissionConfig ¶
type SubmissionConfig struct { // The priorityClassName field on submitted pod must be either empty or in this list. // These names should correspond to priority classes defined in schedulingConfig. AllowedPriorityClassNames map[string]bool // Priority class name assigned to pods that do not specify one. // Must be an entry in PriorityClasses above. DefaultPriorityClassName string // Default job resource limits added to pods. DefaultJobLimits armadaresource.ComputeResources // Tolerations added to all submitted pods. DefaultJobTolerations []v1.Toleration // Tolerations added to all submitted pods of a given priority class. DefaultJobTolerationsByPriorityClass map[string][]v1.Toleration // Tolerations added to all submitted pods requesting a non-zero amount of some resource. DefaultJobTolerationsByResourceRequest map[string][]v1.Toleration // Pods of size greater than this are rejected at submission. MaxPodSpecSizeBytes uint // Jobs requesting less than this amount of resources are rejected at submission. MinJobResources v1.ResourceList // Default value of GangNodeUniformityLabelAnnotation if not set on submitted jobs. // TODO(albin): We should add a label to nodes in the nodeDb indicating which cluster it came from. // If we do, we can default to that label if the uniformity label is empty. DefaultGangNodeUniformityLabel string // Minimum allowed termination grace period for pods submitted to Armada. // Should normally be set to a positive value, e.g., "10m". // Since a zero grace period causes Kubernetes to force delete pods, which may causes issues with container resource cleanup. // // The grace period of pods that either // - do not set a grace period, or // - explicitly set a grace period of 0 seconds, // is automatically set to MinTerminationGracePeriod. MinTerminationGracePeriod time.Duration // Max allowed grace period. // Should normally not be set greater than single-digit minutes, // since cancellation and preemption may need to wait for this amount of time. MaxTerminationGracePeriod time.Duration // Default activeDeadline for all pods that don't explicitly set activeDeadlineSeconds. // Is trumped by DefaultActiveDeadlineByResourceRequest. DefaultActiveDeadline time.Duration // Default activeDeadline for pods with at least one container requesting a given resource. // For example, if // DefaultActiveDeadlineByResourceRequest: map[string]time.Duration{"gpu": time.Second}, // then all pods requesting a non-zero amount of gpu and don't explicitly set activeDeadlineSeconds // will have activeDeadlineSeconds set to 1. // Trumps DefaultActiveDeadline. DefaultActiveDeadlineByResourceRequest map[string]time.Duration // Maximum ratio of limits:requests per resource. Jobs who have a higher limits:resource ratio than this will be rejected. // Any resource type missing from this map will default to 1.0. MaxOversubscriptionByResourceRequest map[string]float64 // Enforce that an init containers requestion non-integer cpu. This is due to https://github.com/kubernetes/kubernetes/issues/112228 AssertInitContainersRequestFractionalCpu bool }
SubmissionConfig contains config relating to job submission.
Click to show internal directories.
Click to hide internal directories.