scheduler

package

v0.3.71 Latest Latest Go to latest Published: Jun 13, 2023 License: Apache-2.0 Imports: 71 Imported by: 0

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/armadaproject/armada

Links

Open Source Insights

Documentation ¶

Index ¶

func GangIdAndCardinalityFromAnnotations(annotations map[string]string) (string, int, bool, error)
func GangIdAndCardinalityFromLegacySchedulerJob(job interfaces.LegacySchedulerJob, ...) (string, int, bool, error)
func GroupJobsByAnnotation(annotation string, jobs []*api.Job) map[string][]*api.Job
func JavaStringHash(s string) uint32
func JobsSummary(jobs []interfaces.LegacySchedulerJob) string
func PodRequirementFromJobSchedulingInfo(info *schedulerobjects.JobSchedulingInfo) *schedulerobjects.PodRequirements
func PodRequirementFromLegacySchedulerJob[E interfaces.LegacySchedulerJob](job E, priorityClasses map[string]configuration.PriorityClass) *schedulerobjects.PodRequirements
func PodRequirementsFromJobSchedulingInfos(infos []*schedulerobjects.JobSchedulingInfo) []*schedulerobjects.PodRequirements
func PodRequirementsFromLegacySchedulerJobs[S ~[]E, E interfaces.LegacySchedulerJob](jobs S, priorityClasses map[string]configuration.PriorityClass) []*schedulerobjects.PodRequirements
func PreemptedJobsFromSchedulerResult[T interfaces.LegacySchedulerJob](sr *SchedulerResult) []T
func ResourceListAsWeightedMillis(weights map[string]float64, rl schedulerobjects.ResourceList) int64
func Run(config schedulerconfig.Configuration) error
func ScheduledJobsFromSchedulerResult[T interfaces.LegacySchedulerJob](sr *SchedulerResult) []T
func UpdateUsage[S ~[]E, E interfaces.LegacySchedulerJob](usage map[string]schedulerobjects.QuantityByPriorityAndResourceType, jobs S, ...) map[string]schedulerobjects.QuantityByPriorityAndResourceType
type AddOrSubtract
type CandidateGangIterator
- func NewCandidateGangIterator(sctx *schedulercontext.SchedulingContext, ...) (*CandidateGangIterator, error)
- func (it *CandidateGangIterator) Clear() error
- func (it *CandidateGangIterator) OnlyYieldEvicted()
- func (it *CandidateGangIterator) Peek() (*schedulercontext.GangSchedulingContext, error)
type DefaultPoolAssigner
- func NewPoolAssigner(executorTimeout time.Duration, schedulingConfig configuration.SchedulingConfig, ...) (*DefaultPoolAssigner, error)
- func (p *DefaultPoolAssigner) AssignPool(j *jobdb.Job) (string, error)
- func (p *DefaultPoolAssigner) Refresh(ctx context.Context) error
type Evictor
- func NewFilteredEvictor(jobRepo JobRepository, priorityClasses map[string]configuration.PriorityClass, ...) *Evictor
- func NewOversubscribedEvictor(jobRepo JobRepository, priorityClasses map[string]configuration.PriorityClass, ...) *Evictor
- func NewPreemptibleEvictor(jobRepo JobRepository, priorityClasses map[string]configuration.PriorityClass, ...) *Evictor
- func NewStochasticEvictor(jobRepo JobRepository, priorityClasses map[string]configuration.PriorityClass, ...) *Evictor
- func (evi *Evictor) Evict(ctx context.Context, it nodedb.NodeIterator) (*EvictorResult, error)
type EvictorResult
type ExecutorApi
- func NewExecutorApi(producer pulsar.Producer, jobRepository database.JobRepository, ...) (*ExecutorApi, error)
- func (srv *ExecutorApi) LeaseJobRuns(stream executorapi.ExecutorApi_LeaseJobRunsServer) error
- func (srv *ExecutorApi) ReportEvents(ctx context.Context, list *executorapi.EventList) (*types.Empty, error)
type FairSchedulingAlgo
- func NewFairSchedulingAlgo(config configuration.SchedulingConfig, maxSchedulingDuration time.Duration, ...) (*FairSchedulingAlgo, error)
- func (l *FairSchedulingAlgo) Schedule(ctx context.Context, txn *jobdb.Txn, jobDb *jobdb.JobDb) (*SchedulerResult, error)
type GangScheduler
- func NewGangScheduler(sctx *schedulercontext.SchedulingContext, ...) (*GangScheduler, error)
- func (sch *GangScheduler) Schedule(ctx context.Context, gctx *schedulercontext.GangSchedulingContext) (ok bool, unschedulableReason string, err error)
- func (sch *GangScheduler) SkipUnsuccessfulSchedulingKeyCheck()
type InMemoryJobIterator
- func NewInMemoryJobIterator[S ~[]E, E interfaces.LegacySchedulerJob](jobs S) *InMemoryJobIterator
- func (it *InMemoryJobIterator) Next() (interfaces.LegacySchedulerJob, error)
type InMemoryJobRepository
- func NewInMemoryJobRepository(priorityClasses map[string]configuration.PriorityClass) *InMemoryJobRepository
- func (repo *InMemoryJobRepository) Enqueue(job interfaces.LegacySchedulerJob)
- func (repo *InMemoryJobRepository) EnqueueMany(jobs []interfaces.LegacySchedulerJob)
- func (repo *InMemoryJobRepository) GetExistingJobsByIds(jobIds []string) ([]interfaces.LegacySchedulerJob, error)
- func (repo *InMemoryJobRepository) GetJobIterator(ctx context.Context, queue string) (JobIterator, error)
- func (repo *InMemoryJobRepository) GetQueueJobIds(queue string) ([]string, error)
type JobIterator
type JobQueueIteratorAdapter
- func (it *JobQueueIteratorAdapter) Next() (interfaces.LegacySchedulerJob, error)
type JobRepository
type JobSchedulingContextByExecutor
- func (m JobSchedulingContextByExecutor) String() string
type KubernetesLeaderController
- func NewKubernetesLeaderController(config schedulerconfig.LeaderConfig, client coordinationv1client.LeasesGetter) *KubernetesLeaderController
- func (lc *KubernetesLeaderController) GetToken() LeaderToken
- func (lc *KubernetesLeaderController) Run(ctx context.Context) error
- func (lc *KubernetesLeaderController) ValidateToken(tok LeaderToken) bool
type LeaderController
type LeaderToken
- func InvalidLeaderToken() LeaderToken
- func NewLeaderToken() LeaderToken
type LeaseListener
type MetricsCollector
- func NewMetricsCollector(jobDb *jobdb.JobDb, queueRepository database.QueueRepository, ...) *MetricsCollector
- func (c *MetricsCollector) Collect(metrics chan<- prometheus.Metric)
- func (c *MetricsCollector) Describe(out chan<- *prometheus.Desc)
- func (c *MetricsCollector) Run(ctx context.Context) error
type MultiJobsIterator
- func NewMultiJobsIterator(its ...JobIterator) *MultiJobsIterator
- func (it *MultiJobsIterator) Next() (interfaces.LegacySchedulerJob, error)
type PoolAssigner
type PreemptingQueueScheduler
- func NewPreemptingQueueScheduler(sctx *schedulercontext.SchedulingContext, ...) *PreemptingQueueScheduler
- func (sch *PreemptingQueueScheduler) EnableAssertions()
- func (sch *PreemptingQueueScheduler) Schedule(ctx context.Context) (*SchedulerResult, error)
- func (sch *PreemptingQueueScheduler) SkipUnsuccessfulSchedulingKeyCheck()
type Publisher
type PulsarPublisher
- func NewPulsarPublisher(pulsarClient pulsar.Client, producerOptions pulsar.ProducerOptions, ...) (*PulsarPublisher, error)
- func (p *PulsarPublisher) PublishMarkers(ctx context.Context, groupId uuid.UUID) (uint32, error)
- func (p *PulsarPublisher) PublishMessages(ctx context.Context, events []*armadaevents.EventSequence, ...) error
type QueueCandidateGangIteratorItem
type QueueCandidateGangIteratorPQ
- func (pq QueueCandidateGangIteratorPQ) Len() int
- func (pq QueueCandidateGangIteratorPQ) Less(i, j int) bool
- func (pq *QueueCandidateGangIteratorPQ) Pop() any
- func (pq *QueueCandidateGangIteratorPQ) Push(x any)
- func (pq QueueCandidateGangIteratorPQ) Swap(i, j int)
type QueueScheduler
- func NewQueueScheduler(sctx *schedulercontext.SchedulingContext, ...) (*QueueScheduler, error)
- func (sch *QueueScheduler) Schedule(ctx context.Context) (*SchedulerResult, error)
- func (sch *QueueScheduler) SkipUnsuccessfulSchedulingKeyCheck()
type QueueSchedulingContextByExecutor
- func (m QueueSchedulingContextByExecutor) String() string
type QueuedGangIterator
- func NewQueuedGangIterator(sctx *schedulercontext.SchedulingContext, it JobIterator, maxLookback uint) *QueuedGangIterator
- func (it *QueuedGangIterator) Clear() error
- func (it *QueuedGangIterator) Next() (*schedulercontext.GangSchedulingContext, error)
- func (it *QueuedGangIterator) Peek() (*schedulercontext.GangSchedulingContext, error)
type QueuedJobsIterator
- func NewQueuedJobsIterator(ctx context.Context, queue string, repo JobRepository) (*QueuedJobsIterator, error)
- func (it *QueuedJobsIterator) Next() (interfaces.LegacySchedulerJob, error)
type Scheduler
- func NewScheduler(jobRepository database.JobRepository, ...) (*Scheduler, error)
- func (s *Scheduler) Run(ctx context.Context) error
type SchedulerResult
- func NewSchedulerResult[S ~[]T, T interfaces.LegacySchedulerJob](preemptedJobs S, scheduledJobs S, nodeIdByJobId map[string]string) *SchedulerResult
type SchedulingAlgo
type SchedulingContextByExecutor
- func (m SchedulingContextByExecutor) String() string
type SchedulingContextRepository
- func NewSchedulingContextRepository(maxJobSchedulingContextsPerExecutor uint) (*SchedulingContextRepository, error)
- func (repo *SchedulingContextRepository) AddSchedulingContext(sctx *schedulercontext.SchedulingContext) error
- func (repo *SchedulingContextRepository) GetJobReport(_ context.Context, request *schedulerobjects.JobReportRequest) (*schedulerobjects.JobReport, error)
- func (repo *SchedulingContextRepository) GetMostRecentJobSchedulingContextByExecutor(jobId string) (JobSchedulingContextByExecutor, bool)
- func (repo *SchedulingContextRepository) GetMostRecentPreemptingQueueSchedulingContextByExecutor(queue string) (QueueSchedulingContextByExecutor, bool)
- func (repo *SchedulingContextRepository) GetMostRecentPreemptingSchedulingContextByExecutor() SchedulingContextByExecutor
- func (repo *SchedulingContextRepository) GetMostRecentQueueSchedulingContextByExecutor(queue string) (QueueSchedulingContextByExecutor, bool)
- func (repo *SchedulingContextRepository) GetMostRecentSchedulingContextByExecutor() SchedulingContextByExecutor
- func (repo *SchedulingContextRepository) GetMostRecentSuccessfulQueueSchedulingContextByExecutor(queue string) (QueueSchedulingContextByExecutor, bool)
- func (repo *SchedulingContextRepository) GetMostRecentSuccessfulSchedulingContextByExecutor() SchedulingContextByExecutor
- func (repo *SchedulingContextRepository) GetQueueReport(_ context.Context, request *schedulerobjects.QueueReportRequest) (*schedulerobjects.QueueReport, error)
- func (repo *SchedulingContextRepository) GetSchedulingReport(_ context.Context, request *schedulerobjects.SchedulingReportRequest) (*schedulerobjects.SchedulingReport, error)
- func (repo *SchedulingContextRepository) GetSortedExecutorIds() []string
type StandaloneLeaderController
- func NewStandaloneLeaderController() *StandaloneLeaderController
- func (lc *StandaloneLeaderController) GetToken() LeaderToken
- func (lc *StandaloneLeaderController) Run(ctx context.Context) error
- func (lc *StandaloneLeaderController) ValidateToken(tok LeaderToken) bool
type SubmitChecker
- func NewSubmitChecker(executorTimeout time.Duration, schedulingConfig configuration.SchedulingConfig, ...) *SubmitChecker
- func (srv *SubmitChecker) CheckApiJobs(jobs []*api.Job) (bool, string)
- func (srv *SubmitChecker) CheckPodRequirements(req *schedulerobjects.PodRequirements) (bool, string)
- func (srv *SubmitChecker) Run(ctx context.Context) error
type SubmitScheduleChecker

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

func GangIdAndCardinalityFromAnnotations ¶

func GangIdAndCardinalityFromAnnotations(annotations map[string]string) (string, int, bool, error)

GangIdAndCardinalityFromAnnotations returns a tuple (gangId, gangCardinality, isGangJob, error).

func GangIdAndCardinalityFromLegacySchedulerJob ¶ added in v0.3.47

func GangIdAndCardinalityFromLegacySchedulerJob(job interfaces.LegacySchedulerJob, priorityClasses map[string]configuration.PriorityClass) (string, int, bool, error)

GangIdAndCardinalityFromLegacySchedulerJob returns a tuple (gangId, gangCardinality, isGangJob, error).

func GroupJobsByAnnotation ¶ added in v0.3.49

func GroupJobsByAnnotation(annotation string, jobs []*api.Job) map[string][]*api.Job

func JavaStringHash ¶

func JavaStringHash(s string) uint32

JavaStringHash is the default hashing algorithm used by Pulsar copied from https://github.com/apache/pulsar-client-go/blob/master/pulsar/internal/hash.go

func JobsSummary ¶ added in v0.3.50

func JobsSummary(jobs []interfaces.LegacySchedulerJob) string

JobsSummary returns a string giving an overview of the provided jobs meant for logging. For example: "affected queues [A, B]; resources {A: {cpu: 1}, B: {cpu: 2}}; jobs [jobAId, jobBId]".

func PodRequirementFromJobSchedulingInfo ¶ added in v0.3.47

func PodRequirementFromJobSchedulingInfo(info *schedulerobjects.JobSchedulingInfo) *schedulerobjects.PodRequirements

func PodRequirementFromLegacySchedulerJob ¶ added in v0.3.50

func PodRequirementFromLegacySchedulerJob[E interfaces.LegacySchedulerJob](job E, priorityClasses map[string]configuration.PriorityClass) *schedulerobjects.PodRequirements

func PodRequirementsFromJobSchedulingInfos ¶ added in v0.3.47

func PodRequirementsFromJobSchedulingInfos(infos []*schedulerobjects.JobSchedulingInfo) []*schedulerobjects.PodRequirements

func PodRequirementsFromLegacySchedulerJobs ¶ added in v0.3.47

func PodRequirementsFromLegacySchedulerJobs[S ~[]E, E interfaces.LegacySchedulerJob](jobs S, priorityClasses map[string]configuration.PriorityClass) []*schedulerobjects.PodRequirements

func PreemptedJobsFromSchedulerResult ¶ added in v0.3.54

func PreemptedJobsFromSchedulerResult[T interfaces.LegacySchedulerJob](sr *SchedulerResult) []T

PreemptedJobsFromSchedulerResult returns the slice of preempted jobs in the result, cast to type T.

func ResourceListAsWeightedMillis ¶ added in v0.3.70

func ResourceListAsWeightedMillis(weights map[string]float64, rl schedulerobjects.ResourceList) int64

ResourceListAsWeightedMillis returns the linear combination of the milli values in rl with given weights. This function overflows for values that exceed MaxInt64. E.g., 1Pi is fine but not 10Pi.

func Run ¶

func Run(config schedulerconfig.Configuration) error

Run sets up a Scheduler application and runs it until a SIGTERM is received

func ScheduledJobsFromSchedulerResult ¶ added in v0.3.54

func ScheduledJobsFromSchedulerResult[T interfaces.LegacySchedulerJob](sr *SchedulerResult) []T

ScheduledJobsFromScheduleResult returns the slice of scheduled jobs in the result, cast to type T.

func UpdateUsage ¶ added in v0.3.50

func UpdateUsage[S ~[]E, E interfaces.LegacySchedulerJob](
	usage map[string]schedulerobjects.QuantityByPriorityAndResourceType,
	jobs S,
	priorityClasses map[string]configuration.PriorityClass,
	addOrSubtract AddOrSubtract,
) map[string]schedulerobjects.QuantityByPriorityAndResourceType

Types ¶

type AddOrSubtract ¶ added in v0.3.50

type AddOrSubtract int

const (
	Add AddOrSubtract = iota
	Subtract
)

type CandidateGangIterator ¶

type CandidateGangIterator struct {
	SchedulingContext *schedulercontext.SchedulingContext
	// contains filtered or unexported fields
}

CandidateGangIterator determines which gang to try scheduling next across queues. Specifically, it yields the next gang in the queue with smallest fraction of its fair share, where the fraction of fair share computation includes the yielded gang.

func NewCandidateGangIterator ¶ added in v0.3.47

func NewCandidateGangIterator(
	sctx *schedulercontext.SchedulingContext,
	iteratorsByQueue map[string]*QueuedGangIterator,
) (*CandidateGangIterator, error)

func (*CandidateGangIterator) Clear ¶ added in v0.3.47

func (it *CandidateGangIterator) Clear() error

Clear removes the first item in the iterator. If it.onlyYieldEvicted is true, any consecutive non-evicted jobs are also removed.

func (*CandidateGangIterator) OnlyYieldEvicted ¶ added in v0.3.66

func (it *CandidateGangIterator) OnlyYieldEvicted()

func (*CandidateGangIterator) Peek ¶ added in v0.3.47

func (it *CandidateGangIterator) Peek() (*schedulercontext.GangSchedulingContext, error)

type DefaultPoolAssigner ¶ added in v0.3.54

type DefaultPoolAssigner struct {
	// contains filtered or unexported fields
}

func NewPoolAssigner ¶ added in v0.3.54

func NewPoolAssigner(executorTimeout time.Duration,
	schedulingConfig configuration.SchedulingConfig,
	executorRepository database.ExecutorRepository,
) (*DefaultPoolAssigner, error)

func (*DefaultPoolAssigner) AssignPool ¶ added in v0.3.54

func (p *DefaultPoolAssigner) AssignPool(j *jobdb.Job) (string, error)

AssignPool returns the pool associated with the job or the empty string if no pool is valid

func (*DefaultPoolAssigner) Refresh ¶ added in v0.3.54

func (p *DefaultPoolAssigner) Refresh(ctx context.Context) error

Refresh updates executor state

type Evictor ¶ added in v0.3.54

type Evictor struct {
	// contains filtered or unexported fields
}

func NewFilteredEvictor ¶ added in v0.3.56

func NewFilteredEvictor(
	jobRepo JobRepository,
	priorityClasses map[string]configuration.PriorityClass,
	nodeIdsToEvict map[string]bool,
	jobIdsToEvict map[string]bool,
) *Evictor

NewFilteredEvictor returns a new evictor that evicts all jobs for which jobIdsToEvict[jobId] is true on nodes for which nodeIdsToEvict[nodeId] is true.

func NewOversubscribedEvictor ¶ added in v0.3.54

func NewOversubscribedEvictor(
	jobRepo JobRepository,
	priorityClasses map[string]configuration.PriorityClass,
	defaultPriorityClass string,
	perNodeEvictionProbability float64,
	random *rand.Rand,
) *Evictor

NewOversubscribedEvictor returns a new evictor that for each node evicts all preemptible jobs of a priority class for which at least one job could not be scheduled with probability perNodeEvictionProbability.

func NewPreemptibleEvictor ¶ added in v0.3.54

func NewPreemptibleEvictor(
	jobRepo JobRepository,
	priorityClasses map[string]configuration.PriorityClass,
	defaultPriorityClass string,
	nodeFilter func(context.Context, *schedulerobjects.Node) bool,
) *Evictor

NewPreemptibleEvictor returns a new evictor that evicts all preemptible jobs on nodes for which nodeFilter returns true.

func NewStochasticEvictor ¶ added in v0.3.54

func NewStochasticEvictor(
	jobRepo JobRepository,
	priorityClasses map[string]configuration.PriorityClass,
	defaultPriorityClass string,
	perNodeEvictionProbability float64,
	random *rand.Rand,
) *Evictor

NewStochasticEvictor returns a new evictor that for each node evicts all preemptible jobs from that node with probability perNodeEvictionProbability.

func (*Evictor) Evict ¶ added in v0.3.54

func (evi *Evictor) Evict(ctx context.Context, it nodedb.NodeIterator) (*EvictorResult, error)

Evict removes jobs from nodes, returning all affected jobs and nodes. Any node for which nodeFilter returns false is skipped. Any job for which jobFilter returns true is evicted (if the node was not skipped). If a job was evicted from a node, postEvictFunc is called with the corresponding job and node.

type EvictorResult ¶ added in v0.3.54

type EvictorResult struct {
	// Map from job id to job, containing all evicted jobs.
	EvictedJobsById map[string]interfaces.LegacySchedulerJob
	// Map from node id to node, containing all nodes on which at least one job was evicted.
	AffectedNodesById map[string]*schedulerobjects.Node
	// For each evicted job, maps the id of the job to the id of the node it was evicted from.
	NodeIdByJobId map[string]string
}

type ExecutorApi ¶

type ExecutorApi struct {
	// contains filtered or unexported fields
}

ExecutorApi is a gRPC service that exposes functionality required by the armada executors

func NewExecutorApi ¶

func NewExecutorApi(producer pulsar.Producer,
	jobRepository database.JobRepository,
	executorRepository database.ExecutorRepository,
	legacyExecutorRepository database.ExecutorRepository,
	allowedPriorities []int32,
	maxJobsPerCall uint,
	nodeIdLabel string,
	priorityClassNameOverride *string,
) (*ExecutorApi, error)

func (*ExecutorApi) LeaseJobRuns ¶

func (srv *ExecutorApi) LeaseJobRuns(stream executorapi.ExecutorApi_LeaseJobRunsServer) error

LeaseJobRuns performs the following actions:

Stores the request in postgres so that the scheduler can use the job + capacity information in the next scheduling round
Determines if any of the job runs in the request are no longer active and should be cancelled
Determines if any new job runs should be leased to the executor

func (*ExecutorApi) ReportEvents ¶

func (srv *ExecutorApi) ReportEvents(ctx context.Context, list *executorapi.EventList) (*types.Empty, error)

ReportEvents publishes all events to pulsar. The events are compacted for more efficient publishing

type FairSchedulingAlgo ¶ added in v0.3.63

type FairSchedulingAlgo struct {
	// contains filtered or unexported fields
}

FairSchedulingAlgo is a SchedulingAlgo based on PreemptingQueueScheduler.

func NewFairSchedulingAlgo ¶ added in v0.3.63

func NewFairSchedulingAlgo(
	config configuration.SchedulingConfig,
	maxSchedulingDuration time.Duration,
	executorRepository database.ExecutorRepository,
	queueRepository database.QueueRepository,
	schedulingContextRepository *SchedulingContextRepository,
) (*FairSchedulingAlgo, error)

func (*FairSchedulingAlgo) Schedule ¶ added in v0.3.63

func (l *FairSchedulingAlgo) Schedule(
	ctx context.Context,
	txn *jobdb.Txn,
	jobDb *jobdb.JobDb,
) (*SchedulerResult, error)

Schedule assigns jobs to nodes in the same way as the old lease call. It iterates over each executor in turn (using lexicographical order) and assigns the jobs using a LegacyScheduler, before moving onto the next executor It maintains state of which executors it has considered already and may take multiple Schedule() calls to consider all of the executors if scheduling is slow Newly leased jobs are updated as such in the jobDb using the transaction provided and are also returned to the caller.

type GangScheduler ¶ added in v0.3.63

type GangScheduler struct {
	// contains filtered or unexported fields
}

GangScheduler schedules one gang at a time. GangScheduler is not aware of queues.

func NewGangScheduler ¶ added in v0.3.63

func NewGangScheduler(
	sctx *schedulercontext.SchedulingContext,
	constraints schedulerconstraints.SchedulingConstraints,
	nodeDb *nodedb.NodeDb,
) (*GangScheduler, error)

func (*GangScheduler) Schedule ¶ added in v0.3.63

func (sch *GangScheduler) Schedule(ctx context.Context, gctx *schedulercontext.GangSchedulingContext) (ok bool, unschedulableReason string, err error)

func (*GangScheduler) SkipUnsuccessfulSchedulingKeyCheck ¶ added in v0.3.65

func (sch *GangScheduler) SkipUnsuccessfulSchedulingKeyCheck()

type InMemoryJobIterator ¶ added in v0.3.50

type InMemoryJobIterator struct {
	// contains filtered or unexported fields
}

func NewInMemoryJobIterator ¶ added in v0.3.50

func NewInMemoryJobIterator[S ~[]E, E interfaces.LegacySchedulerJob](jobs S) *InMemoryJobIterator

func (*InMemoryJobIterator) Next ¶ added in v0.3.50

func (it *InMemoryJobIterator) Next() (interfaces.LegacySchedulerJob, error)

type InMemoryJobRepository ¶ added in v0.3.50

type InMemoryJobRepository struct {
	// contains filtered or unexported fields
}

func NewInMemoryJobRepository ¶ added in v0.3.50

func NewInMemoryJobRepository(priorityClasses map[string]configuration.PriorityClass) *InMemoryJobRepository

func (*InMemoryJobRepository) Enqueue ¶ added in v0.3.50

func (repo *InMemoryJobRepository) Enqueue(job interfaces.LegacySchedulerJob)

func (*InMemoryJobRepository) EnqueueMany ¶ added in v0.3.50

func (repo *InMemoryJobRepository) EnqueueMany(jobs []interfaces.LegacySchedulerJob)

func (*InMemoryJobRepository) GetExistingJobsByIds ¶ added in v0.3.50

func (repo *InMemoryJobRepository) GetExistingJobsByIds(jobIds []string) ([]interfaces.LegacySchedulerJob, error)

func (*InMemoryJobRepository) GetJobIterator ¶ added in v0.3.50

func (repo *InMemoryJobRepository) GetJobIterator(ctx context.Context, queue string) (JobIterator, error)

func (*InMemoryJobRepository) GetQueueJobIds ¶ added in v0.3.50

func (repo *InMemoryJobRepository) GetQueueJobIds(queue string) ([]string, error)

type JobIterator ¶

type JobIterator interface {
	Next() (interfaces.LegacySchedulerJob, error)
}

type JobQueueIteratorAdapter ¶ added in v0.3.47

type JobQueueIteratorAdapter struct {
	// contains filtered or unexported fields
}

func (*JobQueueIteratorAdapter) Next ¶ added in v0.3.47

func (it *JobQueueIteratorAdapter) Next() (interfaces.LegacySchedulerJob, error)

type JobRepository ¶

type JobRepository interface {
	GetQueueJobIds(queueName string) ([]string, error)
	GetExistingJobsByIds(ids []string) ([]interfaces.LegacySchedulerJob, error)
}

type JobSchedulingContextByExecutor ¶ added in v0.3.62

type JobSchedulingContextByExecutor map[string]*schedulercontext.JobSchedulingContext

func (JobSchedulingContextByExecutor) String ¶ added in v0.3.62

func (m JobSchedulingContextByExecutor) String() string

type KubernetesLeaderController ¶

type KubernetesLeaderController struct {
	// contains filtered or unexported fields
}

KubernetesLeaderController uses the Kubernetes leader election mechanism to determine who is leader. This allows multiple instances of the scheduler to be run for high availability.

TODO: Move into package in common.

func NewKubernetesLeaderController ¶

func NewKubernetesLeaderController(config schedulerconfig.LeaderConfig, client coordinationv1client.LeasesGetter) *KubernetesLeaderController

func (*KubernetesLeaderController) GetToken ¶

func (lc *KubernetesLeaderController) GetToken() LeaderToken

func (*KubernetesLeaderController) Run ¶

func (lc *KubernetesLeaderController) Run(ctx context.Context) error

Run starts the controller. This is a blocking call that returns when the provided context is cancelled.

func (*KubernetesLeaderController) ValidateToken ¶

func (lc *KubernetesLeaderController) ValidateToken(tok LeaderToken) bool

type LeaderController ¶

type LeaderController interface {
	// GetToken returns a LeaderToken which allows you to determine if you are leader or not
	GetToken() LeaderToken
	// ValidateToken allows a caller to determine whether a previously obtained token is still valid.
	// Returns true if the token is a leader and false otherwise
	ValidateToken(tok LeaderToken) bool
	// Run starts the controller.  This is a blocking call which will return when the provided context is cancelled
	Run(ctx context.Context) error
}

LeaderController is an interface to be implemented by structs that control which scheduler is leader

type LeaderToken ¶

type LeaderToken struct {
	// contains filtered or unexported fields
}

LeaderToken is a token handed out to schedulers which they can use to determine if they are leader

func InvalidLeaderToken ¶

func InvalidLeaderToken() LeaderToken

InvalidLeaderToken returns a LeaderToken indicating this instance is not leader.

func NewLeaderToken ¶

func NewLeaderToken() LeaderToken

NewLeaderToken returns a LeaderToken indicating this instance is the leader.

type LeaseListener ¶

type LeaseListener interface {
	// contains filtered or unexported methods
}

LeaseListener allows clients to listen for lease events.

type MetricsCollector ¶ added in v0.3.54

type MetricsCollector struct {
	// contains filtered or unexported fields
}

MetricsCollector is a Prometheus Collector that handles scheduler metrics. The metrics themselves are calculated asynchronously every refreshPeriod

func NewMetricsCollector ¶ added in v0.3.54

func NewMetricsCollector(
	jobDb *jobdb.JobDb,
	queueRepository database.QueueRepository,
	executorRepository database.ExecutorRepository,
	poolAssigner PoolAssigner,
	refreshPeriod time.Duration,
) *MetricsCollector

func (*MetricsCollector) Collect ¶ added in v0.3.54

func (c *MetricsCollector) Collect(metrics chan<- prometheus.Metric)

Collect returns the current state of all metrics of the collector.

func (*MetricsCollector) Describe ¶ added in v0.3.54

func (c *MetricsCollector) Describe(out chan<- *prometheus.Desc)

Describe returns all descriptions of the collector.

func (*MetricsCollector) Run ¶ added in v0.3.54

func (c *MetricsCollector) Run(ctx context.Context) error

Run enters s a loop which updates the metrics every refreshPeriod until the supplied context is cancelled

type MultiJobsIterator ¶ added in v0.3.47

type MultiJobsIterator struct {
	// contains filtered or unexported fields
}

MultiJobsIterator chains several JobIterators together, emptying them in the order provided.

func NewMultiJobsIterator ¶ added in v0.3.47

func NewMultiJobsIterator(its ...JobIterator) *MultiJobsIterator

func (*MultiJobsIterator) Next ¶ added in v0.3.47

func (it *MultiJobsIterator) Next() (interfaces.LegacySchedulerJob, error)

type PoolAssigner ¶ added in v0.3.54

type PoolAssigner interface {
	Refresh(ctx context.Context) error
	AssignPool(j *jobdb.Job) (string, error)
}

PoolAssigner allows jobs to be assigned to a pool Note that this is intended only for use with metrics calculation

type PreemptingQueueScheduler ¶ added in v0.3.63

type PreemptingQueueScheduler struct {
	// contains filtered or unexported fields
}

PreemptingQueueScheduler is a scheduler that makes a unified decisions on which jobs to preempt and schedule. Uses QueueScheduler as a building block.

func NewPreemptingQueueScheduler ¶ added in v0.3.63

func NewPreemptingQueueScheduler(
	sctx *schedulercontext.SchedulingContext,
	constraints schedulerconstraints.SchedulingConstraints,
	nodeEvictionProbability float64,
	nodeOversubscriptionEvictionProbability float64,
	jobRepo JobRepository,
	nodeDb *nodedb.NodeDb,
	initialNodeIdByJobId map[string]string,
	initialJobIdsByGangId map[string]map[string]bool,
	initialGangIdByJobId map[string]string,
) *PreemptingQueueScheduler

func (*PreemptingQueueScheduler) EnableAssertions ¶ added in v0.3.63

func (sch *PreemptingQueueScheduler) EnableAssertions()

func (*PreemptingQueueScheduler) Schedule ¶ added in v0.3.63

func (sch *PreemptingQueueScheduler) Schedule(ctx context.Context) (*SchedulerResult, error)

Schedule - preempts jobs belonging to queues with total allocation above their fair share and - schedules new jobs belonging to queues with total allocation less than their fair share.

func (*PreemptingQueueScheduler) SkipUnsuccessfulSchedulingKeyCheck ¶ added in v0.3.65

func (sch *PreemptingQueueScheduler) SkipUnsuccessfulSchedulingKeyCheck()

type Publisher ¶

type Publisher interface {
	// PublishMessages will publish the supplied messages. A LeaderToken is provided and the
	// implementor may decide whether to publish based on the status of this token
	PublishMessages(ctx context.Context, events []*armadaevents.EventSequence, shouldPublish func() bool) error

	// PublishMarkers publishes a single marker message for each Pulsar partition.  Each marker
	// massage contains the supplied group id, which allows all marker messages for a given call
	// to be identified.  The uint32 returned is the number of messages published
	PublishMarkers(ctx context.Context, groupId uuid.UUID) (uint32, error)
}

Publisher is an interface to be implemented by structs that handle publishing messages to pulsar

type PulsarPublisher ¶

type PulsarPublisher struct {
	// contains filtered or unexported fields
}

PulsarPublisher is the default implementation of Publisher

func NewPulsarPublisher ¶

func NewPulsarPublisher(
	pulsarClient pulsar.Client,
	producerOptions pulsar.ProducerOptions,
	pulsarSendTimeout time.Duration,
) (*PulsarPublisher, error)

func (*PulsarPublisher) PublishMarkers ¶

func (p *PulsarPublisher) PublishMarkers(ctx context.Context, groupId uuid.UUID) (uint32, error)

PublishMarkers sends one pulsar message (containing an armadaevents.PartitionMarker) to each partition of the producer's Pulsar topic.

func (*PulsarPublisher) PublishMessages ¶

func (p *PulsarPublisher) PublishMessages(ctx context.Context, events []*armadaevents.EventSequence, shouldPublish func() bool) error

PublishMessages publishes all event sequences to pulsar. Event sequences for a given jobset will be combined into single event sequences up to maxMessageBatchSize.

type QueueCandidateGangIteratorItem ¶ added in v0.3.47

type QueueCandidateGangIteratorItem struct {
	// contains filtered or unexported fields
}

type QueueCandidateGangIteratorPQ ¶ added in v0.3.47

type QueueCandidateGangIteratorPQ []*QueueCandidateGangIteratorItem

Priority queue used by CandidateGangIterator to determine from which queue to schedule the next job.

func (QueueCandidateGangIteratorPQ) Len ¶ added in v0.3.47

func (pq QueueCandidateGangIteratorPQ) Len() int

func (QueueCandidateGangIteratorPQ) Less ¶ added in v0.3.47

func (pq QueueCandidateGangIteratorPQ) Less(i, j int) bool

func (*QueueCandidateGangIteratorPQ) Pop ¶ added in v0.3.47

func (pq *QueueCandidateGangIteratorPQ) Pop() any

func (*QueueCandidateGangIteratorPQ) Push ¶ added in v0.3.47

func (pq *QueueCandidateGangIteratorPQ) Push(x any)

func (QueueCandidateGangIteratorPQ) Swap ¶ added in v0.3.47

func (pq QueueCandidateGangIteratorPQ) Swap(i, j int)

type QueueScheduler ¶ added in v0.3.63

type QueueScheduler struct {
	// contains filtered or unexported fields
}

QueueScheduler is responsible for choosing the order in which to attempt scheduling queued gangs. Relies on GangScheduler for scheduling once a gang is chosen.

func NewQueueScheduler ¶ added in v0.3.63

func NewQueueScheduler(
	sctx *schedulercontext.SchedulingContext,
	constraints schedulerconstraints.SchedulingConstraints,
	nodeDb *nodedb.NodeDb,
	jobIteratorByQueue map[string]JobIterator,
) (*QueueScheduler, error)

func (*QueueScheduler) Schedule ¶ added in v0.3.63

func (sch *QueueScheduler) Schedule(ctx context.Context) (*SchedulerResult, error)

func (*QueueScheduler) SkipUnsuccessfulSchedulingKeyCheck ¶ added in v0.3.65

func (sch *QueueScheduler) SkipUnsuccessfulSchedulingKeyCheck()

type QueueSchedulingContextByExecutor ¶ added in v0.3.62

type QueueSchedulingContextByExecutor map[string]*schedulercontext.QueueSchedulingContext

func (QueueSchedulingContextByExecutor) String ¶ added in v0.3.62

func (m QueueSchedulingContextByExecutor) String() string

type QueuedGangIterator ¶

type QueuedGangIterator struct {
	// contains filtered or unexported fields
}

QueuedGangIterator is an iterator over queued gangs. Each gang is yielded once its final member is received from the underlying iterator. Jobs without gangIdAnnotation are considered gangs of cardinality 1.

func NewQueuedGangIterator ¶

func NewQueuedGangIterator(sctx *schedulercontext.SchedulingContext, it JobIterator, maxLookback uint) *QueuedGangIterator

func (*QueuedGangIterator) Clear ¶ added in v0.3.47

func (it *QueuedGangIterator) Clear() error

func (*QueuedGangIterator) Next ¶

func (it *QueuedGangIterator) Next() (*schedulercontext.GangSchedulingContext, error)

func (*QueuedGangIterator) Peek ¶ added in v0.3.47

func (it *QueuedGangIterator) Peek() (*schedulercontext.GangSchedulingContext, error)

type QueuedJobsIterator ¶

type QueuedJobsIterator struct {
	// contains filtered or unexported fields
}

QueuedJobsIterator is an iterator over all jobs in a queue. It lazily loads jobs in batches from Redis asynch.

func NewQueuedJobsIterator ¶

func NewQueuedJobsIterator(ctx context.Context, queue string, repo JobRepository) (*QueuedJobsIterator, error)

func (*QueuedJobsIterator) Next ¶

func (it *QueuedJobsIterator) Next() (interfaces.LegacySchedulerJob, error)

type Scheduler ¶

type Scheduler struct {
	// contains filtered or unexported fields
}

Scheduler is the main Armada scheduler. It periodically performs the following cycle: 1. Update state from postgres (via the jobRepository). 2. Determine if leader and exit if not. 3. Generate any necessary events resulting from the state update. 4. Expire any jobs assigned to clusters that have timed out. 5. Schedule jobs. 6. Publish any Armada events resulting from the scheduling cycle.

func NewScheduler ¶

func NewScheduler(
	jobRepository database.JobRepository,
	executorRepository database.ExecutorRepository,
	schedulingAlgo SchedulingAlgo,
	leaderController LeaderController,
	publisher Publisher,
	stringInterner *stringinterner.StringInterner,
	submitChecker SubmitScheduleChecker,
	cyclePeriod time.Duration,
	schedulePeriod time.Duration,
	executorTimeout time.Duration,
	maxAttemptedRuns uint,
	nodeIdLabel string,
) (*Scheduler, error)

func (*Scheduler) Run ¶

func (s *Scheduler) Run(ctx context.Context) error

Run enters the scheduling loop, which will continue until ctx is cancelled.

type SchedulerResult ¶ added in v0.3.54

type SchedulerResult struct {
	// Running jobs that should be preempted.
	PreemptedJobs []interfaces.LegacySchedulerJob
	// Queued jobs that should be scheduled.
	ScheduledJobs []interfaces.LegacySchedulerJob
	// For each preempted job, maps the job id to the id of the node on which the job was running.
	// For each scheduled job, maps the job id to the id of the node on which the job should be scheduled.
	NodeIdByJobId map[string]string
}

SchedulerResult is returned by Rescheduler.Schedule().

func NewSchedulerResult ¶ added in v0.3.54

func NewSchedulerResult[S ~[]T, T interfaces.LegacySchedulerJob](
	preemptedJobs S,
	scheduledJobs S,
	nodeIdByJobId map[string]string,
) *SchedulerResult

type SchedulingAlgo ¶

type SchedulingAlgo interface {
	// Schedule should assign jobs to nodes.
	// Any jobs that are scheduled should be marked as such in the JobDb using the transaction provided.
	Schedule(ctx context.Context, txn *jobdb.Txn, jobDb *jobdb.JobDb) (*SchedulerResult, error)
}

SchedulingAlgo is the interface between the Pulsar-backed scheduler and the algorithm deciding which jobs to schedule and preempt.

type SchedulingContextByExecutor ¶ added in v0.3.62

type SchedulingContextByExecutor map[string]*schedulercontext.SchedulingContext

func (SchedulingContextByExecutor) String ¶ added in v0.3.62

func (m SchedulingContextByExecutor) String() string

type SchedulingContextRepository ¶ added in v0.3.62

type SchedulingContextRepository struct {
	// contains filtered or unexported fields
}

SchedulingContextRepository stores scheduling contexts associated with recent scheduling attempts. On adding a context, a map is cloned, then mutated, and then swapped for the previous map using atomic pointers. Hence, reads concurrent with writes are safe and don't need locking. A mutex protects against concurrent writes.

func NewSchedulingContextRepository ¶ added in v0.3.62

func NewSchedulingContextRepository(maxJobSchedulingContextsPerExecutor uint) (*SchedulingContextRepository, error)

func (*SchedulingContextRepository) AddSchedulingContext ¶ added in v0.3.62

func (repo *SchedulingContextRepository) AddSchedulingContext(sctx *schedulercontext.SchedulingContext) error

AddSchedulingContext adds a scheduling context to the repo. It also extracts the queue and job scheduling contexts it contains and stores those separately.

It's safe to call this method concurrently with itself and with methods getting contexts from the repo. It's not safe to mutate contexts once they've been provided to this method.

Job contexts are stored first, then queue contexts, and finally the scheduling context itself. This avoids having a stored scheduling (queue) context referring to a queue (job) context that isn't stored yet.

func (*SchedulingContextRepository) GetJobReport ¶ added in v0.3.62

func (repo *SchedulingContextRepository) GetJobReport(_ context.Context, request *schedulerobjects.JobReportRequest) (*schedulerobjects.JobReport, error)

GetJobReport is a gRPC endpoint for querying job reports. TODO: Further separate this from internal contexts.

func (*SchedulingContextRepository) GetMostRecentJobSchedulingContextByExecutor ¶ added in v0.3.62

func (repo *SchedulingContextRepository) GetMostRecentJobSchedulingContextByExecutor(jobId string) (JobSchedulingContextByExecutor, bool)

func (*SchedulingContextRepository) GetMostRecentPreemptingQueueSchedulingContextByExecutor ¶ added in v0.3.71

func (repo *SchedulingContextRepository) GetMostRecentPreemptingQueueSchedulingContextByExecutor(queue string) (QueueSchedulingContextByExecutor, bool)

func (*SchedulingContextRepository) GetMostRecentPreemptingSchedulingContextByExecutor ¶ added in v0.3.71

func (repo *SchedulingContextRepository) GetMostRecentPreemptingSchedulingContextByExecutor() SchedulingContextByExecutor

func (*SchedulingContextRepository) GetMostRecentQueueSchedulingContextByExecutor ¶ added in v0.3.62

func (repo *SchedulingContextRepository) GetMostRecentQueueSchedulingContextByExecutor(queue string) (QueueSchedulingContextByExecutor, bool)

func (*SchedulingContextRepository) GetMostRecentSchedulingContextByExecutor ¶ added in v0.3.62

func (repo *SchedulingContextRepository) GetMostRecentSchedulingContextByExecutor() SchedulingContextByExecutor

func (*SchedulingContextRepository) GetMostRecentSuccessfulQueueSchedulingContextByExecutor ¶ added in v0.3.62

func (repo *SchedulingContextRepository) GetMostRecentSuccessfulQueueSchedulingContextByExecutor(queue string) (QueueSchedulingContextByExecutor, bool)

func (*SchedulingContextRepository) GetMostRecentSuccessfulSchedulingContextByExecutor ¶ added in v0.3.62

func (repo *SchedulingContextRepository) GetMostRecentSuccessfulSchedulingContextByExecutor() SchedulingContextByExecutor

func (*SchedulingContextRepository) GetQueueReport ¶ added in v0.3.62

func (repo *SchedulingContextRepository) GetQueueReport(_ context.Context, request *schedulerobjects.QueueReportRequest) (*schedulerobjects.QueueReport, error)

GetQueueReport is a gRPC endpoint for querying queue reports. TODO: Further separate this from internal contexts.

func (*SchedulingContextRepository) GetSchedulingReport ¶ added in v0.3.62

func (repo *SchedulingContextRepository) GetSchedulingReport(_ context.Context, request *schedulerobjects.SchedulingReportRequest) (*schedulerobjects.SchedulingReport, error)

GetSchedulingReport is a gRPC endpoint for querying scheduler reports. TODO: Further separate this from internal contexts.

func (*SchedulingContextRepository) GetSortedExecutorIds ¶ added in v0.3.62

func (repo *SchedulingContextRepository) GetSortedExecutorIds() []string

type StandaloneLeaderController ¶

type StandaloneLeaderController struct {
	// contains filtered or unexported fields
}

StandaloneLeaderController returns a token that always indicates you are leader This can be used when only a single instance of the scheduler is needed

func NewStandaloneLeaderController ¶

func NewStandaloneLeaderController() *StandaloneLeaderController

func (*StandaloneLeaderController) GetToken ¶

func (lc *StandaloneLeaderController) GetToken() LeaderToken

func (*StandaloneLeaderController) Run ¶ added in v0.3.47

func (lc *StandaloneLeaderController) Run(ctx context.Context) error

func (*StandaloneLeaderController) ValidateToken ¶

func (lc *StandaloneLeaderController) ValidateToken(tok LeaderToken) bool

type SubmitChecker ¶

type SubmitChecker struct {
	// contains filtered or unexported fields
}

func NewSubmitChecker ¶

func NewSubmitChecker(
	executorTimeout time.Duration,
	schedulingConfig configuration.SchedulingConfig,
	executorRepository database.ExecutorRepository,
) *SubmitChecker

func (*SubmitChecker) CheckApiJobs ¶

func (srv *SubmitChecker) CheckApiJobs(jobs []*api.Job) (bool, string)

func (*SubmitChecker) CheckPodRequirements ¶ added in v0.3.63

func (srv *SubmitChecker) CheckPodRequirements(req *schedulerobjects.PodRequirements) (bool, string)

func (*SubmitChecker) Run ¶ added in v0.3.49

func (srv *SubmitChecker) Run(ctx context.Context) error

type SubmitScheduleChecker ¶ added in v0.3.63

type SubmitScheduleChecker interface {
	CheckPodRequirements(podRequirement *schedulerobjects.PodRequirements) (bool, string)
	CheckApiJobs(jobs []*api.Job) (bool, string)
}

Source Files ¶

View all Source files

Directories ¶

Path	Synopsis
adapters
configuration
constraints
context
database
interfaces
jobdb
kubernetesobjects
affinity
mocks Package schedulermocks is a generated GoMock package.	Package schedulermocks is a generated GoMock package.
nodedb
schedulerobjects
testfixtures

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL