Documentation ¶
Index ¶
- Constants
- func ExtractEssentialJobMetadata(jobRun *executorapi.JobRunLease) (*job.RunMeta, error)
- type ClusterAllocationService
- type ClusterAllocator
- type IssueHandler
- type JobLeaseRequester
- type JobLeaseService
- func (jobLeaseService *JobLeaseService) RenewJobLeases(jobs []*job.RunningJob) ([]*job.RunningJob, error)
- func (jobLeaseService *JobLeaseService) ReportDone(jobIds []string) error
- func (jobLeaseService *JobLeaseService) RequestJobLeases(availableResource *armadaresource.ComputeResources, nodes []api.NodeInfo, ...) ([]*api.Job, error)
- func (jobLeaseService *JobLeaseService) ReturnLease(pod *v1.Pod, reason string, jobRunAttempted bool) error
- type JobManager
- type JobRequester
- type LeaseRequest
- type LeaseRequester
- type LeaseResponse
- type LeaseService
- type LegacyClusterAllocationService
- type ResourceCleanupService
Constants ¶
View Source
const ( UnableToSchedule podIssueType = iota StuckStartingUp StuckTerminating ExternallyDeleted ErrorDuringIssueHandling )
Variables ¶
This section is empty.
Functions ¶
func ExtractEssentialJobMetadata ¶ added in v0.3.57
func ExtractEssentialJobMetadata(jobRun *executorapi.JobRunLease) (*job.RunMeta, error)
Types ¶
type ClusterAllocationService ¶
type ClusterAllocationService struct {
// contains filtered or unexported fields
}
func NewClusterAllocationService ¶
func NewClusterAllocationService( clusterId executorContext.ClusterIdentity, eventReporter reporter.EventReporter, jobRunStateManager job.RunStateStore, submitter job.Submitter, etcdHealthMonitor healthmonitor.EtcdLimitHealthMonitor, ) *ClusterAllocationService
func (*ClusterAllocationService) AllocateSpareClusterCapacity ¶
func (allocationService *ClusterAllocationService) AllocateSpareClusterCapacity()
type ClusterAllocator ¶ added in v0.3.47
type ClusterAllocator interface {
AllocateSpareClusterCapacity()
}
type IssueHandler ¶ added in v0.3.77
type IssueHandler struct {
// contains filtered or unexported fields
}
func NewIssueHandler ¶ added in v0.3.77
func NewIssueHandler( jobRunState job.RunStateStore, clusterContext executorContext.ClusterContext, eventReporter reporter.EventReporter, stateChecksConfig configuration.StateChecksConfiguration, pendingPodChecker podchecks.PodChecker, stuckTerminatingPodExpiry time.Duration, ) *IssueHandler
func (*IssueHandler) HandlePodIssues ¶ added in v0.3.77
func (p *IssueHandler) HandlePodIssues()
type JobLeaseRequester ¶ added in v0.3.47
type JobLeaseRequester struct {
// contains filtered or unexported fields
}
func NewJobLeaseRequester ¶ added in v0.3.47
func NewJobLeaseRequester( executorApiClient executorapi.ExecutorApiClient, clusterIdentity clusterContext.ClusterIdentity, minimumJobSize armadaresource.ComputeResources, ) *JobLeaseRequester
func (*JobLeaseRequester) LeaseJobRuns ¶ added in v0.3.47
func (requester *JobLeaseRequester) LeaseJobRuns(ctx context.Context, request *LeaseRequest) (*LeaseResponse, error)
type JobLeaseService ¶
type JobLeaseService struct {
// contains filtered or unexported fields
}
func NewJobLeaseService ¶
func NewJobLeaseService( clusterContext context2.ClusterContext, queueClient api.AggregatedQueueClient, minimumJobSize armadaresource.ComputeResources, avoidNodeLabelsOnRetry []string, jobLeaseRequestTimeout time.Duration, ) *JobLeaseService
func (*JobLeaseService) RenewJobLeases ¶
func (jobLeaseService *JobLeaseService) RenewJobLeases(jobs []*job.RunningJob) ([]*job.RunningJob, error)
func (*JobLeaseService) ReportDone ¶
func (jobLeaseService *JobLeaseService) ReportDone(jobIds []string) error
func (*JobLeaseService) RequestJobLeases ¶
func (jobLeaseService *JobLeaseService) RequestJobLeases( availableResource *armadaresource.ComputeResources, nodes []api.NodeInfo, leasedResourceByQueue map[string]armadaresource.ComputeResources, leasedResourceByQueueAndPriority map[string]map[int32]armadaresource.ComputeResources, ) ([]*api.Job, error)
func (*JobLeaseService) ReturnLease ¶
type JobManager ¶
type JobManager struct {
// contains filtered or unexported fields
}
func NewJobManager ¶
func NewJobManager( clusterIdentity context2.ClusterIdentity, jobContext job.JobContext, eventReporter reporter.EventReporter, jobLeaseService LeaseService, ) *JobManager
func (*JobManager) ManageJobLeases ¶
func (m *JobManager) ManageJobLeases()
type JobRequester ¶ added in v0.3.57
type JobRequester struct {
// contains filtered or unexported fields
}
func NewJobRequester ¶ added in v0.3.57
func NewJobRequester( clusterId executorContext.ClusterIdentity, eventReporter reporter.EventReporter, leaseRequester LeaseRequester, jobRunStateStore job.RunStateStore, utilisationService utilisation.UtilisationService, podDefaults *configuration.PodDefaults, ) *JobRequester
func (*JobRequester) RequestJobsRuns ¶ added in v0.3.57
func (r *JobRequester) RequestJobsRuns()
type LeaseRequest ¶ added in v0.3.57
type LeaseRequest struct { AvailableResource armadaresource.ComputeResources Nodes []*api.NodeInfo UnassignedJobRunIds []armadaevents.Uuid }
type LeaseRequester ¶ added in v0.3.47
type LeaseRequester interface {
LeaseJobRuns(ctx context.Context, request *LeaseRequest) (*LeaseResponse, error)
}
type LeaseResponse ¶ added in v0.3.57
type LeaseResponse struct { LeasedRuns []*executorapi.JobRunLease RunIdsToCancel []*armadaevents.Uuid RunIdsToPreempt []*armadaevents.Uuid }
type LeaseService ¶
type LeaseService interface { ReturnLease(pod *v1.Pod, reason string, jobRunAttempted bool) error RequestJobLeases( availableResource *armadaresource.ComputeResources, nodes []api.NodeInfo, leasedResourceByQueue map[string]armadaresource.ComputeResources, leasedResourceByQueueAndPriority map[string]map[int32]armadaresource.ComputeResources, ) ([]*api.Job, error) RenewJobLeases(jobs []*job.RunningJob) ([]*job.RunningJob, error) ReportDone(jobIds []string) error }
type LegacyClusterAllocationService ¶ added in v0.3.47
type LegacyClusterAllocationService struct {
// contains filtered or unexported fields
}
func NewLegacyClusterAllocationService ¶ added in v0.3.47
func NewLegacyClusterAllocationService( clusterContext executorContext.ClusterContext, eventReporter reporter.EventReporter, leaseService LeaseService, utilisationService utilisation.UtilisationService, submitter job.Submitter, etcdHealthMonitor healthmonitor.EtcdLimitHealthMonitor, ) *LegacyClusterAllocationService
func (*LegacyClusterAllocationService) AllocateSpareClusterCapacity ¶ added in v0.3.47
func (allocationService *LegacyClusterAllocationService) AllocateSpareClusterCapacity()
type ResourceCleanupService ¶
type ResourceCleanupService struct {
// contains filtered or unexported fields
}
func NewResourceCleanupService ¶
func NewResourceCleanupService( clusterContext clusterContext.ClusterContext, kubernetesConfiguration configuration.KubernetesConfiguration, ) *ResourceCleanupService
func (*ResourceCleanupService) CleanupResources ¶
func (r *ResourceCleanupService) CleanupResources()
CleanupResources
- This function finds and delete old resources. It does this in two ways:
- - By deleting all expired terminated pods
- - Deleting non-expired terminated pods when then MaxTerminatedPods limit is exceeded
Source Files ¶
Click to show internal directories.
Click to hide internal directories.