service

package
v0.3.79-rc-9a1ab98 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jul 5, 2023 License: Apache-2.0 Imports: 37 Imported by: 0

Documentation

Index

Constants

View Source
const (
	UnableToSchedule podIssueType = iota
	StuckStartingUp
	StuckTerminating
	ExternallyDeleted
	ErrorDuringIssueHandling
)

Variables

This section is empty.

Functions

func ExtractEssentialJobMetadata added in v0.3.57

func ExtractEssentialJobMetadata(jobRun *executorapi.JobRunLease) (*job.RunMeta, error)

Types

type ClusterAllocationService

type ClusterAllocationService struct {
	// contains filtered or unexported fields
}

func NewClusterAllocationService

func NewClusterAllocationService(
	clusterId executorContext.ClusterIdentity,
	eventReporter reporter.EventReporter,
	jobRunStateManager job.RunStateStore,
	submitter job.Submitter,
	etcdHealthMonitor healthmonitor.EtcdLimitHealthMonitor,
) *ClusterAllocationService

func (*ClusterAllocationService) AllocateSpareClusterCapacity

func (allocationService *ClusterAllocationService) AllocateSpareClusterCapacity()

type ClusterAllocator added in v0.3.47

type ClusterAllocator interface {
	AllocateSpareClusterCapacity()
}

type IssueHandler added in v0.3.77

type IssueHandler struct {
	// contains filtered or unexported fields
}

func NewIssueHandler added in v0.3.77

func NewIssueHandler(
	jobRunState job.RunStateStore,
	clusterContext executorContext.ClusterContext,
	eventReporter reporter.EventReporter,
	stateChecksConfig configuration.StateChecksConfiguration,
	pendingPodChecker podchecks.PodChecker,
	stuckTerminatingPodExpiry time.Duration,
) *IssueHandler

func (*IssueHandler) HandlePodIssues added in v0.3.77

func (p *IssueHandler) HandlePodIssues()

type JobLeaseRequester added in v0.3.47

type JobLeaseRequester struct {
	// contains filtered or unexported fields
}

func NewJobLeaseRequester added in v0.3.47

func NewJobLeaseRequester(
	executorApiClient executorapi.ExecutorApiClient,
	clusterIdentity clusterContext.ClusterIdentity,
	minimumJobSize armadaresource.ComputeResources,
) *JobLeaseRequester

func (*JobLeaseRequester) LeaseJobRuns added in v0.3.47

func (requester *JobLeaseRequester) LeaseJobRuns(ctx context.Context, request *LeaseRequest) (*LeaseResponse, error)

type JobLeaseService

type JobLeaseService struct {
	// contains filtered or unexported fields
}

func NewJobLeaseService

func NewJobLeaseService(
	clusterContext context2.ClusterContext,
	queueClient api.AggregatedQueueClient,
	minimumJobSize armadaresource.ComputeResources,
	avoidNodeLabelsOnRetry []string,
	jobLeaseRequestTimeout time.Duration,
) *JobLeaseService

func (*JobLeaseService) RenewJobLeases

func (jobLeaseService *JobLeaseService) RenewJobLeases(jobs []*job.RunningJob) ([]*job.RunningJob, error)

func (*JobLeaseService) ReportDone

func (jobLeaseService *JobLeaseService) ReportDone(jobIds []string) error

func (*JobLeaseService) RequestJobLeases

func (jobLeaseService *JobLeaseService) RequestJobLeases(
	availableResource *armadaresource.ComputeResources,
	nodes []api.NodeInfo,
	leasedResourceByQueue map[string]armadaresource.ComputeResources,
	leasedResourceByQueueAndPriority map[string]map[int32]armadaresource.ComputeResources,
) ([]*api.Job, error)

func (*JobLeaseService) ReturnLease

func (jobLeaseService *JobLeaseService) ReturnLease(pod *v1.Pod, reason string, jobRunAttempted bool) error

type JobManager

type JobManager struct {
	// contains filtered or unexported fields
}

func NewJobManager

func NewJobManager(
	clusterIdentity context2.ClusterIdentity,
	jobContext job.JobContext,
	eventReporter reporter.EventReporter,
	jobLeaseService LeaseService,
) *JobManager

func (*JobManager) ManageJobLeases

func (m *JobManager) ManageJobLeases()

type JobRequester added in v0.3.57

type JobRequester struct {
	// contains filtered or unexported fields
}

func NewJobRequester added in v0.3.57

func NewJobRequester(
	clusterId executorContext.ClusterIdentity,
	eventReporter reporter.EventReporter,
	leaseRequester LeaseRequester,
	jobRunStateStore job.RunStateStore,
	utilisationService utilisation.UtilisationService,
	podDefaults *configuration.PodDefaults,
) *JobRequester

func (*JobRequester) RequestJobsRuns added in v0.3.57

func (r *JobRequester) RequestJobsRuns()

type LeaseRequest added in v0.3.57

type LeaseRequest struct {
	AvailableResource   armadaresource.ComputeResources
	Nodes               []*api.NodeInfo
	UnassignedJobRunIds []armadaevents.Uuid
}

type LeaseRequester added in v0.3.47

type LeaseRequester interface {
	LeaseJobRuns(ctx context.Context, request *LeaseRequest) (*LeaseResponse, error)
}

type LeaseResponse added in v0.3.57

type LeaseResponse struct {
	LeasedRuns      []*executorapi.JobRunLease
	RunIdsToCancel  []*armadaevents.Uuid
	RunIdsToPreempt []*armadaevents.Uuid
}

type LeaseService

type LeaseService interface {
	ReturnLease(pod *v1.Pod, reason string, jobRunAttempted bool) error
	RequestJobLeases(
		availableResource *armadaresource.ComputeResources,
		nodes []api.NodeInfo,
		leasedResourceByQueue map[string]armadaresource.ComputeResources,
		leasedResourceByQueueAndPriority map[string]map[int32]armadaresource.ComputeResources,
	) ([]*api.Job, error)
	RenewJobLeases(jobs []*job.RunningJob) ([]*job.RunningJob, error)
	ReportDone(jobIds []string) error
}

type LegacyClusterAllocationService added in v0.3.47

type LegacyClusterAllocationService struct {
	// contains filtered or unexported fields
}

func NewLegacyClusterAllocationService added in v0.3.47

func NewLegacyClusterAllocationService(
	clusterContext executorContext.ClusterContext,
	eventReporter reporter.EventReporter,
	leaseService LeaseService,
	utilisationService utilisation.UtilisationService,
	submitter job.Submitter,
	etcdHealthMonitor healthmonitor.EtcdLimitHealthMonitor,
) *LegacyClusterAllocationService

func (*LegacyClusterAllocationService) AllocateSpareClusterCapacity added in v0.3.47

func (allocationService *LegacyClusterAllocationService) AllocateSpareClusterCapacity()

type ResourceCleanupService

type ResourceCleanupService struct {
	// contains filtered or unexported fields
}

func NewResourceCleanupService

func NewResourceCleanupService(
	clusterContext clusterContext.ClusterContext,
	kubernetesConfiguration configuration.KubernetesConfiguration,
) *ResourceCleanupService

func (*ResourceCleanupService) CleanupResources

func (r *ResourceCleanupService) CleanupResources()

CleanupResources

  • This function finds and delete old resources. It does this in two ways:
  • - By deleting all expired terminated pods
  • - Deleting non-expired terminated pods when then MaxTerminatedPods limit is exceeded

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL