agentrm

package
v0.38.0-rc2 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 28, 2024 License: Apache-2.0 Imports: 54 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func BestFit

func BestFit(req *sproto.AllocateRequest, agent *agentState) float64

BestFit returns a float affinity score between 0 and 1 for the affinity between the task and the agent. This method attempts to allocate tasks to the agent that is both most utilized and offers the fewest slots. This method should be used when the cluster is dominated by multi-slot applications.

func MakeFitFunction

func MakeFitFunction(fittingPolicy string) func(
	*sproto.AllocateRequest, *agentState) float64

MakeFitFunction returns the corresponding fitting function.

func Single

func Single[K comparable, V any](m map[K]V) (kr K, vr V, ok bool)

Single asserts there's a single element in the map and take it.

func WorstFit

func WorstFit(req *sproto.AllocateRequest, agent *agentState) float64

WorstFit returns a float affinity score between 0 and 1 for the affinity between the task and the agent. This method attempts to allocate tasks to the agent that is least utilized. This method should be used when the cluster is dominated by single-slot applications.

Types

type HardConstraint

type HardConstraint func(req *sproto.AllocateRequest, agent *agentState) bool

HardConstraint returns true if the task can be assigned to the agent and false otherwise.

type ResourceManager

type ResourceManager struct {
	// contains filtered or unexported fields
}

A ResourceManager manages many resource pools and routing requests for resources to them.

func New

func New(
	db *db.PgDB,
	e *echo.Echo,
	config *config.ResourceManagerWithPoolsConfig,
	opts *aproto.MasterSetAgentOptions,
	cert *tls.Certificate,
) (*ResourceManager, error)

New returns a new ResourceManager, which manages communicating with and scheduling on Determined agents.

func (*ResourceManager) Allocate

Allocate implements rm.ResourceManager.

func (*ResourceManager) CheckMaxSlotsExceeded

func (a *ResourceManager) CheckMaxSlotsExceeded(v *sproto.ValidateResourcesRequest) (bool, error)

CheckMaxSlotsExceeded checks if the job exceeded the maximum number of slots.

func (*ResourceManager) CreateNamespace

func (a *ResourceManager) CreateNamespace(string, string, bool) error

CreateNamespace is not supported.

func (*ResourceManager) DefaultNamespace

func (a *ResourceManager) DefaultNamespace(string) (*string, error)

DefaultNamespace is not supported.

func (*ResourceManager) DeleteJob

DeleteJob implements rm.ResourceManager.

func (*ResourceManager) DeleteNamespace

func (a *ResourceManager) DeleteNamespace(namespaceName string) error

DeleteNamespace is not supported.

func (*ResourceManager) DisableAgent

DisableAgent implements rm.ResourceManager.

func (*ResourceManager) DisableSlot

DisableSlot implements rm.ResourceManager.

func (*ResourceManager) EnableAgent

EnableAgent implements rm.ResourceManager.

func (*ResourceManager) EnableSlot

EnableSlot implements rm.ResourceManager.

func (*ResourceManager) GetAgent

GetAgent implements rm.ResourceManager.

func (*ResourceManager) GetAgents

func (a *ResourceManager) GetAgents() (*apiv1.GetAgentsResponse, error)

GetAgents implements rm.ResourceManager.

func (*ResourceManager) GetAllocationSummaries

func (a *ResourceManager) GetAllocationSummaries() (map[model.AllocationID]sproto.AllocationSummary, error)

GetAllocationSummaries implements rm.ResourceManager.

func (*ResourceManager) GetDefaultAuxResourcePool

func (a *ResourceManager) GetDefaultAuxResourcePool() (rm.ResourcePoolName, error)

GetDefaultAuxResourcePool implements rm.ResourceManager.

func (*ResourceManager) GetDefaultComputeResourcePool

func (a *ResourceManager) GetDefaultComputeResourcePool() (rm.ResourcePoolName, error)

GetDefaultComputeResourcePool implements rm.ResourceManager.

func (*ResourceManager) GetExternalJobs

func (*ResourceManager) GetExternalJobs(rm.ResourcePoolName) ([]*jobv1.Job, error)

GetExternalJobs implements rm.ResourceManager.

func (*ResourceManager) GetJobQ

func (a *ResourceManager) GetJobQ(rpName rm.ResourcePoolName) (map[model.JobID]*sproto.RMJobInfo, error)

GetJobQ implements rm.ResourceManager.

func (*ResourceManager) GetJobQueueStatsRequest

func (a *ResourceManager) GetJobQueueStatsRequest(
	msg *apiv1.GetJobQueueStatsRequest,
) (*apiv1.GetJobQueueStatsResponse, error)

GetJobQueueStatsRequest implements rm.ResourceManager.

func (*ResourceManager) GetNamespaceResourceQuota

func (a *ResourceManager) GetNamespaceResourceQuota(string, string) (*float64, error)

GetNamespaceResourceQuota is not supported.

func (*ResourceManager) GetResourcePools

func (a *ResourceManager) GetResourcePools() (*apiv1.GetResourcePoolsResponse, error)

GetResourcePools implements rm.ResourceManager.

func (*ResourceManager) GetSlot

GetSlot implements rm.ResourceManager.

func (*ResourceManager) GetSlots

GetSlots implements rm.ResourceManager.

func (*ResourceManager) HealthCheck

func (a *ResourceManager) HealthCheck() []model.ResourceManagerHealth

HealthCheck always returns healthy for agentrm.

func (*ResourceManager) IsReattachableOnlyAfterStarted

func (*ResourceManager) IsReattachableOnlyAfterStarted() bool

IsReattachableOnlyAfterStarted implements rm.ResourceManager.

func (*ResourceManager) NotifyContainerRunning

func (*ResourceManager) NotifyContainerRunning(sproto.NotifyContainerRunning) error

NotifyContainerRunning implements rm.ResourceManager.

func (*ResourceManager) RecoverJobPosition

func (a *ResourceManager) RecoverJobPosition(msg sproto.RecoverJobPosition)

RecoverJobPosition implements rm.ResourceManager.

func (*ResourceManager) Release

func (a *ResourceManager) Release(msg sproto.ResourcesReleased)

Release implements rm.ResourceManager.

func (*ResourceManager) RemoveEmptyNamespace

func (a *ResourceManager) RemoveEmptyNamespace(string, string) error

RemoveEmptyNamespace is not supported.

func (*ResourceManager) ResolveResourcePool

func (a *ResourceManager) ResolveResourcePool(name rm.ResourcePoolName, workspaceID int, slots int) (
	rm.ResourcePoolName, error,
)

ResolveResourcePool implements rm.ResourceManager.

func (*ResourceManager) SetGroupMaxSlots

func (a *ResourceManager) SetGroupMaxSlots(msg sproto.SetGroupMaxSlots)

SetGroupMaxSlots implements rm.ResourceManager.

func (*ResourceManager) SetGroupPriority

func (a *ResourceManager) SetGroupPriority(msg sproto.SetGroupPriority) error

SetGroupPriority implements rm.ResourceManager.

func (*ResourceManager) SetGroupWeight

func (a *ResourceManager) SetGroupWeight(msg sproto.SetGroupWeight) error

SetGroupWeight implements rm.ResourceManager.

func (*ResourceManager) SetResourceQuota

func (a *ResourceManager) SetResourceQuota(int, string, string) error

SetResourceQuota is not supported.

func (*ResourceManager) SmallerValueIsHigherPriority

func (a *ResourceManager) SmallerValueIsHigherPriority() (bool, error)

SmallerValueIsHigherPriority returns true if smaller priority values indicate a higher priority level.

func (*ResourceManager) TaskContainerDefaults

func (a *ResourceManager) TaskContainerDefaults(
	resourcePoolName rm.ResourcePoolName,
	defaultConfig model.TaskContainerDefaultsConfig,
) (model.TaskContainerDefaultsConfig, error)

TaskContainerDefaults implements rm.ResourceManager.

func (*ResourceManager) ValidateResourcePool

func (a *ResourceManager) ValidateResourcePool(name rm.ResourcePoolName) error

ValidateResourcePool implements rm.ResourceManager.

func (*ResourceManager) ValidateResources

func (a *ResourceManager) ValidateResources(
	msg sproto.ValidateResourcesRequest,
) ([]command.LaunchWarning, error)

ValidateResources implements rm.ResourceManager.

func (*ResourceManager) VerifyNamespaceExists

func (a *ResourceManager) VerifyNamespaceExists(string, string) error

VerifyNamespaceExists is not supported.

type Scheduler

type Scheduler interface {
	Schedule(rp *resourcePool) ([]*sproto.AllocateRequest, []model.AllocationID)
	JobQInfo(rp *resourcePool) map[model.JobID]*sproto.RMJobInfo
}

Scheduler schedules tasks on agents. Its only function Schedule is called to determine which pending requests can be fulfilled and which scheduled tasks can be terminated. Schedule is expected to ba called every time there is a change to the cluster status, for example, new agents being connected, devices being disabled, and etc,. Schedule should avoid unnecessary shuffling tasks on agents to avoid the overhead of restarting a preempted task.

func MakeScheduler

func MakeScheduler(conf *config.SchedulerConfig) (Scheduler, error)

MakeScheduler returns the corresponding scheduler implementation.

func NewFairShareScheduler

func NewFairShareScheduler() Scheduler

NewFairShareScheduler creates a new scheduler that schedules tasks according to the max-min fairness of groups. For groups that are above their fair share, the scheduler requests them to terminate their idle tasks until they have achieved their fair share.

func NewPriorityScheduler

func NewPriorityScheduler(config *config.SchedulerConfig) Scheduler

NewPriorityScheduler creates a new scheduler that schedules tasks via priority.

type SoftConstraint

type SoftConstraint func(req *sproto.AllocateRequest, agent *agentState) float64

SoftConstraint returns a score from 0 (lowest) to 1 (highest) representing how optimal is the state of the cluster if the task were assigned to the agent.

Directories

Path Synopsis
aws
gcp

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL