Documentation ¶
Index ¶
- Constants
- type ChangePosition
- type ChangePriority
- type KillTaskPod
- type PodsInfo
- type PreemptTaskPod
- type ResourceManager
- func (k *ResourceManager) Allocate(msg sproto.AllocateRequest) (*sproto.ResourcesSubscription, error)
- func (ResourceManager) DeleteJob(sproto.DeleteJob) (sproto.DeleteJobResponse, error)
- func (k *ResourceManager) DisableAgent(req *apiv1.DisableAgentRequest) (resp *apiv1.DisableAgentResponse, err error)
- func (k ResourceManager) DisableSlot(req *apiv1.DisableSlotRequest) (resp *apiv1.DisableSlotResponse, err error)
- func (k *ResourceManager) EnableAgent(req *apiv1.EnableAgentRequest) (resp *apiv1.EnableAgentResponse, err error)
- func (k ResourceManager) EnableSlot(req *apiv1.EnableSlotRequest) (resp *apiv1.EnableSlotResponse, err error)
- func (ResourceManager) ExternalPreemptionPending(sproto.PendingPreemption) error
- func (ResourceManager) GetAgent(*apiv1.GetAgentRequest) (*apiv1.GetAgentResponse, error)
- func (k *ResourceManager) GetAgents(msg *apiv1.GetAgentsRequest) (*apiv1.GetAgentsResponse, error)
- func (k *ResourceManager) GetAllocationSummaries(msg sproto.GetAllocationSummaries) (map[model.AllocationID]sproto.AllocationSummary, error)
- func (k *ResourceManager) GetAllocationSummary(msg sproto.GetAllocationSummary) (*sproto.AllocationSummary, error)
- func (k *ResourceManager) GetDefaultAuxResourcePool(sproto.GetDefaultAuxResourcePoolRequest) (sproto.GetDefaultAuxResourcePoolResponse, error)
- func (k *ResourceManager) GetDefaultComputeResourcePool(sproto.GetDefaultComputeResourcePoolRequest) (sproto.GetDefaultComputeResourcePoolResponse, error)
- func (ResourceManager) GetExternalJobs(sproto.GetExternalJobs) ([]*jobv1.Job, error)
- func (k *ResourceManager) GetJobQ(msg sproto.GetJobQ) (map[model.JobID]*sproto.RMJobInfo, error)
- func (k *ResourceManager) GetJobQueueStatsRequest(*apiv1.GetJobQueueStatsRequest) (*apiv1.GetJobQueueStatsResponse, error)
- func (k *ResourceManager) GetResourcePools(*apiv1.GetResourcePoolsRequest) (*apiv1.GetResourcePoolsResponse, error)
- func (ResourceManager) GetSlot(*apiv1.GetSlotRequest) (*apiv1.GetSlotResponse, error)
- func (ResourceManager) GetSlots(*apiv1.GetSlotsRequest) (*apiv1.GetSlotsResponse, error)
- func (k ResourceManager) IsReattachableOnlyAfterStarted() bool
- func (k *ResourceManager) MoveJob(msg sproto.MoveJob) error
- func (k ResourceManager) NotifyContainerRunning(msg sproto.NotifyContainerRunning) error
- func (k *ResourceManager) RecoverJobPosition(msg sproto.RecoverJobPosition)
- func (k *ResourceManager) Release(msg sproto.ResourcesReleased)
- func (k ResourceManager) ResolveResourcePool(name string, workspaceID int, slots int) (string, error)
- func (k *ResourceManager) SetAllocationName(msg sproto.SetAllocationName)
- func (k *ResourceManager) SetGroupMaxSlots(msg sproto.SetGroupMaxSlots)
- func (k *ResourceManager) SetGroupPriority(msg sproto.SetGroupPriority) error
- func (k *ResourceManager) SetGroupWeight(msg sproto.SetGroupWeight) error
- func (k ResourceManager) TaskContainerDefaults(pool string, fallbackConfig model.TaskContainerDefaultsConfig) (result model.TaskContainerDefaultsConfig, err error)
- func (k *ResourceManager) ValidateCommandResources(msg sproto.ValidateCommandResourcesRequest) (sproto.ValidateCommandResourcesResponse, error)
- func (k ResourceManager) ValidateResourcePool(name string) error
- func (k ResourceManager) ValidateResourcePoolAvailability(name string, slots int) ([]command.LaunchWarning, error)
- func (k ResourceManager) ValidateResources(name string, slots int, command bool) error
- type StartTaskPod
- type SummarizeResources
Constants ¶
const ResourceTypeNvidia = "nvidia.com/gpu"
ResourceTypeNvidia describes the GPU resource type.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type ChangePosition ¶
ChangePosition notifies the pods actor of a position change and to resubmit the specified pod.
type ChangePriority ¶
ChangePriority notifies the pods actor of a priority change and to resubmit the specified pod.
type KillTaskPod ¶
KillTaskPod notifies the pods actor to kill a pod.
type PreemptTaskPod ¶
type PreemptTaskPod struct {
PodName string
}
PreemptTaskPod notifies the pods actor to preempt a pod.
type ResourceManager ¶
type ResourceManager struct {
// contains filtered or unexported fields
}
ResourceManager is a resource manager that manages k8s resources.
func New ¶
func New( system *actor.System, db *db.PgDB, rmConfigs *config.ResourceConfig, taskContainerDefaults *model.TaskContainerDefaultsConfig, opts *aproto.MasterSetAgentOptions, cert *tls.Certificate, ) *ResourceManager
New returns a new ResourceManager, which communicates with and submits work to a Kubernetes apiserver.
func (*ResourceManager) Allocate ¶
func (k *ResourceManager) Allocate(msg sproto.AllocateRequest) (*sproto.ResourcesSubscription, error)
Allocate implements rm.ResourceManager.
func (ResourceManager) DeleteJob ¶
func (ResourceManager) DeleteJob(sproto.DeleteJob) (sproto.DeleteJobResponse, error)
DeleteJob implements rm.ResourceManager.
func (*ResourceManager) DisableAgent ¶
func (k *ResourceManager) DisableAgent( req *apiv1.DisableAgentRequest, ) (resp *apiv1.DisableAgentResponse, err error)
DisableAgent prevents scheduling on a node and has the option to kill running jobs.
func (ResourceManager) DisableSlot ¶
func (k ResourceManager) DisableSlot( req *apiv1.DisableSlotRequest, ) (resp *apiv1.DisableSlotResponse, err error)
DisableSlot implements 'det slot disable...' functionality.
func (*ResourceManager) EnableAgent ¶
func (k *ResourceManager) EnableAgent( req *apiv1.EnableAgentRequest, ) (resp *apiv1.EnableAgentResponse, err error)
EnableAgent allows scheduling on a node that has been disabled.
func (ResourceManager) EnableSlot ¶
func (k ResourceManager) EnableSlot( req *apiv1.EnableSlotRequest, ) (resp *apiv1.EnableSlotResponse, err error)
EnableSlot implements 'det slot enable...' functionality.
func (ResourceManager) ExternalPreemptionPending ¶
func (ResourceManager) ExternalPreemptionPending(sproto.PendingPreemption) error
ExternalPreemptionPending implements rm.ResourceManager.
func (ResourceManager) GetAgent ¶
func (ResourceManager) GetAgent(*apiv1.GetAgentRequest) (*apiv1.GetAgentResponse, error)
GetAgent implements rm.ResourceManager.
func (*ResourceManager) GetAgents ¶
func (k *ResourceManager) GetAgents(msg *apiv1.GetAgentsRequest) (*apiv1.GetAgentsResponse, error)
GetAgents implements rm.ResourceManager.
func (*ResourceManager) GetAllocationSummaries ¶
func (k *ResourceManager) GetAllocationSummaries( msg sproto.GetAllocationSummaries, ) (map[model.AllocationID]sproto.AllocationSummary, error)
GetAllocationSummaries implements rm.ResourceManager.
func (*ResourceManager) GetAllocationSummary ¶
func (k *ResourceManager) GetAllocationSummary(msg sproto.GetAllocationSummary) (*sproto.AllocationSummary, error)
GetAllocationSummary implements rm.ResourceManager.
func (*ResourceManager) GetDefaultAuxResourcePool ¶
func (k *ResourceManager) GetDefaultAuxResourcePool( sproto.GetDefaultAuxResourcePoolRequest, ) (sproto.GetDefaultAuxResourcePoolResponse, error)
GetDefaultAuxResourcePool implements rm.ResourceManager.
func (*ResourceManager) GetDefaultComputeResourcePool ¶
func (k *ResourceManager) GetDefaultComputeResourcePool( sproto.GetDefaultComputeResourcePoolRequest, ) (sproto.GetDefaultComputeResourcePoolResponse, error)
GetDefaultComputeResourcePool implements rm.ResourceManager.
func (ResourceManager) GetExternalJobs ¶
func (ResourceManager) GetExternalJobs(sproto.GetExternalJobs) ([]*jobv1.Job, error)
GetExternalJobs implements rm.ResourceManager.
func (*ResourceManager) GetJobQueueStatsRequest ¶
func (k *ResourceManager) GetJobQueueStatsRequest( *apiv1.GetJobQueueStatsRequest, ) (*apiv1.GetJobQueueStatsResponse, error)
GetJobQueueStatsRequest implements rm.ResourceManager.
func (*ResourceManager) GetResourcePools ¶
func (k *ResourceManager) GetResourcePools(*apiv1.GetResourcePoolsRequest) (*apiv1.GetResourcePoolsResponse, error)
GetResourcePools implements rm.ResourceManager.
func (ResourceManager) GetSlot ¶
func (ResourceManager) GetSlot(*apiv1.GetSlotRequest) (*apiv1.GetSlotResponse, error)
GetSlot implements rm.ResourceManager. TODO(DET-9919): Implement GetSlot for Kubernetes RM.
func (ResourceManager) GetSlots ¶
func (ResourceManager) GetSlots(*apiv1.GetSlotsRequest) (*apiv1.GetSlotsResponse, error)
GetSlots implements rm.ResourceManager. TODO(DET-9919): Implement GetSlots for Kubernetes RM.
func (ResourceManager) IsReattachableOnlyAfterStarted ¶
func (k ResourceManager) IsReattachableOnlyAfterStarted() bool
IsReattachableOnlyAfterStarted always returns false for the k8s resource manager.
func (*ResourceManager) MoveJob ¶
func (k *ResourceManager) MoveJob(msg sproto.MoveJob) error
MoveJob implements rm.ResourceManager. TODO(DET-9920): This should know which pool it wants.
func (ResourceManager) NotifyContainerRunning ¶
func (k ResourceManager) NotifyContainerRunning( msg sproto.NotifyContainerRunning, ) error
NotifyContainerRunning receives a notification from the container to let the master know that the container is running.
func (*ResourceManager) RecoverJobPosition ¶
func (k *ResourceManager) RecoverJobPosition(msg sproto.RecoverJobPosition)
RecoverJobPosition implements rm.ResourceManager.
func (*ResourceManager) Release ¶
func (k *ResourceManager) Release(msg sproto.ResourcesReleased)
Release implements rm.ResourceManager. TODO(DET-9920): This should know which pool it wants.
func (ResourceManager) ResolveResourcePool ¶
func (k ResourceManager) ResolveResourcePool( name string, workspaceID int, slots int, ) (string, error)
ResolveResourcePool resolves the resource pool completely.
func (*ResourceManager) SetAllocationName ¶
func (k *ResourceManager) SetAllocationName(msg sproto.SetAllocationName)
SetAllocationName implements rm.ResourceManager. TODO(DET-9920): This should know which pool it wants.
func (*ResourceManager) SetGroupMaxSlots ¶
func (k *ResourceManager) SetGroupMaxSlots(msg sproto.SetGroupMaxSlots)
SetGroupMaxSlots implements rm.ResourceManager. TODO(DET-9920): This should know which pool it wants.
func (*ResourceManager) SetGroupPriority ¶
func (k *ResourceManager) SetGroupPriority(msg sproto.SetGroupPriority) error
SetGroupPriority implements rm.ResourceManager. TODO(DET-9920): This should know which pool it wants.
func (*ResourceManager) SetGroupWeight ¶
func (k *ResourceManager) SetGroupWeight(msg sproto.SetGroupWeight) error
SetGroupWeight implements rm.ResourceManager. TODO(DET-9920): This should know which pool it wants.
func (ResourceManager) TaskContainerDefaults ¶
func (k ResourceManager) TaskContainerDefaults( pool string, fallbackConfig model.TaskContainerDefaultsConfig, ) (result model.TaskContainerDefaultsConfig, err error)
TaskContainerDefaults returns TaskContainerDefaults for the specified pool.
func (*ResourceManager) ValidateCommandResources ¶
func (k *ResourceManager) ValidateCommandResources( msg sproto.ValidateCommandResourcesRequest, ) (sproto.ValidateCommandResourcesResponse, error)
ValidateCommandResources implements rm.ResourceManager.
func (ResourceManager) ValidateResourcePool ¶
func (k ResourceManager) ValidateResourcePool(name string) error
ValidateResourcePool validates that the named resource pool exists.
func (ResourceManager) ValidateResourcePoolAvailability ¶
func (k ResourceManager) ValidateResourcePoolAvailability( name string, slots int, ) ([]command.LaunchWarning, error)
ValidateResourcePoolAvailability checks the available resources for a given pool. This is a no-op for k8s.
func (ResourceManager) ValidateResources ¶
func (k ResourceManager) ValidateResources( name string, slots int, command bool, ) error
ValidateResources ensures enough resources are available in the resource pool. This is a no-op for k8s.
type StartTaskPod ¶
type StartTaskPod struct { Req *sproto.AllocateRequest AllocationID model.AllocationID Spec tasks.TaskSpec Slots int Rank int ResourcePool string Namespace string LogContext logger.Context }
StartTaskPod notifies the pods actor to start a pod with the task spec.
type SummarizeResources ¶
type SummarizeResources struct {
PoolName string
}
SummarizeResources summerize pods resource.