Documentation ¶
Index ¶
- Constants
- func Initialize(s *actor.System, e *echo.Echo, c *actor.Ref, namespace string, ...) *actor.Ref
- type ChangePosition
- type ChangePriority
- type KillTaskPod
- type PodsInfo
- type PreemptTaskPod
- type ResourceManager
- func (k ResourceManager) DisableSlot(m actor.Messenger, req *apiv1.DisableSlotRequest) (resp *apiv1.DisableSlotResponse, err error)
- func (k ResourceManager) EnableSlot(m actor.Messenger, req *apiv1.EnableSlotRequest) (resp *apiv1.EnableSlotResponse, err error)
- func (k ResourceManager) GetResourcePoolRef(ctx actor.Messenger, name string) (*actor.Ref, error)
- func (k ResourceManager) IsReattachEnabled(ctx actor.Messenger) bool
- func (k ResourceManager) IsReattachEnabledForRP(ctx actor.Messenger, rp string) bool
- func (k ResourceManager) IsReattachableOnlyAfterStarted(ctx actor.Messenger) bool
- func (k ResourceManager) NotifyContainerRunning(ctx actor.Messenger, msg sproto.NotifyContainerRunning) error
- func (k ResourceManager) ResolveResourcePool(ctx actor.Messenger, name string, slots int) (string, error)
- func (k ResourceManager) TaskContainerDefaults(ctx actor.Messenger, pool string, ...) (result model.TaskContainerDefaultsConfig, err error)
- func (k ResourceManager) ValidateResourcePool(ctx actor.Messenger, name string) error
- func (k ResourceManager) ValidateResourcePoolAvailability(ctx actor.Messenger, name string, slots int) ([]command.LaunchWarning, error)
- func (k ResourceManager) ValidateResources(ctx actor.Messenger, name string, slots int, command bool) error
- type SchedulerTick
- type StartTaskPod
- type SummarizeResources
Constants ¶
const ( // ActionCoolDown is the rate limit for job submission. ActionCoolDown = 500 * time.Millisecond )
const ResourceTypeNvidia = "nvidia.com/gpu"
ResourceTypeNvidia describes the GPU resource type.
Variables ¶
This section is empty.
Functions ¶
func Initialize ¶
func Initialize( s *actor.System, e *echo.Echo, c *actor.Ref, namespace string, namespaceToPoolName map[string]string, masterServiceName string, masterTLSConfig model.TLSClientConfig, loggingConfig model.LoggingConfig, leaveKubernetesResources bool, scheduler string, slotType device.Type, slotResourceRequests config.PodSlotResourceRequests, fluentConfig config.FluentConfig, credsDir string, masterIP string, masterPort int32, ) *actor.Ref
Initialize creates a new global pods actor.
Types ¶
type ChangePosition ¶
ChangePosition notifies the pods actor of a position change and to resubmit the specified pod.
type ChangePriority ¶
ChangePriority notifies the pods actor of a priority change and to resubmit the specified pod.
type KillTaskPod ¶
KillTaskPod notifies the pods actor to kill a pod.
type PreemptTaskPod ¶
type PreemptTaskPod struct {
PodName string
}
PreemptTaskPod notifies the pods actor to preempt a pod.
type ResourceManager ¶
type ResourceManager struct {
*actorrm.ResourceManager
}
ResourceManager is a resource manager that manages k8s resources.
func New ¶
func New( system *actor.System, db *db.PgDB, echo *echo.Echo, config *config.ResourceConfig, opts *aproto.MasterSetAgentOptions, cert *tls.Certificate, ) ResourceManager
New returns a new ResourceManager, which communicates with and submits work to a Kubernetes apiserver.
func (ResourceManager) DisableSlot ¶
func (k ResourceManager) DisableSlot( m actor.Messenger, req *apiv1.DisableSlotRequest, ) (resp *apiv1.DisableSlotResponse, err error)
DisableSlot implements 'det slot disable...' functionality.
func (ResourceManager) EnableSlot ¶
func (k ResourceManager) EnableSlot( m actor.Messenger, req *apiv1.EnableSlotRequest, ) (resp *apiv1.EnableSlotResponse, err error)
EnableSlot implements 'det slot enable...' functionality.
func (ResourceManager) GetResourcePoolRef ¶
func (k ResourceManager) GetResourcePoolRef( ctx actor.Messenger, name string, ) (*actor.Ref, error)
GetResourcePoolRef gets an actor ref to a resource pool by name.
func (ResourceManager) IsReattachEnabled ¶
func (k ResourceManager) IsReattachEnabled(ctx actor.Messenger) bool
IsReattachEnabled is true if any RP is configured to support it.
func (ResourceManager) IsReattachEnabledForRP ¶
func (k ResourceManager) IsReattachEnabledForRP(ctx actor.Messenger, rp string) bool
IsReattachEnabledForRP returns true, if the specified RP has AgentReattachEnabled.
func (ResourceManager) IsReattachableOnlyAfterStarted ¶
func (k ResourceManager) IsReattachableOnlyAfterStarted(ctx actor.Messenger) bool
IsReattachableOnlyAfterStarted always returns false for the k8s resource manager.
func (ResourceManager) NotifyContainerRunning ¶
func (k ResourceManager) NotifyContainerRunning( ctx actor.Messenger, msg sproto.NotifyContainerRunning, ) error
NotifyContainerRunning receives a notification from the container to let the master know that the container is running.
func (ResourceManager) ResolveResourcePool ¶
func (k ResourceManager) ResolveResourcePool( ctx actor.Messenger, name string, slots int, ) (string, error)
ResolveResourcePool resolves the resource pool completely.
func (ResourceManager) TaskContainerDefaults ¶
func (k ResourceManager) TaskContainerDefaults( ctx actor.Messenger, pool string, fallbackConfig model.TaskContainerDefaultsConfig, ) (result model.TaskContainerDefaultsConfig, err error)
TaskContainerDefaults returns TaskContainerDefaults for the specified pool.
func (ResourceManager) ValidateResourcePool ¶
func (k ResourceManager) ValidateResourcePool(ctx actor.Messenger, name string) error
ValidateResourcePool validates that the named resource pool exists.
func (ResourceManager) ValidateResourcePoolAvailability ¶
func (k ResourceManager) ValidateResourcePoolAvailability( ctx actor.Messenger, name string, slots int, ) ([]command.LaunchWarning, error)
ValidateResourcePoolAvailability checks the available resources for a given pool. This is a no-op for k8s.
func (ResourceManager) ValidateResources ¶
func (k ResourceManager) ValidateResources( ctx actor.Messenger, name string, slots int, command bool, ) error
ValidateResources ensures enough resources are available in the resource pool. This is a no-op for k8s.
type SchedulerTick ¶
type SchedulerTick struct{}
SchedulerTick notifies the Resource Manager to submit pending jobs.
type StartTaskPod ¶
type StartTaskPod struct { TaskActor *actor.Ref Spec tasks.TaskSpec Slots int Rank int ResourcePool string Namespace string LogContext logger.Context }
StartTaskPod notifies the pods actor to start a pod with the task spec.
type SummarizeResources ¶
type SummarizeResources struct {
PoolName string
}
SummarizeResources summerize pods resource.