Documentation ¶
Index ¶
- Constants
- type ResourceManager
- func (k *ResourceManager) Allocate(msg sproto.AllocateRequest) (*sproto.ResourcesSubscription, error)
- func (k *ResourceManager) CreateNamespace(namespaceName string, clusterName string, fanout bool) error
- func (k *ResourceManager) DefaultNamespace(clusterName string) (*string, error)
- func (ResourceManager) DeleteJob(sproto.DeleteJob) (sproto.DeleteJobResponse, error)
- func (k *ResourceManager) DeleteNamespace(namespace string) error
- func (k *ResourceManager) DisableAgent(req *apiv1.DisableAgentRequest) (resp *apiv1.DisableAgentResponse, err error)
- func (k ResourceManager) DisableSlot(req *apiv1.DisableSlotRequest) (resp *apiv1.DisableSlotResponse, err error)
- func (k *ResourceManager) EnableAgent(req *apiv1.EnableAgentRequest) (resp *apiv1.EnableAgentResponse, err error)
- func (k ResourceManager) EnableSlot(req *apiv1.EnableSlotRequest) (resp *apiv1.EnableSlotResponse, err error)
- func (k *ResourceManager) GetAgent(msg *apiv1.GetAgentRequest) (*apiv1.GetAgentResponse, error)
- func (k *ResourceManager) GetAgents() (*apiv1.GetAgentsResponse, error)
- func (k *ResourceManager) GetAllocationSummaries() (map[model.AllocationID]sproto.AllocationSummary, error)
- func (k *ResourceManager) GetDefaultAuxResourcePool() (rm.ResourcePoolName, error)
- func (k *ResourceManager) GetDefaultComputeResourcePool() (rm.ResourcePoolName, error)
- func (ResourceManager) GetExternalJobs(rm.ResourcePoolName) ([]*jobv1.Job, error)
- func (k *ResourceManager) GetJobQ(rpName rm.ResourcePoolName) (map[model.JobID]*sproto.RMJobInfo, error)
- func (k *ResourceManager) GetJobQueueStatsRequest(msg *apiv1.GetJobQueueStatsRequest) (*apiv1.GetJobQueueStatsResponse, error)
- func (k *ResourceManager) GetNamespaceResourceQuota(namespaceName string, clusterName string) (*float64, error)
- func (k *ResourceManager) GetResourcePools() (*apiv1.GetResourcePoolsResponse, error)
- func (k *ResourceManager) GetSlot(msg *apiv1.GetSlotRequest) (*apiv1.GetSlotResponse, error)
- func (k *ResourceManager) GetSlots(msg *apiv1.GetSlotsRequest) (*apiv1.GetSlotsResponse, error)
- func (k *ResourceManager) HealthCheck() []model.ResourceManagerHealth
- func (k ResourceManager) IsReattachableOnlyAfterStarted() bool
- func (k ResourceManager) NotifyContainerRunning(msg sproto.NotifyContainerRunning) error
- func (k *ResourceManager) RecoverJobPosition(msg sproto.RecoverJobPosition)
- func (k *ResourceManager) Release(msg sproto.ResourcesReleased)
- func (k *ResourceManager) RemoveEmptyNamespace(namespaceName string, clusterName string) error
- func (k ResourceManager) ResolveResourcePool(name rm.ResourcePoolName, workspaceID int, slots int) (rm.ResourcePoolName, error)
- func (k *ResourceManager) SetGroupMaxSlots(msg sproto.SetGroupMaxSlots)
- func (k *ResourceManager) SetGroupPriority(msg sproto.SetGroupPriority) error
- func (k *ResourceManager) SetGroupWeight(msg sproto.SetGroupWeight) error
- func (k *ResourceManager) SetResourceQuota(quota int, namespace, clusterName string) error
- func (k *ResourceManager) SmallerValueIsHigherPriority() (bool, error)
- func (k ResourceManager) TaskContainerDefaults(resourcePoolName rm.ResourcePoolName, ...) (model.TaskContainerDefaultsConfig, error)
- func (k ResourceManager) ValidateResourcePool(name rm.ResourcePoolName) error
- func (k *ResourceManager) ValidateResources(msg sproto.ValidateResourcesRequest) ([]command.LaunchWarning, error)
- func (k *ResourceManager) VerifyNamespaceExists(namespaceName string, clusterName string) error
Constants ¶
const ( // ReleaseNamespaceEnvVar is the name of the environment variable within a pod running the // master service containing the namespace in which determined was deployed. ReleaseNamespaceEnvVar = "DET_RELEASE_NAMESPACE" // ResourceTypeNvidia describes the GPU resource type. ResourceTypeNvidia = "nvidia.com/gpu" )
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type ResourceManager ¶
type ResourceManager struct {
// contains filtered or unexported fields
}
ResourceManager is a resource manager that manages k8s resources.
func New ¶
func New( db *db.PgDB, rmConfigs *config.ResourceManagerWithPoolsConfig, taskContainerDefaults *model.TaskContainerDefaultsConfig, opts *aproto.MasterSetAgentOptions, cert *tls.Certificate, ) (*ResourceManager, error)
New returns a new ResourceManager, which communicates with and submits work to a Kubernetes apiserver.
func (*ResourceManager) Allocate ¶
func (k *ResourceManager) Allocate(msg sproto.AllocateRequest) (*sproto.ResourcesSubscription, error)
Allocate implements rm.ResourceManager.
func (*ResourceManager) CreateNamespace ¶
func (k *ResourceManager) CreateNamespace(namespaceName string, clusterName string, fanout bool, ) error
CreateNamespace implements rm.ResourceManager.
func (*ResourceManager) DefaultNamespace ¶
func (k *ResourceManager) DefaultNamespace(clusterName string) (*string, error)
DefaultNamespace implements rm.ResourceManager.
func (ResourceManager) DeleteJob ¶
func (ResourceManager) DeleteJob(sproto.DeleteJob) (sproto.DeleteJobResponse, error)
DeleteJob implements rm.ResourceManager.
func (*ResourceManager) DeleteNamespace ¶
func (k *ResourceManager) DeleteNamespace(namespace string) error
DeleteNamespace implements rm.ResourceManager.
func (*ResourceManager) DisableAgent ¶
func (k *ResourceManager) DisableAgent( req *apiv1.DisableAgentRequest, ) (resp *apiv1.DisableAgentResponse, err error)
DisableAgent prevents scheduling on a node and has the option to kill running jobs.
func (ResourceManager) DisableSlot ¶
func (k ResourceManager) DisableSlot( req *apiv1.DisableSlotRequest, ) (resp *apiv1.DisableSlotResponse, err error)
DisableSlot implements 'det slot disable...' functionality.
func (*ResourceManager) EnableAgent ¶
func (k *ResourceManager) EnableAgent( req *apiv1.EnableAgentRequest, ) (resp *apiv1.EnableAgentResponse, err error)
EnableAgent allows scheduling on a node that has been disabled.
func (ResourceManager) EnableSlot ¶
func (k ResourceManager) EnableSlot( req *apiv1.EnableSlotRequest, ) (resp *apiv1.EnableSlotResponse, err error)
EnableSlot implements 'det slot enable...' functionality.
func (*ResourceManager) GetAgent ¶
func (k *ResourceManager) GetAgent(msg *apiv1.GetAgentRequest) (*apiv1.GetAgentResponse, error)
GetAgent implements rm.ResourceManager.
func (*ResourceManager) GetAgents ¶
func (k *ResourceManager) GetAgents() (*apiv1.GetAgentsResponse, error)
GetAgents implements rm.ResourceManager.
func (*ResourceManager) GetAllocationSummaries ¶
func (k *ResourceManager) GetAllocationSummaries() (map[model.AllocationID]sproto.AllocationSummary, error)
GetAllocationSummaries implements rm.ResourceManager.
func (*ResourceManager) GetDefaultAuxResourcePool ¶
func (k *ResourceManager) GetDefaultAuxResourcePool() (rm.ResourcePoolName, error)
GetDefaultAuxResourcePool implements rm.ResourceManager.
func (*ResourceManager) GetDefaultComputeResourcePool ¶
func (k *ResourceManager) GetDefaultComputeResourcePool() (rm.ResourcePoolName, error)
GetDefaultComputeResourcePool implements rm.ResourceManager.
func (ResourceManager) GetExternalJobs ¶
func (ResourceManager) GetExternalJobs(rm.ResourcePoolName) ([]*jobv1.Job, error)
GetExternalJobs implements rm.ResourceManager.
func (*ResourceManager) GetJobQ ¶
func (k *ResourceManager) GetJobQ(rpName rm.ResourcePoolName) (map[model.JobID]*sproto.RMJobInfo, error)
GetJobQ implements rm.ResourceManager.
func (*ResourceManager) GetJobQueueStatsRequest ¶
func (k *ResourceManager) GetJobQueueStatsRequest( msg *apiv1.GetJobQueueStatsRequest, ) (*apiv1.GetJobQueueStatsResponse, error)
GetJobQueueStatsRequest implements rm.ResourceManager.
func (*ResourceManager) GetNamespaceResourceQuota ¶
func (k *ResourceManager) GetNamespaceResourceQuota(namespaceName string, clusterName string) (*float64, error)
GetNamespaceResourceQuota gets the resource quota for the specified namespace.
func (*ResourceManager) GetResourcePools ¶
func (k *ResourceManager) GetResourcePools() (*apiv1.GetResourcePoolsResponse, error)
GetResourcePools implements rm.ResourceManager.
func (*ResourceManager) GetSlot ¶
func (k *ResourceManager) GetSlot(msg *apiv1.GetSlotRequest) (*apiv1.GetSlotResponse, error)
GetSlot implements rm.ResourceManager.
func (*ResourceManager) GetSlots ¶
func (k *ResourceManager) GetSlots(msg *apiv1.GetSlotsRequest) (*apiv1.GetSlotsResponse, error)
GetSlots implements rm.ResourceManager.
func (*ResourceManager) HealthCheck ¶
func (k *ResourceManager) HealthCheck() []model.ResourceManagerHealth
HealthCheck tries to call the KubeAPI.
func (ResourceManager) IsReattachableOnlyAfterStarted ¶
func (k ResourceManager) IsReattachableOnlyAfterStarted() bool
IsReattachableOnlyAfterStarted always returns false for the k8s resource manager.
func (ResourceManager) NotifyContainerRunning ¶
func (k ResourceManager) NotifyContainerRunning( msg sproto.NotifyContainerRunning, ) error
NotifyContainerRunning receives a notification from the container to let the master know that the container is running.
func (*ResourceManager) RecoverJobPosition ¶
func (k *ResourceManager) RecoverJobPosition(msg sproto.RecoverJobPosition)
RecoverJobPosition implements rm.ResourceManager.
func (*ResourceManager) Release ¶
func (k *ResourceManager) Release(msg sproto.ResourcesReleased)
Release implements rm.ResourceManager.
func (*ResourceManager) RemoveEmptyNamespace ¶
func (k *ResourceManager) RemoveEmptyNamespace(namespaceName string, clusterName string, ) error
RemoveEmptyNamespace removes a namespace from our interfaces in cluster if it is no longer used by any workspace.
func (ResourceManager) ResolveResourcePool ¶
func (k ResourceManager) ResolveResourcePool( name rm.ResourcePoolName, workspaceID int, slots int, ) (rm.ResourcePoolName, error)
ResolveResourcePool resolves the resource pool completely.
func (*ResourceManager) SetGroupMaxSlots ¶
func (k *ResourceManager) SetGroupMaxSlots(msg sproto.SetGroupMaxSlots)
SetGroupMaxSlots implements rm.ResourceManager.
func (*ResourceManager) SetGroupPriority ¶
func (k *ResourceManager) SetGroupPriority(msg sproto.SetGroupPriority) error
SetGroupPriority implements rm.ResourceManager.
func (*ResourceManager) SetGroupWeight ¶
func (k *ResourceManager) SetGroupWeight(msg sproto.SetGroupWeight) error
SetGroupWeight implements rm.ResourceManager.
func (*ResourceManager) SetResourceQuota ¶
func (k *ResourceManager) SetResourceQuota(quota int, namespace, clusterName string) error
SetResourceQuota implements rm.ResourceManager.
func (*ResourceManager) SmallerValueIsHigherPriority ¶
func (k *ResourceManager) SmallerValueIsHigherPriority() (bool, error)
SmallerValueIsHigherPriority returns true if smaller priority values indicate a higher priority level.
func (ResourceManager) TaskContainerDefaults ¶
func (k ResourceManager) TaskContainerDefaults( resourcePoolName rm.ResourcePoolName, defaultConfig model.TaskContainerDefaultsConfig, ) (model.TaskContainerDefaultsConfig, error)
TaskContainerDefaults returns TaskContainerDefaults for the specified pool.
func (ResourceManager) ValidateResourcePool ¶
func (k ResourceManager) ValidateResourcePool(name rm.ResourcePoolName) error
ValidateResourcePool validates that the named resource pool exists.
func (*ResourceManager) ValidateResources ¶
func (k *ResourceManager) ValidateResources( msg sproto.ValidateResourcesRequest, ) ([]command.LaunchWarning, error)
ValidateResources implements rm.ResourceManager.
func (*ResourceManager) VerifyNamespaceExists ¶
func (k *ResourceManager) VerifyNamespaceExists(namespaceName string, clusterName string) error
VerifyNamespaceExists implements rm.ResourceManager.