api

package
v1.1.2 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 8, 2021 License: Apache-2.0 Imports: 17 Imported by: 28

Documentation

Index

Constants

View Source
const (
	// NamespaceWeightKey is the key in ResourceQuota.spec.hard indicating the weight of this namespace
	NamespaceWeightKey = "volcano.sh/namespace.weight"
	// DefaultNamespaceWeight is the default weight of namespace
	DefaultNamespaceWeight = 1
)
View Source
const (
	// NodePodNumberExceeded means pods in node exceed the allocatable pod number
	NodePodNumberExceeded = "node(s) pod number exceeded"
	// NodeResourceFitFailed means node could not fit the request of pod
	NodeResourceFitFailed = "node(s) resource fit failed"

	// AllNodeUnavailableMsg is the default error message
	AllNodeUnavailableMsg = "all nodes are unavailable"
)
View Source
const (

	// VolcanoGPUResource extended gpu resource
	VolcanoGPUResource = "volcano.sh/gpu-memory"
	// VolcanoGPUNumber virtual GPU card number
	VolcanoGPUNumber = "volcano.sh/gpu-number"

	// PredicateTime is the key of predicate time
	PredicateTime = "volcano.sh/predicate-time"
	// GPUIndex is the key of gpu index
	GPUIndex = "volcano.sh/gpu-index"
)
View Source
const (
	// GPUResourceName need to follow https://github.com/NVIDIA/k8s-device-plugin/blob/66a35b71ac4b5cbfb04714678b548bd77e5ba719/server.go#L20
	GPUResourceName = "nvidia.com/gpu"
)
View Source
const (
	// PodGroupVersionV1Beta1 represents PodGroupVersion of v1beta1
	PodGroupVersionV1Beta1 string = "v1beta1"
)

These are the valid phase of podGroups.

Variables

This section is empty.

Functions

func AddGPUIndexPatch added in v1.0.0

func AddGPUIndexPatch(id int) string

AddGPUIndexPatch returns the patch adding GPU index

func AllocatedStatus

func AllocatedStatus(status TaskStatus) bool

AllocatedStatus checks whether the tasks has AllocatedStatus

func GetGPUIndex added in v1.0.0

func GetGPUIndex(pod *v1.Pod) int

GetGPUIndex returns the ID of the GPU

func GetGPUResourceOfPod added in v1.0.0

func GetGPUResourceOfPod(pod *v1.Pod) uint

GetGPUResourceOfPod returns the GPU resource required by the pod.

func JobTerminated

func JobTerminated(job *JobInfo) bool

JobTerminated checks whether job was terminated.

func MergeErrors

func MergeErrors(errs ...error) error

MergeErrors is used to merge multiple errors into single error

func RemoveGPUIndexPatch added in v1.0.0

func RemoveGPUIndexPatch() string

RemoveGPUIndexPatch returns the patch removing GPU index

Types

type BatchNodeOrderFn

type BatchNodeOrderFn func(*TaskInfo, []*NodeInfo) (map[string]float64, error)

BatchNodeOrderFn is the func declaration used to get priority score for ALL nodes for a particular task.

type BestNodeFn added in v0.4.1

type BestNodeFn func(*TaskInfo, map[float64][]*NodeInfo) *NodeInfo

BestNodeFn is the func declaration used to return the nodeScores to plugins.

type ClusterInfo

type ClusterInfo struct {
	Jobs          map[JobID]*JobInfo
	Nodes         map[string]*NodeInfo
	Queues        map[QueueID]*QueueInfo
	NamespaceInfo map[NamespaceName]*NamespaceInfo
}

ClusterInfo is a snapshot of cluster by cache.

func (ClusterInfo) String

func (ci ClusterInfo) String() string

type CompareFn

type CompareFn func(interface{}, interface{}) int

CompareFn is the func declaration used by sort or priority queue.

type EvictableFn

type EvictableFn func(*TaskInfo, []*TaskInfo) []*TaskInfo

EvictableFn is the func declaration used to evict tasks.

type FitError

type FitError struct {
	NodeName string
	Reasons  []string
	// contains filtered or unexported fields
}

FitError describe the reason why task could not fit that node

func NewFitError

func NewFitError(task *TaskInfo, node *NodeInfo, message ...string) *FitError

NewFitError return FitError by message

func (*FitError) Error

func (f *FitError) Error() string

Error returns the final error message

type FitErrors

type FitErrors struct {
	// contains filtered or unexported fields
}

FitErrors is set of FitError on many nodes

func NewFitErrors

func NewFitErrors() *FitErrors

NewFitErrors returns an FitErrors

func (*FitErrors) Error

func (f *FitErrors) Error() string

Error returns the final error message

func (*FitErrors) SetError

func (f *FitErrors) SetError(err string)

SetError set the common error message in FitErrors

func (*FitErrors) SetNodeError

func (f *FitErrors) SetNodeError(nodeName string, err error)

SetNodeError set the node error in FitErrors

type GPUDevice added in v1.0.0

type GPUDevice struct {
	// GPU ID
	ID int
	// The pods that are sharing this GPU
	PodMap map[string]*v1.Pod
	// memory per card
	Memory uint
}

GPUDevice include gpu id, memory and the pods that are sharing it.

func NewGPUDevice added in v1.0.0

func NewGPUDevice(id int, mem uint) *GPUDevice

NewGPUDevice creates a device

type JobID

type JobID types.UID

JobID is the type of JobInfo's ID.

type JobInfo

type JobInfo struct {
	UID JobID

	Name      string
	Namespace string

	Queue QueueID

	Priority int32

	MinAvailable int32

	JobFitErrors   string
	NodesFitErrors map[TaskID]*FitErrors

	// All tasks of the Job.
	TaskStatusIndex map[TaskStatus]tasksMap
	Tasks           tasksMap

	Allocated    *Resource
	TotalRequest *Resource

	CreationTimestamp metav1.Time
	PodGroup          *PodGroup

	ScheduleStartTimestamp metav1.Time
}

JobInfo will have all info of a Job

func NewJobInfo

func NewJobInfo(uid JobID, tasks ...*TaskInfo) *JobInfo

NewJobInfo creates a new jobInfo for set of tasks

func (*JobInfo) AddTaskInfo

func (ji *JobInfo) AddTaskInfo(ti *TaskInfo)

AddTaskInfo is used to add a task to a job

func (*JobInfo) Clone

func (ji *JobInfo) Clone() *JobInfo

Clone is used to clone a jobInfo object

func (*JobInfo) DeleteTaskInfo

func (ji *JobInfo) DeleteTaskInfo(ti *TaskInfo) error

DeleteTaskInfo is used to delete a task from a job

func (*JobInfo) FitError

func (ji *JobInfo) FitError() string

FitError returns detailed information on why a job's task failed to fit on each available node

func (*JobInfo) Pipelined

func (ji *JobInfo) Pipelined() bool

Pipelined returns whether the number of ready and pipelined task is enough

func (*JobInfo) Ready

func (ji *JobInfo) Ready() bool

Ready returns whether job is ready for run

func (*JobInfo) ReadyTaskNum

func (ji *JobInfo) ReadyTaskNum() int32

ReadyTaskNum returns the number of tasks that are ready or that is best-effort.

func (*JobInfo) SetPodGroup

func (ji *JobInfo) SetPodGroup(pg *PodGroup)

SetPodGroup sets podGroup details to a job

func (JobInfo) String

func (ji JobInfo) String() string

String returns a jobInfo object in string format

func (*JobInfo) UnsetPodGroup

func (ji *JobInfo) UnsetPodGroup()

UnsetPodGroup removes podGroup details from a job

func (*JobInfo) UpdateTaskStatus

func (ji *JobInfo) UpdateTaskStatus(task *TaskInfo, status TaskStatus) error

UpdateTaskStatus is used to update task's status in a job. If error occurs both task and job are guaranteed to be in the original state.

func (*JobInfo) ValidTaskNum

func (ji *JobInfo) ValidTaskNum() int32

ValidTaskNum returns the number of tasks that are valid.

func (*JobInfo) WaitingTaskNum

func (ji *JobInfo) WaitingTaskNum() int32

WaitingTaskNum returns the number of tasks that are pipelined.

type LessFn

type LessFn func(interface{}, interface{}) bool

LessFn is the func declaration used by sort or priority queue.

type NamespaceCollection

type NamespaceCollection struct {
	Name string
	// contains filtered or unexported fields
}

NamespaceCollection will record all details about namespace

func NewNamespaceCollection

func NewNamespaceCollection(name string) *NamespaceCollection

NewNamespaceCollection creates new NamespaceCollection object to record all information about a namespace

func (*NamespaceCollection) Delete

func (n *NamespaceCollection) Delete(quota *v1.ResourceQuota)

Delete remove the registered information according quota object

func (*NamespaceCollection) Snapshot

func (n *NamespaceCollection) Snapshot() *NamespaceInfo

Snapshot will clone a NamespaceInfo without Heap according NamespaceCollection

func (*NamespaceCollection) Update

func (n *NamespaceCollection) Update(quota *v1.ResourceQuota)

Update modify the registered information according quota object

type NamespaceInfo

type NamespaceInfo struct {
	// Name is the name of this namespace
	Name NamespaceName
	// Weight is the highest weight among many ResourceQuota.
	Weight int64
}

NamespaceInfo records information of namespace

func (*NamespaceInfo) GetWeight

func (n *NamespaceInfo) GetWeight() int64

GetWeight returns weight of a namespace, any invalid case would get default value

type NamespaceName

type NamespaceName string

NamespaceName is name of namespace

type NodeInfo

type NodeInfo struct {
	Name string
	Node *v1.Node

	// The state of node
	State NodeState

	// The releasing resource on that node
	Releasing *Resource
	// The pipelined resource on that node
	Pipelined *Resource
	// The idle resource on that node
	Idle *Resource
	// The used resource on that node, including running and terminating
	// pods
	Used *Resource

	Allocatable *Resource
	Capability  *Resource

	Tasks map[TaskID]*TaskInfo

	// Used to store custom information
	Others     map[string]interface{}
	GPUDevices map[int]*GPUDevice
}

NodeInfo is node level aggregated information.

func NewNodeInfo

func NewNodeInfo(node *v1.Node) *NodeInfo

NewNodeInfo is used to create new nodeInfo object

func (*NodeInfo) AddGPUResource added in v1.0.0

func (ni *NodeInfo) AddGPUResource(pod *v1.Pod)

AddGPUResource adds the pod to GPU pool if it is assigned

func (*NodeInfo) AddTask

func (ni *NodeInfo) AddTask(task *TaskInfo) error

AddTask is used to add a task in nodeInfo object

If error occurs both task and node are guaranteed to be in the original state.

func (*NodeInfo) Clone

func (ni *NodeInfo) Clone() *NodeInfo

Clone used to clone nodeInfo Object

func (*NodeInfo) FutureIdle

func (ni *NodeInfo) FutureIdle() *Resource

FutureIdle returns resources that will be idle in the future:

That is current idle resources plus released resources minus pipelined resources.

func (*NodeInfo) GetDevicesIdleGPUMemory added in v1.0.0

func (ni *NodeInfo) GetDevicesIdleGPUMemory() map[int]uint

GetDevicesIdleGPUMemory returns all the idle GPU memory by gpu card.

func (*NodeInfo) Pods

func (ni *NodeInfo) Pods() (pods []*v1.Pod)

Pods returns all pods running in that node

func (*NodeInfo) Ready

func (ni *NodeInfo) Ready() bool

Ready returns whether node is ready for scheduling

func (*NodeInfo) RemoveTask

func (ni *NodeInfo) RemoveTask(ti *TaskInfo) error

RemoveTask used to remove a task from nodeInfo object.

If error occurs both task and node are guaranteed to be in the original state.

func (*NodeInfo) SetNode

func (ni *NodeInfo) SetNode(node *v1.Node)

SetNode sets kubernetes node object to nodeInfo object

func (NodeInfo) String

func (ni NodeInfo) String() string

String returns nodeInfo details in string format

func (*NodeInfo) SubGPUResource added in v1.0.0

func (ni *NodeInfo) SubGPUResource(pod *v1.Pod)

SubGPUResource frees the gpu hold by the pod

func (*NodeInfo) UpdateTask

func (ni *NodeInfo) UpdateTask(ti *TaskInfo) error

UpdateTask is used to update a task in nodeInfo object.

If error occurs both task and node are guaranteed to be in the original state.

type NodeMapFn

type NodeMapFn func(*TaskInfo, *NodeInfo) (float64, error)

NodeMapFn is the func declaration used to get priority score for a node for a particular task.

type NodeOrderFn

type NodeOrderFn func(*TaskInfo, *NodeInfo) (float64, error)

NodeOrderFn is the func declaration used to get priority score for a node for a particular task.

type NodeOrderMapFn

type NodeOrderMapFn func(*TaskInfo, *NodeInfo) (map[string]float64, float64, error)

NodeOrderMapFn is the func declaration used to get priority score of all plugins for a node for a particular task.

type NodeOrderReduceFn

type NodeOrderReduceFn func(*TaskInfo, map[string]schedulerapi.HostPriorityList) (map[string]float64, error)

NodeOrderReduceFn is the func declaration used to reduce priority score of all nodes for a plugin for a particular task.

type NodePhase

type NodePhase int

NodePhase defines the phase of node

const (
	// Ready means the node is ready for scheduling
	Ready NodePhase = 1 << iota
	// NotReady means the node is not ready for scheduling
	NotReady
)

func (NodePhase) String

func (np NodePhase) String() string

type NodeReduceFn

type NodeReduceFn func(*TaskInfo, schedulerapi.HostPriorityList) error

NodeReduceFn is the func declaration used to reduce priority score for a node for a particular task.

type NodeResourceMap

type NodeResourceMap map[string]*Resource

NodeResourceMap stores resource in a node

type NodeState

type NodeState struct {
	Phase  NodePhase
	Reason string
}

NodeState defines the current state of node.

type PodGroup

type PodGroup struct {
	scheduling.PodGroup

	// Version represents the version of PodGroup
	Version string
}

PodGroup is a collection of Pod; used for batch workload.

type PodGroupPhase

type PodGroupPhase string

PodGroupPhase is the phase of a pod group at the current time.

type PredicateFn

type PredicateFn func(*TaskInfo, *NodeInfo) error

PredicateFn is the func declaration used to predicate node for task.

type QueueID

type QueueID types.UID

QueueID is UID type, serves as unique ID for each queue

type QueueInfo

type QueueInfo struct {
	UID  QueueID
	Name string

	Weight int32

	// Weights is a list of slash sperated float numbers.
	// Each of them is a weight corresponding the
	// hierarchy level.
	Weights string
	// Hierarchy is a list of node name along the
	// path from the root to the node itself.
	Hierarchy string

	Queue *scheduling.Queue
}

QueueInfo will have all details about queue

func NewQueueInfo

func NewQueueInfo(queue *scheduling.Queue) *QueueInfo

NewQueueInfo creates new queueInfo object

func (*QueueInfo) Clone

func (q *QueueInfo) Clone() *QueueInfo

Clone is used to clone queueInfo object

func (*QueueInfo) Reclaimable

func (q *QueueInfo) Reclaimable() bool

Reclaimable return whether queue is reclaimable

type ReservedNodesFn added in v1.1.0

type ReservedNodesFn func()

ReservedNodesFn is the func declaration used to select the reserved nodes

type Resource

type Resource struct {
	MilliCPU float64
	Memory   float64

	// ScalarResources
	ScalarResources map[v1.ResourceName]float64

	// MaxTaskNum is only used by predicates; it should NOT
	// be accounted in other operators, e.g. Add.
	MaxTaskNum int
}

Resource struct defines all the resource type

func EmptyResource

func EmptyResource() *Resource

EmptyResource creates a empty resource object and returns

func GetPodResourceRequest

func GetPodResourceRequest(pod *v1.Pod) *Resource

GetPodResourceRequest returns all the resource required for that pod

func GetPodResourceWithoutInitContainers

func GetPodResourceWithoutInitContainers(pod *v1.Pod) *Resource

GetPodResourceWithoutInitContainers returns Pod's resource request, it does not contain init containers' resource request.

func NewResource

func NewResource(rl v1.ResourceList) *Resource

NewResource create a new resource object from resource list

func (*Resource) Add

func (r *Resource) Add(rr *Resource) *Resource

Add is used to add the two resources

func (*Resource) AddScalar

func (r *Resource) AddScalar(name v1.ResourceName, quantity float64)

AddScalar adds a resource by a scalar value of this resource.

func (*Resource) Clone

func (r *Resource) Clone() *Resource

Clone is used to clone a resource type

func (*Resource) Diff

func (r *Resource) Diff(rr *Resource) (*Resource, *Resource)

Diff calculate the difference between two resource

func (*Resource) FitDelta

func (r *Resource) FitDelta(rr *Resource) *Resource

FitDelta Computes the delta between a resource object representing available resources an operand representing resources being requested. Any field that is less than 0 after the operation represents an insufficient resource.

func (*Resource) Get

func (r *Resource) Get(rn v1.ResourceName) float64

Get returns the resource value for that particular resource type

func (*Resource) IsEmpty

func (r *Resource) IsEmpty() bool

IsEmpty returns bool after checking any of resource is less than min possible value

func (*Resource) IsZero

func (r *Resource) IsZero(rn v1.ResourceName) bool

IsZero checks whether that resource is less than min possible value

func (*Resource) Less

func (r *Resource) Less(rr *Resource) bool

Less checks whether a resource is less than other

func (*Resource) LessEqual

func (r *Resource) LessEqual(rr *Resource) bool

LessEqual checks whether a resource is less than other resource

func (*Resource) LessEqualStrict

func (r *Resource) LessEqualStrict(rr *Resource) bool

LessEqualStrict checks whether a resource is less or equal than other

func (*Resource) MinDimensionResource added in v1.1.1

func (r *Resource) MinDimensionResource(rr *Resource) *Resource

MinDimensionResource is used to reset the r resource dimension which is less than rr e.g r resource is <cpu 2000.00, memory 4047845376.00, hugepages-2Mi 0.00, hugepages-1Gi 0.00> rr resource is <cpu 3000.00, memory 1000.00> return r resource is <cpu 2000.00, memory 1000.00, hugepages-2Mi 0.00, hugepages-1Gi 0.00>

func (*Resource) Multi

func (r *Resource) Multi(ratio float64) *Resource

Multi multiples the resource with ratio provided

func (*Resource) ResourceNames

func (r *Resource) ResourceNames() []v1.ResourceName

ResourceNames returns all resource types

func (*Resource) Scale added in v1.1.0

func (r *Resource) Scale(scale float64) *Resource

Scale updates resource to the provided scale

func (*Resource) SetMaxResource

func (r *Resource) SetMaxResource(rr *Resource)

SetMaxResource compares with ResourceList and takes max value for each Resource.

func (*Resource) SetScalar

func (r *Resource) SetScalar(name v1.ResourceName, quantity float64)

SetScalar sets a resource by a scalar value of this resource.

func (*Resource) String

func (r *Resource) String() string

String returns resource details in string format

func (*Resource) Sub

func (r *Resource) Sub(rr *Resource) *Resource

Sub subtracts two Resource objects.

type TargetJobFn added in v1.1.0

type TargetJobFn func([]*JobInfo) *JobInfo

TargetJobFn is the func declaration used to select the target job satisfies some conditions

type TaskID

type TaskID types.UID

TaskID is UID type for Task

func PodKey

func PodKey(pod *v1.Pod) TaskID

PodKey returns the string key of a pod.

type TaskInfo

type TaskInfo struct {
	UID TaskID
	Job JobID

	Name      string
	Namespace string

	// Resreq is the resource that used when task running.
	Resreq *Resource
	// InitResreq is the resource that used to launch a task.
	InitResreq *Resource

	NodeName    string
	Status      TaskStatus
	Priority    int32
	VolumeReady bool

	Pod *v1.Pod
}

TaskInfo will have all infos about the task

func NewTaskInfo

func NewTaskInfo(pod *v1.Pod) *TaskInfo

NewTaskInfo creates new taskInfo object for a Pod

func (*TaskInfo) Clone

func (ti *TaskInfo) Clone() *TaskInfo

Clone is used for cloning a task

func (TaskInfo) String

func (ti TaskInfo) String() string

String returns the taskInfo details in a string

type TaskStatus

type TaskStatus int

TaskStatus defines the status of a task/pod.

const (
	// Pending means the task is pending in the apiserver.
	Pending TaskStatus = 1 << iota

	// Allocated means the scheduler assigns a host to it.
	Allocated

	// Pipelined means the scheduler assigns a host to wait for releasing resource.
	Pipelined

	// Binding means the scheduler send Bind request to apiserver.
	Binding

	// Bound means the task/Pod bounds to a host.
	Bound

	// Running means a task is running on the host.
	Running

	// Releasing means a task/pod is deleted.
	Releasing

	// Succeeded means that all containers in the pod have voluntarily terminated
	// with a container exit code of 0, and the system is not going to restart any of these containers.
	Succeeded

	// Failed means that all containers in the pod have terminated, and at least one container has
	// terminated in a failure (exited with a non-zero exit code or was stopped by the system).
	Failed

	// Unknown means the status of task/pod is unknown to the scheduler.
	Unknown
)

func (TaskStatus) String

func (ts TaskStatus) String() string

type ValidateExFn

type ValidateExFn func(interface{}) *ValidateResult

ValidateExFn is the func declaration used to validate the result.

type ValidateFn

type ValidateFn func(interface{}) bool

ValidateFn is the func declaration used to check object's status.

type ValidateResult

type ValidateResult struct {
	Pass    bool
	Reason  string
	Message string
}

ValidateResult is struct to which can used to determine the result

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL