Documentation ¶
Index ¶
- Constants
- Variables
- func AllocatedStatus(status TaskStatus) bool
- func BuildPodgroup(name, ns string, minMember int32, minResource v1.ResourceList) scheduling.PodGroup
- func BuildResourceList(cpu string, memory string, scalarResources ...ScalarResource) v1.ResourceList
- func BuildResourceListWithGPU(cpu string, memory string, GPU string, scalarResources ...ScalarResource) v1.ResourceList
- func CompletedStatus(status TaskStatus) bool
- func GenerateNodeResNumaSets(nodes map[string]*NodeInfo) map[string]ResNumaSets
- func GenerateNumaNodes(nodes map[string]*NodeInfo) map[string][]int
- func GetMinResource() float64
- func GetPodPreemptable(pod *v1.Pod) bool
- func GetPodResourceNumaInfo(ti *TaskInfo) map[int]v1.ResourceList
- func GetPodRevocableZone(pod *v1.Pod) string
- func IsCountQuota(name v1.ResourceName) bool
- func IsIgnoredScalarResource(name v1.ResourceName) bool
- func JobTerminated(job *JobInfo) bool
- func MergeErrors(errs ...error) error
- func ParseResourceList(m map[string]string) (v1.ResourceList, error)
- func PreemptableStatus(status TaskStatus) bool
- func ResFloat642Quantity(resName v1.ResourceName, quantity float64) resource.Quantity
- func ResQuantity2Float64(resName v1.ResourceName, quantity resource.Quantity) float64
- func WrapInsufficientResourceReason(resources []string) string
- type AllocatableFn
- type AllocateFailError
- type BatchNodeOrderFn
- type BestNodeFn
- type CSINodeStatusInfo
- type ClusterID
- type ClusterInfo
- type CompareFn
- type Devices
- type DimensionDefaultValue
- type DisruptionBudget
- type EvictableFn
- type FitError
- type FitErrors
- type JobEnqueuedFn
- type JobID
- type JobInfo
- func (ji *JobInfo) AddTaskInfo(ti *TaskInfo)
- func (ji *JobInfo) CheckTaskPipelined() bool
- func (ji *JobInfo) CheckTaskReady() bool
- func (ji *JobInfo) CheckTaskStarving() bool
- func (ji *JobInfo) CheckTaskValid() bool
- func (ji *JobInfo) Clone() *JobInfo
- func (ji *JobInfo) DeductSchGatedResources(res *Resource) *Resource
- func (ji *JobInfo) DeleteTaskInfo(ti *TaskInfo) error
- func (ji *JobInfo) FitError() string
- func (ji *JobInfo) FitFailedRoles() map[string]struct{}
- func (ji *JobInfo) GetElasticResources() *Resource
- func (ji *JobInfo) GetMinResources() *Resource
- func (ji *JobInfo) GetSchGatedPodResources() *Resource
- func (ji *JobInfo) HasPendingTasks() bool
- func (ji *JobInfo) IsPending() bool
- func (ji *JobInfo) IsPipelined() bool
- func (ji *JobInfo) IsReady() bool
- func (ji *JobInfo) IsStarving() bool
- func (ji *JobInfo) NeedContinueAllocating() bool
- func (ji *JobInfo) ParseMinMemberInfo(pg *PodGroup)
- func (ji *JobInfo) PendingBestEffortTaskNum() int32
- func (ji *JobInfo) ReadyTaskNum() int32
- func (ji *JobInfo) SetPodGroup(pg *PodGroup)
- func (ji JobInfo) String() string
- func (ji *JobInfo) TaskHasFitErrors(task *TaskInfo) bool
- func (ji *JobInfo) TaskSchedulingReason(tid TaskID) (reason, msg, nominatedNodeName string)
- func (ji *JobInfo) UnsetPodGroup()
- func (ji *JobInfo) UpdateTaskStatus(task *TaskInfo, status TaskStatus) error
- func (ji *JobInfo) ValidTaskNum() int32
- func (ji *JobInfo) WaitingTaskNum() int32
- type LessFn
- type NamespaceCollection
- type NamespaceInfo
- type NamespaceName
- type NodeInfo
- func (ni *NodeInfo) AddTask(task *TaskInfo) error
- func (ni *NodeInfo) Clone() *NodeInfo
- func (ni *NodeInfo) CloneImageSummary() map[string]*k8sframework.ImageStateSummary
- func (ni *NodeInfo) CloneOthers() map[string]interface{}
- func (ni *NodeInfo) FutureIdle() *Resource
- func (ni *NodeInfo) GetNodeAllocatable() *Resource
- func (ni *NodeInfo) Pods() (pods []*v1.Pod)
- func (ni *NodeInfo) Ready() bool
- func (ni *NodeInfo) RefreshNumaSchedulerInfoByCrd()
- func (ni *NodeInfo) RemoveTask(ti *TaskInfo) error
- func (ni *NodeInfo) SetNode(node *v1.Node)
- func (ni NodeInfo) String() string
- func (ni *NodeInfo) UpdateTask(ti *TaskInfo) error
- type NodeMapFn
- type NodeOrderFn
- type NodeOrderMapFn
- type NodeOrderReduceFn
- type NodePhase
- type NodeReduceFn
- type NodeResourceMap
- type NodeState
- type NodeUsage
- type NumaChgFlag
- type NumatopoInfo
- func (info *NumatopoInfo) AddTask(ti *TaskInfo)
- func (info *NumatopoInfo) Allocate(resSets ResNumaSets)
- func (info *NumatopoInfo) Compare(newInfo *NumatopoInfo) bool
- func (info *NumatopoInfo) DeepCopy() *NumatopoInfo
- func (info *NumatopoInfo) Release(resSets ResNumaSets)
- func (info *NumatopoInfo) RemoveTask(ti *TaskInfo)
- type PodGroup
- type PodGroupPhase
- type PodResourceDecision
- type PrePredicateFn
- type PredicateFn
- type QueueID
- type QueueInfo
- type ResNumaSets
- type ReservedNodesFn
- type Resource
- func (r *Resource) Add(rr *Resource) *Resource
- func (r *Resource) AddScalar(name v1.ResourceName, quantity float64)
- func (r *Resource) Clone() *Resource
- func (r *Resource) Diff(rr *Resource, defaultValue DimensionDefaultValue) (*Resource, *Resource)
- func (r *Resource) Equal(rr *Resource, defaultValue DimensionDefaultValue) bool
- func (r *Resource) FitDelta(rr *Resource) *Resource
- func (r *Resource) Get(rn v1.ResourceName) float64
- func (r *Resource) IsEmpty() bool
- func (r *Resource) IsZero(rn v1.ResourceName) bool
- func (r *Resource) Less(rr *Resource, defaultValue DimensionDefaultValue) bool
- func (r *Resource) LessEqual(rr *Resource, defaultValue DimensionDefaultValue) bool
- func (r *Resource) LessEqualPartly(rr *Resource, defaultValue DimensionDefaultValue) bool
- func (r *Resource) LessEqualWithDimension(rr *Resource, req *Resource) bool
- func (r *Resource) LessEqualWithResourcesName(rr *Resource, defaultValue DimensionDefaultValue) (bool, []string)
- func (r *Resource) LessPartly(rr *Resource, defaultValue DimensionDefaultValue) bool
- func (r *Resource) MinDimensionResource(rr *Resource, defaultValue DimensionDefaultValue) *Resource
- func (r *Resource) Multi(ratio float64) *Resource
- func (r *Resource) ResourceNames() ResourceNameList
- func (r *Resource) SetMaxResource(rr *Resource)
- func (r *Resource) SetScalar(name v1.ResourceName, quantity float64)
- func (r *Resource) String() string
- func (r *Resource) Sub(rr *Resource) *Resource
- func (r *Resource) SubWithoutAssert(rr *Resource) *Resource
- type ResourceInfo
- type ResourceNameList
- type ScalarResource
- type ScoredNode
- type SiloClusterInfo
- type Status
- type StatusSets
- type TargetJobFn
- type TaskID
- type TaskInfo
- func (ti *TaskInfo) ClearLastTxContext()
- func (ti *TaskInfo) Clone() *TaskInfo
- func (ti *TaskInfo) GenerateLastTxContext()
- func (ti *TaskInfo) GetTransactionContext() TransactionContext
- func (ti *TaskInfo) SetPodResourceDecision() error
- func (ti TaskInfo) String() string
- func (ti *TaskInfo) UnsetPodResourceDecision()
- type TaskStatus
- type TopologyInfo
- type TransactionContext
- type ValidateExFn
- type ValidateFn
- type ValidateResult
- type ValidateWithCandidateFn
- type VictimCompareFn
- type VictimTasksFn
- type VoteFn
Constants ¶
const ( // Success means that plugin ran correctly and found pod schedulable. // NOTE: A nil status is also considered as "Success". Success int = iota // Error is used for internal plugin errors, unexpected input, etc. Error // Unschedulable is used when a plugin finds a pod unschedulable. The scheduler might attempt to // preempt other pods to get this pod scheduled. Use UnschedulableAndUnresolvable to make the // scheduler skip preemption. // The accompanying status message should explain why the pod is unschedulable. Unschedulable // UnschedulableAndUnresolvable is used when a plugin finds a pod unschedulable and // preemption would not change anything. Plugins should return Unschedulable if it is possible // that the pod can get scheduled with preemption. // The accompanying status message should explain why the pod is unschedulable. UnschedulableAndUnresolvable // Wait is used when a Permit plugin finds a pod scheduling should wait. Wait // Skip is used when a Bind plugin chooses to skip binding. Skip )
These are predefined codes used in a Status.
const ( // NodePodNumberExceeded means pods in node exceed the allocatable pod number NodePodNumberExceeded = "node(s) pod number exceeded" // NodeResourceFitFailed means node could not fit the request of pod NodeResourceFitFailed = "node(s) resource fit failed" AllNodeUnavailableMsg = "all nodes are unavailable" )
const ( // PodReasonUnschedulable reason in PodScheduled PodCondition means that the scheduler // can't schedule the pod right now, for example due to insufficient resources in the cluster. // It can also mean that the scheduler skips scheduling the pod which left the pod `Undetermined`, // for example due to unschedulable pod already occurred. PodReasonUnschedulable = "Unschedulable" // PodReasonSchedulable reason in PodScheduled PodCondition means that the scheduler // can schedule the pod right now, but not bind yet PodReasonSchedulable = "Schedulable" // PodReasonSchedulerError reason in PodScheduled PodCondition means that the scheduler // tried to schedule the pod, but went error when scheduling // for example bind pod return error. PodReasonSchedulerError = "SchedulerError" )
These are reasons for a pod's transition to a condition.
const ( // VolcanoGPUResource extended gpu resource VolcanoGPUResource = "volcano.sh/gpu-memory" // VolcanoGPUNumber virtual GPU card number VolcanoGPUNumber = "volcano.sh/gpu-number" // PredicateTime is the key of predicate time PredicateTime = "volcano.sh/predicate-time" // GPUIndex is the key of gpu index GPUIndex = "volcano.sh/gpu-index" // UnhealthyGPUIDs list of unhealthy gpu ids UnhealthyGPUIDs = "volcano.sh/gpu-unhealthy-ids" // OversubscriptionNode is the key of node oversubscription OversubscriptionNode = "volcano.sh/oversubscription" // OversubscriptionCPU is the key of cpu oversubscription OversubscriptionCPU = "volcano.sh/oversubscription-cpu" // OversubscriptionMemory is the key of memory oversubscription OversubscriptionMemory = "volcano.sh/oversubscription-memory" // OfflineJobEvicting node will not schedule pod due to offline job evicting OfflineJobEvicting = "volcano.sh/offline-job-evicting" )
const (
// GPUResourceName need to follow https://github.com/NVIDIA/k8s-device-plugin/blob/66a35b71ac4b5cbfb04714678b548bd77e5ba719/server.go#L20
GPUResourceName = "nvidia.com/gpu"
)
const (
GPUSharingDevice = "GpuShare"
)
const JobWaitingTime = "sla-waiting-time"
JobWaitingTime is maximum waiting time that a job could stay Pending in service level agreement when job waits longer than waiting time, it should enqueue at once, and cluster should reserve resources for it
const ( // PodGroupVersionV1Beta1 represents PodGroupVersion of v1beta1 PodGroupVersionV1Beta1 string = "v1beta1" )
These are the valid phase of podGroups.
const TaskPriorityAnnotation = "volcano.sh/task-priority"
Variables ¶
var IgnoredDevicesList = ignoredDevicesList{}
var RegisteredDevices = []string{ GPUSharingDevice, vgpu.DeviceName, }
Functions ¶
func AllocatedStatus ¶
func AllocatedStatus(status TaskStatus) bool
AllocatedStatus checks whether the tasks has AllocatedStatus
func BuildPodgroup ¶ added in v1.11.0
func BuildPodgroup(name, ns string, minMember int32, minResource v1.ResourceList) scheduling.PodGroup
BuildPodgroup builds podgroup
func BuildResourceList ¶ added in v1.8.2
func BuildResourceList(cpu string, memory string, scalarResources ...ScalarResource) v1.ResourceList
BuildResourceList builds resource list object
func BuildResourceListWithGPU ¶ added in v1.8.2
func BuildResourceListWithGPU(cpu string, memory string, GPU string, scalarResources ...ScalarResource) v1.ResourceList
BuildResourceListWithGPU builds resource list with GPU
func CompletedStatus ¶ added in v1.11.0
func CompletedStatus(status TaskStatus) bool
CompletedStatus checks whether the tasks are completed (regardless of failure or success)
func GenerateNodeResNumaSets ¶ added in v1.4.0
func GenerateNodeResNumaSets(nodes map[string]*NodeInfo) map[string]ResNumaSets
GenerateNodeResNumaSets return the idle resource sets of all node
func GenerateNumaNodes ¶ added in v1.4.0
GenerateNumaNodes return the numa IDs of all node
func GetMinResource ¶ added in v1.4.0
func GetMinResource() float64
func GetPodPreemptable ¶ added in v1.2.0
GetPodPreemptable return volcano.sh/preemptable value for pod
func GetPodResourceNumaInfo ¶ added in v1.5.0
func GetPodResourceNumaInfo(ti *TaskInfo) map[int]v1.ResourceList
func GetPodRevocableZone ¶ added in v1.2.0
GetPodRevocableZone return volcano.sh/revocable-zone value for pod/podgroup
func IsCountQuota ¶ added in v1.6.0
func IsCountQuota(name v1.ResourceName) bool
func IsIgnoredScalarResource ¶ added in v1.8.2
func IsIgnoredScalarResource(name v1.ResourceName) bool
func JobTerminated ¶
JobTerminated checks whether job was terminated.
func MergeErrors ¶
MergeErrors is used to merge multiple errors into single error
func ParseResourceList ¶ added in v1.4.0
func ParseResourceList(m map[string]string) (v1.ResourceList, error)
ParseResourceList parses the given configuration map into an API ResourceList or returns an error.
func PreemptableStatus ¶ added in v1.8.2
func PreemptableStatus(status TaskStatus) bool
PreemptableStatus checks whether the task can be preempted
func ResFloat642Quantity ¶ added in v1.5.0
func ResFloat642Quantity(resName v1.ResourceName, quantity float64) resource.Quantity
ResFloat642Quantity transform resource quantity
func ResQuantity2Float64 ¶ added in v1.5.0
func ResQuantity2Float64(resName v1.ResourceName, quantity resource.Quantity) float64
ResQuantity2Float64 transform resource quantity
func WrapInsufficientResourceReason ¶ added in v1.8.0
WrapInsufficientResourceReason wrap insufficient resource reason.
Types ¶
type AllocatableFn ¶ added in v1.5.1
AllocatableFn is the func declaration used to check whether the task can be allocated
type AllocateFailError ¶ added in v1.5.0
type AllocateFailError struct {
Reason string
}
func (*AllocateFailError) Error ¶ added in v1.5.0
func (o *AllocateFailError) Error() string
type BatchNodeOrderFn ¶
BatchNodeOrderFn is the func declaration used to get priority score for ALL nodes for a particular task.
type BestNodeFn ¶ added in v0.4.1
BestNodeFn is the func declaration used to return the nodeScores to plugins.
type CSINodeStatusInfo ¶ added in v1.6.1
func (*CSINodeStatusInfo) Clone ¶ added in v1.6.1
func (cs *CSINodeStatusInfo) Clone() *CSINodeStatusInfo
Clone clone csi node status info
type ClusterInfo ¶
type ClusterInfo struct { Jobs map[JobID]*JobInfo Nodes map[string]*NodeInfo Queues map[QueueID]*QueueInfo NamespaceInfo map[NamespaceName]*NamespaceInfo RevocableNodes map[string]*NodeInfo NodeList []string CSINodesStatus map[string]*CSINodeStatusInfo }
ClusterInfo is a snapshot of cluster by cache.
func (ClusterInfo) String ¶
func (ci ClusterInfo) String() string
type CompareFn ¶
type CompareFn func(interface{}, interface{}) int
CompareFn is the func declaration used by sort or priority queue.
type Devices ¶ added in v1.8.0
type Devices interface { //following two functions used in node_info //AddResource is to add the corresponding device resource of this 'pod' into current scheduler cache AddResource(pod *v1.Pod) //SubResource is to subtract the corresponding device resource of this 'pod' from current scheduler cache SubResource(pod *v1.Pod) //following four functions used in predicate //HasDeviceRequest checks if the 'pod' request this device HasDeviceRequest(pod *v1.Pod) bool // 3: UnschedulableAndUnresolvable // UnschedulableAndUnresolvable is used when a plugin finds a pod unschedulable and // preemption would not change anything. Plugins should return Unschedulable if it is possible // that the pod can get scheduled with preemption. // The accompanying status message should explain why the pod is unschedulable. FilterNode(pod *v1.Pod, policy string) (int, string, error) // ScoreNode will be invoked when using devicescore plugin, devices api can use it to implement multiple // scheduling policies. ScoreNode(pod *v1.Pod, policy string) float64 // Allocate action in predicate Allocate(kubeClient kubernetes.Interface, pod *v1.Pod) error // Release action in predicate Release(kubeClient kubernetes.Interface, pod *v1.Pod) error // GetIgnoredDevices notify vc-scheduler to ignore devices in return list GetIgnoredDevices() []string // GetStatus used for debug and monitor GetStatus() string }
type DimensionDefaultValue ¶ added in v1.4.0
type DimensionDefaultValue int
DimensionDefaultValue means default value for black resource dimension
const ( // Zero means resource dimension not defined will be treated as zero Zero DimensionDefaultValue = 0 // Infinity means resource dimension not defined will be treated as infinity Infinity DimensionDefaultValue = -1 )
type DisruptionBudget ¶ added in v1.2.0
type DisruptionBudget struct { MinAvailable string }
DisruptionBudget define job min pod available and max pod unavailable value
func NewDisruptionBudget ¶ added in v1.2.0
func NewDisruptionBudget(minAvailable, maxUnavilable string) *DisruptionBudget
NewDisruptionBudget create disruption budget for job
func (*DisruptionBudget) Clone ¶ added in v1.2.0
func (db *DisruptionBudget) Clone() *DisruptionBudget
Clone return a clone of DisruptionBudget
type EvictableFn ¶
EvictableFn is the func declaration used to evict tasks.
type FitError ¶
type FitError struct { NodeName string Status StatusSets // contains filtered or unexported fields }
FitError describe the reason why task could not fit that node
func NewFitErrWithStatus ¶ added in v1.10.0
NewFitErrWithStatus returns a fit error with code and reason in it
func NewFitError ¶
NewFitError return FitError by message, setting default code to Error
type FitErrors ¶
type FitErrors struct {
// contains filtered or unexported fields
}
FitErrors is set of FitError on many nodes
func (*FitErrors) GetUnschedulableAndUnresolvableNodes ¶ added in v1.10.0
GetUnschedulableAndUnresolvableNodes returns the set of nodes that has no help from preempting pods from it
func (*FitErrors) SetNodeError ¶
SetNodeError set the node error in FitErrors
type JobEnqueuedFn ¶ added in v1.4.0
type JobEnqueuedFn func(interface{})
JobEnqueuedFn is the func declaration used to call after job enqueued.
type JobInfo ¶
type JobInfo struct { UID JobID PgUID types.UID Name string Namespace string Queue QueueID Priority int32 MinAvailable int32 WaitingTime *time.Duration JobFitErrors string NodesFitErrors map[TaskID]*FitErrors // All tasks of the Job. TaskStatusIndex map[TaskStatus]tasksMap Tasks tasksMap TaskMinAvailable map[string]int32 // key is value of "volcano.sh/task-spec", value is number TaskMinAvailableTotal int32 Allocated *Resource TotalRequest *Resource CreationTimestamp metav1.Time PodGroup *PodGroup ScheduleStartTimestamp metav1.Time Preemptable bool // RevocableZone support set volcano.sh/revocable-zone annotaion or label for pod/podgroup // we only support empty value or * value for this version and we will support specify revocable zone name for future release // empty value means workload can not use revocable node // * value means workload can use all the revocable node for during node active revocable time. RevocableZone string Budget *DisruptionBudget }
JobInfo will have all info of a Job
func NewJobInfo ¶
NewJobInfo creates a new jobInfo for set of tasks
func (*JobInfo) AddTaskInfo ¶
AddTaskInfo is used to add a task to a job
func (*JobInfo) CheckTaskPipelined ¶ added in v1.6.0
CheckTaskPipelined return whether each task of job is pipelined.
func (*JobInfo) CheckTaskReady ¶ added in v1.6.0
CheckTaskReady return whether each task of job is ready.
func (*JobInfo) CheckTaskStarving ¶ added in v1.6.0
CheckTaskStarving return whether job has at least one task which is starving.
func (*JobInfo) CheckTaskValid ¶ added in v1.6.0
CheckTaskValid returns whether each task of job is valid.
func (*JobInfo) DeductSchGatedResources ¶ added in v1.10.0
DeductSchGatedResources deduct resources of scheduling gated pod from Resource res; If resource is less than gated resources, return zero; Note: The purpose of this functionis to deduct the resources of scheduling gated tasks in a job when calculating inqueued resources so that it will not block other jobs from being inqueued.
func (*JobInfo) DeleteTaskInfo ¶
DeleteTaskInfo is used to delete a task from a job
func (*JobInfo) FitError ¶
FitError returns detailed information on why a job's task failed to fit on each available node
func (*JobInfo) FitFailedRoles ¶ added in v1.10.0
FitFailedRoles returns the job roles' failed fit records
func (*JobInfo) GetElasticResources ¶ added in v1.6.0
GetElasticResources returns those partly resources in allocated which are more than its minResource
func (*JobInfo) GetMinResources ¶ added in v1.3.0
GetMinResources return the min resources of podgroup.
func (*JobInfo) GetSchGatedPodResources ¶ added in v1.10.0
Get the total resources of tasks whose pod is scheduling gated By definition, if a pod is scheduling gated, it's status is Pending
func (*JobInfo) HasPendingTasks ¶ added in v1.8.0
HasPendingTasks return whether job has pending tasks
func (*JobInfo) IsPipelined ¶ added in v1.9.0
func (*JobInfo) IsStarving ¶ added in v1.9.0
func (*JobInfo) NeedContinueAllocating ¶ added in v1.10.0
NeedContinueAllocating checks whether it can continue on allocating for current job when its one pod predicated failed, there are two cases to continue:
- job's total allocatable number meet its minAvailable(each task role has no independent minMember setting): because there are cases that some of the pods are not allocatable, but other pods are allocatable and the number of this kind pods can meet the gang-scheduling
- each task's allocable number meet its independent minAvailable this is for the case that each task role has its own independent minMember. eg, current role's pod has a failed predicating result but its allocated number has meet its minMember, the other roles' pods which have no failed predicating results can continue on
performance analysis:
As the failed predicating role has been pre-checked when it was popped from queue, this function will only be called at most as the number of roles in this job.
func (*JobInfo) ParseMinMemberInfo ¶ added in v1.10.0
ParseMinMemberInfo set the information about job's min member 1. set number of each role to TaskMinAvailable 2. calculate sum of all roles' min members and set to TaskMinAvailableTotal
func (*JobInfo) PendingBestEffortTaskNum ¶ added in v1.9.0
func (*JobInfo) ReadyTaskNum ¶
ReadyTaskNum returns the number of tasks that are ready or that is best-effort.
func (*JobInfo) SetPodGroup ¶
SetPodGroup sets podGroup details to a job
func (*JobInfo) TaskHasFitErrors ¶ added in v1.10.0
TaskHasFitErrors checks if the task has fit errors and can continue try predicating
func (*JobInfo) TaskSchedulingReason ¶ added in v1.4.0
TaskSchedulingReason get detailed reason and message of the given task It returns detailed reason and message for tasks based on last scheduling transaction.
func (*JobInfo) UnsetPodGroup ¶
func (ji *JobInfo) UnsetPodGroup()
UnsetPodGroup removes podGroup details from a job
func (*JobInfo) UpdateTaskStatus ¶
func (ji *JobInfo) UpdateTaskStatus(task *TaskInfo, status TaskStatus) error
UpdateTaskStatus is used to update task's status in a job. If error occurs both task and job are guaranteed to be in the original state.
func (*JobInfo) ValidTaskNum ¶
ValidTaskNum returns the number of tasks that are valid.
func (*JobInfo) WaitingTaskNum ¶
WaitingTaskNum returns the number of tasks that are pipelined.
type LessFn ¶
type LessFn func(interface{}, interface{}) bool
LessFn is the func declaration used by sort or priority queue.
type NamespaceCollection ¶
type NamespaceCollection struct { Name string QuotaStatus map[string]v1.ResourceQuotaStatus }
NamespaceCollection will record all details about namespace
func NewNamespaceCollection ¶
func NewNamespaceCollection(name string) *NamespaceCollection
NewNamespaceCollection creates new NamespaceCollection object to record all information about a namespace
func (*NamespaceCollection) Delete ¶
func (n *NamespaceCollection) Delete(quota *v1.ResourceQuota)
Delete remove the registered information according quota object
func (*NamespaceCollection) Snapshot ¶
func (n *NamespaceCollection) Snapshot() *NamespaceInfo
Snapshot will clone a NamespaceInfo without Heap according NamespaceCollection
func (*NamespaceCollection) Update ¶
func (n *NamespaceCollection) Update(quota *v1.ResourceQuota)
Update modify the registered information according quota object
type NamespaceInfo ¶
type NamespaceInfo struct { // Name is the name of this namespace Name NamespaceName // QuotaStatus stores the ResourceQuotaStatus of all ResourceQuotas in this namespace QuotaStatus map[string]v1.ResourceQuotaStatus }
NamespaceInfo records information of namespace
type NodeInfo ¶
type NodeInfo struct { Name string Node *v1.Node // The state of node State NodeState // The releasing resource on that node Releasing *Resource // The pipelined resource on that node Pipelined *Resource // The idle resource on that node Idle *Resource // The used resource on that node, including running and terminating // pods Used *Resource Allocatable *Resource Capacity *Resource ResourceUsage *NodeUsage Tasks map[TaskID]*TaskInfo NumaInfo *NumatopoInfo NumaChgFlag NumaChgFlag NumaSchedulerInfo *NumatopoInfo RevocableZone string // Used to store custom information Others map[string]interface{} // enable node resource oversubscription OversubscriptionNode bool // OfflineJobEvicting true means node resource usage too high then dispatched pod can not use oversubscription resource OfflineJobEvicting bool // Resource Oversubscription feature: the Oversubscription Resource reported in annotation OversubscriptionResource *Resource // ImageStates holds the entry of an image if and only if this image is on the node. The entry can be used for // checking an image's existence and advanced usage (e.g., image locality scheduling policy) based on the image // state information. ImageStates map[string]*k8sframework.ImageStateSummary }
NodeInfo is node level aggregated information.
func NewNodeInfo ¶
NewNodeInfo is used to create new nodeInfo object
func (*NodeInfo) AddTask ¶
AddTask is used to add a task in nodeInfo object
If error occurs both task and node are guaranteed to be in the original state.
func (*NodeInfo) CloneImageSummary ¶ added in v1.8.0
func (ni *NodeInfo) CloneImageSummary() map[string]*k8sframework.ImageStateSummary
CloneImageSummary Clone Image State
func (*NodeInfo) CloneOthers ¶ added in v1.8.0
CloneOthers clone other map resources
func (*NodeInfo) FutureIdle ¶
FutureIdle returns resources that will be idle in the future:
That is current idle resources plus released resources minus pipelined resources.
func (*NodeInfo) GetNodeAllocatable ¶ added in v1.4.0
GetNodeAllocatable return node Allocatable without OversubscriptionResource resource
func (*NodeInfo) RefreshNumaSchedulerInfoByCrd ¶ added in v1.4.0
func (ni *NodeInfo) RefreshNumaSchedulerInfoByCrd()
RefreshNumaSchedulerInfoByCrd used to update scheduler numa information based the CRD numatopo
func (*NodeInfo) RemoveTask ¶
RemoveTask used to remove a task from nodeInfo object.
If error occurs both task and node are guaranteed to be in the original state.
func (*NodeInfo) UpdateTask ¶
UpdateTask is used to update a task in nodeInfo object.
If error occurs both task and node are guaranteed to be in the original state.
type NodeMapFn ¶
NodeMapFn is the func declaration used to get priority score for a node for a particular task.
type NodeOrderFn ¶
NodeOrderFn is the func declaration used to get priority score for a node for a particular task.
type NodeOrderMapFn ¶
NodeOrderMapFn is the func declaration used to get priority score of all plugins for a node for a particular task.
type NodeOrderReduceFn ¶
type NodeOrderReduceFn func(*TaskInfo, map[string]k8sframework.NodeScoreList) (map[string]float64, error)
NodeOrderReduceFn is the func declaration used to reduce priority score of all nodes for a plugin for a particular task.
type NodeReduceFn ¶
type NodeReduceFn func(*TaskInfo, k8sframework.NodeScoreList) error
NodeReduceFn is the func declaration used to reduce priority score for a node for a particular task.
type NodeResourceMap ¶
NodeResourceMap stores resource in a node
type NodeUsage ¶ added in v1.6.0
type NodeUsage struct { MetricsTime time.Time CPUUsageAvg map[string]float64 MEMUsageAvg map[string]float64 }
NodeUsage defines the real load usage of node
type NumaChgFlag ¶ added in v1.4.0
type NumaChgFlag int
NumaChgFlag indicate node numainfo changed status
const ( // NumaInfoResetFlag indicate reset operate NumaInfoResetFlag NumaChgFlag = 0b00 // NumaInfoMoreFlag indicate the received allocatable resource is getting more NumaInfoMoreFlag NumaChgFlag = 0b11 // NumaInfoLessFlag indicate the received allocatable resource is getting less NumaInfoLessFlag NumaChgFlag = 0b10 // DefaultMaxNodeScore indicates the default max node score DefaultMaxNodeScore = 100 )
type NumatopoInfo ¶ added in v1.4.0
type NumatopoInfo struct { Namespace string Name string Policies map[nodeinfov1alpha1.PolicyName]string NumaResMap map[string]*ResourceInfo CPUDetail topology.CPUDetails ResReserved v1.ResourceList }
NumatopoInfo is the information about topology manager on the node
func (*NumatopoInfo) AddTask ¶ added in v1.5.0
func (info *NumatopoInfo) AddTask(ti *TaskInfo)
AddTask is the function to update the used resource of per numa node
func (*NumatopoInfo) Allocate ¶ added in v1.4.0
func (info *NumatopoInfo) Allocate(resSets ResNumaSets)
Allocate is the function to remove the allocated resource
func (*NumatopoInfo) Compare ¶ added in v1.4.0
func (info *NumatopoInfo) Compare(newInfo *NumatopoInfo) bool
Compare is the function to show the change of the resource on kubelet return val: - true : the resource on kubelet is getting more or no change - false : the resource on kubelet is getting less
func (*NumatopoInfo) DeepCopy ¶ added in v1.4.0
func (info *NumatopoInfo) DeepCopy() *NumatopoInfo
DeepCopy used to copy NumatopoInfo
func (*NumatopoInfo) Release ¶ added in v1.4.0
func (info *NumatopoInfo) Release(resSets ResNumaSets)
Release is the function to reclaim the allocated resource
func (*NumatopoInfo) RemoveTask ¶ added in v1.5.0
func (info *NumatopoInfo) RemoveTask(ti *TaskInfo)
RemoveTask is the function to update the used resource of per numa node
type PodGroup ¶
type PodGroup struct { scheduling.PodGroup // Version represents the version of PodGroup Version string }
PodGroup is a collection of Pod; used for batch workload.
type PodGroupPhase ¶
type PodGroupPhase string
PodGroupPhase is the phase of a pod group at the current time.
type PodResourceDecision ¶ added in v1.5.0
type PodResourceDecision struct { // NUMAResources is resource list with numa info indexed by numa id. NUMAResources map[int]v1.ResourceList `json:"numa,omitempty"` }
PodResourceDecision is resource allocation determinated by scheduler, and passed to kubelet through pod annotation.
type PrePredicateFn ¶ added in v1.7.0
PrePredicateFn is the func declaration used to pre-predicate node for task.
type PredicateFn ¶
PredicateFn is the func declaration used to predicate node for task.
type QueueInfo ¶
type QueueInfo struct { UID QueueID Name string Weight int32 // Weights is a list of slash sperated float numbers. // Each of them is a weight corresponding the // hierarchy level. Weights string // Hierarchy is a list of node name along the // path from the root to the node itself. Hierarchy string Queue *scheduling.Queue }
QueueInfo will have all details about queue
func NewQueueInfo ¶
func NewQueueInfo(queue *scheduling.Queue) *QueueInfo
NewQueueInfo creates new queueInfo object
func (*QueueInfo) Reclaimable ¶
Reclaimable return whether queue is reclaimable
type ResNumaSets ¶ added in v1.4.0
ResNumaSets is the set map of the resource
func (ResNumaSets) Allocate ¶ added in v1.4.0
func (resSets ResNumaSets) Allocate(taskSets ResNumaSets)
Allocate is to remove the allocated resource which is assigned to task
func (ResNumaSets) Clone ¶ added in v1.4.0
func (resSets ResNumaSets) Clone() ResNumaSets
Clone is the copy action
func (ResNumaSets) Release ¶ added in v1.4.0
func (resSets ResNumaSets) Release(taskSets ResNumaSets)
Release is to reclaim the allocated resource which is assigned to task
type ReservedNodesFn ¶ added in v1.1.0
type ReservedNodesFn func()
ReservedNodesFn is the func declaration used to select the reserved nodes
type Resource ¶
type Resource struct { MilliCPU float64 Memory float64 // ScalarResources ScalarResources map[v1.ResourceName]float64 // MaxTaskNum is only used by predicates; it should NOT // be accounted in other operators, e.g. Add. MaxTaskNum int }
Resource struct defines all the resource type
func EmptyResource ¶
func EmptyResource() *Resource
EmptyResource creates a empty resource object and returns
func ExceededPart ¶ added in v1.11.0
ExceededPart returns the partly resource in left which exceed right
func GetPodResourceRequest ¶
GetPodResourceRequest returns all the resource required for that pod
func GetPodResourceWithoutInitContainers ¶
GetPodResourceWithoutInitContainers returns Pod's resource request, it does not contain init containers' resource request.
func NewResource ¶
func NewResource(rl v1.ResourceList) *Resource
NewResource creates a new resource object from resource list
func (*Resource) AddScalar ¶
func (r *Resource) AddScalar(name v1.ResourceName, quantity float64)
AddScalar adds a resource by a scalar value of this resource.
func (*Resource) Diff ¶
func (r *Resource) Diff(rr *Resource, defaultValue DimensionDefaultValue) (*Resource, *Resource)
Diff calculate the difference between two resource object Note: if `defaultValue` equals `Infinity`, the difference between two values will be `Infinity`, marked as -1
func (*Resource) Equal ¶ added in v1.4.0
func (r *Resource) Equal(rr *Resource, defaultValue DimensionDefaultValue) bool
Equal returns true only on condition that values in all dimension are equal with each other for r and rr Otherwise returns false. @param defaultValue "default value for resource dimension not defined in ScalarResources. Its value can only be one of 'Zero' and 'Infinity'"
func (*Resource) FitDelta ¶
FitDelta Computes the delta between a resource object representing available resources an operand representing resources being requested. Any field that is less than 0 after the operation represents an insufficient resource.
func (*Resource) Get ¶
func (r *Resource) Get(rn v1.ResourceName) float64
Get returns the resource value for that particular resource type
func (*Resource) IsEmpty ¶
IsEmpty returns false if any kind of resource other than IgnoredResources is not less than min value, otherwise returns true
func (*Resource) IsZero ¶
func (r *Resource) IsZero(rn v1.ResourceName) bool
IsZero returns false if the given kind of resource is not less than min value
func (*Resource) Less ¶
func (r *Resource) Less(rr *Resource, defaultValue DimensionDefaultValue) bool
Less returns true only on condition that all dimensions of resources in r are less than that of rr, Otherwise returns false. @param defaultValue "default value for resource dimension not defined in ScalarResources. Its value can only be one of 'Zero' and 'Infinity'"
func (*Resource) LessEqual ¶
func (r *Resource) LessEqual(rr *Resource, defaultValue DimensionDefaultValue) bool
LessEqual returns true only on condition that all dimensions of resources in r are less than or equal with that of rr, Otherwise returns false. @param defaultValue "default value for resource dimension not defined in ScalarResources. Its value can only be one of 'Zero' and 'Infinity'"
func (*Resource) LessEqualPartly ¶ added in v1.4.0
func (r *Resource) LessEqualPartly(rr *Resource, defaultValue DimensionDefaultValue) bool
LessEqualPartly returns true if there exists any dimension whose resource amount in r is less than or equal with that in rr. Otherwise returns false. @param defaultValue "default value for resource dimension not defined in ScalarResources. Its value can only be one of 'Zero' and 'Infinity'"
func (*Resource) LessEqualWithDimension ¶ added in v1.10.0
LessEqualWithDimension only compare the resource items in req param @param req define the resource item to be compared if req is nil, equals r.LessEqual(rr, zero)
func (*Resource) LessEqualWithResourcesName ¶ added in v1.8.0
func (r *Resource) LessEqualWithResourcesName(rr *Resource, defaultValue DimensionDefaultValue) (bool, []string)
LessEqualWithResourcesName returns true, []string{} only on condition that all dimensions of resources in r are less than or equal with that of rr, Otherwise returns false and err string ,which show what resources are insufficient. @param defaultValue "default value for resource dimension not defined in ScalarResources. Its value can only be one of 'Zero' and 'Infinity'" this function is the same as LessEqual , and it will be merged to LessEqual in the future
func (*Resource) LessPartly ¶ added in v1.4.0
func (r *Resource) LessPartly(rr *Resource, defaultValue DimensionDefaultValue) bool
LessPartly returns true if there exists any dimension whose resource amount in r is less than that in rr. Otherwise returns false. @param defaultValue "default value for resource dimension not defined in ScalarResources. Its value can only be one of 'Zero' and 'Infinity'"
func (*Resource) MinDimensionResource ¶ added in v1.1.1
func (r *Resource) MinDimensionResource(rr *Resource, defaultValue DimensionDefaultValue) *Resource
MinDimensionResource is used to reset the r resource dimension which is less than rr e.g r resource is <cpu 2000.00, memory 4047845376.00, hugepages-2Mi 0.00, hugepages-1Gi 0.00> rr resource is <cpu 3000.00, memory 1000.00> return r resource is <cpu 2000.00, memory 1000.00, hugepages-2Mi 0.00, hugepages-1Gi 0.00> @param defaultValue "default value for resource dimension not defined in ScalarResources. Its value can only be one of 'Zero' and 'Infinity'"
func (*Resource) ResourceNames ¶
func (r *Resource) ResourceNames() ResourceNameList
ResourceNames returns all resource types
func (*Resource) SetMaxResource ¶
SetMaxResource compares with ResourceList and takes max value for each Resource.
func (*Resource) SetScalar ¶
func (r *Resource) SetScalar(name v1.ResourceName, quantity float64)
SetScalar sets a resource by a scalar value of this resource.
func (*Resource) SubWithoutAssert ¶ added in v1.9.0
SubWithoutAssert subtracts two Resource objects without assertion, this function is added because some resource subtraction allows negative results, while others do not.
type ResourceInfo ¶ added in v1.4.0
type ResourceInfo struct { Allocatable cpuset.CPUSet Capacity int AllocatablePerNuma map[int]float64 // key: NUMA ID UsedPerNuma map[int]float64 // key: NUMA ID }
ResourceInfo is the allocatable information for the resource
type ResourceNameList ¶ added in v1.4.0
type ResourceNameList []v1.ResourceName
ResourceNameList struct defines resource name collection
func (ResourceNameList) Contains ¶ added in v1.4.0
func (r ResourceNameList) Contains(rr ResourceNameList) bool
Contains judges whether rr is subset of r
type ScalarResource ¶ added in v1.8.2
type ScoredNode ¶ added in v1.6.0
ScoredNode is the wrapper for node during Scoring.
type SiloClusterInfo ¶ added in v1.4.0
type SiloClusterInfo struct { UID ClusterID Cluster *scheduling.Cluster }
SiloClusterInfo will have all details about queue
func NewSiloClusterInfo ¶ added in v1.4.0
func NewSiloClusterInfo(cluster *scheduling.Cluster) *SiloClusterInfo
NewSiloClusterInfo creates new queueInfo object
type Status ¶ added in v1.8.0
func ConvertPredicateStatus ¶ added in v1.10.0
func ConvertPredicateStatus(status *k8sframework.Status) *Status
ConvertPredicateStatus return predicate status from k8sframework status
type StatusSets ¶ added in v1.10.0
type StatusSets []*Status
func (StatusSets) ContainsErrorSkipOrWait ¶ added in v1.10.0
func (s StatusSets) ContainsErrorSkipOrWait() bool
func (StatusSets) ContainsUnschedulable ¶ added in v1.10.0
func (s StatusSets) ContainsUnschedulable() bool
func (StatusSets) ContainsUnschedulableAndUnresolvable ¶ added in v1.10.0
func (s StatusSets) ContainsUnschedulableAndUnresolvable() bool
func (StatusSets) Message ¶ added in v1.10.0
func (s StatusSets) Message() string
Message return the message generated from StatusSets
func (StatusSets) Reasons ¶ added in v1.10.0
func (s StatusSets) Reasons() []string
Reasons return the reasons list
type TargetJobFn ¶ added in v1.1.0
TargetJobFn is the func declaration used to select the target job satisfies some conditions
type TaskInfo ¶
type TaskInfo struct { UID TaskID Job JobID Name string Namespace string TaskRole string // value of "volcano.sh/task-spec" // Resreq is the resource that used when task running. Resreq *Resource // InitResreq is the resource that used to launch a task. InitResreq *Resource TransactionContext // LastTransaction holds the context of last scheduling transaction LastTransaction *TransactionContext Priority int32 VolumeReady bool Preemptable bool BestEffort bool HasRestartableInitContainer bool SchGated bool // RevocableZone supports setting volcano.sh/revocable-zone annotation or label for pod/podgroup // we only support empty value or * value for this version and we will support specify revocable zone name for future releases // empty value means workload can not use revocable node // * value means workload can use all the revocable node for during node active revocable time. RevocableZone string NumaInfo *TopologyInfo PodVolumes *volumescheduling.PodVolumes Pod *v1.Pod // CustomBindErrHandler is a custom callback func called when task bind err. CustomBindErrHandler func() error `json:"-"` // CustomBindErrHandlerSucceeded indicates whether CustomBindErrHandler is executed successfully. CustomBindErrHandlerSucceeded bool }
TaskInfo will have all infos about the task
func NewTaskInfo ¶
NewTaskInfo creates new taskInfo object for a Pod
func (*TaskInfo) ClearLastTxContext ¶ added in v1.4.0
func (ti *TaskInfo) ClearLastTxContext()
ClearLastTxContext clear context of last transaction for a task
func (*TaskInfo) GenerateLastTxContext ¶ added in v1.4.0
func (ti *TaskInfo) GenerateLastTxContext()
GenerateLastTxContext generate and set context of last transaction for a task
func (*TaskInfo) GetTransactionContext ¶ added in v1.4.0
func (ti *TaskInfo) GetTransactionContext() TransactionContext
GetTransactionContext get transaction context of a task
func (*TaskInfo) SetPodResourceDecision ¶ added in v1.5.0
func (*TaskInfo) UnsetPodResourceDecision ¶ added in v1.5.0
func (ti *TaskInfo) UnsetPodResourceDecision()
type TaskStatus ¶
type TaskStatus int
TaskStatus defines the status of a task/pod.
const ( // Pending means the task is pending in the apiserver. Pending TaskStatus = 1 << iota // Allocated means the scheduler assigns a host to it. Allocated // Pipelined means the scheduler assigns a host to wait for releasing resource. Pipelined // Binding means the scheduler send Bind request to apiserver. Binding // Bound means the task/Pod bounds to a host. Bound // Running means a task is running on the host. Running // Releasing means a task/pod is deleted. Releasing // Succeeded means that all containers in the pod have voluntarily terminated // with a container exit code of 0, and the system is not going to restart any of these containers. Succeeded // Failed means that all containers in the pod have terminated, and at least one container has // terminated in a failure (exited with a non-zero exit code or was stopped by the system). Failed // Unknown means the status of task/pod is unknown to the scheduler. Unknown )
func (TaskStatus) String ¶
func (ts TaskStatus) String() string
type TopologyInfo ¶ added in v1.5.0
type TopologyInfo struct { Policy string ResMap map[int]v1.ResourceList // key: numa ID }
func GetPodTopologyInfo ¶ added in v1.5.0
func GetPodTopologyInfo(pod *v1.Pod) *TopologyInfo
GetPodTopologyInfo return volcano.sh/numa-topology-policy value for pod
func (*TopologyInfo) Clone ¶ added in v1.5.0
func (info *TopologyInfo) Clone() *TopologyInfo
type TransactionContext ¶ added in v1.4.0
type TransactionContext struct { NodeName string EvictionOccurred bool Status TaskStatus }
TransactionContext holds all the fields that needed by scheduling transaction
func (*TransactionContext) Clone ¶ added in v1.4.0
func (ctx *TransactionContext) Clone() *TransactionContext
Clone returns a clone of TransactionContext
type ValidateExFn ¶
type ValidateExFn func(interface{}) *ValidateResult
ValidateExFn is the func declaration used to validate the result.
type ValidateFn ¶
type ValidateFn func(interface{}) bool
ValidateFn is the func declaration used to check object's status.
type ValidateResult ¶
ValidateResult is struct to which can used to determine the result
type ValidateWithCandidateFn ¶ added in v1.10.0
type ValidateWithCandidateFn func(interface{}, interface{}) bool
ValidateWithCandidateFn behaves like ValidateFn but take the candidate task into consideration.
type VictimCompareFn ¶ added in v1.11.0
type VictimCompareFn func(interface{}, interface{}, interface{}) int
VictimCompareFn is the func declaration used by sort or priority victims.
type VictimTasksFn ¶ added in v1.2.0
VictimTasksFn is the func declaration used to select victim tasks