api

package
v1.8.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Sep 25, 2023 License: Apache-2.0 Imports: 29 Imported by: 24

Documentation

Index

Constants

View Source
const (
	// Success means that plugin ran correctly and found pod schedulable.
	// NOTE: A nil status is also considered as "Success".
	Success int = iota
	// Error is used for internal plugin errors, unexpected input, etc.
	Error
	// Unschedulable is used when a plugin finds a pod unschedulable. The scheduler might attempt to
	// preempt other pods to get this pod scheduled. Use UnschedulableAndUnresolvable to make the
	// scheduler skip preemption.
	// The accompanying status message should explain why the pod is unschedulable.
	Unschedulable
	// UnschedulableAndUnresolvable is used when a plugin finds a pod unschedulable and
	// preemption would not change anything. Plugins should return Unschedulable if it is possible
	// that the pod can get scheduled with preemption.
	// The accompanying status message should explain why the pod is unschedulable.
	UnschedulableAndUnresolvable
	// Wait is used when a Permit plugin finds a pod scheduling should wait.
	Wait
	// Skip is used when a Bind plugin chooses to skip binding.
	Skip
)

These are predefined codes used in a Status.

View Source
const (
	// NodePodNumberExceeded means pods in node exceed the allocatable pod number
	NodePodNumberExceeded = "node(s) pod number exceeded"
	// NodeResourceFitFailed means node could not fit the request of pod
	NodeResourceFitFailed = "node(s) resource fit failed"

	// AllNodeUnavailableMsg is the default error message
	AllNodeUnavailableMsg = "all nodes are unavailable"
)
View Source
const (
	// PodReasonUnschedulable reason in PodScheduled PodCondition means that the scheduler
	// can't schedule the pod right now, for example due to insufficient resources in the cluster.
	// It can also mean that the scheduler skips scheduling the pod which left the pod `Undetermined`,
	// for example due to unschedulable pod already occurred.
	PodReasonUnschedulable = "Unschedulable"
	// PodReasonSchedulable reason in PodScheduled PodCondition means that the scheduler
	// can schedule the pod right now, but not bind yet
	PodReasonSchedulable = "Schedulable"
)

These are reasons for a pod's transition to a condition.

View Source
const (

	// VolcanoGPUResource extended gpu resource
	VolcanoGPUResource = "volcano.sh/gpu-memory"
	// VolcanoGPUNumber virtual GPU card number
	VolcanoGPUNumber = "volcano.sh/gpu-number"

	// PredicateTime is the key of predicate time
	PredicateTime = "volcano.sh/predicate-time"
	// GPUIndex is the key of gpu index
	GPUIndex = "volcano.sh/gpu-index"

	// UnhealthyGPUIDs list of unhealthy gpu ids
	UnhealthyGPUIDs = "volcano.sh/gpu-unhealthy-ids"

	// OversubscriptionNode is the key of node oversubscription
	OversubscriptionNode = "volcano.sh/oversubscription"
	// OversubscriptionCPU is the key of cpu oversubscription
	OversubscriptionCPU = "volcano.sh/oversubscription-cpu"
	// OversubscriptionMemory is the key of memory oversubscription
	OversubscriptionMemory = "volcano.sh/oversubscription-memory"
	// OfflineJobEvicting node will not schedule pod due to offline job evicting
	OfflineJobEvicting = "volcano.sh/offline-job-evicting"
)
View Source
const (
	// GPUResourceName need to follow https://github.com/NVIDIA/k8s-device-plugin/blob/66a35b71ac4b5cbfb04714678b548bd77e5ba719/server.go#L20
	GPUResourceName = "nvidia.com/gpu"
)
View Source
const (
	GPUSharingDevice = "GpuShare"
)
View Source
const JobWaitingTime = "sla-waiting-time"

JobWaitingTime is maximum waiting time that a job could stay Pending in service level agreement when job waits longer than waiting time, it should enqueue at once, and cluster should reserve resources for it

View Source
const (
	// PodGroupVersionV1Beta1 represents PodGroupVersion of v1beta1
	PodGroupVersionV1Beta1 string = "v1beta1"
)

These are the valid phase of podGroups.

View Source
const TaskPriorityAnnotation = "volcano.sh/task-priority"

Variables

View Source
var IgnoredDevicesList []string
View Source
var RegisteredDevices = []string{
	GPUSharingDevice, vgpu.DeviceName,
}

Functions

func AllocatedStatus

func AllocatedStatus(status TaskStatus) bool

AllocatedStatus checks whether the tasks has AllocatedStatus

func GenerateNodeResNumaSets added in v1.4.0

func GenerateNodeResNumaSets(nodes map[string]*NodeInfo) map[string]ResNumaSets

GenerateNodeResNumaSets return the idle resource sets of all node

func GenerateNumaNodes added in v1.4.0

func GenerateNumaNodes(nodes map[string]*NodeInfo) map[string][]int

GenerateNumaNodes return the numa IDs of all node

func GetMinResource added in v1.4.0

func GetMinResource() float64

func GetPodPreemptable added in v1.2.0

func GetPodPreemptable(pod *v1.Pod) bool

GetPodPreemptable return volcano.sh/preemptable value for pod

func GetPodResourceNumaInfo added in v1.5.0

func GetPodResourceNumaInfo(ti *TaskInfo) map[int]v1.ResourceList

func GetPodRevocableZone added in v1.2.0

func GetPodRevocableZone(pod *v1.Pod) string

GetPodRevocableZone return volcano.sh/revocable-zone value for pod/podgroup

func IsCountQuota added in v1.6.0

func IsCountQuota(name v1.ResourceName) bool

func JobTerminated

func JobTerminated(job *JobInfo) bool

JobTerminated checks whether job was terminated.

func MergeErrors

func MergeErrors(errs ...error) error

MergeErrors is used to merge multiple errors into single error

func ParseResourceList added in v1.4.0

func ParseResourceList(m map[string]string) (v1.ResourceList, error)

ParseResourceList parses the given configuration map into an API ResourceList or returns an error.

func ResFloat642Quantity added in v1.5.0

func ResFloat642Quantity(resName v1.ResourceName, quantity float64) resource.Quantity

ResFloat642Quantity transform resource quantity

func ResQuantity2Float64 added in v1.5.0

func ResQuantity2Float64(resName v1.ResourceName, quantity resource.Quantity) float64

ResQuantity2Float64 transform resource quantity

func WrapInsufficientResourceReason added in v1.8.0

func WrapInsufficientResourceReason(resources []string) string

WrapInsufficientResourceReason wrap insufficient resource reason.

Types

type AllocatableFn added in v1.5.1

type AllocatableFn func(*QueueInfo, *TaskInfo) bool

AllocatableFn is the func declaration used to check whether the task can be allocated

type AllocateFailError added in v1.5.0

type AllocateFailError struct {
	Reason string
}

func (*AllocateFailError) Error added in v1.5.0

func (o *AllocateFailError) Error() string

type BatchNodeOrderFn

type BatchNodeOrderFn func(*TaskInfo, []*NodeInfo) (map[string]float64, error)

BatchNodeOrderFn is the func declaration used to get priority score for ALL nodes for a particular task.

type BestNodeFn added in v0.4.1

type BestNodeFn func(*TaskInfo, map[float64][]*NodeInfo) *NodeInfo

BestNodeFn is the func declaration used to return the nodeScores to plugins.

type CSINodeStatusInfo added in v1.6.1

type CSINodeStatusInfo struct {
	CSINodeName  string
	DriverStatus map[string]bool
}

func (*CSINodeStatusInfo) Clone added in v1.6.1

func (cs *CSINodeStatusInfo) Clone() *CSINodeStatusInfo

Clone clone csi node status info

type ClusterID added in v1.4.0

type ClusterID types.UID

ClusterID is UID type, serves as unique ID for each queue

type ClusterInfo

type ClusterInfo struct {
	Jobs           map[JobID]*JobInfo
	Nodes          map[string]*NodeInfo
	Queues         map[QueueID]*QueueInfo
	NamespaceInfo  map[NamespaceName]*NamespaceInfo
	RevocableNodes map[string]*NodeInfo
	NodeList       []string
	CSINodesStatus map[string]*CSINodeStatusInfo
}

ClusterInfo is a snapshot of cluster by cache.

func (ClusterInfo) String

func (ci ClusterInfo) String() string

type CompareFn

type CompareFn func(interface{}, interface{}) int

CompareFn is the func declaration used by sort or priority queue.

type Devices added in v1.8.0

type Devices interface {
	//following two functions used in node_info
	//AddResource is to add the corresponding device resource of this 'pod' into current scheduler cache
	AddResource(pod *v1.Pod)
	//SubResoure is to substract the corresponding device resource of this 'pod' from current scheduler cache
	SubResource(pod *v1.Pod)

	//following four functions used in predicate
	//HasDeviceRequest checks if the 'pod' request this device
	HasDeviceRequest(pod *v1.Pod) bool

	// 3: UnschedulableAndUnresolvable
	// UnschedulableAndUnresolvable is used when a plugin finds a pod unschedulable and
	// preemption would not change anything. Plugins should return Unschedulable if it is possible
	// that the pod can get scheduled with preemption.
	// The accompanying status message should explain why the pod is unschedulable.
	FilterNode(pod *v1.Pod) (int, string, error)
	//Allocate action in predicate
	Allocate(kubeClient kubernetes.Interface, pod *v1.Pod) error
	//Release action in predicate
	Release(kubeClient kubernetes.Interface, pod *v1.Pod) error

	//IgnredDevices notify vc-scheduler to ignore devices in return list
	GetIgnoredDevices() []string

	//used for debug and monitor
	GetStatus() string
}

type DimensionDefaultValue added in v1.4.0

type DimensionDefaultValue int

DimensionDefaultValue means default value for black resource dimension

const (
	// Zero means resource dimension not defined will be treated as zero
	Zero DimensionDefaultValue = 0
	// Infinity means resource dimension not defined will be treated as infinity
	Infinity DimensionDefaultValue = -1
)

type DisruptionBudget added in v1.2.0

type DisruptionBudget struct {
	MinAvailable  string
	MaxUnavilable string
}

DisruptionBudget define job min pod available and max pod unvailable value

func NewDisruptionBudget added in v1.2.0

func NewDisruptionBudget(minAvailable, maxUnavilable string) *DisruptionBudget

NewDisruptionBudget create disruption budget for job

func (*DisruptionBudget) Clone added in v1.2.0

func (db *DisruptionBudget) Clone() *DisruptionBudget

Clone return a clone of DisruptionBudget

type EvictableFn

type EvictableFn func(*TaskInfo, []*TaskInfo) ([]*TaskInfo, int)

EvictableFn is the func declaration used to evict tasks.

type FitError

type FitError struct {
	NodeName string
	Reasons  []string
	// contains filtered or unexported fields
}

FitError describe the reason why task could not fit that node

func NewFitError

func NewFitError(task *TaskInfo, node *NodeInfo, message ...string) *FitError

NewFitError return FitError by message

func (*FitError) Error

func (f *FitError) Error() string

Error returns the final error message

type FitErrors

type FitErrors struct {
	// contains filtered or unexported fields
}

FitErrors is set of FitError on many nodes

func NewFitErrors

func NewFitErrors() *FitErrors

NewFitErrors returns an FitErrors

func (*FitErrors) Error

func (f *FitErrors) Error() string

Error returns the final error message

func (*FitErrors) SetError

func (f *FitErrors) SetError(err string)

SetError set the common error message in FitErrors

func (*FitErrors) SetNodeError

func (f *FitErrors) SetNodeError(nodeName string, err error)

SetNodeError set the node error in FitErrors

type JobEnqueuedFn added in v1.4.0

type JobEnqueuedFn func(interface{})

JobEnqueuedFn is the func declaration used to call after job enqueued.

type JobID

type JobID types.UID

JobID is the type of JobInfo's ID.

type JobInfo

type JobInfo struct {
	UID JobID

	Name      string
	Namespace string

	Queue QueueID

	Priority int32

	MinAvailable int32

	WaitingTime *time.Duration

	JobFitErrors   string
	NodesFitErrors map[TaskID]*FitErrors

	// All tasks of the Job.
	TaskStatusIndex       map[TaskStatus]tasksMap
	Tasks                 tasksMap
	TaskMinAvailable      map[TaskID]int32
	TaskMinAvailableTotal int32

	Allocated    *Resource
	TotalRequest *Resource

	CreationTimestamp metav1.Time
	PodGroup          *PodGroup

	ScheduleStartTimestamp metav1.Time

	Preemptable bool

	// RevocableZone support set volcano.sh/revocable-zone annotaion or label for pod/podgroup
	// we only support empty value or * value for this version and we will support specify revocable zone name for futrue release
	// empty value means workload can not use revocable node
	// * value means workload can use all the revocable node for during node active revocable time.
	RevocableZone string
	Budget        *DisruptionBudget
}

JobInfo will have all info of a Job

func NewJobInfo

func NewJobInfo(uid JobID, tasks ...*TaskInfo) *JobInfo

NewJobInfo creates a new jobInfo for set of tasks

func (*JobInfo) AddTaskInfo

func (ji *JobInfo) AddTaskInfo(ti *TaskInfo)

AddTaskInfo is used to add a task to a job

func (*JobInfo) CheckTaskPipelined added in v1.6.0

func (ji *JobInfo) CheckTaskPipelined() bool

CheckTaskPipelined return whether each task of job is pipelined.

func (*JobInfo) CheckTaskReady added in v1.6.0

func (ji *JobInfo) CheckTaskReady() bool

CheckTaskReady return whether each task of job is ready.

func (*JobInfo) CheckTaskStarving added in v1.6.0

func (ji *JobInfo) CheckTaskStarving() bool

CheckTaskStarving return whether job has at least one task which is starving.

func (*JobInfo) CheckTaskValid added in v1.6.0

func (ji *JobInfo) CheckTaskValid() bool

CheckTaskValid returns whether each task of job is valid.

func (*JobInfo) Clone

func (ji *JobInfo) Clone() *JobInfo

Clone is used to clone a jobInfo object

func (*JobInfo) DeleteTaskInfo

func (ji *JobInfo) DeleteTaskInfo(ti *TaskInfo) error

DeleteTaskInfo is used to delete a task from a job

func (*JobInfo) FitError

func (ji *JobInfo) FitError() string

FitError returns detailed information on why a job's task failed to fit on each available node

func (*JobInfo) GetElasticResources added in v1.6.0

func (ji *JobInfo) GetElasticResources() *Resource

func (*JobInfo) GetMinResources added in v1.3.0

func (ji *JobInfo) GetMinResources() *Resource

GetMinResources return the min resources of podgroup.

func (*JobInfo) HasPendingTasks added in v1.8.0

func (ji *JobInfo) HasPendingTasks() bool

HasPendingTasks return whether job has pending tasks

func (*JobInfo) IsPending added in v1.4.0

func (ji *JobInfo) IsPending() bool

IsPending returns whether job is in pending status

func (*JobInfo) Ready

func (ji *JobInfo) Ready() bool

Ready returns whether job is ready for run

func (*JobInfo) ReadyTaskNum

func (ji *JobInfo) ReadyTaskNum() int32

ReadyTaskNum returns the number of tasks that are ready or that is best-effort.

func (*JobInfo) SetPodGroup

func (ji *JobInfo) SetPodGroup(pg *PodGroup)

SetPodGroup sets podGroup details to a job

func (JobInfo) String

func (ji JobInfo) String() string

String returns a jobInfo object in string format

func (*JobInfo) TaskSchedulingReason added in v1.4.0

func (ji *JobInfo) TaskSchedulingReason(tid TaskID) (reason string, msg string)

TaskSchedulingReason get detailed reason and message of the given task It returns detailed reason and message for tasks based on last scheduling transaction.

func (*JobInfo) UnsetPodGroup

func (ji *JobInfo) UnsetPodGroup()

UnsetPodGroup removes podGroup details from a job

func (*JobInfo) UpdateTaskStatus

func (ji *JobInfo) UpdateTaskStatus(task *TaskInfo, status TaskStatus) error

UpdateTaskStatus is used to update task's status in a job. If error occurs both task and job are guaranteed to be in the original state.

func (*JobInfo) ValidTaskNum

func (ji *JobInfo) ValidTaskNum() int32

ValidTaskNum returns the number of tasks that are valid.

func (*JobInfo) WaitingTaskNum

func (ji *JobInfo) WaitingTaskNum() int32

WaitingTaskNum returns the number of tasks that are pipelined.

type LessFn

type LessFn func(interface{}, interface{}) bool

LessFn is the func declaration used by sort or priority queue.

type NamespaceCollection

type NamespaceCollection struct {
	Name        string
	QuotaStatus map[string]v1.ResourceQuotaStatus
}

NamespaceCollection will record all details about namespace

func NewNamespaceCollection

func NewNamespaceCollection(name string) *NamespaceCollection

NewNamespaceCollection creates new NamespaceCollection object to record all information about a namespace

func (*NamespaceCollection) Delete

func (n *NamespaceCollection) Delete(quota *v1.ResourceQuota)

Delete remove the registered information according quota object

func (*NamespaceCollection) Snapshot

func (n *NamespaceCollection) Snapshot() *NamespaceInfo

Snapshot will clone a NamespaceInfo without Heap according NamespaceCollection

func (*NamespaceCollection) Update

func (n *NamespaceCollection) Update(quota *v1.ResourceQuota)

Update modify the registered information according quota object

type NamespaceInfo

type NamespaceInfo struct {
	// Name is the name of this namespace
	Name NamespaceName
	// QuotaStatus stores the ResourceQuotaStatus of all ResourceQuotas in this namespace
	QuotaStatus map[string]v1.ResourceQuotaStatus
}

NamespaceInfo records information of namespace

type NamespaceName

type NamespaceName string

NamespaceName is name of namespace

type NodeInfo

type NodeInfo struct {
	Name string
	Node *v1.Node

	// The state of node
	State NodeState

	// The releasing resource on that node
	Releasing *Resource
	// The pipelined resource on that node
	Pipelined *Resource
	// The idle resource on that node
	Idle *Resource
	// The used resource on that node, including running and terminating
	// pods
	Used *Resource

	Allocatable   *Resource
	Capacity      *Resource
	ResourceUsage *NodeUsage

	Tasks             map[TaskID]*TaskInfo
	NumaInfo          *NumatopoInfo
	NumaChgFlag       NumaChgFlag
	NumaSchedulerInfo *NumatopoInfo
	RevocableZone     string

	// Used to store custom information
	Others map[string]interface{}

	// enable node resource oversubscription
	OversubscriptionNode bool
	// OfflineJobEvicting true means node resource usage too high then dispatched pod can not use oversubscription resource
	OfflineJobEvicting bool

	// Resource Oversubscription feature: the Oversubscription Resource reported in annotation
	OversubscriptionResource *Resource

	// ImageStates holds the entry of an image if and only if this image is on the node. The entry can be used for
	// checking an image's existence and advanced usage (e.g., image locality scheduling policy) based on the image
	// state information.
	ImageStates map[string]*k8sframework.ImageStateSummary
}

NodeInfo is node level aggregated information.

func NewNodeInfo

func NewNodeInfo(node *v1.Node) *NodeInfo

NewNodeInfo is used to create new nodeInfo object

func (*NodeInfo) AddTask

func (ni *NodeInfo) AddTask(task *TaskInfo) error

AddTask is used to add a task in nodeInfo object

If error occurs both task and node are guaranteed to be in the original state.

func (*NodeInfo) Clone

func (ni *NodeInfo) Clone() *NodeInfo

Clone used to clone nodeInfo Object

func (*NodeInfo) CloneImageSummary added in v1.8.0

func (ni *NodeInfo) CloneImageSummary() map[string]*k8sframework.ImageStateSummary

CloneImageSummary Clone Image State

func (*NodeInfo) CloneOthers added in v1.8.0

func (ni *NodeInfo) CloneOthers() map[string]interface{}

CloneOthers clone other map resources

func (*NodeInfo) FutureIdle

func (ni *NodeInfo) FutureIdle() *Resource

FutureIdle returns resources that will be idle in the future:

That is current idle resources plus released resources minus pipelined resources.

func (*NodeInfo) GetNodeAllocatable added in v1.4.0

func (ni *NodeInfo) GetNodeAllocatable() *Resource

GetNodeAllocatable return node Allocatable without OversubscriptionResource resource

func (*NodeInfo) Pods

func (ni *NodeInfo) Pods() (pods []*v1.Pod)

Pods returns all pods running in that node

func (*NodeInfo) Ready

func (ni *NodeInfo) Ready() bool

Ready returns whether node is ready for scheduling

func (*NodeInfo) RefreshNumaSchedulerInfoByCrd added in v1.4.0

func (ni *NodeInfo) RefreshNumaSchedulerInfoByCrd()

RefreshNumaSchedulerInfoByCrd used to update scheduler numa information based the CRD numatopo

func (*NodeInfo) RemoveTask

func (ni *NodeInfo) RemoveTask(ti *TaskInfo) error

RemoveTask used to remove a task from nodeInfo object.

If error occurs both task and node are guaranteed to be in the original state.

func (*NodeInfo) SetNode

func (ni *NodeInfo) SetNode(node *v1.Node)

SetNode sets kubernetes node object to nodeInfo object

func (NodeInfo) String

func (ni NodeInfo) String() string

String returns nodeInfo details in string format

func (*NodeInfo) UpdateTask

func (ni *NodeInfo) UpdateTask(ti *TaskInfo) error

UpdateTask is used to update a task in nodeInfo object.

If error occurs both task and node are guaranteed to be in the original state.

type NodeMapFn

type NodeMapFn func(*TaskInfo, *NodeInfo) (float64, error)

NodeMapFn is the func declaration used to get priority score for a node for a particular task.

type NodeOrderFn

type NodeOrderFn func(*TaskInfo, *NodeInfo) (float64, error)

NodeOrderFn is the func declaration used to get priority score for a node for a particular task.

type NodeOrderMapFn

type NodeOrderMapFn func(*TaskInfo, *NodeInfo) (map[string]float64, float64, error)

NodeOrderMapFn is the func declaration used to get priority score of all plugins for a node for a particular task.

type NodeOrderReduceFn

type NodeOrderReduceFn func(*TaskInfo, map[string]k8sframework.NodeScoreList) (map[string]float64, error)

NodeOrderReduceFn is the func declaration used to reduce priority score of all nodes for a plugin for a particular task.

type NodePhase

type NodePhase int

NodePhase defines the phase of node

const (
	// Ready means the node is ready for scheduling
	Ready NodePhase = 1 << iota
	// NotReady means the node is not ready for scheduling
	NotReady
)

func (NodePhase) String

func (np NodePhase) String() string

type NodeReduceFn

type NodeReduceFn func(*TaskInfo, k8sframework.NodeScoreList) error

NodeReduceFn is the func declaration used to reduce priority score for a node for a particular task.

type NodeResourceMap

type NodeResourceMap map[string]*Resource

NodeResourceMap stores resource in a node

type NodeState

type NodeState struct {
	Phase  NodePhase
	Reason string
}

NodeState defines the current state of node.

type NodeUsage added in v1.6.0

type NodeUsage struct {
	CPUUsageAvg map[string]float64
	MEMUsageAvg map[string]float64
}

NodeUsage defines the real load usage of node

func (*NodeUsage) DeepCopy added in v1.6.0

func (nu *NodeUsage) DeepCopy() *NodeUsage

type NumaChgFlag added in v1.4.0

type NumaChgFlag int

NumaChgFlag indicate node numainfo changed status

const (
	// NumaInfoResetFlag indicate reset operate
	NumaInfoResetFlag NumaChgFlag = 0b00
	// NumaInfoMoreFlag indicate the received allocatable resource is getting more
	NumaInfoMoreFlag NumaChgFlag = 0b11
	// NumaInfoLessFlag indicate the received allocatable resource is getting less
	NumaInfoLessFlag NumaChgFlag = 0b10
	// DefaultMaxNodeScore indicates the default max node score
	DefaultMaxNodeScore = 100
)

type NumatopoInfo added in v1.4.0

type NumatopoInfo struct {
	Namespace   string
	Name        string
	Policies    map[nodeinfov1alpha1.PolicyName]string
	NumaResMap  map[string]*ResourceInfo
	CPUDetail   topology.CPUDetails
	ResReserved v1.ResourceList
}

NumatopoInfo is the information about topology manager on the node

func (*NumatopoInfo) AddTask added in v1.5.0

func (info *NumatopoInfo) AddTask(ti *TaskInfo)

AddTask is the function to update the used resource of per numa node

func (*NumatopoInfo) Allocate added in v1.4.0

func (info *NumatopoInfo) Allocate(resSets ResNumaSets)

Allocate is the function to remove the allocated resource

func (*NumatopoInfo) Compare added in v1.4.0

func (info *NumatopoInfo) Compare(newInfo *NumatopoInfo) bool

Compare is the function to show the change of the resource on kubelet return val: - true : the resource on kubelet is getting more or no change - false : the resource on kubelet is getting less

func (*NumatopoInfo) DeepCopy added in v1.4.0

func (info *NumatopoInfo) DeepCopy() *NumatopoInfo

DeepCopy used to copy NumatopoInfo

func (*NumatopoInfo) Release added in v1.4.0

func (info *NumatopoInfo) Release(resSets ResNumaSets)

Release is the function to reclaim the allocated resource

func (*NumatopoInfo) RemoveTask added in v1.5.0

func (info *NumatopoInfo) RemoveTask(ti *TaskInfo)

RemoveTask is the function to update the used resource of per numa node

type PodGroup

type PodGroup struct {
	scheduling.PodGroup

	// Version represents the version of PodGroup
	Version string
}

PodGroup is a collection of Pod; used for batch workload.

func (*PodGroup) Clone added in v1.5.0

func (pg *PodGroup) Clone() *PodGroup

type PodGroupPhase

type PodGroupPhase string

PodGroupPhase is the phase of a pod group at the current time.

type PodResourceDecision added in v1.5.0

type PodResourceDecision struct {
	// NUMAResources is resource list with numa info indexed by numa id.
	NUMAResources map[int]v1.ResourceList `json:"numa,omitempty"`
}

PodResourceDecision is resource allocation determinated by scheduler, and passed to kubelet through pod annotation.

type PrePredicateFn added in v1.7.0

type PrePredicateFn func(*TaskInfo) error

PrePredicateFn is the func declaration used to pre-predicate node for task.

type PredicateFn

type PredicateFn func(*TaskInfo, *NodeInfo) ([]*Status, error)

PredicateFn is the func declaration used to predicate node for task.

type QueueID

type QueueID types.UID

QueueID is UID type, serves as unique ID for each queue

type QueueInfo

type QueueInfo struct {
	UID  QueueID
	Name string

	Weight int32

	// Weights is a list of slash sperated float numbers.
	// Each of them is a weight corresponding the
	// hierarchy level.
	Weights string
	// Hierarchy is a list of node name along the
	// path from the root to the node itself.
	Hierarchy string

	Queue *scheduling.Queue
}

QueueInfo will have all details about queue

func NewQueueInfo

func NewQueueInfo(queue *scheduling.Queue) *QueueInfo

NewQueueInfo creates new queueInfo object

func (*QueueInfo) Clone

func (q *QueueInfo) Clone() *QueueInfo

Clone is used to clone queueInfo object

func (*QueueInfo) Reclaimable

func (q *QueueInfo) Reclaimable() bool

Reclaimable return whether queue is reclaimable

type ResNumaSets added in v1.4.0

type ResNumaSets map[string]cpuset.CPUSet

ResNumaSets is the set map of the resource

func (ResNumaSets) Allocate added in v1.4.0

func (resSets ResNumaSets) Allocate(taskSets ResNumaSets)

Allocate is to remove the allocated resource which is assigned to task

func (ResNumaSets) Clone added in v1.4.0

func (resSets ResNumaSets) Clone() ResNumaSets

Clone is the copy action

func (ResNumaSets) Release added in v1.4.0

func (resSets ResNumaSets) Release(taskSets ResNumaSets)

Release is to reclaim the allocated resource which is assigned to task

type ReservedNodesFn added in v1.1.0

type ReservedNodesFn func()

ReservedNodesFn is the func declaration used to select the reserved nodes

type Resource

type Resource struct {
	MilliCPU float64
	Memory   float64

	// ScalarResources
	ScalarResources map[v1.ResourceName]float64

	// MaxTaskNum is only used by predicates; it should NOT
	// be accounted in other operators, e.g. Add.
	MaxTaskNum int
}

Resource struct defines all the resource type

func EmptyResource

func EmptyResource() *Resource

EmptyResource creates a empty resource object and returns

func GetPodResourceRequest

func GetPodResourceRequest(pod *v1.Pod) *Resource

GetPodResourceRequest returns all the resource required for that pod

func GetPodResourceWithoutInitContainers

func GetPodResourceWithoutInitContainers(pod *v1.Pod) *Resource

GetPodResourceWithoutInitContainers returns Pod's resource request, it does not contain init containers' resource request.

func NewResource

func NewResource(rl v1.ResourceList) *Resource

NewResource creates a new resource object from resource list

func (*Resource) Add

func (r *Resource) Add(rr *Resource) *Resource

Add is used to add two given resources

func (*Resource) AddScalar

func (r *Resource) AddScalar(name v1.ResourceName, quantity float64)

AddScalar adds a resource by a scalar value of this resource.

func (*Resource) Clone

func (r *Resource) Clone() *Resource

Clone is used to clone a resource type, which is a deep copy function.

func (*Resource) Diff

func (r *Resource) Diff(rr *Resource, defaultValue DimensionDefaultValue) (*Resource, *Resource)

Diff calculate the difference between two resource object Note: if `defaultValue` equals `Infinity`, the difference between two values will be `Infinity`, marked as -1

func (*Resource) Equal added in v1.4.0

func (r *Resource) Equal(rr *Resource, defaultValue DimensionDefaultValue) bool

Equal returns true only on condition that values in all dimension are equal with each other for r and rr Otherwise returns false. @param defaultValue "default value for resource dimension not defined in ScalarResources. Its value can only be one of 'Zero' and 'Infinity'"

func (*Resource) FitDelta

func (r *Resource) FitDelta(rr *Resource) *Resource

FitDelta Computes the delta between a resource object representing available resources an operand representing resources being requested. Any field that is less than 0 after the operation represents an insufficient resource.

func (*Resource) Get

func (r *Resource) Get(rn v1.ResourceName) float64

Get returns the resource value for that particular resource type

func (*Resource) IsEmpty

func (r *Resource) IsEmpty() bool

IsEmpty returns false if any kind of resource is not less than min value, otherwise returns true

func (*Resource) IsZero

func (r *Resource) IsZero(rn v1.ResourceName) bool

IsZero returns false if the given kind of resource is not less than min value

func (*Resource) Less

func (r *Resource) Less(rr *Resource, defaultValue DimensionDefaultValue) bool

Less returns true only on condition that all dimensions of resources in r are less than that of rr, Otherwise returns false. @param defaultValue "default value for resource dimension not defined in ScalarResources. Its value can only be one of 'Zero' and 'Infinity'"

func (*Resource) LessEqual

func (r *Resource) LessEqual(rr *Resource, defaultValue DimensionDefaultValue) bool

LessEqual returns true only on condition that all dimensions of resources in r are less than or equal with that of rr, Otherwise returns false. @param defaultValue "default value for resource dimension not defined in ScalarResources. Its value can only be one of 'Zero' and 'Infinity'"

func (*Resource) LessEqualPartly added in v1.4.0

func (r *Resource) LessEqualPartly(rr *Resource, defaultValue DimensionDefaultValue) bool

LessEqualPartly returns true if there exists any dimension whose resource amount in r is less than or equal with that in rr. Otherwise returns false. @param defaultValue "default value for resource dimension not defined in ScalarResources. Its value can only be one of 'Zero' and 'Infinity'"

func (*Resource) LessEqualWithResourcesName added in v1.8.0

func (r *Resource) LessEqualWithResourcesName(rr *Resource, defaultValue DimensionDefaultValue) (bool, []string)

LessEqualWithResourcesName returns true, []string{} only on condition that all dimensions of resources in r are less than or equal with that of rr, Otherwise returns false and err string ,which show what resources are insufficient. @param defaultValue "default value for resource dimension not defined in ScalarResources. Its value can only be one of 'Zero' and 'Infinity'" this function is the same as LessEqual , and it will be merged to LessEqual in the future

func (*Resource) LessPartly added in v1.4.0

func (r *Resource) LessPartly(rr *Resource, defaultValue DimensionDefaultValue) bool

LessPartly returns true if there exists any dimension whose resource amount in r is less than that in rr. Otherwise returns false. @param defaultValue "default value for resource dimension not defined in ScalarResources. Its value can only be one of 'Zero' and 'Infinity'"

func (*Resource) MinDimensionResource added in v1.1.1

func (r *Resource) MinDimensionResource(rr *Resource, defaultValue DimensionDefaultValue) *Resource

MinDimensionResource is used to reset the r resource dimension which is less than rr e.g r resource is <cpu 2000.00, memory 4047845376.00, hugepages-2Mi 0.00, hugepages-1Gi 0.00> rr resource is <cpu 3000.00, memory 1000.00> return r resource is <cpu 2000.00, memory 1000.00, hugepages-2Mi 0.00, hugepages-1Gi 0.00> @param defaultValue "default value for resource dimension not defined in ScalarResources. Its value can only be one of 'Zero' and 'Infinity'"

func (*Resource) Multi

func (r *Resource) Multi(ratio float64) *Resource

Multi multiples the resource with ratio provided

func (*Resource) ResourceNames

func (r *Resource) ResourceNames() ResourceNameList

ResourceNames returns all resource types

func (*Resource) SetMaxResource

func (r *Resource) SetMaxResource(rr *Resource)

SetMaxResource compares with ResourceList and takes max value for each Resource.

func (*Resource) SetScalar

func (r *Resource) SetScalar(name v1.ResourceName, quantity float64)

SetScalar sets a resource by a scalar value of this resource.

func (*Resource) String

func (r *Resource) String() string

String returns resource details in string format

func (*Resource) Sub

func (r *Resource) Sub(rr *Resource) *Resource

Sub subtracts two Resource objects with assertion.

type ResourceInfo added in v1.4.0

type ResourceInfo struct {
	Allocatable        cpuset.CPUSet
	Capacity           int
	AllocatablePerNuma map[int]float64 // key: NUMA ID
	UsedPerNuma        map[int]float64 // key: NUMA ID
}

ResourceInfo is the allocatable information for the resource

type ResourceNameList added in v1.4.0

type ResourceNameList []v1.ResourceName

ResourceNameList struct defines resource name collection

func (ResourceNameList) Contains added in v1.4.0

func (r ResourceNameList) Contains(rr ResourceNameList) bool

Contains judges whether rr is subset of r

type ScoredNode added in v1.6.0

type ScoredNode struct {
	NodeName string
	Score    int64
}

ScoredNode is the wrapper for node during Scoring.

type SiloClusterInfo added in v1.4.0

type SiloClusterInfo struct {
	UID     ClusterID
	Cluster *scheduling.Cluster
}

SiloClusterInfo will have all details about queue

func NewSiloClusterInfo added in v1.4.0

func NewSiloClusterInfo(cluster *scheduling.Cluster) *SiloClusterInfo

NewSiloClusterInfo creates new queueInfo object

type Status added in v1.8.0

type Status struct {
	Code   int
	Reason string
}

func (Status) String added in v1.8.1

func (s Status) String() string

String represents status string

type TargetJobFn added in v1.1.0

type TargetJobFn func([]*JobInfo) *JobInfo

TargetJobFn is the func declaration used to select the target job satisfies some conditions

type TaskID

type TaskID types.UID

TaskID is UID type for Task

func PodKey

func PodKey(pod *v1.Pod) TaskID

PodKey returns the string key of a pod.

type TaskInfo

type TaskInfo struct {
	UID TaskID
	Job JobID

	Name      string
	Namespace string

	// Resreq is the resource that used when task running.
	Resreq *Resource
	// InitResreq is the resource that used to launch a task.
	InitResreq *Resource

	TransactionContext
	// LastTransaction holds the context of last scheduling transaction
	LastTransaction *TransactionContext

	Priority    int32
	VolumeReady bool
	Preemptable bool
	BestEffort  bool

	// RevocableZone supports setting volcano.sh/revocable-zone annotation or label for pod/podgroup
	// we only support empty value or * value for this version and we will support specify revocable zone name for future releases
	// empty value means workload can not use revocable node
	// * value means workload can use all the revocable node for during node active revocable time.
	RevocableZone string

	NumaInfo   *TopologyInfo
	PodVolumes *volumescheduling.PodVolumes
	Pod        *v1.Pod

	// CustomBindErrHandler is a custom callback func called when task bind err.
	CustomBindErrHandler func() error
	// CustomBindErrHandlerSucceeded indicates whether CustomBindErrHandler is executed successfully.
	CustomBindErrHandlerSucceeded bool
}

TaskInfo will have all infos about the task

func NewTaskInfo

func NewTaskInfo(pod *v1.Pod) *TaskInfo

NewTaskInfo creates new taskInfo object for a Pod

func (*TaskInfo) ClearLastTxContext added in v1.4.0

func (ti *TaskInfo) ClearLastTxContext()

ClearLastTxContext clear context of last transaction for a task

func (*TaskInfo) Clone

func (ti *TaskInfo) Clone() *TaskInfo

Clone is used for cloning a task

func (*TaskInfo) GenerateLastTxContext added in v1.4.0

func (ti *TaskInfo) GenerateLastTxContext()

GenerateLastTxContext generate and set context of last transaction for a task

func (*TaskInfo) GetTaskSpecKey added in v1.5.0

func (ti *TaskInfo) GetTaskSpecKey() TaskID

func (*TaskInfo) GetTransactionContext added in v1.4.0

func (ti *TaskInfo) GetTransactionContext() TransactionContext

GetTransactionContext get transaction context of a task

func (*TaskInfo) SetPodResourceDecision added in v1.5.0

func (ti *TaskInfo) SetPodResourceDecision() error

func (TaskInfo) String

func (ti TaskInfo) String() string

String returns the taskInfo details in a string

func (*TaskInfo) UnsetPodResourceDecision added in v1.5.0

func (ti *TaskInfo) UnsetPodResourceDecision()

type TaskStatus

type TaskStatus int

TaskStatus defines the status of a task/pod.

const (
	// Pending means the task is pending in the apiserver.
	Pending TaskStatus = 1 << iota

	// Allocated means the scheduler assigns a host to it.
	Allocated

	// Pipelined means the scheduler assigns a host to wait for releasing resource.
	Pipelined

	// Binding means the scheduler send Bind request to apiserver.
	Binding

	// Bound means the task/Pod bounds to a host.
	Bound

	// Running means a task is running on the host.
	Running

	// Releasing means a task/pod is deleted.
	Releasing

	// Succeeded means that all containers in the pod have voluntarily terminated
	// with a container exit code of 0, and the system is not going to restart any of these containers.
	Succeeded

	// Failed means that all containers in the pod have terminated, and at least one container has
	// terminated in a failure (exited with a non-zero exit code or was stopped by the system).
	Failed

	// Unknown means the status of task/pod is unknown to the scheduler.
	Unknown
)

func (TaskStatus) String

func (ts TaskStatus) String() string

type TopologyInfo added in v1.5.0

type TopologyInfo struct {
	Policy string
	ResMap map[int]v1.ResourceList // key: numa ID
}

func GetPodTopologyInfo added in v1.5.0

func GetPodTopologyInfo(pod *v1.Pod) *TopologyInfo

GetPodTopologyInfo return volcano.sh/numa-topology-policy value for pod

func (*TopologyInfo) Clone added in v1.5.0

func (info *TopologyInfo) Clone() *TopologyInfo

type TransactionContext added in v1.4.0

type TransactionContext struct {
	NodeName string
	Status   TaskStatus
}

TransactionContext holds all the fields that needed by scheduling transaction

func (*TransactionContext) Clone added in v1.4.0

func (ctx *TransactionContext) Clone() *TransactionContext

Clone returns a clone of TransactionContext

type ValidateExFn

type ValidateExFn func(interface{}) *ValidateResult

ValidateExFn is the func declaration used to validate the result.

type ValidateFn

type ValidateFn func(interface{}) bool

ValidateFn is the func declaration used to check object's status.

type ValidateResult

type ValidateResult struct {
	Pass    bool
	Reason  string
	Message string
}

ValidateResult is struct to which can used to determine the result

type VictimTasksFn added in v1.2.0

type VictimTasksFn func([]*TaskInfo) []*TaskInfo

VictimTasksFn is the func declaration used to select victim tasks

type VoteFn added in v1.2.0

type VoteFn func(interface{}) int

VoteFn is the func declaration used to check object's complicated status.

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL