simontype

package
v0.0.0-...-f2569be Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jun 5, 2023 License: Apache-2.0 Imports: 6 Imported by: 0

Documentation

Index

Constants

View Source
const (
	SimonPluginName        = "Simon"
	OpenGpuSharePluginName = "Open-Gpu-Share"

	RandomScorePluginName        = "RandomScore"
	DotProductScorePluginName    = "DotProductScore"
	GpuClusteringScorePluginName = "GpuClusteringScore"
	GpuPackingScorePluginName    = "GpuPackingScore"
	BestFitScorePluginName       = "BestFitScore"
	FGDScorePluginName           = "FGDScore"

	NewNodeNamePrefix    = "simon"
	DefaultSchedulerName = "simon-scheduler"

	CreatePodError = "failed to create pod"
	DeletePodError = "failed to delete pod"

	AnnoWorkloadKind      = "simon/workload-kind"
	AnnoWorkloadName      = "simon/workload-name"
	AnnoWorkloadNamespace = "simon/workload-namespace"
	AnnoNodeLocalStorage  = "simon/node-local-storage"
	AnnoPodLocalStorage   = "simon/pod-local-storage"
	AnnoNodeGpuShare      = "simon/node-gpu-share"
	AnnoPodUnscheduled    = "simon/pod-unscheduled"

	LabelNewNode = "simon/new-node"
	LabelAppName = "simon/app-name"

	EnvMaxCPU    = "MaxCPU"
	EnvMaxMemory = "MaxMemory"
	EnvMaxVG     = "MaxVG"

	Pod                   = "Pod"
	Deployment            = "Deployment"
	ReplicaSet            = "ReplicaSet"
	ReplicationController = "ReplicationController"
	StatefulSet           = "StatefulSet"
	DaemonSet             = "DaemonSet"
	Job                   = "Job"
	CronJob               = "CronJob"

	HostName = "kubernetes.io/hostname"
	NodeIp   = "node-ip"

	NotesFileSuffix       = "NOTES.txt"
	SeparateSymbol        = "-"
	WorkLoadHashCodeDigit = 10
	PodHashCodeDigit      = 5
	MaxNumGpuPerNode      = 8
)
View Source
const (
	DefaultTypicalPodPopularityThreshold = 60 // 60%
	DefaultTypicalPodIncreaseStep        = 10
)

Variables

This section is empty.

Functions

func AllocateExclusiveGpuId

func AllocateExclusiveGpuId(nodeRes NodeResource, podRes PodResource) (gpuId string)

Types

type GpuDimExtMethod

type GpuDimExtMethod string
const (

	// MergeGpuDim adds the node resources of each gpu together as one dimension.
	// e.g., <3000 CPU, 6700 GPU>
	MergeGpuDim GpuDimExtMethod = "merge"

	// SeparateGpuDimAndShareOtherDim splits each node into multiple virtual nodes to be consistent with pod resource dimension.
	// Each virtual node contains a shared gpu or multiple fully free gpus, shares resources in other dimensions such as cpu, memory, etc.
	// e.g., <3000 CPU, 200 GPU>, <3000 CPU, 500 GPU>, <3000 CPU, 6000 GPU> =? <3000 CPU, 1000 GPU> * 6
	SeparateGpuDimAndShareOtherDim GpuDimExtMethod = "share"

	// SeparateGpuDimAndDivideOtherDim is similar to SeparateGpuDimAndShareOtherDim.
	// The difference is that it divides the resources of other dimensions according to the amount of gpu resources left.
	// e.g., <89.55 CPU, 200 GPU>, <223.88 CPU, 500 GPU>, <2686.57 CPU, 6000 GPU>
	SeparateGpuDimAndDivideOtherDim GpuDimExtMethod = "divide"

	// ExtGpuDim is used to raise the resource dimension at the pod level to be consistent with node gpu resource dimension.
	// e.g., 1) Pod <100 CPU, 100 GPU, 0 GPU, 0 GPU>, Node <3000 CPU, 200 GPU, 500 GPU, 6000 GPU>
	//       2) Pod <100 CPU, 0 GPU, 100 GPU, 0 GPU>, Node <3000 CPU, 200 GPU, 500 GPU, 6000 GPU>
	//       3) Pod <100 CPU, 0 GPU, 0 GPU, 100 GPU>, Node <3000 CPU, 200 GPU, 500 GPU, 6000 GPU>
	ExtGpuDim GpuDimExtMethod = "extend"
)

type GpuPluginCfg

type GpuPluginCfg struct {
	DimExtMethod GpuDimExtMethod `json:"dimExtMethod,omitempty"`

	// By default, we do not apply any normalization.
	NormMethod NormMethod `json:"normMethod,omitempty"`
}

type GpuResource

type GpuResource struct {
	MilliGpuLeft int64
	Id           string
}

type GpuSelMethod

type GpuSelMethod string
const (
	SelBestFitGpu  GpuSelMethod = "best"
	SelWorstFitGpu GpuSelMethod = "worst"
	SelRandomGpu   GpuSelMethod = "random"
)

type NodeResource

type NodeResource struct {
	NodeName         string
	MilliCpuLeft     int64
	MilliCpuCapacity int64
	MilliGpuLeftList []int64 // Do NOT sort it directly, using SortedMilliGpuLeftIndexList instead. Its order matters; the index is the GPU device index.
	GpuNumber        int
	GpuType          string
	GpuAffinity      map[string]int
}

NodeResource is initialized by utils.GetNodeResourceViaPodList, utils.GetNodeResourceViaHandleAndName, utils.GetNodeResourceViaNodeInfo

func (NodeResource) Add

func (tnr NodeResource) Add(tpr PodResource, idl []int) (NodeResource, error)

func (NodeResource) Copy

func (tnr NodeResource) Copy() NodeResource

func (NodeResource) Flatten

func (tnr NodeResource) Flatten(remark string) NodeResourceFlat

func (NodeResource) GetFullyFreeGpuNum

func (tnr NodeResource) GetFullyFreeGpuNum() (free int)

func (NodeResource) GetTotalMilliGpuLeft

func (tnr NodeResource) GetTotalMilliGpuLeft() (total int64)

func (NodeResource) Repr

func (tnr NodeResource) Repr() string

func (NodeResource) SortedMilliGpuLeftIndexList

func (tnr NodeResource) SortedMilliGpuLeftIndexList(ascending bool) []int

func (NodeResource) Sub

func (tnr NodeResource) Sub(tpr PodResource) (NodeResource, error)

func (NodeResource) ToFormalizedGpuResourceList

func (tnr NodeResource) ToFormalizedGpuResourceList() (gpuResourceList []GpuResource)

func (NodeResource) ToResourceVec

func (tnr NodeResource) ToResourceVec() []float64

ToResourceVec returns a resource vector: [milli cpu left, total milli gpu left].

func (NodeResource) ToVirtualNodeResourceList

func (tnr NodeResource) ToVirtualNodeResourceList(method GpuDimExtMethod, podRes PodResource) (virtualNodeResourceList []VirtualNodeResource)

type NodeResourceFlat

type NodeResourceFlat struct {
	MilliCpu int64
	MilliGpu string
	GpuType  string
	Remark   string
}

type NodeStatus

type NodeStatus struct {
	Node *v1.Node
	Pods []*v1.Pod
}

type NormMethod

type NormMethod string
const (
	// NormByNode indicates the resource vector will be normalized by node allocatable.
	NormByNode NormMethod = "node"
	// NormByPod indicates the resource vector will be normalized by pod request.
	NormByPod NormMethod = "pod"
	// NormByMax indicates the resource vector will be normalized by the machine's maximum available resources.
	NormByMax NormMethod = "max"
)

type OpenGpuSharePluginCfg

type OpenGpuSharePluginCfg struct {
	GpuPluginCfg

	GpuSelMethod GpuSelMethod `json:"gpuSelMethod,omitempty"`
}

type PodResource

type PodResource struct {
	MilliCpu  int64
	MilliGpu  int64 // Milli GPU request per GPU, 0-1000
	GpuNumber int
	GpuType   string
}

func (PodResource) IsGpuShare

func (tpr PodResource) IsGpuShare() bool

IsGpuShare returns true if pod is a GPU-share pod, otherwise false.

func (PodResource) Less

func (tpr PodResource) Less(other PodResource) bool

func (PodResource) Repr

func (tpr PodResource) Repr() string

func (PodResource) ToResourceVec

func (tpr PodResource) ToResourceVec() []float64

ToResourceVec returns a resource vector: [milli cpu request, milli gpu request].

func (PodResource) ToVirtualPodResourceList

func (tpr PodResource) ToVirtualPodResourceList(method GpuDimExtMethod, nodeRes NodeResource) (virtualPodResourceList []VirtualPodResource)

func (PodResource) TotalMilliGpu

func (tpr PodResource) TotalMilliGpu() int64

type SchedulingMatchGroup

type SchedulingMatchGroup struct {
	NodeResourceVec []float64
	PodResourceVec  []float64
	GpuId           string
}

type SimulateResult

type SimulateResult struct {
	UnscheduledPods []UnscheduledPod
	NodeStatus      []NodeStatus
}

type SkylinePodList

type SkylinePodList []PodResource

type TargetPod

type TargetPod struct {
	TargetPodResource PodResource
	Percentage        float64 // range: 0.0 - 1.0 (100%)
}

type TargetPodList

type TargetPodList []TargetPod

func (TargetPodList) Len

func (p TargetPodList) Len() int

func (TargetPodList) Less

func (p TargetPodList) Less(i, j int) bool

func (TargetPodList) Swap

func (p TargetPodList) Swap(i, j int)

type UnscheduledPod

type UnscheduledPod struct {
	Pod    *v1.Pod
	Reason string
}

type VirtualNodeResource

type VirtualNodeResource struct {
	ResourceVec []float64
	GpuId       string
}

type VirtualPodResource

type VirtualPodResource struct {
	ResourceVec []float64
	GpuId       string
}

Directories

Path Synopsis
open-gpu-share

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL