devicemanager

package

v0.1.23 Latest Latest Go to latest Published: Sep 12, 2022 License: MIT Imports: 46 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/Interstellarss/faas-share

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
Variables
func FilterActivePods(pods []*v1.Pod) []*v1.Pod
func GPUAffinityFilter(nodeResources NodeResources, sharepod *faasv1.SharePod)
func GPUAntiAffinityFilter(nodeResources NodeResources, sharepod *faasv1.SharePod)
func GPUExclusionFilter(nodeResources NodeResources, sharepod *faasv1.SharePod)
func IsPodActive(p *v1.Pod) bool
func IsPodHot(p *v1.Pod) bool
func PodKey(pod *v1.Pod) string
func RandStr(length int) string
func ScheduleAlgorithmBestFit(isGPUPod bool, gpu_request float64, gpu_mem int64, sharepod *faasv1.SharePod, ...) (schedNodeName string, schedGPUID string)
func StartConfigManager(stopCh <-chan struct{}, kc kubernetes.Interface) error
func UpdateNodeGPUInfo(nodeName string, uuid2mem *map[string]string)
type ActivePodsWithRanks
- func (s ActivePodsWithRanks) Len() int
- func (s ActivePodsWithRanks) Less(i, j int) bool
- func (s ActivePodsWithRanks) Swap(i, j int)
type ClientStatus
type Controller
- func NewController(kubeclient kubernetes.Interface, kubeshareclient clientset.Interface, ...) *Controller
- func (c *Controller) Run(threadiness int, stopCh <-chan struct{}) error
type GPUInfo
type GPUResource
- func (this *GPUResource) DeepCopy() *GPUResource
type NameList
type NodeInfo
type NodeResource
- func (this *NodeResource) DeepCopy() *NodeResource
type NodeResources
- func (this *NodeResources) DeepCopy() *NodeResources
- func (this *NodeResources) PrintMe()
type NodeStatus
type PodRequest
- func FindInQueue(key string, pl *list.List) (*PodRequest, bool)
type RealPodControl
type SharepodProbes

Constants ¶

View Source

const (
	// SuccessSynced is used as part of the Event 'reason' when a SharePod is synced
	SuccessSynced = "Synced"
	// ErrResourceExists is used as part of the Event 'reason' when a SharePod fails
	// to sync due to a Deployment of the same name already existing.
	ErrResourceExists = "ErrResourceExists"

	ErrValueError = "ErrValueError"

	// MessageResourceExists is the message used for Events when a resource
	// fails to sync due to a Deployment already existing
	MessageResourceExists = "Resource %q already exists and is not managed by SharePod"
	// MessageResourceSynced is the message used for an Event fired when a SharePod
	// is synced successfully
	MessageResourceSynced = "SharePod synced successfully"

	KubeShareLibraryPath = "/kubeshare/library"
	SchedulerIpPath      = KubeShareLibraryPath + "/schedulerIP.txt"
	PodManagerPortStart  = 50050

	KubeShareScheduleAffinity     = "kubeshare/sched_affinity"
	KubeShareScheduleAntiAffinity = "kubeshare/sched_anti-affinity"
	KubeShareScheduleExclusion    = "kubeshare/sched_exclusion"

	FaasShareWarm = "faas-share/warmpool"
)

View Source

const (
	LabelMinReplicas = "com.openfaas.scale.min"
)

Variables ¶

View Source

var (
	KeyFunc = cache.DeletionHandlingMetaNamespaceKeyFunc
)

View Source

var (
	ResourceQuantity1 = resource.MustParse("1")
)

Functions ¶

func FilterActivePods ¶ added in v0.1.21

func FilterActivePods(pods []*v1.Pod) []*v1.Pod

more percise filter mechanism for pre-warm pod

func GPUAffinityFilter ¶ added in v0.1.21

func GPUAffinityFilter(nodeResources NodeResources, sharepod *faasv1.SharePod)

func GPUAntiAffinityFilter ¶ added in v0.1.21

func GPUAntiAffinityFilter(nodeResources NodeResources, sharepod *faasv1.SharePod)

func GPUExclusionFilter ¶ added in v0.1.21

func GPUExclusionFilter(nodeResources NodeResources, sharepod *faasv1.SharePod)

func IsPodActive ¶ added in v0.1.21

func IsPodActive(p *v1.Pod) bool

func IsPodHot ¶ added in v0.1.21

func IsPodHot(p *v1.Pod) bool

func PodKey ¶ added in v0.1.21

func PodKey(pod *v1.Pod) string

PodKey returns a key unique to the given pod within a cluster. It's used so we consistently use the same key scheme in this module. It does exactly what cache.MetaNamespaceKeyFunc would have done except there's not possibility for error since we know the exact type.

func RandStr ¶ added in v0.1.22

func RandStr(length int) string

func ScheduleAlgorithmBestFit ¶ added in v0.1.21

func ScheduleAlgorithmBestFit(isGPUPod bool, gpu_request float64, gpu_mem int64, sharepod *faasv1.SharePod, nodeResources NodeResources) (schedNodeName string, schedGPUID string)

func StartConfigManager ¶

func StartConfigManager(stopCh <-chan struct{}, kc kubernetes.Interface) error

func UpdateNodeGPUInfo ¶

func UpdateNodeGPUInfo(nodeName string, uuid2mem *map[string]string)

Types ¶

type ActivePodsWithRanks ¶ added in v0.1.21

type ActivePodsWithRanks struct {
	Pods []*v1.Pod

	// Rank is a ranking of pods.  This ranking is used during sorting when
	// comparing two pods that are both scheduled, in the same phase, and
	// having the same ready status.
	Rank []int
	// Now is a reference timestamp for doing logarithmic timestamp comparisons.
	// If zero, comparison happens without scaling.
	Now metav1.Time
}

ActivePodsWithRanks is a sortable list of pods and a list of corresponding ranks which will be considered during sorting. The two lists must have equal length. After sorting, the pods will be ordered as follows, applying each rule in turn until one matches:

If only one of the pods is assigned to a node, the pod that is not assigned comes before the pod that is.
If the pods' phases differ, a pending pod comes before a pod whose phase is unknown, and a pod whose phase is unknown comes before a running pod.
If exactly one of the pods is ready, the pod that is not ready comes before the ready pod.
If controller.kubernetes.io/pod-deletion-cost annotation is set, then the pod with the lower value will come first.
If the pods' ranks differ, the pod with greater rank comes before the pod with lower rank.
If both pods are ready but have not been ready for the same amount of time, the pod that has been ready for a shorter amount of time comes before the pod that has been ready for longer.
If one pod has a container that has restarted more than any container in the other pod, the pod with the container with more restarts comes before the other pod.
If the pods' creation times differ, the pod that was created more recently comes before the older pod.

In 6 and 8, times are compared in a logarithmic scale. This allows a level of randomness among equivalent Pods when sorting. If two pods have the same logarithmic rank, they are sorted by UUID to provide a pseudorandom order.

If none of these rules matches, the second pod comes before the first pod.

The intention of this ordering is to put pods that should be preferred for deletion first in the list.

func (ActivePodsWithRanks) Len ¶ added in v0.1.21

func (s ActivePodsWithRanks) Len() int

func (ActivePodsWithRanks) Less ¶ added in v0.1.21

func (s ActivePodsWithRanks) Less(i, j int) bool

Less compares two pods with corresponding ranks and returns true if the first one should be preferred for deletion.

func (ActivePodsWithRanks) Swap ¶ added in v0.1.21

func (s ActivePodsWithRanks) Swap(i, j int)

type ClientStatus ¶

type ClientStatus string

const (
	ClientReady    ClientStatus = "Ready"
	ClientNotReady ClientStatus = "NotReady"
)

type Controller ¶

type Controller struct {
	// contains filtered or unexported fields
}

func NewController ¶

func NewController(
	kubeclient kubernetes.Interface,
	kubeshareclient clientset.Interface,
	nodeInformer coreinformers.NodeInformer,
	podInformer coreinformers.PodInformer,

	kubeshareInformer informers.SharePodInformer) *Controller

NewController returns a new sample controller

func (*Controller) Run ¶

func (c *Controller) Run(threadiness int, stopCh <-chan struct{}) error

Try out without stopCh, rather with context

type GPUInfo ¶

type GPUInfo struct {
	UUID    string
	Usage   float64
	Mem     int64
	PodList *list.List
}

type GPUResource ¶ added in v0.1.21

type GPUResource struct {
	GPUFreeReq int64
	// GPUFreeMem in bytes
	GPUFreeMem int64

	GPUAffinityTags     []string
	GPUAntiAffinityTags []string
	// len(GPUExclusionTags) should be only one
	GPUExclusionTags []string
}

func (*GPUResource) DeepCopy ¶ added in v0.1.21

func (this *GPUResource) DeepCopy() *GPUResource

type NameList ¶ added in v0.1.22

type NameList struct {
	// contains filtered or unexported fields
}

TODO here to extend to new Struct for multil access for different sharepod

type NodeInfo ¶

type NodeInfo struct {
	// GPUID -> GPU
	GPUID2GPU map[string]*GPUInfo
	// UUID -> Port (string)
	UUID2Port map[string]string

	// port in use
	PodManagerPortBitmap *bitmap.RRBitmap
	PodIP                string
}

type NodeResource ¶ added in v0.1.21

type NodeResource struct {
	CpuTotal int64
	MemTotal int64
	GpuTotal int
	// GpuMemTotal in bytes
	GpuMemTotal int64
	CpuFree     int64
	MemFree     int64
	/* Available GPU calculate */
	// Total GPU count - Pods using nvidia.com/gpu
	GpuFreeCount int
	// GPUs available usage (1.0 - SharePod usage)
	// GPUID to integer index mapping
	GpuFree map[string]*GPUResource
}

func (*NodeResource) DeepCopy ¶ added in v0.1.21

func (this *NodeResource) DeepCopy() *NodeResource

type NodeResources ¶ added in v0.1.21

type NodeResources map[string]*NodeResource

NodeResources: Available resources in cluster to schedule Training Jobs

func (*NodeResources) DeepCopy ¶ added in v0.1.21

func (this *NodeResources) DeepCopy() *NodeResources

func (*NodeResources) PrintMe ¶ added in v0.1.21

func (this *NodeResources) PrintMe()

type NodeStatus ¶

type NodeStatus struct {
	Conn          net.Conn
	LastHeartbeat time.Time
	ClientStatus  ClientStatus
}

type PodRequest ¶

type PodRequest struct {
	Key            string
	Request        float64
	Limit          float64
	Memory         int64
	PodManagerPort int
}

func FindInQueue ¶

func FindInQueue(key string, pl *list.List) (*PodRequest, bool)

type RealPodControl ¶ added in v0.1.21

type RealPodControl struct {
	KubeClient clientset.Interface
	Recorder   record.EventRecorder
}

type SharepodProbes ¶ added in v0.1.23

type SharepodProbes struct {
	Liveness  *v1.Probe
	Readiness *v1.Probe
}

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL