util

package
v6.0.0+incompatible Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 31, 2024 License: Apache-2.0, Apache-2.0 Imports: 20 Imported by: 0

Documentation

Overview

Package util is using for HuaWei infer common Ascend pin affinity schedule.

Package util is using for HuaWei infer common Ascend pin affinity schedule.

Package util is using for deployment util function.

Package util is using for the total variable.

Package util is using for the total variable.

Package util is using for the total variable.

Package util is using for the total variable.

Index

Constants

View Source
const (
	// JobTypeWhole whole card  for NPU  Job.
	JobTypeWhole = iota
	// JobTypeDyCut Dynamic force cutting NPU vJob.
	JobTypeDyCut
	// JobTypeStCut Static force segmentation NPU Job.
	JobTypeStCut
	// JobTypeUnknown unknown NPU Job type.
	JobTypeUnknown
)

For Job type

View Source
const (
	TaskStatusInit = iota
	TaskStatusAllocate
	TaskStatusWrBack
	TaskStatusRunning
	TaskStatusFailed
)

for task status

View Source
const (
	// LogErrorLev for log error.
	LogErrorLev = 1
	// LogWarningLev for log warning.
	LogWarningLev = 2
	// LogInfoLev for log information.
	LogInfoLev = 3
	// LogDebugLev for log debug.
	LogDebugLev = 4
	// ErrorInt return -1 when get error for int
	ErrorInt = -1
	// NPUIndex2 the 2 index.
	NPUIndex2 = 2
	// NPUIndex3 the 3 index.
	NPUIndex3 = 3
	// NPUIndex8 the 8 index.
	NPUIndex8 = 8
	// NPUIndex16 the 16 index.
	NPUIndex16 = 16
	// NPUIndex7 the 7 index.
	NPUIndex7 = 7
	// NPUIndex4 the 4 index.
	NPUIndex4 = 4
	// NPUIndex5 the 5 index.
	NPUIndex5 = 5
	// NPUIndex6 the 6 index.
	NPUIndex6 = 6
	// NPUIndex1 the 1 index.
	NPUIndex1 = 1
	// NPUIndex0 the 0 index.
	NPUIndex0 = 0
	// NPUIndex9 the 9 index.
	NPUIndex9 = 9
	// NPUIndex10 the 10 index.
	NPUIndex10 = 10
	// NPUIndex11 the 11 index.
	NPUIndex11 = 11
	// NPUIndex12 the 12 index.
	NPUIndex12 = 12
	// NPUIndex13 the 13 index.
	NPUIndex13 = 13
	// NPUIndex14 the 14 index.
	NPUIndex14 = 14
	// NPUIndex15 the 15 index.
	NPUIndex15 = 15
	// CoreNum32 32 core 910
	CoreNum32 = 32
	// CoreNum3 3 core 910
	CoreNum3 = 3
	// CoreNum5 5 core 910
	CoreNum5 = 5
	// CoreNum10 10 core 910
	CoreNum10 = 10
	// CoreNum6 6 core 910
	CoreNum6 = 6
	// CoreNum12 12 core 910
	CoreNum12 = 12
	// CoreNum30 30 core 910
	CoreNum30 = 30
	// CoreNum20 20 core 910
	CoreNum20 = 20
	// CoreNum25 25 core 910
	CoreNum25 = 25
	// CoreNum24 24 core 910
	CoreNum24 = 24
	// CpuNum14 14 cpu 910
	CpuNum14 = 14
	// CpuNum6 6 cpu 910
	CpuNum6 = 6
	// MapInitNum for map init length.
	MapInitNum = 3
	// Base10 for const 10.
	Base10 = 10
	// BitSize64 for const 64
	BitSize64 = 64
	// MaxSliceNum max slice number
	MaxSliceNum = 128
	// NPUHexKilo for const 1000,volcano frame used.
	NPUHexKilo = 1000
	// HwPreName pre name
	HwPreName = "huawei.com/"
	// NPUCardPreName for NPU card pre-Name.
	NPUCardPreName = "huawei.com/Ascend"
	// HuaweiArchArm for arm.
	HuaweiArchArm = "huawei-arm"
	// HuaweiArchX86 for x86.
	HuaweiArchX86 = "huawei-x86"

	// Accelerator for custom tag.
	Accelerator = "accelerator"

	// CMSelectorKey selector key in scheduler configmap.
	CMSelectorKey = "selector"
	// CMInitParamKey init param key in scheduler configmap
	CMInitParamKey = "init-params"
	// AcceleratorType for selector.
	AcceleratorType = "accelerator-type"
	// CardAcceleratorType for card mode.
	CardAcceleratorType = "card"
	// Module910bx16AcceleratorType for module mode.
	Module910bx16AcceleratorType = "module-910b-16"
	// Module910bx8AcceleratorType for module mode.
	Module910bx8AcceleratorType = "module-910b-8"
	// Card910bx2AcceleratorType for module mode.
	Card910bx2AcceleratorType = "card-910b-2"
	// Card910bx2InferAcceleratorType for infer mode.
	Card910bx2InferAcceleratorType = "card-910b-infer"
	// ModuleAcceleratorType for module mode.
	ModuleAcceleratorType = "module"
	// ChipAcceleratorType for chip mode.
	ChipAcceleratorType = "chip"
	// HalfAcceleratorType for half mode
	HalfAcceleratorType = "half"
	// ServerType server type value takes Ascend310P-10-dual/Ascend910-32...
	ServerType = "servertype"
	// ServerTypeDual dual card
	ServerTypeDual = "dual"

	// NPU910CardName for judge 910 npu resource.
	NPU910CardName = "huawei.com/Ascend910"
	// NPU910CardNamePre for getting card number.
	NPU910CardNamePre = "Ascend910-"
	// NPU310PCardName for judge 310P npu resource.
	NPU310PCardName = "huawei.com/Ascend310P"
	// NPU310CardName for judge 310 npu resource.
	NPU310CardName = "huawei.com/Ascend310"
	// NPU310CardNamePre for getting card number.
	NPU310CardNamePre = "Ascend310-"
	// NPU310PCardNamePre for getting card number.
	NPU310PCardNamePre = "Ascend310P-"
	// AscendNPUPodRealUse for NPU pod real use cards.
	AscendNPUPodRealUse = "huawei.com/AscendReal"
	// AscendNPUCore for NPU core num, like 56; Records the chip name that the scheduler assigns to the pod.
	AscendNPUCore = "huawei.com/npu-core"
	// Ascend910bName for judge Ascend910b npu resource.
	Ascend910bName = "huawei.com/Ascend910b"

	// SegmentEnable for VNPU segment enable flag. Default is "false".
	SegmentEnable = "presetVirtualDevice"

	// UseClusterInfoManager for use cluster info manager , default is true
	UseClusterInfoManager = "useClusterInfoManager"

	// SubHealthyStrategyLabel sub-healthy handle strategy. default is grace exit
	SubHealthyStrategyLabel = "subHealthyStrategy"
	// SubHealthyIgnore ignore sub-healthy
	SubHealthyIgnore = "ignore"
	// SubHealthyGraceExit don't use sub-healthy node and grace exit
	SubHealthyGraceExit = "graceExit"
	// SubHealthyForceExit don't use sub-healthy node and force exit
	SubHealthyForceExit = "forceExit"
	// DevInfoNameSpace device-plugin install Namespace
	DevInfoNameSpace = "kube-system"
	// MindXDlNameSpace mindx dl Namespace
	MindXDlNameSpace = "mindx-dl"
	// DevInfoPreName like "mindx-dl-deviceinfo-ubuntu"
	DevInfoPreName = "mindx-dl-deviceinfo-"
	// NodeDCmInfoNamePrefix is for noded to report node healthy state
	NodeDCmInfoNamePrefix = "mindx-dl-nodeinfo-"
	// SwitchCmInfoNamePrefix is the prefix for switch fault configmap
	SwitchCmInfoNamePrefix = "mindx-dl-switchinfo-"
	// NodedNodeHealtyStatuskey  is the key of node healthy status from configmap data of noded
	NodedNodeHealtyStatuskey = "nodedNodeHealtyStatus"
	// NodeSubHealthy means there is some fault on the node which is reported by nodeD, but will not immediately
	// make node unhealthy, this status will prevent new task schduled on this node and reschedule will not consider
	// this node
	NodeSubHealthy = "SubHealthy"
	// NodeUnHealthyByNodeD is the node unhealthy status reported by nodeD configmap,
	// in this case pod will be rescheduling
	NodeUnHealthyByNodeD = "UnHealthy"
	// NodeHealthyByNodeD is the node healthy status reported by nodeD configmap
	NodeHealthyByNodeD = "Healthy"
	// NodeDEnableKey indicates if the label has been set
	NodeDEnableKey = "nodeDEnable"
	// NodeDEnableOnValue the value of NodeDEnableKey, which means nodeD has been enabled
	NodeDEnableOnValue = "on"
	// NodeDEnableOffValue the value of NodeDEnableKey, which means nodeD has not been enabled
	NodeDEnableOffValue = "off"

	// PreSeparateFaultCode  PreSeparate fault Code
	PreSeparateFaultCode = "PreSeparate"

	// SwitchNodeHealtyStatuskey same with noded there will be healthy subhealthy unhealthy status report by switch info
	SwitchNodeHealtyStatuskey = "NodeStatus"
	// NpuSubHealthyKey annotation of npu sub-healthy status. true is sub-healthy
	NpuSubHealthyKey = "subHealthy"

	// DevInfoCMKey mindx-dl-deviceinfo configmap key
	DevInfoCMKey = "DeviceInfoCfg"
	// NodeInfoCMKey node info configmap key
	NodeInfoCMKey = "NodeInfo"
	// SwitchInfoCmKey is the key of switch info configmap
	SwitchInfoCmKey = "SwitchInfoCfg"
	// RePropertyCacheName rescheduling keyword in init env.cache
	RePropertyCacheName = "re-scheduling"
	// CmCheckCode Check code key
	CmCheckCode = "checkCode"
	// CmName Name of ReSchedulerConfigmap
	CmName = "vcjob-fault-npu-cm"
	// JobRecovery keywords for retain
	JobRecovery = "job-recovery"

	// DeleteOperator informer delete operator
	DeleteOperator = "delete"
	// AddOperator informer add operator
	AddOperator = "add"
	// UpdateOperator informer update operator
	UpdateOperator = "update"

	// CmConsumer who uses these configmap
	CmConsumer = "mx-consumer-volcano"
	// CmConsumerValue the value only for true
	CmConsumerValue = "true"
	// ClusterDeviceInfo the name of cluster device info configmap
	ClusterDeviceInfo = "cluster-info-device-"
	// ClusterNodeInfo the name of cluster node info configmap
	ClusterNodeInfo = "cluster-info-node-"
	// ClusterSwitchInfo the name of cluster switch info configmap
	ClusterSwitchInfo = "cluster-info-switch-"
	// ClusterD the name of ClusterD deployment
	ClusterD = "clusterd"

	// Pod910DeviceKey pod annotation key, for generate 910 hccl rank table
	Pod910DeviceKey = "ascend.kubectl.kubernetes.io/ascend-910-configuration"
	// PodPredicateTime set pod PodPredicateTime for using by device-plugin.
	PodPredicateTime = "predicate-time"
	// NodeNotMeetTopologyWarning node not satisfy the schedulable topology warning.
	NodeNotMeetTopologyWarning = "the npus on this node don't satisfy the schedulable topology"
	// ArgumentError argument nil error.
	ArgumentError = "invalid argument"
	// JobKindKey for define the Job kind:ascend-310P, ascend-910
	JobKindKey = "ring-controller.atlas"
	// JobKind910Value in ring-controller.atlas.
	JobKind910Value = "ascend-910"
	// JobKind310Value in ring-controller.atlas.
	JobKind310Value = "ascend-310"
	// JobKind310PValue 310p ring controller name
	JobKind310PValue = "ascend-310P"
	// JobKind910BValue 910B ring controller name
	JobKind910BValue = "ascend-910b"
	// DistributedJobKey flag for distributed job
	DistributedJobKey = "distributed-job"
	// DistributedJobValue indicate distributed job
	DistributedJobValue = "true"
	// StandaloneJobValue indicate standalone job
	StandaloneJobValue = "false"

	// SuperPodAnnoKey annotation key of super pod
	SuperPodAnnoKey = "sp-block"

	// DistributedInferKey distributed infer
	DistributedInferKey = "distributed"
	// DistributedInferLabel true or false
	DistributedInferLabel = "true"
)
View Source
const (
	// AffScore0 value 0 for scored.
	AffScore0 = iota
	// AffScore1 value 1 for scored.
	AffScore1
	// AffScore2 value 2 for scored.
	AffScore2
	// AffScore3 value 3 for scored.
	AffScore3
	// AffScore4 value 4 for scored.
	AffScore4
	// AffScore5 value 4 for scored.
	AffScore5
	// AffScore6 value 4 for scored.
	AffScore6
	// AffScore7 value 4 for scored.
	AffScore7
	// AffScore8 value 4 for scored.
	AffScore8
)
View Source
const (
	// JobNotEnqueue job enqueue failed
	JobNotEnqueue = -1
	// JobEnqueue job enqueue success
	JobEnqueue = 1
	// JobEnqueueSkip skip the judgement of ascend-volcano-plugin in the job enqueue phase
	JobEnqueueSkip = 0
	// PodGroupInqueue the pg Inqueue status
	PodGroupInqueue = "Inqueue"
	// PodGroupPending the pg Pending status
	PodGroupPending = "Pending"
	// PodGroupRunning the pg Running status
	PodGroupRunning = "Running"
	// PodGroupUnknown the pg Unknown status
	PodGroupUnknown = "Unknown"
	// PodGroupUnschedulableType the pg Unschedulable Condition
	PodGroupUnschedulableType = "Unschedulable"

	// PodDeleteTimes the tag of single pod has been deleted
	PodDeleteTimes = "pod-delete-times"
	// EnableFunc enable the function
	EnableFunc = "on"
	// SinglePodTag the tag of single pod rescheduling
	SinglePodTag = "pod-rescheduling"
	// ProcessRecoverEnable the tag of process rescheduling
	ProcessRecoverEnable = "process-recover-enable"
	// BaseDeviceInfoKey base device info key
	BaseDeviceInfoKey = "baseDeviceInfos"
)
View Source
const (
	// TagOfPodPending the limitation on pod pending times
	TagOfPodPending = "ready"
	// DefaultPodDeleteTimes default time of pod deleted
	DefaultPodDeleteTimes = "0"
)

Variables

This section is empty.

Functions

func ChangeIntArrToStr

func ChangeIntArrToStr(top []int, npuCardPreName string) string

ChangeIntArrToStr Covert []int to string. Like [0,1] -> "Ascend910-0,Ascend910-1".

func ChangeNodesToNodeMaps

func ChangeNodesToNodeMaps(nodes []*api.NodeInfo) map[string]*api.NodeInfo

ChangeNodesToNodeMaps change nodes slice into node maps

func ChangeTopToIntArray

func ChangeTopToIntArray(topStr string, npuCardPreName string) []int

ChangeTopToIntArray Change npu card ids from string to int array.

func CheckConfigMapIsDeviceInfo

func CheckConfigMapIsDeviceInfo(cm *v1.ConfigMap) bool

CheckConfigMapIsDeviceInfo check configmap is device info

func CheckConfigMapIsNodeInfo

func CheckConfigMapIsNodeInfo(cm *v1.ConfigMap) bool

CheckConfigMapIsNodeInfo check whether the configmap is kube-system/node-info-

func CheckStrInSlice

func CheckStrInSlice(str string, slice []string) bool

CheckStrInSlice return whether str in string slice

func ClusterDDeploymentIsExist

func ClusterDDeploymentIsExist(kubeClient kubernetes.Interface) bool

ClusterDDeploymentIsExist ClusterD deployment is exist

func ConvertErrSliceToError

func ConvertErrSliceToError(reErrors []error) error

ConvertErrSliceToError convert []error to one error.

func CreateOrUpdateConfigMap

func CreateOrUpdateConfigMap(k8s kubernetes.Interface, cm *v1.ConfigMap, cmName, nameSpace string) error

CreateOrUpdateConfigMap Create or update configMap.

func DeepCopyCmData

func DeepCopyCmData(cmData map[string]string) map[string]string

DeepCopyCmData return a replica of the cmDate

func GetConfigFromSchedulerConfigMap

func GetConfigFromSchedulerConfigMap(configKey string, configurations []config.Configuration) (*config.Configuration,
	error)

GetConfigFromSchedulerConfigMap get config info from yaml

func GetConfigMap

func GetConfigMap(client kubernetes.Interface, namespace, cmName string) (*v1.ConfigMap, error)

GetConfigMap Get config map from k8s.

func GetConfigMapWithRetry

func GetConfigMapWithRetry(client kubernetes.Interface, namespace, cmName string) (*v1.ConfigMap, error)

GetConfigMapWithRetry Get config map from k8s.

func GetDeployment

func GetDeployment(kubeClient kubernetes.Interface, namespace, depName string) (*v1.Deployment, error)

GetDeployment Get deployment from k8s.

func GetNpuNameFromJobRequire

func GetNpuNameFromJobRequire(npuName string) string

GetNpuNameFromJobRequire get npuName,if job require name is npu-core return huawei.com/Ascend310P

func GetReserveNodes

func GetReserveNodes(configurations []config.Configuration) (int, error)

GetReserveNodes get reserve nodes

func GetSizeOfSuperPod

func GetSizeOfSuperPod(configurations []config.Configuration) (int, error)

GetSizeOfSuperPod get size of super pod

func GetTaskInfoByNameFromSSN

func GetTaskInfoByNameFromSSN(ssn *framework.Session, taskName string) (*api.TaskInfo, error)

GetTaskInfoByNameFromSSN get corresponding api.TaskInfo object by given taskName

func GetTorNodeWithOneMinuteDelay

func GetTorNodeWithOneMinuteDelay(client kubernetes.Interface, namespace, cmName string) (*v1.ConfigMap, error)

GetTorNodeWithOneMinuteDelay get tor node configMap with one-minute delay

func GetVTaskUseTemplate

func GetVTaskUseTemplate(taskInf *api.TaskInfo) (string, error)

GetVTaskUseTemplate the format is : 0-vir04-3c_ndvpp,0-vir0

func InformerConfigmapFilter

func InformerConfigmapFilter(obj interface{}) bool

InformerConfigmapFilter is used to filter out cm need to be listened for ascend plugin

func IsConfigMapChanged

func IsConfigMapChanged(k8s kubernetes.Interface, cm *v1.ConfigMap, cmName, nameSpace string) bool

IsConfigMapChanged judge the cm wither is same. true is no change.

func IsMapHasNPUResource

func IsMapHasNPUResource(resMap map[v1.ResourceName]float64, npuName string) bool

IsMapHasNPUResource Determines whether a target string exists in the map.

func IsNodeReady

func IsNodeReady(node *v1.Node) bool

IsNodeReady returns the node ready status

func IsSelectorMeetJob

func IsSelectorMeetJob(jobSelectors, conf map[string]string) bool

IsSelectorMeetJob check the selectors

func IsSliceContain

func IsSliceContain(keyword interface{}, targetSlice interface{}) bool

IsSliceContain judges whether keyword in tasgetSlice

func MakeDataHash

func MakeDataHash(data interface{}) string

MakeDataHash check code for configmap

func Max

func Max(x, y int) int

Max return the bigger one

func Min

func Min(x, y int) int

Min return the smaller one

func ReferenceNameOfJob

func ReferenceNameOfJob(job *api.JobInfo) string

ReferenceNameOfJob get name of job

func ReferenceNameOfTask

func ReferenceNameOfTask(task *api.TaskInfo) string

ReferenceNameOfTask get pod OwnerReferences name

func RemoveCommonElement

func RemoveCommonElement(s1, s2 []int) []int

RemoveCommonElement remove common element from s1

func RemoveSliceDuplicateElement

func RemoveSliceDuplicateElement(languages []string) []string

RemoveSliceDuplicateElement remove duplicate element in slice

func SafePrint

func SafePrint(args ...interface{}) string

SafePrint safe print error

func UpdateConfigmapIncrementally

func UpdateConfigmapIncrementally(kubeClient kubernetes.Interface, ns, name string,
	newData map[string]string) (map[string]string, error)

UpdateConfigmapIncrementally update configmap Map data but keep the key value pair that new data does not have

func UuidOfJob

func UuidOfJob(job *api.JobInfo) types.UID

UuidOfJob get uid of job

Types

type ComJob

type ComJob struct {
	Name               api.JobID
	ReferenceName      string
	NameSpace          string
	SubHealthyStrategy string
	Annotation         map[string]string
	Selector           map[string]string
	Label              map[string]string
}

ComJob all vcJob has.

type Device

type Device struct {
	DeviceID      string `json:"device_id"` // device id
	DeviceIP      string `json:"device_ip"` // device ip
	SuperDeviceID string `json:"super_device_id,omitempty"`
}

Device id for Instcance

type Instance

type Instance struct {
	PodName    string   `json:"pod_name"`  // pod Name
	ServerID   string   `json:"server_id"` // serverdId
	SuperPodId int32    `json:"super_pod_id"`
	Devices    []Device `json:"devices"` // dev
}

Instance is for annotation

type NPUJob

type NPUJob struct {
	// the mapKey is taskID, not Name.
	Tasks             map[api.TaskID]NPUTask
	SelectServers     string
	NPUTaskNum        int
	SchedulingTaskNum int
	ReqNPUName        string
	ReqNPUNum         int
	SpBlockNPUNum     int
	*VJob
}

NPUJob only npu vcJob have.

func (*NPUJob) GetNPUTaskNumInJob

func (nJob *NPUJob) GetNPUTaskNumInJob() int

GetNPUTaskNumInJob get the NPU task number in one job. for some task has no NPU.

func (*NPUJob) GetSchedulingTaskNum

func (nJob *NPUJob) GetSchedulingTaskNum() int

GetSchedulingTaskNum get the num of scheduling task

func (*NPUJob) GetVTaskNumInVJob

func (nJob *NPUJob) GetVTaskNumInVJob() int

GetVTaskNumInVJob get the NPU task number in one job. for some task has no NPU.

func (*NPUJob) IsNPUJob

func (nJob *NPUJob) IsNPUJob() bool

IsNPUJob Determine whether is the NPU job. Dynamic segmentation: huawei.com/npu-core. static segmentation: huawei.com/Ascend910-Y. no segmentation: huawei.com/Ascend910.

func (*NPUJob) IsVJob

func (nJob *NPUJob) IsVJob() bool

IsVJob Determine whether is the NPU virtual job. Dynamic segmentation: huawei.com/npu-core. static segmentation: huawei.com/Ascend910-Y. no segmentation: huawei.com/Ascend910.

func (*NPUJob) SetJobStatusByInf

func (nJob *NPUJob) SetJobStatusByInf(vcJob *api.JobInfo)

SetJobStatusByInf set vJob status by podGroup.

func (*NPUJob) SetJobType

func (nJob *NPUJob) SetJobType()

SetJobType set virtual job type must be used after isVJob. for all chips: 910, 310P

type NPUTask

type NPUTask struct {
	Name       string
	NameSpace  string
	ReqNPUName string
	ReqNPUNum  int
	// Selector the same as job.
	Selector   map[string]string
	Annotation map[string]string
	Label      map[string]string
	NodeName   string
	PodStatus  v1.PodPhase
	Index      int
	*VTask
}

NPUTask for npu task need.

func (*NPUTask) ComputeTaskType

func (asTask *NPUTask) ComputeTaskType() int

ComputeTaskType compute the task's type.

func (*NPUTask) DeleteRealPodByTask

func (asTask *NPUTask) DeleteRealPodByTask(ssn *framework.Session, waitTime int64) error

DeleteRealPodByTask generally used by force deletion

func (*NPUTask) EvictJobByTask

func (asTask *NPUTask) EvictJobByTask(ssn *framework.Session, reason string, taskName string) error

EvictJobByTask generally used by grace deletion

func (*NPUTask) ForceDeletePodByTaskInf

func (asTask *NPUTask) ForceDeletePodByTaskInf(ssn *framework.Session, reason string, nodeName string) error

ForceDeletePodByTaskInf Force delete pod by taskInf.

func (*NPUTask) GetRealPodByTask

func (asTask *NPUTask) GetRealPodByTask(ssn *framework.Session) (*v1.Pod, error)

GetRealPodByTask get pod specified by task name and namespace from kubernetes

func (*NPUTask) InitVTask

func (asTask *NPUTask) InitVTask(taskInf *api.TaskInfo) error

InitVTask init vNPU task.

func (*NPUTask) IsNPUTask

func (asTask *NPUTask) IsNPUTask() bool

IsNPUTask Determine whether is the NPU task. Dynamic segmentation: huawei.com/npu-core. static segmentation: huawei.com/Ascend910-Y. no segmentation: huawei.com/Ascend910.

func (*NPUTask) IsTaskInItsNode

func (asTask *NPUTask) IsTaskInItsNode(ssn *framework.Session, nodeName string) bool

IsTaskInItsNode check if task is on the node

func (*NPUTask) IsVNPUTask

func (asTask *NPUTask) IsVNPUTask() bool

IsVNPUTask Determine whether is the NPU virtual task. Dynamic segmentation: huawei.com/npu-core. static segmentation: huawei.com/Ascend910-Y. no segmentation: huawei.com/Ascend910.

func (*NPUTask) UpdatePodPendingReason

func (asTask *NPUTask) UpdatePodPendingReason(taskInfo *api.TaskInfo, reasonTmp string) error

UpdatePodPendingReason update pod pending reason.

type NpuBaseInfo

type NpuBaseInfo struct {
	IP            string
	SuperDeviceID uint32
}

NpuBaseInfo npu base info

type SchedulerJobAttr

type SchedulerJobAttr struct {
	ComJob
	*NPUJob
}

SchedulerJobAttr vcJob's attribute.

func (SchedulerJobAttr) IsJobSinglePodDelete

func (sJob SchedulerJobAttr) IsJobSinglePodDelete() bool

IsJobSinglePodDelete valid job.

type TaskAllocated

type TaskAllocated struct {
	// like ubuntu
	NodeName string
	// element like 1
	CardName []int
	// element like Ascend310P-2c-100-1
	PhysicsName []string
}

TaskAllocated Task allocated struct.

type VJob

type VJob struct {
	// type: JobTypeWhole, JobTypeDycut, JobTypeStcut.
	Type   int
	Status string
}

VJob for dynamic NPU Job.

type VResource

type VResource struct {
	Aicore int
	Aicpu  int
	DVPP   string
}

VResource resource dimensions

func (*VResource) Add

func (vResource *VResource) Add(resource VResource)

Add add resource

func (VResource) BeGreater

func (vResource VResource) BeGreater(resource VResource) bool

BeGreater judge resource greater or equal to

func (*VResource) Sub

func (vResource *VResource) Sub(resource VResource)

Sub sub resource

type VTask

type VTask struct {
	// TASK_STATUS_INIT...
	Status int
	// type: JobTypeWhole, JobTypeDycut, JobTypeStcut.
	Type      int
	Allocated TaskAllocated
}

VTask virtual NPU task struct.

type VTemplate

type VTemplate struct {
	// ChipKind Ascend910/Ascend310P
	ChipKind   string
	AICore     int
	AICPU      int
	DVPPEnable string
}

VTemplate for vNode resource

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL