cloud

package
v0.0.0-...-25cfa27 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 17, 2025 License: Apache-2.0 Imports: 16 Imported by: 0

Documentation

Index

Constants

View Source
const (
	LabelNodepoolManager             = keyPrefix + "nodepool-manager"
	LabelNodepoolManagerTPUPodinator = "tpu-provisioner"

	LabelParentKind      = keyPrefix + "tpu-provisioner-parent-kind"
	LabelParentName      = keyPrefix + "tpu-provisioner-parent-name"
	LabelParentNamespace = keyPrefix + "tpu-provisioner-parent-namespace"

	LabelJobSetName      = keyPrefix + "tpu-provisioner-jobset-name"
	LabelJobSetNamespace = keyPrefix + "tpu-provisioner-jobset-namespace"

	LabelProvisionerNodepoolID = "provisioner-nodepool-id"

	// AnnotationCopyLabels is a comma-separated list of labels to copy from the Pod to the node pool config (Nodes).
	AnnotationCopyLabels = "tpu-provisioner.cloud.google.com/copy-labels"

	EventNodePoolCreationStarted   = "NodePoolCreationStarted"
	EventNodePoolCreationSucceeded = "NodePoolCreationSucceeded"
	EventNodePoolCreationFailed    = "NodePoolCreationFailed"

	EventNodePoolDeletionStarted   = "NodePoolDeletionStarted"
	EventNodePoolDeletionSucceeded = "NodePoolDeletionSucceeded"
	EventNodePoolDeletionFailed    = "NodePoolDeletionFailed"

	EventNodePoolNotFound = "NodePoolNotFound"
)
View Source
const (
	// GKE labels
	GKETPUNodeSelector         = "cloud.google.com/gke-tpu-topology"
	GKEAcceleratorNodeSelector = "cloud.google.com/gke-tpu-accelerator"
	GKENodePoolNameLabel       = "cloud.google.com/gke-nodepool"

	// ICIResiliencyLabel is used for disabling ICI resiliency, by default if not specified TPU slice
	// is created in the ICI resilient mode. To disable the ICI resilient, workload needs
	// to use node selector or affinity cloud.google.com/gke-tpu-ici-resiliency=false.
	ICIResiliencyLabel = "cloud.google.com/gke-tpu-ici-resiliency"

	// LocationHintLabel is used for passing in a desired borg cell the node pool MIG should be
	// provisioned in.
	LocationHintLabel = "cloud.google.com/gke-location-hint"

	// Supported accelerator types
	V4PodSliceAccelerator  = "tpu-v4-podslice"
	V5ePodSliceAccelerator = "tpu-v5-lite-podslice"
	V5pPodSliceAccelerator = "tpu-v5p-slice"

	// Resource type labels
	GoogleTPUResource = "google.com/tpu"
)

Variables

View Source
var ErrDuplicateRequest = errors.New("duplicate request")
View Source
var ErrNodePoolStopping = errors.New("node pool stopping")

Functions

This section is empty.

Types

type GKE

type GKE struct {
	Service        *containerv1beta1.Service
	ClusterContext GKEContext

	Recorder record.EventRecorder
	// contains filtered or unexported fields
}

func (*GKE) DeleteNodePool

func (g *GKE) DeleteNodePool(name string, eventObj client.Object, why string) error

func (*GKE) DeleteNodePoolForNode

func (g *GKE) DeleteNodePoolForNode(node *corev1.Node, why string) error

func (*GKE) EnsureNodePoolForPod

func (g *GKE) EnsureNodePoolForPod(p *corev1.Pod, why string) error

func (*GKE) ListNodePools

func (g *GKE) ListNodePools() ([]NodePoolRef, error)

func (*GKE) NodePoolLabelKey

func (g *GKE) NodePoolLabelKey() string

type GKEContext

type GKEContext struct {
	ProjectID          string
	ClusterLocation    string
	Cluster            string
	NodeZone           string
	NodeServiceAccount string
	NodeSecondaryDisk  string
	NodeTags           []string
	// PodToNodeLabels is a list of key=value pairs that will be copied from the Pod
	// to the Node.
	PodToNodeLabels []string
	NodeSecureBoot  bool
	ForceOnDemand   bool
}

func (GKEContext) ClusterName

func (c GKEContext) ClusterName() string

func (GKEContext) NodePoolName

func (c GKEContext) NodePoolName(name string) string

func (GKEContext) OpName

func (c GKEContext) OpName(op string) string

type Mock

type Mock struct{}

Mock is useful for local development or debugging purposes to understand what the controller would do without it doing anything.

func (*Mock) DeleteNodePool

func (m *Mock) DeleteNodePool(string, client.Object, string) error

func (*Mock) DeleteNodePoolForNode

func (m *Mock) DeleteNodePoolForNode(*corev1.Node, string) error

func (*Mock) EnsureNodePoolForPod

func (m *Mock) EnsureNodePoolForPod(*corev1.Pod, string) error

func (*Mock) ListNodePools

func (m *Mock) ListNodePools() ([]NodePoolRef, error)

func (*Mock) NodePoolLabelKey

func (m *Mock) NodePoolLabelKey() string

TODO: Find a better mock node pool label key.

type NodePoolRef

type NodePoolRef struct {
	Name string

	CreationTime time.Time

	CreatedForJobSet types.NamespacedName

	Error   bool
	Message string
}

type Provider

type Provider interface {
	NodePoolLabelKey() string
	EnsureNodePoolForPod(*corev1.Pod, string) error
	DeleteNodePoolForNode(*corev1.Node, string) error
	DeleteNodePool(string, client.Object, string) error
	ListNodePools() ([]NodePoolRef, error)
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL