Documentation ¶
Overview ¶
Package testcluster wraps the Kubernetes library for common test operations. It also provides a TestCluster abstraction for interacting with clusters.
Index ¶
- Constants
- Variables
- func GetIPFromService(service *v13.Service) string
- func SetContainerResources(pod *v13.Pod, containerName string, requests ContainerResourcesRequest) (*v13.Pod, error)
- func SetNodePlacementPolicyCompact(nodepool *cspb.NodePool, tpuTopology string) error
- type AcceleratorType
- type CPUArchitecture
- type ContainerResourcesRequest
- type KubernetesClient
- type KubernetesReq
- type Namespace
- func (n *Namespace) Cleanup(ctx context.Context) error
- func (n *Namespace) GetPersistentVolume(name, size string) *v13.PersistentVolumeClaim
- func (n *Namespace) GetService(name string, spec v13.ServiceSpec) *v13.Service
- func (n *Namespace) NewAlpinePod(name, image string, cmd []string) *v13.Pod
- func (n *Namespace) NewPod(name string) *v13.Pod
- func (n *Namespace) ProbeResources(ctx context.Context, requests ContainerResourcesRequest) error
- func (n *Namespace) Reset(ctx context.Context) error
- func (n *Namespace) WaitForResources(ctx context.Context, requests ContainerResourcesRequest) error
- type NodePool
- type NodePoolType
- type RuntimeType
- type TestCluster
- func (t *TestCluster) ConfigureDaemonSetForRuntimeTestNodepool(ctx context.Context, ds *appsv1.DaemonSet) error
- func (t *TestCluster) ConfigurePodForClientNodepool(ctx context.Context, pod *v13.Pod) (*v13.Pod, error)
- func (t *TestCluster) ConfigurePodForRuntimeTestNodepool(ctx context.Context, pod *v13.Pod) (*v13.Pod, error)
- func (t *TestCluster) ConfigurePodForTertiaryNodepool(ctx context.Context, pod *v13.Pod) (*v13.Pod, error)
- func (t *TestCluster) ContainerDurationSecondsByName(ctx context.Context, pod *v13.Pod, containerName string) (time.Duration, error)
- func (t *TestCluster) CreateDaemonset(ctx context.Context, ds *appsv1.DaemonSet) (*appsv1.DaemonSet, error)
- func (t *TestCluster) CreatePersistentVolume(ctx context.Context, volume *v13.PersistentVolumeClaim) (*v13.PersistentVolumeClaim, error)
- func (t *TestCluster) CreatePod(ctx context.Context, pod *v13.Pod) (*v13.Pod, error)
- func (t *TestCluster) CreateService(ctx context.Context, service *v13.Service) (*v13.Service, error)
- func (t *TestCluster) DeleteDaemonset(ctx context.Context, ds *appsv1.DaemonSet) error
- func (t *TestCluster) DeletePersistentVolume(ctx context.Context, volume *v13.PersistentVolumeClaim) error
- func (t *TestCluster) DeletePod(ctx context.Context, pod *v13.Pod) error
- func (t *TestCluster) DeleteService(ctx context.Context, service *v13.Service) error
- func (t *TestCluster) GetDaemonset(ctx context.Context, ds *appsv1.DaemonSet) (*appsv1.DaemonSet, error)
- func (t *TestCluster) GetGVisorRuntimeLabelMap() map[string]string
- func (t *TestCluster) GetGVisorRuntimeToleration() v13.Toleration
- func (t *TestCluster) GetLogReader(ctx context.Context, pod *v13.Pod, opts v13.PodLogOptions) (io.ReadCloser, error)
- func (t *TestCluster) GetName() string
- func (t *TestCluster) GetPod(ctx context.Context, pod *v13.Pod) (*v13.Pod, error)
- func (t *TestCluster) GetPodsInDaemonSet(ctx context.Context, ds *appsv1.DaemonSet) ([]v13.Pod, error)
- func (t *TestCluster) GetService(ctx context.Context, service *v13.Service) (*v13.Service, error)
- func (t *TestCluster) HasGVisorTestRuntime(ctx context.Context) (bool, error)
- func (t *TestCluster) ListPods(ctx context.Context, namespace string) (*v13.PodList, error)
- func (t *TestCluster) ListServices(ctx context.Context, namespace string) (*v13.ServiceList, error)
- func (t *TestCluster) Namespace(namespace string) *Namespace
- func (t *TestCluster) OverrideTestNodepoolRuntime(testRuntime RuntimeType)
- func (t *TestCluster) ReadPodLogs(ctx context.Context, pod *v13.Pod) (string, error)
- func (t *TestCluster) RuntimeTestNodepoolArchitecture(ctx context.Context) (CPUArchitecture, error)
- func (t *TestCluster) StreamDaemonSetLogs(ctx context.Context, ds *appsv1.DaemonSet, opts v13.PodLogOptions, ...) error
- func (t *TestCluster) WaitForDaemonset(ctx context.Context, ds *appsv1.DaemonSet) error
- func (t *TestCluster) WaitForPodCompleted(ctx context.Context, pod *v13.Pod) error
- func (t *TestCluster) WaitForPodRunning(ctx context.Context, pod *v13.Pod) error
- func (t *TestCluster) WaitForPodTerminated(ctx context.Context, pod *v13.Pod) (v13.PodPhase, error)
- func (t *TestCluster) WaitForServiceReady(ctx context.Context, service *v13.Service) error
- type UnstableClient
Constants ¶
const ( RuntimeTypeGVisor = RuntimeType("gvisor") RuntimeTypeUnsandboxed = RuntimeType("runc") RuntimeTypeGVisorTPU = RuntimeType("gvisor-tpu") RuntimeTypeUnsandboxedTPU = RuntimeType("runc-tpu") )
List of known runtime types.
const ( // NamespaceDefault is the default namespace for Kubernetes. NamespaceDefault = v13.NamespaceDefault // NamespaceSanity is used for objects that are part of sanity checks. NamespaceSanity = "sanity" // NamespaceBenchmark is used for objects that are part of benchmarks. NamespaceBenchmark = "benchmark" )
Common namespace names.
const ( // NodePoolTypeKey is the key to mark a nodepool as a "test-runtime-nodepool" or a "client-nodepool" NodePoolTypeKey = "nodepool-type" // NodepoolRuntimeKey is the key to mark the runtime used by a nodepool. NodepoolRuntimeKey = "runtime" // NodepoolNumAcceleratorsKey is the key to mark the number of accelerators in a nodepool. NodepoolNumAcceleratorsKey = "num-accelerators" // NodepoolTPUTopologyKey is the key to mark the TPU topology used by a nodepool. NodepoolTPUTopologyKey = "tpu-topology" // NodepoolInstanceTypeKey is the key to mark the instance type used by a nodepool. NodepoolInstanceTypeKey = "node.kubernetes.io/instance-type" // Name of the TPU accelerator key used in Pod.Spec.NodeSelector. NodepoolTPUAcceleratorSelectorKey = "cloud.google.com/gke-tpu-accelerator" // Name of the TPU topology key used in Pod.Spec.NodeSelector. NodepoolTPUTopologySelectorKey = "cloud.google.com/gke-tpu-topology" )
Nodepool keys.
const ( // CPUArchitectureX86 is the x86 CPU architecture. CPUArchitectureX86 = CPUArchitecture("amd64") // CPUArchitectureARM is the ARM CPU architecture. CPUArchitectureARM = CPUArchitecture("arm64") )
const ( AcceleratorTypeTeslaT4GPU = AcceleratorType("nvidia-tesla-t4") AcceleratorTypeA100GPU = AcceleratorType("nvidia-tesla-a100") AcceleratorTypeL4GPU = AcceleratorType("nvidia-tesla-l4") AcceleratorTypeV4PodTPU = AcceleratorType("tpu-v4-pod") )
List of supported GPUs.
Variables ¶
var ( // DefaultMachineType is the default machine type to use for specs and create-default. DefaultMachineType = "n2-standard-4" // DefaultNvidiaMachineType is the default machine type for nvidia. DefaultNvidiaMachineType = "n1-standard-4" // TPUAcceleratorMachineTypeMap maps TPU types to the machine type to use. TPUAcceleratorMachineTypeMap = map[AcceleratorType]string{ AcceleratorTypeV4PodTPU: "ct4p-hightpu-4t", } )
Default machine types.
Functions ¶
func GetIPFromService ¶
GetIPFromService returns the IP on a service.
func SetContainerResources ¶
func SetContainerResources(pod *v13.Pod, containerName string, requests ContainerResourcesRequest) (*v13.Pod, error)
SetContainerResources sets container resources. Sets both the resource limits and requests as container runtimes honor them differently. `containerName` is optional if the pod has exactly one container.
func SetNodePlacementPolicyCompact ¶
SetNodePlacementPolicyCompact sets the node placement policy to COMPACT and with the given TPU topology. This is done by reflection because the NodePool_PlacementPolicy proto message isn't available in the latest exported version of the genproto API. This is only used for TPU nodepools so not critical for most benchmarks.
Types ¶
type CPUArchitecture ¶
type CPUArchitecture string
CPUArchitecture is the CPU architecture of a node. It is stored under the archKey label in node labels.
type ContainerResourcesRequest ¶
type ContainerResourcesRequest struct { CPUResources string // CPUResources to request. Note: Will be overridden by flag above. MemoryResources string // MemoryResources to request. Note: Will be overridden by flag above. GPU bool }
ContainerResourcesRequest holds arguments to set requested resource on a container.
func (ContainerResourcesRequest) String ¶
func (crr ContainerResourcesRequest) String() string
String returns a string representation of the `ContainerResourcesRequest`.
type KubernetesClient ¶
type KubernetesClient interface { // Do performs a request with a Kubernetes client. Do(context.Context, KubernetesReq) error }
KubernetesClient is an interface that wraps Kubernetes requests.
func NewRetryableClient ¶
func NewRetryableClient(ctx context.Context, client UnstableClient) (KubernetesClient, error)
NewRetryableClient creates a new retryable Kubernetes client. It takes an `UnstableClient` as input, which is used to create new instances of Kubernetes clients as needed, and to determine whether a request should be retried. This can be safely used concurrently, in which case additional Kubernetes clients will be created as needed, and reused when possible (but never garbage-collected, unless they start emitting retriable errors). It will immediately create an initial Kubernetes client from the `UnstableClient` as the initial client to use.
type KubernetesReq ¶
type KubernetesReq func(context.Context, kubernetes.Interface) error
KubernetesReq is a function that performs a request with a Kubernetes client.
type Namespace ¶
type Namespace struct { Namespace string // contains filtered or unexported fields }
Namespace represents a Kubernetes object namespace. It can contain pods or services or other Kubernetes objects. It is useful in tests that create multiple objects and want to ensure their mutual destruction, as well as for re-running tests and ensuring that the objects from past runs are cleaned up properly on the next run.
func (*Namespace) GetPersistentVolume ¶
func (n *Namespace) GetPersistentVolume(name, size string) *v13.PersistentVolumeClaim
GetPersistentVolume gets a persistent volume spec for benchmarks.
func (*Namespace) GetService ¶
GetService gets a service spec for benchmarks.
func (*Namespace) NewAlpinePod ¶
NewAlpinePod returns an alpine pod template.
func (*Namespace) ProbeResources ¶
func (n *Namespace) ProbeResources(ctx context.Context, requests ContainerResourcesRequest) error
ProbeResources verifies that a pod requesting the given resources can be scheduled.
func (*Namespace) Reset ¶
Reset deletes this namespace if it exists, and unconditionally creates a new namespace of this name. This should be used in the beginning of tests, such that the namespace is empty and ready to be used.
func (*Namespace) WaitForResources ¶
func (n *Namespace) WaitForResources(ctx context.Context, requests ContainerResourcesRequest) error
WaitForResources checks that a pod requesting the given resources can be scheduled. If they cannot, it will loop until the given context expire or the resources become available.
type NodePool ¶
type NodePool struct {
// contains filtered or unexported fields
}
NodePool is a set of nodes in a TestCluster. These nodes share a set of relevant labels and are used to segment the set of nodes in a Kubernetes cluster. In the context of Kubernetes tests and benchmarks, these pools are used to separate where workloads of each type schedule and run. NodePools are expected to be uniform (i.e. same amount of resources and reasonably similar hardware) so that simple pod scheduling can determine where to consume resources.
type NodePoolType ¶
type NodePoolType string
NodePoolType is the type of a NodePool.
const ( // TestRuntimeNodepoolName is the value that marks a "test-runtime-nodepool", or a nodepool where // w/ the runtime under test. TestRuntimeNodepoolName NodePoolType = "test-runtime-nodepool" // ClientNodepoolName is the value that marks a client nodepool. Usually this is a plain GKE // nodepool ClientNodepoolName NodePoolType = "client-nodepool" // TertiaryNodepoolName is the value that marks the tertiary nodepool. // This could either be a plain GKE nodepool or could be gVisor-enabled, // as configured during test range creation. TertiaryNodepoolName NodePoolType = "tertiary-nodepool" )
Nodepool names.
type RuntimeType ¶
type RuntimeType string
RuntimeType is a supported runtime for the test nodepool.
func (RuntimeType) ApplyNodepool ¶
func (t RuntimeType) ApplyNodepool(nodepool *cspb.NodePool)
ApplyNodepool modifies the nodepool to configure it to use the runtime.
func (RuntimeType) ApplyPodSpec ¶
func (t RuntimeType) ApplyPodSpec(podSpec *v13.PodSpec)
ApplyPodSpec modifies a PodSpec to use this runtime.
type TestCluster ¶
type TestCluster struct {
// contains filtered or unexported fields
}
TestCluster wraps clusters with their individual ClientSets so that helper methods can be called.
func NewTestClusterFromClient ¶
func NewTestClusterFromClient(clusterName string, client kubernetes.Interface) *TestCluster
NewTestClusterFromClient returns a new TestCluster client with a given client.
func NewTestClusterFromKubernetesClient ¶
func NewTestClusterFromKubernetesClient(clusterName string, client KubernetesClient) *TestCluster
NewTestClusterFromKubernetesClient returns a new TestCluster client with a given KubernetesClient.
func NewTestClusterFromProto ¶
NewTestClusterFromProto returns a new TestCluster client from a proto.
func (*TestCluster) ConfigureDaemonSetForRuntimeTestNodepool ¶
func (t *TestCluster) ConfigureDaemonSetForRuntimeTestNodepool(ctx context.Context, ds *appsv1.DaemonSet) error
ConfigureDaemonSetForRuntimeTestNodepool configures the DaemonSet to run on the test runtime.
func (*TestCluster) ConfigurePodForClientNodepool ¶
func (t *TestCluster) ConfigurePodForClientNodepool(ctx context.Context, pod *v13.Pod) (*v13.Pod, error)
ConfigurePodForClientNodepool configures the pod to run on the client nodepool.
func (*TestCluster) ConfigurePodForRuntimeTestNodepool ¶
func (t *TestCluster) ConfigurePodForRuntimeTestNodepool(ctx context.Context, pod *v13.Pod) (*v13.Pod, error)
ConfigurePodForRuntimeTestNodepool configures the pod to run on the test runtime.
func (*TestCluster) ConfigurePodForTertiaryNodepool ¶
func (t *TestCluster) ConfigurePodForTertiaryNodepool(ctx context.Context, pod *v13.Pod) (*v13.Pod, error)
ConfigurePodForTertiaryNodepool configures the pod to run on the tertiary nodepool.
func (*TestCluster) ContainerDurationSecondsByName ¶
func (t *TestCluster) ContainerDurationSecondsByName(ctx context.Context, pod *v13.Pod, containerName string) (time.Duration, error)
ContainerDurationSecondsByName gets the runtime of a container reported by the kubelet by name. The kubelet reports runtime at second granularity.
func (*TestCluster) CreateDaemonset ¶
func (t *TestCluster) CreateDaemonset(ctx context.Context, ds *appsv1.DaemonSet) (*appsv1.DaemonSet, error)
CreateDaemonset creates a daemonset with default options.
func (*TestCluster) CreatePersistentVolume ¶
func (t *TestCluster) CreatePersistentVolume(ctx context.Context, volume *v13.PersistentVolumeClaim) (*v13.PersistentVolumeClaim, error)
CreatePersistentVolume creates a persistent volume.
func (*TestCluster) CreateService ¶
func (t *TestCluster) CreateService(ctx context.Context, service *v13.Service) (*v13.Service, error)
CreateService is a helper method to create a service in a cluster.
func (*TestCluster) DeleteDaemonset ¶
DeleteDaemonset deletes a daemonset from this cluster.
func (*TestCluster) DeletePersistentVolume ¶
func (t *TestCluster) DeletePersistentVolume(ctx context.Context, volume *v13.PersistentVolumeClaim) error
DeletePersistentVolume deletes a persistent volume.
func (*TestCluster) DeleteService ¶
DeleteService is a helper to delete a given service.
func (*TestCluster) GetDaemonset ¶
func (t *TestCluster) GetDaemonset(ctx context.Context, ds *appsv1.DaemonSet) (*appsv1.DaemonSet, error)
GetDaemonset gets a daemonset.
func (*TestCluster) GetGVisorRuntimeLabelMap ¶
func (t *TestCluster) GetGVisorRuntimeLabelMap() map[string]string
GetGVisorRuntimeLabelMap returns the gVisor runtime key-value pair used on gVisor-runtime-enabled nodes.
func (*TestCluster) GetGVisorRuntimeToleration ¶
func (t *TestCluster) GetGVisorRuntimeToleration() v13.Toleration
GetGVisorRuntimeToleration returns a pod scheduling toleration that allows the pod to schedule on gVisor-runtime-enabled nodes.
func (*TestCluster) GetLogReader ¶
func (t *TestCluster) GetLogReader(ctx context.Context, pod *v13.Pod, opts v13.PodLogOptions) (io.ReadCloser, error)
GetLogReader gets an io.ReadCloser from which logs can be read. It is the caller's responsibility to close it.
func (*TestCluster) GetName ¶
func (t *TestCluster) GetName() string
GetName returns this cluster's name.
func (*TestCluster) GetPodsInDaemonSet ¶
func (t *TestCluster) GetPodsInDaemonSet(ctx context.Context, ds *appsv1.DaemonSet) ([]v13.Pod, error)
GetPodsInDaemonSet returns the list of pods of the given DaemonSet.
func (*TestCluster) GetService ¶
GetService is a helper method to get a service in a cluster.
func (*TestCluster) HasGVisorTestRuntime ¶
func (t *TestCluster) HasGVisorTestRuntime(ctx context.Context) (bool, error)
HasGVisorTestRuntime returns whether the test nodes in this cluster use the gVisor runtime.
func (*TestCluster) ListServices ¶
func (t *TestCluster) ListServices(ctx context.Context, namespace string) (*v13.ServiceList, error)
ListServices is a helper method to List services in a cluster.
func (*TestCluster) Namespace ¶
func (t *TestCluster) Namespace(namespace string) *Namespace
Namespace returns a new namespace in this cluster.
func (*TestCluster) OverrideTestNodepoolRuntime ¶
func (t *TestCluster) OverrideTestNodepoolRuntime(testRuntime RuntimeType)
OverrideTestNodepoolRuntime overrides the runtime used for pods running on the test nodepool. If unset, the test nodepool's default runtime is used.
func (*TestCluster) ReadPodLogs ¶
ReadPodLogs reads logs from a pod.
func (*TestCluster) RuntimeTestNodepoolArchitecture ¶
func (t *TestCluster) RuntimeTestNodepoolArchitecture(ctx context.Context) (CPUArchitecture, error)
RuntimeTestNodepoolArchitecture returns the CPU architecture of the test nodepool.
func (*TestCluster) StreamDaemonSetLogs ¶
func (t *TestCluster) StreamDaemonSetLogs(ctx context.Context, ds *appsv1.DaemonSet, opts v13.PodLogOptions, fn func(context.Context, v13.Pod, io.ReadCloser) error) error
StreamDaemonSetLogs streams the contents of a container from the given DaemonSet. The callback function is called once per node that the DaemonSet schedules on, with the reader corresponding to that node. The callback function is expected to close the reader. StreamDaemonSetLogs returns once the DaemonSet is ready everywhere that it is meant to be scheduled.
func (*TestCluster) WaitForDaemonset ¶
WaitForDaemonset waits until a daemonset has propagated containers across the affected nodes.
func (*TestCluster) WaitForPodCompleted ¶
WaitForPodCompleted is a helper method to wait for a pod to be completed.
func (*TestCluster) WaitForPodRunning ¶
WaitForPodRunning is a helper method to wait for a pod to be running.
func (*TestCluster) WaitForPodTerminated ¶
WaitForPodTerminated is a helper method to wait for a pod to exit, whether it succeeded or failed.
func (*TestCluster) WaitForServiceReady ¶
WaitForServiceReady waits until a service is ready.
type UnstableClient ¶
type UnstableClient interface { // Client creates a new instance of a Kubernetes client. // This function may also block (in a context-respecting manner) // in order to implement backoff between Kubernetes client creation // attempts. Client(context.Context) (kubernetes.Interface, error) // RetryError returns whether the given error should be retried. // numAttempt is the number of attempts made so far. // This function may also block (in a context-respecting manner) // in order to implement backoff between request retries. RetryError(ctx context.Context, err error, numAttempt int) bool }
UnstableClient is a Kubernetes client factory that can create new instances of Kubernetes clients and determine whether a request should be retried.