podhandler

package
v0.1.2 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 8, 2024 License: Apache-2.0 Imports: 46 Imported by: 0

Documentation

Index

Constants

View Source
const (
	RootUID   = 0
	RootGID   = 0
	NobodyUID = 65534
	NobodyGID = 65534
)
View Source
const (
	CustomSlurmFlags = "slurm.hpk.io/flags"
)
View Source
const GenerateEnvTemplate = `` /* 263-byte string literal not displayed */

GenerateEnvTemplate is used to generate environment variables. This is needed for variables that consume information from the downward API (like .status.podIP)

View Source
const HostScriptTemplate = `#!/bin/bash
#SBATCH --job-name={{.Pod.Name}}
#SBATCH --output={{.VirtualEnv.StdoutPath}}
#SBATCH --error={{.VirtualEnv.StderrPath}}
{{- range $index, $flag := .CustomFlags}}
#SBATCH {{$flag}}
{{end}}
#SBATCH --signal=B:TERM@60 # tells the controller
                            # to send SIGTERM to the job 60 secs
                            # before its time ends to give it a
                            # chance for better cleanup.

{{- if .ResourceRequest.CPU}}
#SBATCH --ntasks-per-node={{.ResourceRequest.CPU}}
{{end}}

{{- if .ResourceRequest.Memory}}
#SBATCH --mem={{.ResourceRequest.Memory}} 
{{end}} 

#### BEGIN SECTION: VirtualEnvironment Builder ####
# Description
# 	Builds a script for running a Virtual Environment
# 	that resembles the semantics of a Pause Environment.
cat > {{.VirtualEnv.ConstructorFilePath}} << 'PAUSE_EOF'
` + PauseScriptTemplate + `
PAUSE_EOF
#### END SECTION: VirtualEnvironment Builder ####


#### BEGIN SECTION: Host Environment ####
# Description
# 	Stuff to run outside the virtual environment

# exit when any command fails
#set -um pipeline
set -u

echo "[Host] Starting the Constructor for the Virtual Environment ..."
chmod +x  {{.VirtualEnv.ConstructorFilePath}}

export workdir=/tmp/{{.Pod.Namespace}}_{{.Pod.Name}}
echo "[Host] Creating workdir: ${workdir} "
mkdir -p ${workdir}
trap 'echo [HOST] Deleting workdir ${workdir}; rm -rf ${workdir}' EXIT

# --network-args "portmap=8080:80/tcp"
# --container is needed to start a separate /dev/sh
exec {{$.HostEnv.ApptainerBin}} exec --containall --net --fakeroot --scratch /scratch --workdir ${workdir} \
{{- if .HostEnv.EnableCgroupV2}}
--apply-cgroups {{.VirtualEnv.CgroupFilePath}} 		\
{{- end}}
--env PARENT=${PPID}								\
--bind $HOME,/tmp										\
--hostname {{.Pod.Name}}							\
{{$.PauseImageFilePath}} sh -ci {{.VirtualEnv.ConstructorFilePath}} ||
echo "[HOST] **SYSTEMERROR** apptainer exited with code $?" | tee {{.VirtualEnv.SysErrorFilePath}}

#### END SECTION: Host Environment ####
`
View Source
const PauseScriptTemplate = `` /* 5384-byte string literal not displayed */
PauseScriptTemplate provides the template for building pods.

Remarks:

--userns is need to maintain the user's permissions.
--pid is not needed in order for different containers in the same pod to share the same pid space

Variables

View Source
var NotFoundBackoff = wait.Backoff{
	Steps:    10,
	Duration: 2 * time.Second,
	Factor:   5.0,
	Jitter:   0.1,
}

NotFoundBackoff is the recommended backoff for a resource that is required, but is not created yet. For instance, when mounting configmap volumes to pods. TODO: in future version, the backoff can be self-modified depending on the load of the controller.

Functions

func CreatePod

func CreatePod(ctx context.Context, pod *corev1.Pod, watcher filenotify.FileWatcher)

func DeletePod

func DeletePod(podKey client.ObjectKey, watcher filenotify.FileWatcher) bool

DeletePod takes a Pod Reference and deletes the Pod from the provider. DeletePod may be called multiple times for the same pod.

Notice that by using the reference, we operate on the local copy instead of the remote. This serves two purposes: 1) We can extract updated information from .spec (Kubernetes only fetches .Status) 2) We can have "fresh" information that is not yet propagated to Kubernetes

func DetermineEffectiveRunAsUser

func DetermineEffectiveRunAsUser(sc *corev1.SecurityContext) (uid int64, gid int64)

func DetermineEffectiveSecurityContext

func DetermineEffectiveSecurityContext(pod *corev1.Pod, container *corev1.Container) *corev1.SecurityContext

DetermineEffectiveSecurityContext returns a synthesized SecurityContext for reading effective configurations from the provided pod's and container's security context. Container's fields take precedence in cases where both are set

func EscapeSingleQuote

func EscapeSingleQuote(str ...interface{}) string

func FromServices

func FromServices(ctx context.Context, namespace string) []corev1.EnvVar

FromServices builds environment variables that a container is started with, which tell the container where to find the services it may need, which are provided as an argument.

func HumanReadableCode

func HumanReadableCode(code int) string

HumanReadableCode translated the exit into a human-readable form. Source: https://komodor.com/learn/exit-codes-in-containers-and-kubernetes-the-complete-guide/

func LoadPodFromFile

func LoadPodFromFile(filePath string) (*corev1.Pod, error)

LoadPodFromFile will read, decode, and return a Pod from a file.

func LoadPodFromKey

func LoadPodFromKey(podRef client.ObjectKey) (*corev1.Pod, error)

LoadPodFromKey waits LoadPodFromFile with filePath discovery.

func ParseTemplate

func ParseTemplate(text string) (*template.Template, error)

ParseTemplate returns a custom 'text/template' enhanced with functions for processing HPK templates.

func SavePodToFile

func SavePodToFile(_ context.Context, pod *corev1.Pod) error

func SyncContainerStatuses

func SyncContainerStatuses(pod *corev1.Pod)

func UpdateStatusFromRuntime

func UpdateStatusFromRuntime(pod *corev1.Pod)

UpdateStatusFromRuntime performs a deep investigation of the running conditions of the pod to resolv its current status.

func ValidateScript

func ValidateScript(filepath string) error

ValidateScript runs the bash -n <filename.sh> to validate the generated script.

Types

type Classifier

type Classifier struct {
	// contains filtered or unexported fields
}

Classifier splits jobs into Pending, Running, Successful, and Failed. To relief the garbage collector, we use a embeddable structure that we reset at every reconciliation cycle.

func (*Classifier) Classify

func (in *Classifier) Classify(name string, status *corev1.ContainerStatus)

Classify the object based on the standard Frisbee lifecycle.

func (*Classifier) GetFailedJobs

func (in *Classifier) GetFailedJobs(jobNames ...string) []*corev1.ContainerStatus

func (*Classifier) GetPendingJobs

func (in *Classifier) GetPendingJobs(jobNames ...string) []*corev1.ContainerStatus

func (*Classifier) GetRunningJobs

func (in *Classifier) GetRunningJobs(jobNames ...string) []*corev1.ContainerStatus

func (*Classifier) GetSuccessfulJobs

func (in *Classifier) GetSuccessfulJobs(jobNames ...string) []*corev1.ContainerStatus

func (*Classifier) ListAll

func (in *Classifier) ListAll() string

func (*Classifier) ListFailedJobs

func (in *Classifier) ListFailedJobs() []string

func (*Classifier) ListPendingJobs

func (in *Classifier) ListPendingJobs() []string

func (*Classifier) ListRunningJobs

func (in *Classifier) ListRunningJobs() []string

func (*Classifier) ListSuccessfulJobs

func (in *Classifier) ListSuccessfulJobs() []string

func (*Classifier) NumAll

func (in *Classifier) NumAll() string

func (*Classifier) NumFailedJobs

func (in *Classifier) NumFailedJobs() int

func (*Classifier) NumPendingJobs

func (in *Classifier) NumPendingJobs() int

func (*Classifier) NumRunningJobs

func (in *Classifier) NumRunningJobs() int

func (*Classifier) NumSuccessfulJobs

func (in *Classifier) NumSuccessfulJobs() int

func (*Classifier) Reset

func (in *Classifier) Reset()

type Container

type Container struct {
	// needed for apptainer start.
	InstanceName string // instance://podName_containerName

	// The UID to run the entrypoint of the container process.
	// May also be set in PodSecurityContext.  If set in both SecurityContext and
	// PodSecurityContext, the value specified in SecurityContext takes precedence.
	RunAsUser int64

	// The GID to run the entrypoint of the container process.
	// May also be set in PodSecurityContext.  If set in both SecurityContext and
	// PodSecurityContext, the value specified in SecurityContext takes precedence.
	RunAsGroup int64

	ImageFilePath string // format: REGISTRY://image:tag

	EnvFilePath string

	Binds []string

	Command []string

	Args []string // space separated args

	ExecutionMode string // exec or run

	// LogsPath instructs process to write stdout and stderr into the specified path.
	LogsPath string

	// JobIDPath points to the file where the process id of the container is stored.
	// This is used to know when the container has started.
	JobIDPath string

	// ExitCodePath is the path where the embedded Container command will write its exit code
	ExitCodePath string
}

The Container creates new within the Pod and resemble the "Container" semantics.

type GenerateEnvFields

type GenerateEnvFields = struct {
	Variables []corev1.EnvVar
}

GenerateEnvFields provide the inputs to GenerateEnvTemplate.

type JobFields

type JobFields struct {
	Pod types.NamespacedName

	// PauseImageFilePath contains the name of the image for the pause container.
	PauseImageFilePath string

	// VirtualEnv is the equivalent of a Pod.
	VirtualEnv compute.VirtualEnvironment

	HostEnv compute.HostEnvironment

	// InitContainers is a list of init container requests to be executed.
	InitContainers []Container

	// Containers is a list of container requests to be executed.
	Containers []Container

	// ResourceRequest are reserved resources for the job.
	ResourceRequest resources.ResourceList

	// CustomFlags are flags given by the user via 'slurm.hpk.io/flags' annotations
	CustomFlags []string
}

JobFields provide the inputs to HostScriptTemplate.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL