tasks

package
v0.38.0-rc6 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Nov 1, 2024 License: Apache-2.0 Imports: 31 Imported by: 2

Documentation

Index

Constants

View Source
const (

	// SingularityEntrypointWrapperScript is just the name of the singularity entrypoint wrapper.
	SingularityEntrypointWrapperScript = "singularity-entrypoint-wrapper.sh"

	// StartupHookScript contains the script to run on task startup filled in dynamically.
	StartupHookScript = "dynamic-tcd-startup-hook.sh"
)
View Source
const (
	// DefaultWorkDir is the default workdir.
	DefaultWorkDir = "/run/determined/workdir"

	RunDir = "/run/determined"

	PasswdPath = "/run/determined/etc/passwd"
	ShadowPath = "/run/determined/etc/shadow"
	GroupPath  = "/run/determined/etc/group"

	// DtrainSSHPortBase is starting range for Dtrain ports.
	DtrainSSHPortBase = 12350
	// InterTrainProcessCommPort1Base is starting range for intertraincomm1 ports.
	InterTrainProcessCommPort1Base = 12360
	// InterTrainProcessCommPort2Base is starting range for intertraincomm2 ports.
	InterTrainProcessCommPort2Base = 12365
	// C10DPortBase is starting range for c10D ports.
	C10DPortBase = 29400
	// DTrainSSHPort is the name of a port.
	DTrainSSHPort = "DTRAIN_SSH_PORT"
	// InterTrainProcessCommPort1 is the name of a port.
	InterTrainProcessCommPort1 = "INTER_TRAIN_PROCESS_COMM_PORT_1"
	// InterTrainProcessCommPort2 is the name of a port.
	InterTrainProcessCommPort2 = "INTER_TRAIN_PROCESS_COMM_PORT_2"
	// C10DPort is the name of a port.
	C10DPort = "C10D_PORT"
)

File location constants.

View Source
const (

	// ManifestName is the name used by DAI RM when creating HPC job manifests.
	ManifestName = "det"
)

Variables

This section is empty.

Functions

func ToDockerMounts

func ToDockerMounts(bindMounts []expconf.BindMount, workDir string) []mount.Mount

ToDockerMounts converts expconf bind mounts to container mounts.

func TrialSpecProxyPorts

func TrialSpecProxyPorts(
	taskSpec *TaskSpec,
	expConfig expconf.ExperimentConfigV0,
) expconf.ProxyPortsConfig

TrialSpecProxyPorts combines user-defined and system proxy configs. This static function is public because trial actor builds `TrialSpec` instances late.

func ValidatePbs

func ValidatePbs(pbsOptions []string) []error

ValidatePbs checks that the specified PBS options are allowed. If any are not messages are returned in an array of errors.

func ValidateSlurm

func ValidateSlurm(slurmOptions []string) []error

ValidateSlurm checks that the specified slurm options are allowed. If any are not messages are returned in an array of errors.

Types

type GCCkptSpec

type GCCkptSpec struct {
	Base TaskSpec

	ExperimentID int
	LegacyConfig expconf.LegacyConfig
	ToDelete     string
	// If len(CheckpointGlobs) == 0 then we won't delete any checkpoint files
	// and just refresh the state of the checkpoint.
	CheckpointGlobs    []string
	DeleteTensorboards bool
}

GCCkptSpec is a description of a task for running checkpoint GC.

func (GCCkptSpec) ToTaskSpec

func (g GCCkptSpec) ToTaskSpec() TaskSpec

ToTaskSpec generates a TaskSpec.

type GenericCommandSpec

type GenericCommandSpec struct {
	Base TaskSpec

	CommandID string

	Config model.CommandConfig

	// Deprecated: kept so we can still marshal to this.
	// Please use command.CreateGeneric.modelDef instead.
	UserFiles       archive.Archive
	AdditionalFiles archive.Archive

	Metadata genericCommandSpecMetadata

	Keys *ssh.PrivateAndPublicKeys

	WatchProxyIdleTimeout  bool
	WatchRunnerIdleTimeout bool

	TaskType model.TaskType
}

GenericCommandSpec is a description of a task for running a command.

func (*GenericCommandSpec) MakeEnvPorts

func (s *GenericCommandSpec) MakeEnvPorts()

MakeEnvPorts fills in `Environment.Ports` i.e. exposed ports for container config.

func (*GenericCommandSpec) ProxyPorts

ProxyPorts combines user-defined and system proxy configs.

func (GenericCommandSpec) ToTaskSpec

func (s GenericCommandSpec) ToTaskSpec() TaskSpec

ToTaskSpec generates a TaskSpec.

type GenericTaskSpec

type GenericTaskSpec struct {
	Base           TaskSpec
	ProjectID      int
	WorkspaceID    int
	RegisteredTime time.Time
	JobID          model.JobID

	GenericTaskConfig model.GenericTaskConfig
}

GenericTaskSpec is the generic task spec.

func (GenericTaskSpec) ResourcePool

func (s GenericTaskSpec) ResourcePool() string

ResourcePool - returns resource pool.

func (GenericTaskSpec) SetJobPriority

func (s GenericTaskSpec) SetJobPriority(priority int) error

SetJobPriority todo.

func (GenericTaskSpec) SetResourcePool

func (s GenericTaskSpec) SetResourcePool(resourcePool string) error

SetResourcePool todo.

func (GenericTaskSpec) SetWeight

func (s GenericTaskSpec) SetWeight(weight float64) error

SetWeight todo.

func (GenericTaskSpec) ToTaskSpec

func (s GenericTaskSpec) ToTaskSpec() TaskSpec

ToTaskSpec converts the generic task spec to the common task spec.

func (GenericTaskSpec) ToV1Job

func (s GenericTaskSpec) ToV1Job() (*jobv1.Job, error)

ToV1Job todo.

type TaskSpec

type TaskSpec struct {
	// Fields that are only for task logics.
	Description string
	// LoggingFields are fields to include in each record of structured logging.
	LoggingFields map[string]string
	// LogRetentionDays is the number of days to retain logs for.
	LogRetentionDays *int16

	// Fields that are set on the cluster level.
	ClusterID   string
	HarnessPath string
	MasterCert  []byte
	SSHConfig   config.SSHConfig

	SegmentEnabled bool
	SegmentAPIKey  string

	// Fields that are set on the per-request basis.
	// TaskContainerDefaults should be removed from TaskSpec once we move to using the same
	// schema for the cluster-level defaults and the request-level configuration.
	TaskContainerDefaults model.TaskContainerDefaultsConfig
	Environment           expconf.EnvironmentConfig
	ResourcesConfig       expconf.ResourcesConfig
	WorkDir               string
	Owner                 *model.User
	AgentUserGroup        *model.AgentUserGroup
	ExtraArchives         []cproto.RunArchive
	ExtraEnvVars          map[string]string
	ExtraPodLabels        map[string]string
	Entrypoint            []string
	Mounts                []mount.Mount
	// UseHostMode is whether host mode networking would be desirable for this task.
	// This is used by Docker only.
	UseHostMode bool
	ShmSize     int64

	// The parent task of an allocation.
	TaskID string

	// Fields that are set on per-resources basis.
	AllocationID           string
	AllocationSessionToken string
	ResourcesID            string
	ContainerID            string
	Devices                []device.Device

	UserSessionToken string
	TaskType         model.TaskType
	SlurmConfig      expconf.SlurmConfig
	PbsConfig        expconf.PbsConfig

	ExtraProxyPorts expconf.ProxyPortsConfig

	Workspace string
	Project   string
	Labels    []string
	// Ports required by trial or commands and their respective base port values.
	UniqueExposedPortRequests map[string]int

	// For testing only.
	DontShipLogs bool
}

TaskSpec defines the spec of a task.

func (*TaskSpec) Archives

func (t *TaskSpec) Archives() ([]cproto.RunArchive, []cproto.RunArchive)

Archives returns all the archives.

func (*TaskSpec) Clone

func (t *TaskSpec) Clone() (*TaskSpec, error)

Clone deep copies a taskSpec.

func (TaskSpec) EnvVars

func (t TaskSpec) EnvVars() map[string]string

EnvVars returns all the environment variables.

func (*TaskSpec) LogShipperWrappedEntrypoint

func (t *TaskSpec) LogShipperWrappedEntrypoint() []string

LogShipperWrappedEntrypoint returns the configured Entrypoint wrapped with ship_logs.py.

func (*TaskSpec) ResolveWorkDir

func (t *TaskSpec) ResolveWorkDir()

ResolveWorkDir resolves the work dir.

func (*TaskSpec) ToDispatcherManifest

func (t *TaskSpec) ToDispatcherManifest(
	syslog *logrus.Entry,
	allocationID string,
	tlsEnabled bool,
	masterHost string,
	masterPort int,
	certificateName string,
	numSlots int,
	slotType device.Type,
	slurmPartition string,
	tresSupported bool,
	gresSupported bool,
	containerRunType string,
	isPbsLauncher bool,
	labelMode *string,
	disabledNodes []string,
) (*launcher.Manifest, string, string, error)

ToDispatcherManifest creates the manifest that will be ultimately sent to the launcher. Returns:

Manifest, launchingUserName, PayloadName, err

Note: Cannot pass "req *sproto.AllocateRequest" as an argument, as it requires import of "github.com/determined-ai/determined/master/internal/sproto", which results in an "import cycle not allowed" error.

func (*TaskSpec) ToDockerSpec

func (t *TaskSpec) ToDockerSpec() cproto.Spec

ToDockerSpec converts a task spec to a docker container spec.

func (*TaskSpec) WarnUnsupportedOptions

func (t *TaskSpec) WarnUnsupportedOptions(
	userConfiguredPriority bool,
	containerRunType string,
) string

WarnUnsupportedOptions gives warnings for user configurations that are not supported by HPC launcher.

type TaskSpecifier

type TaskSpecifier interface {
	ToTaskSpec() TaskSpec
}

TaskSpecifier creates a TaskSpec. ToTaskSpec must only be called once per specifier.

type TrialSpec

type TrialSpec struct {
	Base TaskSpec

	ExperimentID     int
	TrialID          int
	TrialRunID       int
	ExperimentConfig expconf.ExperimentConfig
	HParams          map[string]interface{}
	TrialSeed        uint32
	LatestCheckpoint *model.Checkpoint
	StepsCompleted   int

	Keys ssh.PrivateAndPublicKeys
}

TrialSpec is a description of a task for running a trial container.

func (*TrialSpec) MakeEnvPorts

func (s *TrialSpec) MakeEnvPorts() expconf.EnvironmentConfigV0

MakeEnvPorts fills in `Environment.Ports` i.e. exposed ports for container config.

func (*TrialSpec) ProxyPorts

func (s *TrialSpec) ProxyPorts() expconf.ProxyPortsConfig

ProxyPorts combines user-defined and system proxy configs.

func (TrialSpec) ToTaskSpec

func (s TrialSpec) ToTaskSpec() TaskSpec

ToTaskSpec generates a TaskSpec.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL