Documentation ¶
Index ¶
- Constants
- func ToDockerMounts(bindMounts []expconf.BindMount, workDir string) []mount.Mount
- func TrialSpecProxyPorts(taskSpec *TaskSpec, expConfig expconf.ExperimentConfigV0) expconf.ProxyPortsConfig
- func ValidatePbs(pbsOptions []string) []error
- func ValidateSlurm(slurmOptions []string) []error
- type GCCkptSpec
- type GenericCommandSpec
- type GenericTaskSpec
- func (s GenericTaskSpec) ResourcePool() string
- func (s GenericTaskSpec) SetJobPriority(priority int) error
- func (s GenericTaskSpec) SetResourcePool(resourcePool string) error
- func (s GenericTaskSpec) SetWeight(weight float64) error
- func (s GenericTaskSpec) ToTaskSpec() TaskSpec
- func (s GenericTaskSpec) ToV1Job() (*jobv1.Job, error)
- type TaskSpec
- func (t *TaskSpec) Archives() ([]cproto.RunArchive, []cproto.RunArchive)
- func (t *TaskSpec) Clone() (*TaskSpec, error)
- func (t TaskSpec) EnvVars() map[string]string
- func (t *TaskSpec) LogShipperWrappedEntrypoint() []string
- func (t *TaskSpec) ResolveWorkDir()
- func (t *TaskSpec) ToDispatcherManifest(syslog *logrus.Entry, allocationID string, tlsEnabled bool, masterHost string, ...) (*launcher.Manifest, string, string, error)
- func (t *TaskSpec) ToDockerSpec() cproto.Spec
- func (t *TaskSpec) WarnUnsupportedOptions(userConfiguredPriority bool, containerRunType string) string
- type TaskSpecifier
- type TrialSpec
Constants ¶
const ( // SingularityEntrypointWrapperScript is just the name of the singularity entrypoint wrapper. SingularityEntrypointWrapperScript = "singularity-entrypoint-wrapper.sh" // StartupHookScript contains the script to run on task startup filled in dynamically. StartupHookScript = "dynamic-tcd-startup-hook.sh" )
const ( // DefaultWorkDir is the default workdir. DefaultWorkDir = "/run/determined/workdir" RunDir = "/run/determined" PasswdPath = "/run/determined/etc/passwd" ShadowPath = "/run/determined/etc/shadow" GroupPath = "/run/determined/etc/group" // DtrainSSHPortBase is starting range for Dtrain ports. DtrainSSHPortBase = 12350 // InterTrainProcessCommPort1Base is starting range for intertraincomm1 ports. InterTrainProcessCommPort1Base = 12360 // InterTrainProcessCommPort2Base is starting range for intertraincomm2 ports. InterTrainProcessCommPort2Base = 12365 // C10DPortBase is starting range for c10D ports. C10DPortBase = 29400 // DTrainSSHPort is the name of a port. DTrainSSHPort = "DTRAIN_SSH_PORT" // InterTrainProcessCommPort1 is the name of a port. InterTrainProcessCommPort1 = "INTER_TRAIN_PROCESS_COMM_PORT_1" // InterTrainProcessCommPort2 is the name of a port. InterTrainProcessCommPort2 = "INTER_TRAIN_PROCESS_COMM_PORT_2" // C10DPort is the name of a port. C10DPort = "C10D_PORT" )
File location constants.
const (
// ManifestName is the name used by DAI RM when creating HPC job manifests.
ManifestName = "det"
)
Variables ¶
This section is empty.
Functions ¶
func ToDockerMounts ¶
ToDockerMounts converts expconf bind mounts to container mounts.
func TrialSpecProxyPorts ¶
func TrialSpecProxyPorts( taskSpec *TaskSpec, expConfig expconf.ExperimentConfigV0, ) expconf.ProxyPortsConfig
TrialSpecProxyPorts combines user-defined and system proxy configs. This static function is public because trial actor builds `TrialSpec` instances late.
func ValidatePbs ¶
ValidatePbs checks that the specified PBS options are allowed. If any are not messages are returned in an array of errors.
func ValidateSlurm ¶
ValidateSlurm checks that the specified slurm options are allowed. If any are not messages are returned in an array of errors.
Types ¶
type GCCkptSpec ¶
type GCCkptSpec struct { Base TaskSpec ExperimentID int LegacyConfig expconf.LegacyConfig ToDelete string // If len(CheckpointGlobs) == 0 then we won't delete any checkpoint files // and just refresh the state of the checkpoint. CheckpointGlobs []string DeleteTensorboards bool }
GCCkptSpec is a description of a task for running checkpoint GC.
func (GCCkptSpec) ToTaskSpec ¶
func (g GCCkptSpec) ToTaskSpec() TaskSpec
ToTaskSpec generates a TaskSpec.
type GenericCommandSpec ¶
type GenericCommandSpec struct { Base TaskSpec CommandID string Config model.CommandConfig // Deprecated: kept so we can still marshal to this. // Please use command.CreateGeneric.modelDef instead. UserFiles archive.Archive AdditionalFiles archive.Archive Metadata genericCommandSpecMetadata Keys *ssh.PrivateAndPublicKeys WatchProxyIdleTimeout bool WatchRunnerIdleTimeout bool TaskType model.TaskType }
GenericCommandSpec is a description of a task for running a command.
func (*GenericCommandSpec) MakeEnvPorts ¶
func (s *GenericCommandSpec) MakeEnvPorts()
MakeEnvPorts fills in `Environment.Ports` i.e. exposed ports for container config.
func (*GenericCommandSpec) ProxyPorts ¶
func (s *GenericCommandSpec) ProxyPorts() expconf.ProxyPortsConfig
ProxyPorts combines user-defined and system proxy configs.
func (GenericCommandSpec) ToTaskSpec ¶
func (s GenericCommandSpec) ToTaskSpec() TaskSpec
ToTaskSpec generates a TaskSpec.
type GenericTaskSpec ¶
type GenericTaskSpec struct { Base TaskSpec ProjectID int WorkspaceID int RegisteredTime time.Time JobID model.JobID GenericTaskConfig model.GenericTaskConfig }
GenericTaskSpec is the generic task spec.
func (GenericTaskSpec) ResourcePool ¶
func (s GenericTaskSpec) ResourcePool() string
ResourcePool - returns resource pool.
func (GenericTaskSpec) SetJobPriority ¶
func (s GenericTaskSpec) SetJobPriority(priority int) error
SetJobPriority todo.
func (GenericTaskSpec) SetResourcePool ¶
func (s GenericTaskSpec) SetResourcePool(resourcePool string) error
SetResourcePool todo.
func (GenericTaskSpec) SetWeight ¶
func (s GenericTaskSpec) SetWeight(weight float64) error
SetWeight todo.
func (GenericTaskSpec) ToTaskSpec ¶
func (s GenericTaskSpec) ToTaskSpec() TaskSpec
ToTaskSpec converts the generic task spec to the common task spec.
type TaskSpec ¶
type TaskSpec struct { // Fields that are only for task logics. Description string // LoggingFields are fields to include in each record of structured logging. LoggingFields map[string]string // LogRetentionDays is the number of days to retain logs for. LogRetentionDays *int16 // Fields that are set on the cluster level. ClusterID string HarnessPath string MasterCert []byte SSHConfig config.SSHConfig SegmentEnabled bool SegmentAPIKey string // Fields that are set on the per-request basis. // TaskContainerDefaults should be removed from TaskSpec once we move to using the same // schema for the cluster-level defaults and the request-level configuration. TaskContainerDefaults model.TaskContainerDefaultsConfig Environment expconf.EnvironmentConfig ResourcesConfig expconf.ResourcesConfig WorkDir string Owner *model.User AgentUserGroup *model.AgentUserGroup ExtraArchives []cproto.RunArchive ExtraEnvVars map[string]string ExtraPodLabels map[string]string Entrypoint []string Mounts []mount.Mount // UseHostMode is whether host mode networking would be desirable for this task. // This is used by Docker only. UseHostMode bool ShmSize int64 // The parent task of an allocation. TaskID string // Fields that are set on per-resources basis. AllocationID string AllocationSessionToken string ResourcesID string ContainerID string Devices []device.Device UserSessionToken string TaskType model.TaskType SlurmConfig expconf.SlurmConfig PbsConfig expconf.PbsConfig ExtraProxyPorts expconf.ProxyPortsConfig Workspace string Project string Labels []string // Ports required by trial or commands and their respective base port values. UniqueExposedPortRequests map[string]int // For testing only. DontShipLogs bool }
TaskSpec defines the spec of a task.
func (*TaskSpec) Archives ¶
func (t *TaskSpec) Archives() ([]cproto.RunArchive, []cproto.RunArchive)
Archives returns all the archives.
func (*TaskSpec) LogShipperWrappedEntrypoint ¶
LogShipperWrappedEntrypoint returns the configured Entrypoint wrapped with ship_logs.py.
func (*TaskSpec) ResolveWorkDir ¶
func (t *TaskSpec) ResolveWorkDir()
ResolveWorkDir resolves the work dir.
func (*TaskSpec) ToDispatcherManifest ¶
func (t *TaskSpec) ToDispatcherManifest( syslog *logrus.Entry, allocationID string, tlsEnabled bool, masterHost string, masterPort int, certificateName string, numSlots int, slotType device.Type, slurmPartition string, tresSupported bool, gresSupported bool, containerRunType string, isPbsLauncher bool, labelMode *string, disabledNodes []string, ) (*launcher.Manifest, string, string, error)
ToDispatcherManifest creates the manifest that will be ultimately sent to the launcher. Returns:
Manifest, launchingUserName, PayloadName, err
Note: Cannot pass "req *sproto.AllocateRequest" as an argument, as it requires import of "github.com/determined-ai/determined/master/internal/sproto", which results in an "import cycle not allowed" error.
func (*TaskSpec) ToDockerSpec ¶
ToDockerSpec converts a task spec to a docker container spec.
type TaskSpecifier ¶
type TaskSpecifier interface {
ToTaskSpec() TaskSpec
}
TaskSpecifier creates a TaskSpec. ToTaskSpec must only be called once per specifier.
type TrialSpec ¶
type TrialSpec struct { Base TaskSpec ExperimentID int TrialID int TrialRunID int ExperimentConfig expconf.ExperimentConfig HParams map[string]interface{} TrialSeed uint32 LatestCheckpoint *model.Checkpoint StepsCompleted int Keys ssh.PrivateAndPublicKeys }
TrialSpec is a description of a task for running a trial container.
func (*TrialSpec) MakeEnvPorts ¶
func (s *TrialSpec) MakeEnvPorts() expconf.EnvironmentConfigV0
MakeEnvPorts fills in `Environment.Ports` i.e. exposed ports for container config.
func (*TrialSpec) ProxyPorts ¶
func (s *TrialSpec) ProxyPorts() expconf.ProxyPortsConfig
ProxyPorts combines user-defined and system proxy configs.
func (TrialSpec) ToTaskSpec ¶
ToTaskSpec generates a TaskSpec.