Documentation ¶
Index ¶
- Constants
- Variables
- func FmtInstances(instances []*Instance) string
- func MostProgressedExperimentState(state1 experimentv1.State, state2 experimentv1.State) experimentv1.State
- func ProjectsToProto(ps []*Project) []*projectv1.Project
- func StateToProto(state State) experimentv1.State
- func StatesToStrings(inStates map[State]bool) []string
- func TaskLogLevelFromLogrus(l logrus.Level) string
- func TaskLogLevelFromProto(l logv1.LogLevel) string
- func TaskLogLevelToProto(l string) logv1.LogLevel
- func TrialMetricsJSONPath(isValidation bool) string
- func TrialSummaryMetricsJSONPath(metricGroup MetricGroup) string
- func UsingCustomImage(req *apiv1.LaunchTensorboardRequest) bool
- func ValidatePrioritySetting(priority *int) []error
- type AcceleratorData
- type AccessScopeID
- type AccessScopeSet
- type ActivityType
- type AgentStats
- type AgentSummary
- type AgentUserGroup
- type AgentsSummary
- type Allocation
- type AllocationID
- type AllocationSession
- type AllocationState
- type AuthTokenKeypair
- type BindMount
- type BindMountsConfig
- type Checkpoint
- type CheckpointTrainingMetadata
- type CheckpointV2
- type CommandConfig
- type ConfigFile
- type DefaultLoggingConfig
- type DeviceConfig
- type DevicesConfig
- type Duration
- type ElasticLoggingConfig
- type ElasticSecurityConfig
- type EntityType
- type Environment
- type ExitedReason
- type Experiment
- type ExtendedFloat64
- type ExternalSessions
- type FullUser
- type Group
- type GroupMembership
- type Groups
- type Instance
- type InstanceState
- type InstanceStats
- type InstanceType
- type JSONObj
- type Job
- type JobID
- type JobType
- type KubernetesTaskContainerDefaults
- type LoggingConfig
- type MetricGroup
- type MetricIdentifier
- type Model
- type ModelVersion
- type Project
- type Projects
- type ProxyPort
- type ProxyPortsConfig
- type RequestID
- type ResourceAggregates
- type ResourcesConfig
- type RuntimeItem
- type RuntimeItems
- type SessionID
- type SlotSummary
- type SlotsSummary
- type Snapshotter
- type State
- type StateWithReason
- type StorageSize
- type TLSClientConfig
- type Task
- type TaskContainerDefaultsConfig
- func (c TaskContainerDefaultsConfig) Merge(other TaskContainerDefaultsConfig) (TaskContainerDefaultsConfig, error)
- func (c *TaskContainerDefaultsConfig) MergeIntoExpConfig(config *expconf.ExperimentConfig)
- func (c *TaskContainerDefaultsConfig) UnmarshalJSON(data []byte) error
- func (c *TaskContainerDefaultsConfig) Validate() []error
- type TaskContextDirectory
- type TaskID
- type TaskLog
- type TaskLogBatch
- type TaskLogVersion
- type TaskStats
- type TaskType
- type Template
- type Trial
- type TrialLog
- type TrialLogBatch
- type TrialMetrics
- type TrialProfilerMetricsBatch
- type TrialProfilerMetricsBatchBatch
- type TrialTaskID
- type User
- type UserActivity
- type UserID
- type UserSession
- type UserWebSetting
- type Users
- type WorkloadManagerType
- type WorkloadSequencerType
- type Workspace
- type WorkspacePin
- type WorkspaceState
Constants ¶
const ( // NotebookIdleTypeKernelsOrTerminals indicates that a notebook should be considered active if any // kernels or terminals are open. NotebookIdleTypeKernelsOrTerminals = "kernels_or_terminals" // NotebookIdleTypeKernelConnections indicates that a notebook should be considered active if any // connections to kernels are open. NotebookIdleTypeKernelConnections = "kernel_connections" // NotebookIdleTypeActivity indicates that a notebook should be considered active if any kernel is // running a command or any terminal is inputting or outputting data. NotebookIdleTypeActivity = "activity" )
const ( // ActiveState constant. ActiveState State = "ACTIVE" // CanceledState constant. CanceledState State = "CANCELED" // CompletedState constant. CompletedState State = "COMPLETED" // ErrorState constant. ErrorState State = "ERROR" // PausedState constant. PausedState State = "PAUSED" // StoppingKilledState constant. StoppingKilledState State = "STOPPING_KILLED" // StoppingCanceledState constant. StoppingCanceledState State = "STOPPING_CANCELED" // StoppingCompletedState constant. StoppingCompletedState State = "STOPPING_COMPLETED" // StoppingErrorState constant. StoppingErrorState State = "STOPPING_ERROR" // DeletingState constant. DeletingState State = "DELETING" // DeleteFailedState constant. DeleteFailedState State = "DELETE_FAILED" // DeletedState constant. DeletedState State = "DELETED" // PartiallyDeletedState constant. PartiallyDeletedState State = "PARTIALLY_DELETED" // RunningState constant. Currently only used by unmanaged trials. RunningState State = "RUNNING" // TrialWorkloadSequencerType constant. TrialWorkloadSequencerType WorkloadSequencerType = "TRIAL_WORKLOAD_SEQUENCER" )
const ( // MinUserSchedulingPriority is the smallest priority users may specify. MinUserSchedulingPriority = 1 // MaxUserSchedulingPriority is the largest priority users may specify. MaxUserSchedulingPriority = 99 )
const ( // LogLevelTrace is the trace task log level. LogLevelTrace = tasklog.LogLevelTrace // LogLevelDebug is the debug task log level. LogLevelDebug = tasklog.LogLevelDebug // LogLevelInfo is the info task log level. LogLevelInfo = tasklog.LogLevelInfo // LogLevelWarning is the warn task log level. LogLevelWarning = tasklog.LogLevelWarning // LogLevelError is the error task log level. LogLevelError = tasklog.LogLevelError // LogLevelCritical is the critical task log level. LogLevelCritical = tasklog.LogLevelCritical // LogLevelUnspecified is the unspecified task log level. LogLevelUnspecified = tasklog.LogLevelUnspecified )
const ( // DefaultWorkspaceID is a special, always-existing, workspace titled "Uncategorized". DefaultWorkspaceID = 1 // DefaultProjectID is the default project ID for the default workspace. DefaultProjectID = 1 )
const BCryptCost = 15
BCryptCost is a stopgap until we implement sane master-configuration.
const ( // DeterminedK8ContainerName is the name of the container that executes the task within Kubernetes // pods that are launched by Determined. DeterminedK8ContainerName = "determined-container" )
const ( // RFC3339MicroTrailingZeroes unlike time.RFC3339Nano is a time format specifier that preserves // trailing zeroes. RFC3339MicroTrailingZeroes = "2006-01-02T15:04:05.000000Z07:00" )
const (
// StepsCompletedMetadataKey is the key within metadata to find steps completed now, if it exists.
StepsCompletedMetadataKey = "steps_completed"
)
Variables ¶
var ( // EmptyPassword is the empty password (i.e., the empty string). EmptyPassword = null.NewString("", false) // NoPasswordLogin is a password that prevents the user from logging in // directly. They can still login via external authentication methods like // OAuth. NoPasswordLogin = null.NewString("", true) )
var CheckpointReverseTransitions = reverseTransitions(CheckpointTransitions)
CheckpointReverseTransitions list possible ancestor states.
var CheckpointTransitions = map[State]map[State]bool{ ActiveState: { CompletedState: true, ErrorState: true, }, CompletedState: { DeletedState: true, }, DeletedState: {}, ErrorState: {}, }
CheckpointTransitions maps checkpoint states to their possible transitions.
var DeletingStates = map[State]bool{ DeletedState: true, DeleteFailedState: true, DeletingState: true, }
DeletingStates are the valid deleting states.
var ExperimentReverseTransitions = reverseTransitions(ExperimentTransitions)
ExperimentReverseTransitions lists possible ancestor states.
var ExperimentTransitions = map[State]map[State]bool{ ActiveState: { PausedState: true, StoppingKilledState: true, StoppingCanceledState: true, StoppingCompletedState: true, StoppingErrorState: true, ErrorState: true, }, PausedState: { ActiveState: true, StoppingKilledState: true, StoppingCanceledState: true, StoppingCompletedState: true, StoppingErrorState: true, ErrorState: true, }, StoppingCanceledState: { CanceledState: true, StoppingKilledState: true, StoppingErrorState: true, ErrorState: true, }, StoppingKilledState: { CanceledState: true, StoppingErrorState: true, ErrorState: true, }, StoppingCompletedState: { CompletedState: true, StoppingErrorState: true, ErrorState: true, }, StoppingErrorState: { ActiveState: true, ErrorState: true, }, CanceledState: { DeletingState: true, }, CompletedState: { DeletingState: true, }, ErrorState: { DeletingState: true, }, DeletingState: { DeletedState: true, DeleteFailedState: true, }, DeleteFailedState: { DeletingState: true, }, DeletedState: {}, }
ExperimentTransitions maps experiment states to their possible transitions.
var ManualStates = map[State]bool{ ActiveState: true, PausedState: true, StoppingCanceledState: true, StoppingKilledState: true, }
ManualStates are the states the user can set an experiment to.
var NonTerminalStates = func() []State { var states []State for s := range ExperimentTransitions { if !TerminalStates[s] && !DeletingStates[s] { states = append(states, s) } } return states }()
NonTerminalStates where an experiment can be canceled or killed.
var RunningStates = map[State]bool{ ActiveState: true, PausedState: true, }
RunningStates are the valid running states.
var StepReverseTransitions = reverseTransitions(StepTransitions)
StepReverseTransitions list possible ancestor states.
var StepTransitions = map[State]map[State]bool{ ActiveState: { CompletedState: true, ErrorState: true, }, CompletedState: {}, ErrorState: {}, }
StepTransitions maps step and validation states to their possible transitions.
var StoppingStates = map[State]bool{ StoppingCanceledState: true, StoppingKilledState: true, StoppingCompletedState: true, StoppingErrorState: true, }
StoppingStates are the valid stopping states.
var StoppingToTerminalStates = map[State]State{ StoppingKilledState: CanceledState, StoppingCanceledState: CanceledState, StoppingCompletedState: CompletedState, StoppingErrorState: ErrorState, }
StoppingToTerminalStates maps from stopping states to the corresponding terminal states.
var TerminalStates = map[State]bool{ CanceledState: true, CompletedState: true, ErrorState: true, }
TerminalStates are the valid terminal states.
var TrialReverseTransitions = reverseTransitions(TrialTransitions)
TrialReverseTransitions list possible ancestor states.
var TrialTransitions = map[State]map[State]bool{ ActiveState: { PausedState: true, StoppingKilledState: true, StoppingCanceledState: true, StoppingCompletedState: true, StoppingErrorState: true, ErrorState: true, CompletedState: true, }, CanceledState: { ActiveState: true, }, CompletedState: { ActiveState: true, }, ErrorState: { ActiveState: true, }, PausedState: { ActiveState: true, StoppingKilledState: true, StoppingCanceledState: true, StoppingCompletedState: true, StoppingErrorState: true, ErrorState: true, }, StoppingCompletedState: { StoppingCanceledState: true, StoppingKilledState: true, StoppingErrorState: true, CompletedState: true, ErrorState: true, }, StoppingCanceledState: { StoppingKilledState: true, StoppingErrorState: true, CanceledState: true, ErrorState: true, }, StoppingKilledState: { StoppingErrorState: true, CanceledState: true, ErrorState: true, }, StoppingErrorState: { ActiveState: true, ErrorState: true, }, }
TrialTransitions maps trial states to their possible transitions. Trials are mostly the same as experiments, but when immediate exits through ErrorState allowed since can die immediately and let the RM clean us up.
Functions ¶
func FmtInstances ¶
FmtInstances formats instance ids and states to print.
func MostProgressedExperimentState ¶
func MostProgressedExperimentState( state1 experimentv1.State, state2 experimentv1.State, ) experimentv1.State
MostProgressedExperimentState returns the more advanced active state based on experimentStateIndex (Queued -> Pulling -> Starting -> Running).
func ProjectsToProto ¶
ProjectsToProto converts a slice of projects to its protobuf representation.
func StateToProto ¶
func StateToProto(state State) experimentv1.State
StateToProto maps State to experimentv1.State.
func StatesToStrings ¶
StatesToStrings converts a State map to a list of strings for db queries.
func TaskLogLevelFromLogrus ¶
TaskLogLevelFromLogrus returns an equivalent task log level from a logrus level.
func TaskLogLevelFromProto ¶
TaskLogLevelFromProto returns a task log level from its protobuf repr.
func TaskLogLevelToProto ¶
TaskLogLevelToProto returns a protobuf task log level from its string repr.
func TrialMetricsJSONPath ¶
TrialMetricsJSONPath returns the legacy JSON path to the metrics field in the metrics table.
func TrialSummaryMetricsJSONPath ¶
func TrialSummaryMetricsJSONPath(metricGroup MetricGroup) string
TrialSummaryMetricsJSONPath returns the JSON path to the trials metric summary.
func UsingCustomImage ¶
func UsingCustomImage(req *apiv1.LaunchTensorboardRequest) bool
UsingCustomImage checks for image argument in request. It's only used for tensor board now. Error is ignored because we treat unexpected error when parsing as not using custom image.
func ValidatePrioritySetting ¶
ValidatePrioritySetting checks that priority if set is within a valid range.
Types ¶
type AcceleratorData ¶
type AcceleratorData struct { bun.BaseModel `bun:"table:allocation_accelerators"` ContainerID string `db:"container_id" bun:"container_id"` AllocationID AllocationID `db:"allocation_id" bun:"allocation_id,notnull"` NodeName string `db:"node_name" bun:"node_name,notnull"` AcceleratorType string `db:"accelerator_type" bun:"accelerator_type,notnull"` AcceleratorUuids []string `db:"accelerator_uuids" bun:"accelerator_uuids,array"` ID *int `db:"id" bun:"id,pk,autoincrement"` }
AcceleratorData is the model for an allocation accelerator data in the database.
func (AcceleratorData) Proto ¶
func (a AcceleratorData) Proto() *apiv1.AcceleratorData
Proto returns the proto representation of the task state.
type AccessScopeSet ¶
type AccessScopeSet = map[AccessScopeID]bool
AccessScopeSet is a set of access scopes.
type ActivityType ¶
type ActivityType string
ActivityType describes a user activity.
const ( // ActivityTypeGet represents a get request. ActivityTypeGet ActivityType = "GET" )
type AgentStats ¶
type AgentStats struct { ResourcePool string `db:"resource_pool"` AgentID string `db:"agent_id"` Slots int `db:"slots"` }
AgentStats stores the start/end status of instance.
type AgentSummary ¶
type AgentSummary struct { ID string `json:"id"` RegisteredTime time.Time `json:"registered_time"` Slots SlotsSummary `json:"slots"` NumContainers int `json:"num_containers"` ResourcePool []string `json:"resource_pool"` Addresses []string `json:"addresses"` Enabled bool `json:"enabled"` Draining bool `json:"draining"` Version string `json:"version"` }
AgentSummary summarizes the state on an agent.
func (AgentSummary) ToProto ¶
func (a AgentSummary) ToProto() *agentv1.Agent
ToProto converts an agent summary to a proto struct.
type AgentUserGroup ¶
type AgentUserGroup struct { bun.BaseModel `bun:"table:agent_user_groups"` ID int `db:"id" bun:"id,pk,autoincrement" json:"id"` UserID UserID `db:"user_id" json:"user_id"` // The User is the username on an agent host machine. This may be different // from the username of the user in the User database. User string `db:"user_" bun:"user_" json:"user"` UID int `db:"uid" json:"uid"` // The Group is the primary group of the user. Group string `db:"group_" bun:"group_" json:"group"` GID int `db:"gid" json:"gid"` }
An AgentUserGroup represents a username and primary group for a user on an agent host machine. There is at most one AgentUserGroup for each User.
func AgentUserGroupFromProto ¶
func AgentUserGroupFromProto(aug *userv1.AgentUserGroup) (*AgentUserGroup, error)
AgentUserGroupFromProto convert agent user group from proto to model.
func (*AgentUserGroup) OwnArchive ¶
func (c *AgentUserGroup) OwnArchive(oldArchive archive.Archive) archive.Archive
OwnArchive will return an archive.Archive modified to be owned by the AgentUserGroup, or unmodified if c is nil.
func (*AgentUserGroup) OwnedArchiveItem ¶
func (c *AgentUserGroup) OwnedArchiveItem( path string, content []byte, mode int, fileType byte, ) archive.Item
OwnedArchiveItem will create an archive.Item owned by the AgentUserGroup, or by root if c is nil.
func (AgentUserGroup) Validate ¶
func (c AgentUserGroup) Validate() []error
Validate validates the fields of the AgentUserGroup.
type AgentsSummary ¶
type AgentsSummary map[string]AgentSummary
AgentsSummary is a map of agent IDs to a summary of the agent.
type Allocation ¶
type Allocation struct { bun.BaseModel `bun:"table:allocations"` AllocationID AllocationID `db:"allocation_id" bun:"allocation_id,pk"` TaskID TaskID `db:"task_id" bun:"task_id,notnull"` Slots int `db:"slots" bun:"slots,notnull"` ResourcePool string `db:"resource_pool" bun:"resource_pool,notnull"` StartTime *time.Time `db:"start_time" bun:"start_time"` EndTime *time.Time `db:"end_time" bun:"end_time"` State *AllocationState `db:"state" bun:"state"` IsReady *bool `db:"is_ready" bun:"is_ready"` Ports map[string]int `db:"ports" bun:"ports,notnull"` // ProxyAddress stores the explicitly provided task-provided proxy address for resource // managers that do not supply us with it. Comes from `determined.exec.prep_container --proxy`. ProxyAddress *string `db:"proxy_address" bun:"proxy_address"` ExitReason *string `db:"exit_reason" bun:"exit_reason"` ExitErr *string `db:"exit_error" bun:"exit_error"` StatusCode *int32 `db:"status_code" bun:"status_code"` }
Allocation is the model for an allocation in the database.
func (Allocation) Proto ¶
func (a Allocation) Proto() *taskv1.Allocation
Proto returns the proto representation of the allocation state.
type AllocationID ¶
type AllocationID string
AllocationID is the ID of an allocation of a task. It is usually of the form TaskID.allocation_number, maybe with some other metadata if different types of allocations run.
func NewAllocationID ¶
func NewAllocationID(in *string) *AllocationID
NewAllocationID casts string ptr to AllocationID ptr.
func (AllocationID) String ¶
func (a AllocationID) String() string
func (AllocationID) ToTaskID ¶
func (a AllocationID) ToTaskID() TaskID
ToTaskID converts an AllocationID to its taskID.
type AllocationSession ¶
type AllocationSession struct { bun.BaseModel `bun:"table:allocation_sessions"` ID SessionID `db:"id" json:"id"` AllocationID AllocationID `db:"allocation_id" json:"allocation_id"` OwnerID *UserID `db:"owner_id" json:"owner_id"` }
AllocationSession corresponds to a row in the "allocation_sessions" DB table.
type AllocationState ¶
type AllocationState string
AllocationState represents the current state of the task. Value indicates a partial ordering.
const ( // AllocationStatePending state denotes that the command is awaiting allocation. AllocationStatePending AllocationState = "PENDING" // AllocationStateWaiting state denotes that the command is waiting on data. AllocationStateWaiting AllocationState = "WAITING" // AllocationStateAssigned state denotes that the command has been assigned to an agent but has // not started yet. AllocationStateAssigned AllocationState = "ASSIGNED" // AllocationStatePulling state denotes that the command's base image is being pulled from the // Docker registry. AllocationStatePulling AllocationState = "PULLING" // AllocationStateStarting state denotes that the image has been pulled and the task is being // started, but the task is not ready yet. AllocationStateStarting AllocationState = "STARTING" // AllocationStateRunning state denotes that the service in the command is running. AllocationStateRunning AllocationState = "RUNNING" // AllocationStateTerminated state denotes that the command has exited or has been aborted. AllocationStateTerminated AllocationState = "TERMINATED" // AllocationStateTerminating state denotes that the command is terminating. AllocationStateTerminating AllocationState = "TERMINATING" )
func MostProgressedAllocationState ¶
func MostProgressedAllocationState(states ...AllocationState) AllocationState
MostProgressedAllocationState returns the further progressed state. E.G. a call with PENDING, PULLING and STARTING returns PULLING.
func (AllocationState) Proto ¶
func (s AllocationState) Proto() taskv1.State
Proto returns the proto representation of the task state.
type AuthTokenKeypair ¶
type AuthTokenKeypair struct { bun.BaseModel `bun:"table:auth_token_keypair"` PublicKey ed25519.PublicKey `db:"public_key"` PrivateKey ed25519.PrivateKey `db:"private_key"` }
AuthTokenKeypair stores the public/private keypair used for asymmetric encryption of authentication tokens.
type BindMount ¶
type BindMount struct { HostPath string `json:"host_path"` ContainerPath string `json:"container_path"` ReadOnly bool `json:"read_only"` Propagation string `json:"propagation"` }
BindMount configures trial runner filesystem bind mounts.
func ToModelBindMount ¶
ToModelBindMount converts new expconf bind mounts into old modl bind mounts.
func (*BindMount) UnmarshalJSON ¶
UnmarshalJSON implements the json.Unmarshaler interface.
type BindMountsConfig ¶
type BindMountsConfig []BindMount
BindMountsConfig is the configuration for bind mounts.
func (BindMountsConfig) ToExpconf ¶
func (b BindMountsConfig) ToExpconf() expconf.BindMountsConfig
ToExpconf translates old model objects into an expconf object.
func (*BindMountsConfig) UnmarshalJSON ¶
func (b *BindMountsConfig) UnmarshalJSON(data []byte) error
UnmarshalJSON implements the json.Unmarshaler interface.
type Checkpoint ¶
type Checkpoint struct { bun.BaseModel `bun:"table:checkpoints_view"` ID int `db:"id"` UUID *uuid.UUID `db:"uuid"` TaskID *TaskID `db:"task_id"` AllocationID *AllocationID `db:"allocation_id"` ReportTime time.Time `db:"report_time"` State State `db:"state"` Resources JSONObj `db:"resources"` Metadata JSONObj `db:"metadata"` Size int64 `db:"size"` CheckpointTrainingMetadata }
Checkpoint represents a row from the `checkpoints_view` view.
type CheckpointTrainingMetadata ¶
type CheckpointTrainingMetadata struct { TrialID int `db:"trial_id"` ExperimentID int `db:"experiment_id"` ExperimentConfig JSONObj `db:"experiment_config"` HParams JSONObj `db:"hparams" bun:"hparams"` TrainingMetrics JSONObj `db:"training_metrics"` ValidationMetrics JSONObj `db:"validation_metrics"` SearcherMetric *float64 `db:"searcher_metric"` StepsCompleted int `db:"steps_completed"` }
CheckpointTrainingMetadata is a substruct of checkpoints encapsulating training specific information.
type CheckpointV2 ¶
type CheckpointV2 struct { bun.BaseModel `bun:"table:checkpoints_v2"` ID int `db:"id" bun:"id,pk,autoincrement"` UUID uuid.UUID `db:"uuid"` TaskID TaskID `db:"task_id"` AllocationID *AllocationID `db:"allocation_id"` ReportTime time.Time `db:"report_time"` State State `db:"state"` Resources map[string]int64 `db:"resources"` Metadata map[string]interface{} `db:"metadata"` Size int64 `db:"size"` }
CheckpointV2 represents a row from the `checkpoints_v2` table.
type CommandConfig ¶
type CommandConfig struct { Description string `json:"description"` BindMounts BindMountsConfig `json:"bind_mounts"` Environment Environment `json:"environment"` Resources ResourcesConfig `json:"resources"` Entrypoint []string `json:"entrypoint"` TensorBoardArgs []string `json:"tensorboard_args,omitempty"` IdleTimeout *Duration `json:"idle_timeout"` NotebookIdleType string `json:"notebook_idle_type"` WorkDir *string `json:"work_dir"` Debug bool `json:"debug"` Pbs expconf.PbsConfig `json:"pbs,omitempty"` Slurm expconf.SlurmConfig `json:"slurm,omitempty"` }
CommandConfig holds the necessary configurations to launch a command task in the cluster.
func DefaultConfig ¶
func DefaultConfig(taskContainerDefaults *TaskContainerDefaultsConfig) CommandConfig
DefaultConfig is the default configuration used by all commands (e.g., commands, notebooks, shells) if a request does not specify any configuration options.
func (*CommandConfig) Validate ¶
func (c *CommandConfig) Validate() []error
Validate implements the check.Validatable interface.
type ConfigFile ¶
ConfigFile represents a row from the `config_files` table.
type DefaultLoggingConfig ¶
type DefaultLoggingConfig struct{}
DefaultLoggingConfig configures logging for tasks using HTTP to the master.
type DeviceConfig ¶
type DeviceConfig struct { HostPath string `json:"host_path"` ContainerPath string `json:"container_path"` Mode string `json:"mode"` }
DeviceConfig configures container device access.
func (DeviceConfig) ToExpconf ¶
func (d DeviceConfig) ToExpconf() expconf.Device
ToExpconf translates old model objects into an expconf object.
func (*DeviceConfig) UnmarshalJSON ¶
func (d *DeviceConfig) UnmarshalJSON(data []byte) error
UnmarshalJSON implements the json.Unmarshaler interface.
type DevicesConfig ¶
type DevicesConfig []DeviceConfig
DevicesConfig is the configuration for devices. It is a named type because it needs custom merging behavior (via UnmarshalJSON).
func (DevicesConfig) ToExpconf ¶
func (d DevicesConfig) ToExpconf() expconf.DevicesConfig
ToExpconf translates old model objects into an expconf object.
func (*DevicesConfig) UnmarshalJSON ¶
func (d *DevicesConfig) UnmarshalJSON(data []byte) error
UnmarshalJSON implements the json.Unmarshaler interface so that DeviceConfigs are additive.
type Duration ¶
Duration is a JSON (un)marshallable version of time.Duration.
func (Duration) MarshalJSON ¶
MarshalJSON implements the json.Marshaler interface.
func (*Duration) UnmarshalJSON ¶
UnmarshalJSON implements the json.Unmarshaler interface.
type ElasticLoggingConfig ¶
type ElasticLoggingConfig struct { Host string `json:"host"` Port int `json:"port"` Security ElasticSecurityConfig `json:"security"` }
ElasticLoggingConfig configures logging for tasks using Elastic.
func (*ElasticLoggingConfig) Resolve ¶
func (o *ElasticLoggingConfig) Resolve() error
Resolve resolves the configuration.
type ElasticSecurityConfig ¶
type ElasticSecurityConfig struct { Username *string `json:"username"` Password *string `json:"password"` TLS TLSClientConfig `json:"tls"` }
ElasticSecurityConfig configures security-related options for the elastic logging backend.
func (*ElasticSecurityConfig) Resolve ¶
func (o *ElasticSecurityConfig) Resolve() error
Resolve resolves the configuration.
func (ElasticSecurityConfig) Validate ¶
func (o ElasticSecurityConfig) Validate() []error
Validate implements the check.Validatable interface.
type EntityType ¶
type EntityType string
EntityType represents an entity.
const ( // EntityTypeProject represents a project. EntityTypeProject EntityType = "Project" )
type Environment ¶
type Environment struct { Image RuntimeItem `json:"image"` EnvironmentVariables RuntimeItems `json:"environment_variables,omitempty"` ProxyPorts ProxyPortsConfig `json:"proxy_ports"` Ports map[string]int `json:"ports"` RegistryAuth *types.AuthConfig `json:"registry_auth,omitempty"` ForcePullImage bool `json:"force_pull_image"` PodSpec *k8sV1.Pod `json:"pod_spec"` AddCapabilities []string `json:"add_capabilities"` DropCapabilities []string `json:"drop_capabilities"` }
Environment configures the environment of a Determined command or experiment.
func DefaultEnvConfig ¶
func DefaultEnvConfig(taskContainerDefaults *TaskContainerDefaultsConfig) Environment
DefaultEnvConfig returns the default environment configuration.
func (Environment) ToExpconf ¶
func (e Environment) ToExpconf() expconf.EnvironmentConfig
ToExpconf translates old model objects into an expconf object.
func (Environment) Validate ¶
func (e Environment) Validate() []error
Validate implements the check.Validatable interface.
type ExitedReason ¶
type ExitedReason string
ExitedReason defines why a workload exited early.
const ( // Errored signals the searcher that the workload errored out. Errored ExitedReason = "ERRORED" // UserRequestedStop signals the searcher that the user requested a cancelation, from code. UserRequestedStop ExitedReason = "USER_REQUESTED_STOP" // UserCanceled signals the searcher that the user requested a cancelation, from the CLI or UI. UserCanceled ExitedReason = "USER_CANCELED" // InvalidHP signals the searcher that the user raised an InvalidHP exception. InvalidHP ExitedReason = "INVALID_HP" // InitInvalidHP signals the searcher that the user raised an InvalidHP exception // in the trial init. InitInvalidHP ExitedReason = "INIT_INVALID_HP" )
func ExitedReasonFromProto ¶
func ExitedReasonFromProto(r trialv1.TrialEarlyExit_ExitedReason) ExitedReason
ExitedReasonFromProto returns an ExitedReason from its protobuf representation.
func (ExitedReason) ToSearcherProto ¶
func (r ExitedReason) ToSearcherProto() experimentv1.TrialExitedEarly_ExitedReason
ToSearcherProto converts an ExitedReason to its protobuf representation for searcher purposes.
type Experiment ¶
type Experiment struct { ID int `db:"id" bun:"id,pk"` JobID JobID `db:"job_id"` State State `db:"state"` Notes string `db:"notes"` // Offer a LegacyConfig rather than ExperimentConfig since most of the system is about querying // experiments which ran some time in the past, which is exactly what LegacyConfig is for. Config expconf.LegacyConfig `db:"config"` OriginalConfig string `db:"original_config"` // The model definition is stored as a .tar.gz file (raw bytes). ModelDefinitionBytes []byte `db:"model_definition" bun:"model_definition"` StartTime time.Time `db:"start_time"` EndTime *time.Time `db:"end_time"` ParentID *int `db:"parent_id"` Archived bool `db:"archived"` GitRemote *string `db:"git_remote"` GitCommit *string `db:"git_commit"` GitCommitter *string `db:"git_committer"` GitCommitDate *time.Time `db:"git_commit_date"` OwnerID *UserID `db:"owner_id"` Username string `db:"username"` ProjectID int `db:"project_id"` Unmanaged bool `db:"unmanaged"` ExternalExperimentID *string `db:"external_experiment_id"` Progress *float64 }
Experiment represents a row from the `experiments` table.
func ExperimentFromProto ¶
func ExperimentFromProto(e *experimentv1.Experiment) (*Experiment, error)
ExperimentFromProto converts a experimentv1.Experiment to a model.Experiment.
func NewExperiment ¶
func NewExperiment( config expconf.ExperimentConfig, originalConfig string, modelDefinitionBytes []byte, parentID *int, archived bool, gitRemote, gitCommit, gitCommitter *string, gitCommitDate *time.Time, projectID int, unmanaged bool, ) (*Experiment, error)
NewExperiment creates a new experiment struct in the paused state. Note that the experiment ID will not be set.
func (*Experiment) Transition ¶
func (e *Experiment) Transition(state State) (bool, error)
Transition changes the state of the experiment to the new state. If the state was not modified the first return value returns false. If the state transition is illegal, an error is returned.
type ExtendedFloat64 ¶
type ExtendedFloat64 float64
ExtendedFloat64 handles serializing floats to JSON, including special cases for infinite values.
func (ExtendedFloat64) MarshalJSON ¶
func (f ExtendedFloat64) MarshalJSON() ([]byte, error)
MarshalJSON implements the json.Marshaler interface.
func (*ExtendedFloat64) UnmarshalJSON ¶
func (f *ExtendedFloat64) UnmarshalJSON(data []byte) error
UnmarshalJSON implements the json.Unmarshaler interface.
type ExternalSessions ¶
type ExternalSessions struct { LoginURI string `json:"login_uri"` LogoutURI string `json:"logout_uri"` JwtKey string `json:"jwt_key"` }
ExternalSessions provides an integration point for an external service to issue JWTs to control access to the cluster.
func (ExternalSessions) Enabled ¶
func (e ExternalSessions) Enabled() bool
Enabled returns whether or not external sessions are enabled.
type FullUser ¶
type FullUser struct { ID UserID `db:"id" json:"id"` DisplayName null.String `db:"display_name" json:"display_name"` Username string `db:"username" json:"username"` Name string `db:"name" json:"name"` Admin bool `db:"admin" json:"admin"` Active bool `db:"active" json:"active"` ModifiedAt time.Time `db:"modified_at" json:"modified_at"` Remote bool `db:"remote" json:"remote"` LastAuthAt *time.Time `db:"last_auth_at" json:"last_auth_at"` AgentUID null.Int `db:"agent_uid" json:"agent_uid"` AgentGID null.Int `db:"agent_gid" json:"agent_gid"` AgentUser null.String `db:"agent_user" json:"agent_user"` AgentGroup null.String `db:"agent_group" json:"agent_group"` }
A FullUser is a User joined with any other user relations.
type Group ¶
type Group struct { bun.BaseModel `bun:"table:groups,alias:groups"` ID int `bun:"id,pk,autoincrement" json:"id"` Name string `bun:"group_name,notnull" json:"name"` OwnerID UserID `bun:"user_id,nullzero" json:"userId,omitempty"` }
Group represents a user group as it's stored in the database.
type GroupMembership ¶
type GroupMembership struct { bun.BaseModel `bun:"table:user_group_membership"` UserID UserID `bun:"user_id,notnull"` GroupID int `bun:"group_id,notnull"` }
GroupMembership represents a user's membership to a group as it's stored in the database.
type Groups ¶
type Groups []Group
Groups is a slice of Group objects—primarily useful for its methods.
type Instance ¶
type Instance struct { ID string LaunchTime time.Time LastStateChangeTime time.Time AgentName string State InstanceState }
Instance connects a provider's name for a compute resource to the Determined agent name.
type InstanceState ¶
type InstanceState string
InstanceState is an enum type that describes an instance state.
const ( // Unknown describes the instance state cannot be recognized. Unknown InstanceState = "Unknown" // Starting describes the instance is starting up. Starting InstanceState = "Starting" // Running describes the instance is running. Running InstanceState = "Running" // Stopping describes the instance is stopping. Stopping InstanceState = "Stopping" // Stopped describes the instance is stopped. Stopped InstanceState = "Stopped" // Terminating is when the instance is in the process of being terminated. Terminating InstanceState = "Terminating" // SpotRequestPendingAWS indicates that the instance is actually a pending AWS spot request. SpotRequestPendingAWS InstanceState = "SpotRequestPendingAWS" )
type InstanceStats ¶
type InstanceStats struct { ResourcePool string `db:"resource_pool"` InstanceID string `db:"instance_id"` Slots int `db:"slots"` }
InstanceStats stores the start/end status of instance.
type InstanceType ¶
InstanceType describes an instance type.
type JSONObj ¶
type JSONObj map[string]interface{}
JSONObj is a JSON object that converts to a []byte in SQL queries.
type Job ¶
type Job struct { bun.BaseModel `bun:"table:jobs"` JobID JobID `db:"job_id" bun:"job_id,pk"` JobType JobType `db:"job_type" bun:"job_type"` OwnerID *UserID `db:"owner_id" bun:"owner_id"` QPos decimal.Decimal `db:"q_position" bun:"q_position"` }
Job is the model for a job in the database.
type JobType ¶
type JobType string
JobType is the type of a job.
const ( // JobTypeNotebook is the "NOTEBOOK" job type for the enum public.job_type in Postgres. JobTypeNotebook JobType = "NOTEBOOK" // JobTypeShell is the "SHELL" job type for the enum public.job_type in Postgres. JobTypeShell JobType = "SHELL" // JobTypeCommand is the "COMMAND" job type for the enum public.job_type in Postgres. JobTypeCommand JobType = "COMMAND" // JobTypeTensorboard is the "TENSORBOARD" job type for the enum.job_type in Postgres. JobTypeTensorboard JobType = "TENSORBOARD" // JobTypeExperiment is the "EXPERIMENT" job type for the enum.job_type in Postgres. JobTypeExperiment JobType = "EXPERIMENT" // JobTypeCheckpointGC is the "CheckpointGC" job type for enum.job_type in Postgres. JobTypeCheckpointGC JobType = "CHECKPOINT_GC" )
func JobTypeFromProto ¶
JobTypeFromProto maps a jobv1.Type to JobType.
type KubernetesTaskContainerDefaults ¶
type KubernetesTaskContainerDefaults struct {
MaxSlotsPerPod *int `json:"max_slots_per_pod"`
}
KubernetesTaskContainerDefaults is task container defaults specific to Kubernetes.
type LoggingConfig ¶
type LoggingConfig struct { DefaultLoggingConfig *DefaultLoggingConfig `union:"type,default" json:"-"` ElasticLoggingConfig *ElasticLoggingConfig `union:"type,elastic" json:"-"` }
LoggingConfig configures logging for tasks (currently only trials) in Determined.
func (LoggingConfig) MarshalJSON ¶
func (c LoggingConfig) MarshalJSON() ([]byte, error)
MarshalJSON serializes LoggingConfig.
func (LoggingConfig) Resolve ¶
func (c LoggingConfig) Resolve() error
Resolve resolves the parts of the TaskContainerDefaultsConfig that must be evaluated on the master machine.
func (*LoggingConfig) UnmarshalJSON ¶
func (c *LoggingConfig) UnmarshalJSON(data []byte) error
UnmarshalJSON deserializes LoggingConfig.
type MetricGroup ¶
type MetricGroup string
MetricGroup denotes what custom group the metric is.
const ( // ValidationMetricGroup designates metrics from validation runs. ValidationMetricGroup MetricGroup = "validation" // TrainingMetricGroup designates metrics from training runs. TrainingMetricGroup MetricGroup = "training" // InferenceMetricGroup designates metrics from inference runs. InferenceMetricGroup MetricGroup = "inference" )
func TrialSummaryMetricGroup ¶
func TrialSummaryMetricGroup(jsonPath string) MetricGroup
TrialSummaryMetricGroup returns the metric group for the given summary JSON path.
func (MetricGroup) ToProto ¶
func (t MetricGroup) ToProto() apiv1.MetricType
ToProto returns the proto representation of the metric group.
func (MetricGroup) ToString ¶
func (t MetricGroup) ToString() string
ToString returns the string representation of the metric group.
func (MetricGroup) Validate ¶
func (t MetricGroup) Validate() error
Validate validates the metric group.
type MetricIdentifier ¶
type MetricIdentifier struct { Group MetricGroup Name metricName }
MetricIdentifier packages metric group and name together.
func DeserializeMetricIdentifier ¶
func DeserializeMetricIdentifier(s string) (*MetricIdentifier, error)
DeserializeMetricIdentifier deserialize a metric identifier from a string.
func (MetricIdentifier) ToProto ¶
func (m MetricIdentifier) ToProto() *metricv1.MetricIdentifier
ToProto returns the proto representation of the metric identifier.
type Model ¶
type Model struct { ID int `db:"id" json:"id"` Name string `db:"name" json:"name"` Description string `db:"description" json:"description"` Metadata JSONObj `db:"metadata" json:"metadata"` CreationTime time.Time `db:"creation_time" json:"creation_time"` LastUpdatedTime time.Time `db:"last_updated_time" json:"last_updated_time"` Labels []string `db:"labels" json:"labels"` Username string `db:"username" json:"username"` Archived bool `db:"archived" json:"archived"` NumVersions int `db:"num_versions" json:"num_versions"` WorkspaceID int `db:"workspace_id" json:"workspace_id"` }
Model represents a row from the `models` table.
type ModelVersion ¶
type ModelVersion struct { ID int `db:"id" json:"id"` Version int `db:"version" json:"version"` CheckpointID int `db:"checkpoint_id" json:"checkpoint_id"` CreationTime time.Time `db:"creation_time" json:"creation_time"` ModelID int `db:"model_id" json:"model_id"` Metadata JSONObj `db:"metadata" json:"metadata"` Name string `db:"name" json:"name"` LastUpdatedTime time.Time `db:"last_updated_time" json:"last_updated_time"` Comment string `db:"comment" json:"comment"` Notes string `db:"readme" json:"notes"` Username string `db:"username" json:"username"` }
ModelVersion represents a row from the `model_versions` table.
type Project ¶
type Project struct { bun.BaseModel `bun:"table:projects"` ID int `bun:"id,pk,autoincrement"` Name string `bun:"name"` CreatedAt time.Time `bun:"created_at,scanonly"` Archived bool `bun:"archived"` WorkspaceID int `bun:"workspace_id"` WorkspaceName string `bun:"workspace_name"` UserID int `bun:"user_id"` Username string `bun:"username"` Immutable bool `bun:"immutable"` Description string `bun:"description"` Notes []*projectv1.Note `bun:"notes,type:jsonb"` NumActiveExperiments int32 `bun:"num_active_experiments"` NumExperiments int32 `bun:"num_experiments"` State WorkspaceState `bun:"state"` ErrorMessage string `bun:"error_message"` LastExperimentStartedAt time.Time `bun:"last_experiment_started_at"` }
Project is the bun model of a project.
type ProxyPort ¶
type ProxyPort struct { ProxyPort int `json:"proxy_port"` ProxyTCP bool `json:"proxy_tcp"` Unauthenticated bool `json:"unauthenticated"` DefaultServiceID bool `json:"default_service_id"` }
ProxyPort is a legacy-style clone of expconf.ProxyPort. TODO(ilia): migrate command config to expconf.
type ProxyPortsConfig ¶
type ProxyPortsConfig []ProxyPort
ProxyPortsConfig is a legacy-style clone of expconf.ProxyPortsConfig.
func (ProxyPortsConfig) ToExpconf ¶
func (p ProxyPortsConfig) ToExpconf() expconf.ProxyPortsConfig
ToExpconf translates old model objects into an expconf object.
type RequestID ¶
RequestID links all operations with the same ID to a single trial create request.
func MustParseRequestID ¶
MustParseRequestID decodes s into a request id or panics.
func NewRequestID ¶
NewRequestID returns a new request ID using the provided reader.
func ParseRequestID ¶
ParseRequestID decodes s into a request id or returns an error.
func (RequestID) Before ¶
Before determines whether this UUID is strictly lexicographically less (comparing the sequences of bytes) than another one.
func (RequestID) MarshalText ¶
MarshalText returns the marshaled form of this ID, which is the string form of the underlying UUID.
func (*RequestID) UnmarshalText ¶
UnmarshalText unmarshals this ID from a text representation.
type ResourceAggregates ¶
type ResourceAggregates struct { Date *time.Time AggregationType string AggregationKey string Seconds float32 }
ResourceAggregates is the model for resource_aggregates in the database.
type ResourcesConfig ¶
type ResourcesConfig struct { Slots int `json:"slots"` MaxSlots *int `json:"max_slots,omitempty"` Weight float64 `json:"weight"` NativeParallel bool `json:"native_parallel,omitempty"` ShmSize *StorageSize `json:"shm_size,omitempty"` ResourcePool string `json:"resource_pool"` Priority *int `json:"priority,omitempty"` Devices DevicesConfig `json:"devices"` // Deprecated: Use ResourcePool instead. AgentLabel string `json:"agent_label,omitempty"` }
ResourcesConfig configures resource usage for an experiment, command, notebook, or tensorboard.
func DefaultResourcesConfig ¶
func DefaultResourcesConfig(taskContainerDefaults *TaskContainerDefaultsConfig) ResourcesConfig
DefaultResourcesConfig returns the default resources configuration.
func ParseJustResources ¶
func ParseJustResources(configBytes []byte) ResourcesConfig
ParseJustResources is a helper function for breaking the circular dependency where we need the TaskContainerDefaults to unmarshal an ExperimentConfig, but we need the Resources.ResourcePool setting to know which TaskContainerDefaults to use. It does not throw errors; if unmarshalling fails that can just get caught later.
func (ResourcesConfig) ToExpconf ¶
func (r ResourcesConfig) ToExpconf() expconf.ResourcesConfig
ToExpconf translates old model objects into an expconf object.
func (ResourcesConfig) Validate ¶
func (r ResourcesConfig) Validate() []error
Validate implements the check.Validatable interface.
type RuntimeItem ¶
type RuntimeItem struct { CPU string `json:"cpu,omitempty"` CUDA string `json:"cuda,omitempty"` ROCM string `json:"rocm,omitempty"` }
RuntimeItem configures the runtime image.
func (RuntimeItem) For ¶
func (r RuntimeItem) For(deviceType device.Type) string
For returns the value for the provided device type.
func (RuntimeItem) ToExpconf ¶
func (r RuntimeItem) ToExpconf() expconf.EnvironmentImageMap
ToExpconf translates old model objects into an expconf object.
func (*RuntimeItem) UnmarshalJSON ¶
func (r *RuntimeItem) UnmarshalJSON(data []byte) error
UnmarshalJSON implements the json.Unmarshaler interface.
type RuntimeItems ¶
type RuntimeItems struct { CPU []string `json:"cpu,omitempty"` CUDA []string `json:"cuda,omitempty"` ROCM []string `json:"rocm,omitempty"` }
RuntimeItems configures the runtime environment variables.
func (*RuntimeItems) For ¶
func (r *RuntimeItems) For(deviceType device.Type) []string
For returns the value for the provided device type.
func (RuntimeItems) ToExpconf ¶
func (r RuntimeItems) ToExpconf() expconf.EnvironmentVariablesMap
ToExpconf translates old model objects into an expconf object.
func (*RuntimeItems) UnmarshalJSON ¶
func (r *RuntimeItems) UnmarshalJSON(data []byte) error
UnmarshalJSON implements the json.Unmarshaler interface.
type SlotSummary ¶
type SlotSummary struct { ID string `json:"id"` Device device.Device `json:"device"` Enabled bool `json:"enabled"` Container *cproto.Container `json:"container"` Draining bool `json:"draining"` }
SlotSummary summarizes the state of a slot.
func (SlotSummary) ToProto ¶
func (s SlotSummary) ToProto() *agentv1.Slot
ToProto converts a SlotSummary to its protobuf representation.
type SlotsSummary ¶
type SlotsSummary map[string]SlotSummary
SlotsSummary contains a summary for a number of slots.
type Snapshotter ¶
type Snapshotter interface { Snapshot() (json.RawMessage, error) Restore(json.RawMessage) error }
Snapshotter is any object that implements how to save an restore its state.
type State ¶
type State string
State is the run state of an experiment / trial / step / etc.
func StateFromProto ¶
func StateFromProto(state experimentv1.State) State
StateFromProto maps experimentv1.State to State.
type StateWithReason ¶
StateWithReason is the run state of an experiment with an informational reason used for logging purposes.
type StorageSize ¶
type StorageSize int64
StorageSize is a named type for custom marshaling behavior for shm_size.
func (*StorageSize) UnmarshalJSON ¶
func (d *StorageSize) UnmarshalJSON(data []byte) error
UnmarshalJSON implements the json.Unmarshaler interface.
type TLSClientConfig ¶
type TLSClientConfig struct { Enabled bool `json:"enabled"` SkipVerify bool `json:"skip_verify"` CertificatePath string `json:"certificate"` CertificateName string `json:"certificate_name"` CertBytes []byte }
TLSClientConfig configures how to make a TLS connection.
func MakeTLSConfig ¶
func MakeTLSConfig(cert *tls.Certificate) (TLSClientConfig, error)
MakeTLSConfig constructs a TLSClientConfig to use the provided tls.Certificate.
func (*TLSClientConfig) Resolve ¶
func (t *TLSClientConfig) Resolve() error
Resolve resolves the configuration.
func (TLSClientConfig) Validate ¶
func (t TLSClientConfig) Validate() []error
Validate implements the check.Validatable interface.
type Task ¶
type Task struct { bun.BaseModel `bun:"table:tasks"` TaskID TaskID `db:"task_id" bun:"task_id,pk"` JobID *JobID `db:"job_id"` TaskType TaskType `db:"task_type"` StartTime time.Time `db:"start_time"` EndTime *time.Time `db:"end_time"` // LogVersion indicates how the logs were stored. LogVersion TaskLogVersion `db:"log_version"` // Relations. Job *Job `bun:"rel:belongs-to,join:job_id=job_id"` }
Task is the model for a task in the database.
type TaskContainerDefaultsConfig ¶
type TaskContainerDefaultsConfig struct { DtrainNetworkInterface string `json:"dtrain_network_interface,omitempty"` NCCLPortRange string `json:"nccl_port_range,omitempty"` GLOOPortRange string `json:"gloo_port_range,omitempty"` ShmSizeBytes int64 `json:"shm_size_bytes,omitempty"` NetworkMode container.NetworkMode `json:"network_mode,omitempty"` // TODO(DET-9855) we should move these over to KubernetesTaskContainerDefaults. CPUPodSpec *k8sV1.Pod `json:"cpu_pod_spec"` GPUPodSpec *k8sV1.Pod `json:"gpu_pod_spec"` Image *RuntimeItem `json:"image,omitempty"` RegistryAuth *types.AuthConfig `json:"registry_auth,omitempty"` ForcePullImage bool `json:"force_pull_image,omitempty"` EnvironmentVariables *RuntimeItems `json:"environment_variables,omitempty"` AddCapabilities []string `json:"add_capabilities"` DropCapabilities []string `json:"drop_capabilities"` Devices DevicesConfig `json:"devices"` BindMounts BindMountsConfig `json:"bind_mounts"` WorkDir *string `json:"work_dir"` Slurm expconf.SlurmConfigV0 `json:"slurm"` Pbs expconf.PbsConfigV0 `json:"pbs"` LogPolicies expconf.LogPoliciesConfig // TODO(DET-9856) we should probably eventually move this to expconf and allow setting // on a per task level. Kubernetes *KubernetesTaskContainerDefaults `json:"kubernetes"` }
TaskContainerDefaultsConfig configures docker defaults for all containers. If you add a field to this, you must update the merge impl.
func DefaultTaskContainerDefaults ¶
func DefaultTaskContainerDefaults() *TaskContainerDefaultsConfig
DefaultTaskContainerDefaults returns the default for TaskContainerDefaultsConfig.
func (TaskContainerDefaultsConfig) Merge ¶
func (c TaskContainerDefaultsConfig) Merge( other TaskContainerDefaultsConfig, ) (TaskContainerDefaultsConfig, error)
Merge merges other into self, preferring other. The result is a deepcopy of self, with deep copies of values taken from other.
func (*TaskContainerDefaultsConfig) MergeIntoExpConfig ¶
func (c *TaskContainerDefaultsConfig) MergeIntoExpConfig(config *expconf.ExperimentConfig)
MergeIntoExpConfig sets any unset ExperimentConfig values from TaskContainerDefaults.
func (*TaskContainerDefaultsConfig) UnmarshalJSON ¶
func (c *TaskContainerDefaultsConfig) UnmarshalJSON(data []byte) error
UnmarshalJSON implements the json.Unmarshaler interface. Setting defaults here is necessary over our usual "Define a default struct and unmarshal into it" strategy because there are places (resource pool configs) where we need to know if the task container defaults were set at all or if they were not; if they were set then that resource pool's task container defaults are used instead of the toplevel master config's settings. To know if the user set them at the resource pool level, the resource pool has to have a nullable pointer, which is not compatible with our usual strategy for defaults.
func (*TaskContainerDefaultsConfig) Validate ¶
func (c *TaskContainerDefaultsConfig) Validate() []error
Validate implements the check.Validatable interface.
type TaskContextDirectory ¶
type TaskContextDirectory struct { bun.BaseModel `bun:"table:task_context_directory"` TaskID TaskID `bun:"task_id"` ContextDirectory []byte `bun:"context_directory"` }
TaskContextDirectory represents a row in database for a tasks context directory. This currently is only for notebooks, trials, tensorboards, and commands now. Trials aren't in it because they are stored on experiments.model_def. In addition trials can have many tasks but currently can only have one model_def. We would end up duplicating a lot of data migrating experiment's model_def over to this table. Also that migration would be pretty painful.
type TaskLog ¶
type TaskLog struct { // A task log should have one of these IDs after being persisted. All should be unique. ID *int `db:"id" json:"id,omitempty"` // The body of an Elasticsearch log response will look something like // { _id: ..., _source: { ... }} where _source is the rest of this struct. // StringID doesn't have serialization tags because it is not part of // _source and populated from _id. StringID *string `json:"-"` TaskID string `db:"task_id" json:"task_id"` AllocationID *string `db:"allocation_id" json:"allocation_id"` AgentID *string `db:"agent_id" json:"agent_id,omitempty"` // In the case of k8s, container_id is a pod name instead. ContainerID *string `db:"container_id" json:"container_id,omitempty"` RankID *int `db:"rank_id" json:"rank_id,omitempty"` Timestamp *time.Time `db:"timestamp" json:"timestamp"` Level *string `db:"level" json:"level"` Log string `db:"log" json:"log"` Source *string `db:"source" json:"source,omitempty"` StdType *string `db:"stdtype" json:"stdtype,omitempty"` }
TaskLog represents a structured log emitted by an allocation.
func TaskLogFromProto ¶
TaskLogFromProto converts a proto task log to a model task log.
type TaskLogBatch ¶
type TaskLogBatch []*TaskLog
TaskLogBatch represents a batch of model.TaskLog.
func (TaskLogBatch) ForEach ¶
func (t TaskLogBatch) ForEach(f func(interface{}) error) error
ForEach implements logs.Batch.
type TaskLogVersion ¶
type TaskLogVersion int32
TaskLogVersion is the version for our log-storing scheme. Useful because changing designs would involve either a really costly migration or versioning schemes and we pick the latter.
const ( TaskLogVersion0 TaskLogVersion = 0 TaskLogVersion1 TaskLogVersion = 1 CurrentTaskLogVersion = TaskLogVersion1 )
CurrentTaskLogVersion describes the current scheme in which we store task logs. To avoid a migration that in some cases would be extremely costly, we record the log version so that we can just read old logs the old way and do the new however we please.
type TaskStats ¶
type TaskStats struct { AllocationID AllocationID EventType string // ContainerID is sent by the agent. This won't always be present in the database // This is a weird table since sometimes it is one row per allocation // (like in record queued stats) and sometimes it is many per allocation like in // pulled time. ContainerID *cproto.ID StartTime *time.Time EndTime *time.Time }
TaskStats is the model for task stats in the database.
type TaskType ¶
type TaskType string
TaskType is the type of a task.
const ( // TaskTypeTrial is the "TRIAL" job type for the enum public.job_type in Postgres. TaskTypeTrial TaskType = "TRIAL" // TaskTypeNotebook is the "NOTEBOOK" job type for the enum public.job_type in Postgres. TaskTypeNotebook TaskType = "NOTEBOOK" // TaskTypeShell is the "SHELL" job type for the enum public.job_type in Postgres. TaskTypeShell TaskType = "SHELL" // TaskTypeCommand is the "COMMAND" job type for the enum public.job_type in Postgres. TaskTypeCommand TaskType = "COMMAND" // TaskTypeTensorboard is the "TENSORBOARD" task type for the enum.task_type in Postgres. TaskTypeTensorboard TaskType = "TENSORBOARD" // TaskTypeCheckpointGC is the "CHECKPOINT_GC" job type for the enum public.job_type in Postgres. TaskTypeCheckpointGC TaskType = "CHECKPOINT_GC" )
type Template ¶
type Template struct { Name string `db:"name" json:"name"` Config []byte `db:"config" json:"config" bun:"config"` WorkspaceID int `db:"workspace_id" json:"workspace_id"` }
Template represents a row from the `templates` table.
type Trial ¶
type Trial struct { bun.BaseModel `bun:"table:trials"` ID int `db:"id" bun:",pk,autoincrement"` RequestID *RequestID `db:"request_id"` ExperimentID int `db:"experiment_id"` State State `db:"state"` StartTime time.Time `db:"start_time"` EndTime *time.Time `db:"end_time"` HParams map[string]any `db:"hparams" bun:"hparams"` WarmStartCheckpointID *int `db:"warm_start_checkpoint_id"` Seed int64 `db:"seed"` TotalBatches int `db:"total_batches"` ExternalTrialID *string `db:"external_trial_id"` }
Trial represents a row from the `trials` table.
type TrialLog ¶
type TrialLog struct { // A trial log should have one of these IDs. All should be unique. // TODO(Brad): This must be int64. ID *int `db:"id" json:"id,omitempty"` // The body of an Elasticsearch log response will look something like // { _id: ..., _source: { ... }} where _source is the rest of this struct. // StringID doesn't have serialization tags because it is not part of // _source and populated from _id. StringID *string `json:"-"` TrialID int `db:"trial_id" json:"trial_id"` Message string `db:"message" json:"message,omitempty"` AgentID *string `db:"agent_id" json:"agent_id,omitempty"` // In the case of k8s, container_id is a pod name instead. ContainerID *string `db:"container_id" json:"container_id,omitempty"` RankID *int `db:"rank_id" json:"rank_id,omitempty"` Timestamp *time.Time `db:"timestamp" json:"timestamp"` Level *string `db:"level" json:"level"` Log *string `db:"log" json:"log"` Source *string `db:"source" json:"source,omitempty"` StdType *string `db:"stdtype" json:"stdtype,omitempty"` }
TrialLog represents a row from the `trial_logs` table.
type TrialLogBatch ¶
type TrialLogBatch []*TrialLog
TrialLogBatch represents a batch of model.TrialLog.
func (TrialLogBatch) ForEach ¶
func (t TrialLogBatch) ForEach(f func(interface{}) error) error
ForEach implements logs.Batch.
type TrialMetrics ¶
type TrialMetrics struct { ID int `db:"id" json:"id"` TrialID int `db:"trial_id" json:"trial_id"` TrialRunID int `db:"trial_run_id" json:"-"` TotalBatches int `db:"total_batches" json:"total_batches"` EndTime *time.Time `db:"end_time" json:"end_time"` Metrics JSONObj `db:"metrics" json:"metrics"` }
TrialMetrics represents a row from the `steps` or `validations` table.
type TrialProfilerMetricsBatch ¶
type TrialProfilerMetricsBatch struct { Values pgtype.Float4Array `db:"values"` Batches pgtype.Int4Array `db:"batches"` Timestamps pgtype.TimestamptzArray `db:"timestamps"` Labels []byte `db:"labels"` }
TrialProfilerMetricsBatch represents a row from the `trial_profiler_metrics` table.
func (*TrialProfilerMetricsBatch) ToProto ¶
func (t *TrialProfilerMetricsBatch) ToProto() (*trialv1.TrialProfilerMetricsBatch, error)
ToProto converts a TrialProfilerMetricsBatch to its protobuf representation.
type TrialProfilerMetricsBatchBatch ¶
type TrialProfilerMetricsBatchBatch []*trialv1.TrialProfilerMetricsBatch
TrialProfilerMetricsBatchBatch represents a batch of trialv1.TrialProfilerMetricsBatch.
func (TrialProfilerMetricsBatchBatch) ForEach ¶
func (t TrialProfilerMetricsBatchBatch) ForEach(f func(interface{}) error) error
ForEach implements logs.Batch.
func (TrialProfilerMetricsBatchBatch) Size ¶
func (t TrialProfilerMetricsBatchBatch) Size() int
Size implements logs.Batch.
type TrialTaskID ¶
TrialTaskID represents a row from the `trial_id_task_id` table.
type User ¶
type User struct { bun.BaseModel `bun:"table:users"` ID UserID `db:"id" bun:"id,pk,autoincrement" json:"id"` Username string `db:"username" json:"username"` PasswordHash null.String `db:"password_hash" json:"-"` DisplayName null.String `db:"display_name" json:"display_name"` Admin bool `db:"admin" json:"admin"` Active bool `db:"active" json:"active"` ModifiedAt time.Time `db:"modified_at" json:"modified_at"` Remote bool `db:"remote" json:"remote"` LastAuthAt *time.Time `db:"last_auth_at" json:"last_auth_at"` }
User corresponds to a row in the "users" DB table.
func (*User) UpdatePasswordHash ¶
UpdatePasswordHash updates the model's password hash employing necessary cryptographic techniques.
func (User) ValidatePassword ¶
ValidatePassword checks that the supplied password is correct.
type UserActivity ¶
type UserActivity struct { bun.BaseModel `bun:"table:activity"` UserID UserID `db:"user_id" json:"user_id"` ActivityType ActivityType `db:"activity_type" json:"activity_type"` EntityType EntityType `db:"entity_type" json:"entity_type"` EntityID int32 `db:"entity_id" json:"entity_id"` ActivityTime time.Time `db:"activity_time" json:"activity_time"` }
UserActivity is a record of user activity.
func UserActivityFromProto ¶
func UserActivityFromProto( a userv1.ActivityType, e userv1.EntityType, entityID int32, userID int32, timestamp time.Time, ) *UserActivity
UserActivityFromProto returns a model UserActivity from a proto definition.
type UserSession ¶
type UserSession struct { bun.BaseModel `bun:"table:user_sessions"` ID SessionID `db:"id" json:"id"` UserID UserID `db:"user_id" json:"user_id"` Expiry time.Time `db:"expiry" json:"expiry"` }
UserSession corresponds to a row in the "user_sessions" DB table.
type UserWebSetting ¶
UserWebSetting is a record of user web setting.
type WorkloadManagerType ¶
type WorkloadManagerType string
WorkloadManagerType indicates which type of workloads the harness should prepare to receive.
type WorkloadSequencerType ¶
type WorkloadSequencerType string
WorkloadSequencerType is the type of sequencer that a trial actor should use.
type Workspace ¶
type Workspace struct { bun.BaseModel `bun:"table:workspaces"` ID int `bun:"id,pk,autoincrement"` Name string `bun:"name"` Archived bool `bun:"archived"` CreatedAt time.Time `bun:"created_at,scanonly"` UserID UserID `bun:"user_id"` Immutable bool `bun:"immutable"` State *WorkspaceState `bun:"state"` AgentUID *int32 `bun:"uid"` AgentUser *string `bun:"user_"` AgentGID *int32 `bun:"gid"` AgentGroup *string `bun:"group_"` CheckpointStorageConfig *expconf.CheckpointStorageConfig `bun:"checkpoint_storage_config"` DefaultComputePool string `bun:"default_compute_pool"` DefaultAuxPool string `bun:"default_aux_pool"` }
Workspace is the bun model of a workspace.
type WorkspacePin ¶
type WorkspacePin struct { bun.BaseModel `bun:"table:workspace_pins"` WorkspaceID int `bun:"workspace_id"` UserID UserID `bun:"user_id"` }
WorkspacePin is the bun model of a workspace.
type WorkspaceState ¶
type WorkspaceState string
WorkspaceState is the state of the workspace state with regards to being deleted.
const ( // WorkspaceStateDeleting constant. WorkspaceStateDeleting WorkspaceState = "DELETING" // WorkspaceStateDeleteFailed constant. WorkspaceStateDeleteFailed WorkspaceState = "DELETE_FAILED" // WorkspaceStateDeleted constant. WorkspaceStateDeleted WorkspaceState = "DELETED" )
func (*WorkspaceState) ToProto ¶
func (s *WorkspaceState) ToProto() workspacev1.WorkspaceState
ToProto converts a WorkspaceState to a proto workspacev1.Workspace state.
Source Files ¶
- agent.go
- agent_user_group.go
- auth_token_keypair.go
- command_config.go
- compat.go
- config_file.go
- defaults.go
- duration.go
- environment_config.go
- experiment.go
- experiment_config.go
- instance.go
- job.go
- logging_config.go
- metrics.go
- model.go
- project.go
- searcher.go
- task.go
- task_container_defaults.go
- task_session.go
- template.go
- types.go
- user.go
- user_activity.go
- user_group.go
- workspace.go