Documentation ¶
Index ¶
- Constants
- Variables
- func FmtInstances(instances []*Instance) string
- func HashPassword(password string) (string, error)
- func MostProgressedExperimentState(state1 experimentv1.State, state2 experimentv1.State) experimentv1.State
- func ProjectsToProto(ps []*Project) []*projectv1.Project
- func SortableSlotIndex(i int) string
- func StateToProto(state State) experimentv1.State
- func StatesToStrings(inStates map[State]bool) []string
- func SummarizeSlots(slots map[string]*agentv1.Slot) *agentv1.SlotStats
- func TaskLogLevelFromLogrus(l logrus.Level) string
- func TaskLogLevelFromProto(l logv1.LogLevel) string
- func TaskLogLevelToProto(l string) logv1.LogLevel
- func TrialMetricsJSONPath(isValidation bool) string
- func TrialSummaryMetricsJSONPath(metricGroup MetricGroup) string
- func UsingCustomImage(req *apiv1.LaunchTensorboardRequest) bool
- func ValidatePrioritySetting(priority *int) []error
- type AcceleratorData
- type AccessScopeID
- type AccessScopeSet
- type ActivityType
- type AgentStats
- type AgentSummary
- type AgentUserGroup
- type AgentsSummary
- type Allocation
- type AllocationID
- type AllocationSession
- type AllocationState
- type AllocationWorkspaceRecord
- type AuthTokenKeypair
- type BindMount
- type BindMountsConfig
- type Checkpoint
- type CheckpointTrainingMetadata
- type CheckpointV2
- type ClusterID
- type ClusterMessage
- type CommandConfig
- type ConfigFile
- type DefaultLoggingConfig
- type DeviceConfig
- type DevicesConfig
- type Duration
- type ElasticLoggingConfig
- type ElasticSecurityConfig
- type EntityType
- type Environment
- type ExitedReason
- type Experiment
- type ExtendedFloat64
- type ExternalSessions
- type FullUser
- type GenericTaskConfig
- type Group
- type GroupMembership
- type Groups
- type HealthCheck
- type HealthStatus
- type Instance
- type InstanceState
- type InstanceStats
- type InstanceType
- type InvalidationMap
- type JSONObj
- type JWT
- type Job
- type JobID
- type JobType
- type KubernetesTaskContainerDefaults
- type LogRetentionPolicy
- type LoggingConfig
- type MetricGroup
- type MetricIdentifier
- type NotebookSession
- type OAuthClient
- type OAuthToken
- func (t *OAuthToken) GetAccess() string
- func (t *OAuthToken) GetAccessCreateAt() time.Time
- func (t *OAuthToken) GetAccessExpiresIn() time.Duration
- func (t *OAuthToken) GetClientID() string
- func (t *OAuthToken) GetCode() string
- func (t *OAuthToken) GetCodeCreateAt() time.Time
- func (t *OAuthToken) GetCodeExpiresIn() time.Duration
- func (t *OAuthToken) GetRedirectURI() string
- func (t *OAuthToken) GetRefresh() string
- func (t *OAuthToken) GetRefreshCreateAt() time.Time
- func (t *OAuthToken) GetRefreshExpiresIn() time.Duration
- func (t *OAuthToken) GetScope() string
- func (t *OAuthToken) GetUserID() string
- func (t *OAuthToken) New() oauth2.TokenInfo
- func (t *OAuthToken) SetAccess(access string)
- func (t *OAuthToken) SetAccessCreateAt(createAt time.Time)
- func (t *OAuthToken) SetAccessExpiresIn(exp time.Duration)
- func (t *OAuthToken) SetClientID(clientID string)
- func (t *OAuthToken) SetCode(code string)
- func (t *OAuthToken) SetCodeCreateAt(createAt time.Time)
- func (t *OAuthToken) SetCodeExpiresIn(exp time.Duration)
- func (t *OAuthToken) SetRedirectURI(redirectURI string)
- func (t *OAuthToken) SetRefresh(refresh string)
- func (t *OAuthToken) SetRefreshCreateAt(createAt time.Time)
- func (t *OAuthToken) SetRefreshExpiresIn(exp time.Duration)
- func (t *OAuthToken) SetScope(scope string)
- func (t *OAuthToken) SetUserID(userID string)
- type OrgID
- type OrgRoleClaims
- type PatchOperation
- type PatchRequest
- type PatchSchemas
- type Project
- type ProjectHparam
- type Projects
- type ProxyPort
- type ProxyPortsConfig
- type RequestID
- type ResourceAggregates
- type ResourceManagerHealth
- type ResourcesConfig
- type Role
- type RoleAssignmentScope
- type Run
- type RunCheckpoints
- type RunHparam
- type RunMetadata
- type RunMetadataIndex
- type RunTaskID
- type RuntimeItem
- type RuntimeItems
- type SCIMEmail
- type SCIMEmails
- type SCIMError
- type SCIMErrorSchemas
- type SCIMGroup
- type SCIMGroupMeta
- type SCIMGroupResourceType
- type SCIMGroupSchemas
- type SCIMGroups
- type SCIMListSchemas
- type SCIMName
- type SCIMUser
- type SCIMUserMeta
- type SCIMUserResourceType
- type SCIMUserSchemas
- type SCIMUsers
- type SessionID
- type SlotSummary
- type SlotsSummary
- type Snapshotter
- type State
- type StateWithReason
- type StorageBackendID
- type StorageSize
- type TLSClientConfig
- type Task
- type TaskContainerDefaultsConfig
- func (c TaskContainerDefaultsConfig) Merge(other TaskContainerDefaultsConfig) (TaskContainerDefaultsConfig, error)
- func (c *TaskContainerDefaultsConfig) MergeIntoExpConfig(config *expconf.ExperimentConfig)
- func (c *TaskContainerDefaultsConfig) UnmarshalJSON(data []byte) error
- func (c *TaskContainerDefaultsConfig) Validate() []error
- type TaskContextDirectory
- type TaskID
- type TaskLog
- type TaskLogBatch
- type TaskLogVersion
- type TaskState
- type TaskStats
- type TaskType
- type Template
- type Trial
- type TrialLog
- type TrialLogBatch
- type TrialMetrics
- type TrialProfilerMetricsBatch
- type TrialProfilerMetricsBatchBatch
- type TrialV2
- type UUID
- type User
- type UserActivity
- type UserID
- type UserSession
- type UserWebSetting
- type Users
- type WorkloadManagerType
- type WorkloadSequencerType
- type Workspace
- type WorkspaceNamespace
- type WorkspacePin
- type WorkspaceState
Constants ¶
const ( // NotebookIdleTypeKernelsOrTerminals indicates that a notebook should be considered active if any // kernels or terminals are open. NotebookIdleTypeKernelsOrTerminals = "kernels_or_terminals" // NotebookIdleTypeKernelConnections indicates that a notebook should be considered active if any // connections to kernels are open. NotebookIdleTypeKernelConnections = "kernel_connections" // NotebookIdleTypeActivity indicates that a notebook should be considered active if any kernel is // running a command or any terminal is inputting or outputting data. NotebookIdleTypeActivity = "activity" )
const ( // ActiveState constant. ActiveState State = "ACTIVE" // CanceledState constant. CanceledState State = "CANCELED" // CompletedState constant. CompletedState State = "COMPLETED" // ErrorState constant. ErrorState State = "ERROR" // PausedState constant. PausedState State = "PAUSED" // StoppingKilledState constant. StoppingKilledState State = "STOPPING_KILLED" // StoppingCanceledState constant. StoppingCanceledState State = "STOPPING_CANCELED" // StoppingCompletedState constant. StoppingCompletedState State = "STOPPING_COMPLETED" // StoppingErrorState constant. StoppingErrorState State = "STOPPING_ERROR" // DeletingState constant. DeletingState State = "DELETING" // DeleteFailedState constant. DeleteFailedState State = "DELETE_FAILED" // DeletedState constant. DeletedState State = "DELETED" // PartiallyDeletedState constant. PartiallyDeletedState State = "PARTIALLY_DELETED" // RunningState constant. Currently only used by unmanaged trials. RunningState State = "RUNNING" // TrialWorkloadSequencerType constant. TrialWorkloadSequencerType WorkloadSequencerType = "TRIAL_WORKLOAD_SEQUENCER" )
const ( // MinUserSchedulingPriority is the smallest priority users may specify. MinUserSchedulingPriority = 1 // MaxUserSchedulingPriority is the largest priority users may specify. MaxUserSchedulingPriority = 99 )
const ( // TaskTypeTrial is the "TRIAL" job type for the enum public.job_type in Postgres. TaskTypeTrial TaskType = "TRIAL" // TaskTypeNotebook is the "NOTEBOOK" job type for the enum public.job_type in Postgres. TaskTypeNotebook TaskType = "NOTEBOOK" // TaskTypeShell is the "SHELL" job type for the enum public.job_type in Postgres. TaskTypeShell TaskType = "SHELL" // TaskTypeCommand is the "COMMAND" job type for the enum public.job_type in Postgres. TaskTypeCommand TaskType = "COMMAND" // TaskTypeTensorboard is the "TENSORBOARD" task type for the enum.task_type in Postgres. TaskTypeTensorboard TaskType = "TENSORBOARD" // TaskTypeCheckpointGC is the "CHECKPOINT_GC" job type for the enum public.job_type in Postgres. TaskTypeCheckpointGC TaskType = "CHECKPOINT_GC" // TaskTypeGeneric is the "GENERIC" job type for the enum public.job_type in Postgres. TaskTypeGeneric TaskType = "GENERIC" // GlobalAccessScopeID represents global permission access. GlobalAccessScopeID AccessScopeID = 0 // AggregationTypeQueued is the type of aggregation for queued tasks. AggregationTypeQueued = "queued" )
const ( // LogLevelTrace is the trace task log level. LogLevelTrace = tasklog.LogLevelTrace // LogLevelDebug is the debug task log level. LogLevelDebug = tasklog.LogLevelDebug // LogLevelInfo is the info task log level. LogLevelInfo = tasklog.LogLevelInfo // LogLevelWarning is the warn task log level. LogLevelWarning = tasklog.LogLevelWarning // LogLevelError is the error task log level. LogLevelError = tasklog.LogLevelError // LogLevelCritical is the critical task log level. LogLevelCritical = tasklog.LogLevelCritical // LogLevelUnspecified is the unspecified task log level. LogLevelUnspecified = tasklog.LogLevelUnspecified )
const ( // DefaultWorkspaceID is a special, always-existing, workspace titled "Uncategorized". DefaultWorkspaceID = 1 // DefaultWorkspaceName is the default workspace name, which is always present, and always has ID 1. DefaultWorkspaceName = "Uncategorized" // DefaultProjectID is the default project ID for the default workspace. DefaultProjectID = 1 )
const BCryptCost = 15
BCryptCost is a stopgap until we implement sane master-configuration.
const ( // DeterminedK8ContainerName is the name of the container that executes the task within Kubernetes // pods that are launched by Determined. DeterminedK8ContainerName = "determined-container" )
const NotebookSessionEnvVar = "DET_NOTEBOOK_TOKEN"
NotebookSessionEnvVar is the environment variable name for notebook task tokens.
const ( // RFC3339MicroTrailingZeroes unlike time.RFC3339Nano is a time format specifier that preserves // trailing zeroes. RFC3339MicroTrailingZeroes = "2006-01-02T15:04:05.000000Z07:00" )
const (
// StepsCompletedMetadataKey is the key within metadata to find steps completed now, if it exists.
StepsCompletedMetadataKey = "steps_completed"
)
Variables ¶
var ( // EmptyPassword is the empty password (i.e., the empty string). EmptyPassword = null.NewString("", false) // NoPasswordLogin is a password that prevents the user from logging in // directly. They can still login via external authentication methods like // OAuth. NoPasswordLogin = null.NewString("", true) )
var CheckpointReverseTransitions = reverseTransitions(CheckpointTransitions)
CheckpointReverseTransitions list possible ancestor states.
var CheckpointTransitions = map[State]map[State]bool{ ActiveState: { CompletedState: true, ErrorState: true, }, CompletedState: { DeletedState: true, }, DeletedState: {}, ErrorState: {}, }
CheckpointTransitions maps checkpoint states to their possible transitions.
var DeletingStates = map[State]bool{ DeletedState: true, DeleteFailedState: true, DeletingState: true, }
DeletingStates are the valid deleting states.
var ExperimentReverseTransitions = reverseTransitions(ExperimentTransitions)
ExperimentReverseTransitions lists possible ancestor states.
var ExperimentTransitions = map[State]map[State]bool{ ActiveState: { PausedState: true, StoppingKilledState: true, StoppingCanceledState: true, StoppingCompletedState: true, StoppingErrorState: true, ErrorState: true, }, PausedState: { ActiveState: true, StoppingKilledState: true, StoppingCanceledState: true, StoppingCompletedState: true, StoppingErrorState: true, ErrorState: true, }, StoppingCanceledState: { CanceledState: true, StoppingKilledState: true, StoppingErrorState: true, ErrorState: true, }, StoppingKilledState: { CanceledState: true, StoppingErrorState: true, ErrorState: true, }, StoppingCompletedState: { CompletedState: true, StoppingErrorState: true, ErrorState: true, }, StoppingErrorState: { ActiveState: true, ErrorState: true, }, CanceledState: { DeletingState: true, }, CompletedState: { DeletingState: true, }, ErrorState: { DeletingState: true, }, DeletingState: { DeletedState: true, DeleteFailedState: true, }, DeleteFailedState: { DeletingState: true, }, DeletedState: {}, }
ExperimentTransitions maps experiment states to their possible transitions.
var ManualStates = map[State]bool{ ActiveState: true, PausedState: true, StoppingCanceledState: true, StoppingKilledState: true, }
ManualStates are the states the user can set an experiment to.
var NonTerminalStates = func() []State { var states []State for s := range ExperimentTransitions { if !TerminalStates[s] && !DeletingStates[s] { states = append(states, s) } } return states }()
NonTerminalStates where an experiment can be canceled or killed.
var ProfilingMetricGroups = []MetricGroup{
"gpu", "cpu", "memory", "disk", "network",
}
ProfilingMetricGroups designates metrics from profiling runs.
var RunningStates = map[State]bool{ ActiveState: true, PausedState: true, }
RunningStates are the valid running states.
var StepReverseTransitions = reverseTransitions(StepTransitions)
StepReverseTransitions list possible ancestor states.
var StepTransitions = map[State]map[State]bool{ ActiveState: { CompletedState: true, ErrorState: true, }, CompletedState: {}, ErrorState: {}, }
StepTransitions maps step and validation states to their possible transitions.
var StoppingStates = map[State]bool{ StoppingCanceledState: true, StoppingKilledState: true, StoppingCompletedState: true, StoppingErrorState: true, }
StoppingStates are the valid stopping states.
var StoppingToTerminalStates = map[State]State{ StoppingKilledState: CanceledState, StoppingCanceledState: CanceledState, StoppingCompletedState: CompletedState, StoppingErrorState: ErrorState, }
StoppingToTerminalStates maps from stopping states to the corresponding terminal states.
var TerminalStates = map[State]bool{ CanceledState: true, CompletedState: true, ErrorState: true, }
TerminalStates are the valid terminal states.
var TrialReverseTransitions = reverseTransitions(TrialTransitions)
TrialReverseTransitions list possible ancestor states.
var TrialTransitions = map[State]map[State]bool{ ActiveState: { PausedState: true, StoppingKilledState: true, StoppingCanceledState: true, StoppingCompletedState: true, StoppingErrorState: true, ErrorState: true, CompletedState: true, }, CanceledState: { ActiveState: true, }, CompletedState: { ActiveState: true, }, ErrorState: { ActiveState: true, }, PausedState: { ActiveState: true, StoppingKilledState: true, StoppingCanceledState: true, StoppingCompletedState: true, StoppingErrorState: true, ErrorState: true, }, StoppingCompletedState: { StoppingCanceledState: true, StoppingKilledState: true, StoppingErrorState: true, CompletedState: true, ErrorState: true, }, StoppingCanceledState: { StoppingKilledState: true, StoppingErrorState: true, CanceledState: true, ErrorState: true, }, StoppingKilledState: { StoppingErrorState: true, CanceledState: true, ErrorState: true, }, StoppingErrorState: { ActiveState: true, ErrorState: true, }, }
TrialTransitions maps trial states to their possible transitions. Trials are mostly the same as experiments, but when immediate exits through ErrorState allowed since can die immediately and let the RM clean us up.
Functions ¶
func FmtInstances ¶
FmtInstances formats instance ids and states to print.
func HashPassword ¶
HashPassword hashes the user's password.
func MostProgressedExperimentState ¶
func MostProgressedExperimentState( state1 experimentv1.State, state2 experimentv1.State, ) experimentv1.State
MostProgressedExperimentState returns the more advanced active state based on experimentStateIndex (Queued -> Pulling -> Starting -> Running).
func ProjectsToProto ¶
ProjectsToProto converts a slice of projects to its protobuf representation.
func SortableSlotIndex ¶
SortableSlotIndex returns a slot index that will sort as you want to.
This is a hack to fix a bug seen by the webui. The webui displays a list of slots and if they are filled, so they expect that the order of what slots are filled in is consistent. In Kubernetes this is an illusion, we don't know what slot is running what job. Our API returns a map of slot IDs to slots that get returned. This map gets parsed and display in the frontend lexicographically. Just doing indexes breaks when there are more than 10 GPUs per agent since it will go 1,10,11 instead of 1,2,3,4.
To fix this on our just pad the numbers with 0s so they sort in the response.
func StateToProto ¶
func StateToProto(state State) experimentv1.State
StateToProto maps State to experimentv1.State.
func StatesToStrings ¶
StatesToStrings converts a State map to a list of strings for db queries.
func SummarizeSlots ¶
SummarizeSlots a set of slots.
func TaskLogLevelFromLogrus ¶
TaskLogLevelFromLogrus returns an equivalent task log level from a logrus level.
func TaskLogLevelFromProto ¶
TaskLogLevelFromProto returns a task log level from its protobuf repr.
func TaskLogLevelToProto ¶
TaskLogLevelToProto returns a protobuf task log level from its string repr.
func TrialMetricsJSONPath ¶
TrialMetricsJSONPath returns the legacy JSON path to the metrics field in the metrics table.
func TrialSummaryMetricsJSONPath ¶
func TrialSummaryMetricsJSONPath(metricGroup MetricGroup) string
TrialSummaryMetricsJSONPath returns the JSON path to the trials metric summary.
func UsingCustomImage ¶
func UsingCustomImage(req *apiv1.LaunchTensorboardRequest) bool
UsingCustomImage checks for image argument in request. It's only used for tensor board now. Error is ignored because we treat unexpected error when parsing as not using custom image.
func ValidatePrioritySetting ¶
ValidatePrioritySetting checks that priority if set is within a valid range.
Types ¶
type AcceleratorData ¶
type AcceleratorData struct { bun.BaseModel `bun:"table:allocation_accelerators"` ContainerID string `db:"container_id" bun:"container_id"` AllocationID AllocationID `db:"allocation_id" bun:"allocation_id,notnull"` NodeName string `db:"node_name" bun:"node_name,notnull"` AcceleratorType string `db:"accelerator_type" bun:"accelerator_type,notnull"` AcceleratorUuids []string `db:"accelerator_uuids" bun:"accelerator_uuids,array"` ID *int `db:"id" bun:"id,pk,autoincrement"` }
AcceleratorData is the model for an allocation accelerator data in the database.
func (AcceleratorData) Proto ¶
func (a AcceleratorData) Proto() *apiv1.AcceleratorData
Proto returns the proto representation of the task state.
type AccessScopeSet ¶
type AccessScopeSet = map[AccessScopeID]bool
AccessScopeSet is a set of access scopes.
type ActivityType ¶
type ActivityType string
ActivityType describes a user activity.
const ( // ActivityTypeGet represents a get request. ActivityTypeGet ActivityType = "GET" )
type AgentStats ¶
type AgentStats struct { ResourcePool string `db:"resource_pool"` AgentID string `db:"agent_id"` Slots int `db:"slots"` }
AgentStats stores the start/end status of instance.
type AgentSummary ¶
type AgentSummary struct { ID string `json:"id"` RegisteredTime time.Time `json:"registered_time"` Slots SlotsSummary `json:"slots"` NumContainers int `json:"num_containers"` ResourcePool []string `json:"resource_pool"` Addresses []string `json:"addresses"` Enabled bool `json:"enabled"` Draining bool `json:"draining"` Version string `json:"version"` }
AgentSummary summarizes the state on an agent.
func (AgentSummary) ToProto ¶
func (a AgentSummary) ToProto() *agentv1.Agent
ToProto converts an agent summary to a proto struct.
type AgentUserGroup ¶
type AgentUserGroup struct { bun.BaseModel `bun:"table:agent_user_groups"` ID int `db:"id" bun:"id,pk,autoincrement" json:"id"` UserID UserID `db:"user_id" json:"user_id"` // The User is the username on an agent host machine. This may be different // from the username of the user in the User database. User string `db:"user_" bun:"user_" json:"user"` UID int `db:"uid" json:"uid"` // The Group is the primary group of the user. Group string `db:"group_" bun:"group_" json:"group"` GID int `db:"gid" json:"gid"` }
An AgentUserGroup represents a username and primary group for a user on an agent host machine. There is at most one AgentUserGroup for each User.
func AgentUserGroupFromProto ¶
func AgentUserGroupFromProto(aug *userv1.AgentUserGroup) (*AgentUserGroup, error)
AgentUserGroupFromProto convert agent user group from proto to model.
func (*AgentUserGroup) OwnArchive ¶
func (c *AgentUserGroup) OwnArchive(oldArchive archive.Archive) archive.Archive
OwnArchive will return an archive.Archive modified to be owned by the AgentUserGroup, or unmodified if c is nil.
func (*AgentUserGroup) OwnedArchiveItem ¶
func (c *AgentUserGroup) OwnedArchiveItem( path string, content []byte, mode int, fileType byte, ) archive.Item
OwnedArchiveItem will create an archive.Item owned by the AgentUserGroup, or by root if c is nil.
func (AgentUserGroup) Validate ¶
func (c AgentUserGroup) Validate() []error
Validate validates the fields of the AgentUserGroup.
type AgentsSummary ¶
type AgentsSummary map[string]AgentSummary
AgentsSummary is a map of agent IDs to a summary of the agent.
type Allocation ¶
type Allocation struct { bun.BaseModel `bun:"table:allocations"` AllocationID AllocationID `db:"allocation_id" bun:"allocation_id,pk"` TaskID TaskID `db:"task_id" bun:"task_id,notnull"` Slots int `db:"slots" bun:"slots,notnull"` ResourcePool string `db:"resource_pool" bun:"resource_pool,notnull"` StartTime *time.Time `db:"start_time" bun:"start_time"` EndTime *time.Time `db:"end_time" bun:"end_time"` State *AllocationState `db:"state" bun:"state"` IsReady *bool `db:"is_ready" bun:"is_ready"` Ports map[string]int `db:"ports" bun:"ports,notnull"` // ProxyAddress stores the explicitly provided task-provided proxy address for resource // managers that do not supply us with it. Comes from `determined.exec.prep_container --proxy`. ProxyAddress *string `db:"proxy_address" bun:"proxy_address"` ExitReason *string `db:"exit_reason" bun:"exit_reason"` ExitErr *string `db:"exit_error" bun:"exit_error"` StatusCode *int32 `db:"status_code" bun:"status_code"` }
Allocation is the model for an allocation in the database.
func (Allocation) Proto ¶
func (a Allocation) Proto() *taskv1.Allocation
Proto returns the proto representation of the allocation state.
type AllocationID ¶
type AllocationID string
AllocationID is the ID of an allocation of a task. It is usually of the form TaskID.allocation_number, maybe with some other metadata if different types of allocations run.
func NewAllocationID ¶
func NewAllocationID(in *string) *AllocationID
NewAllocationID casts string ptr to AllocationID ptr.
func (AllocationID) GetAllocationSpecifier ¶
func (a AllocationID) GetAllocationSpecifier() (int, error)
GetAllocationSpecifier retrieves number at the end of the allocation's id.
func (AllocationID) String ¶
func (a AllocationID) String() string
func (AllocationID) ToTaskID ¶
func (a AllocationID) ToTaskID() TaskID
ToTaskID converts an AllocationID to its taskID.
type AllocationSession ¶
type AllocationSession struct { bun.BaseModel `bun:"table:allocation_sessions"` ID SessionID `db:"id" bun:"id,pk,autoincrement" json:"id"` AllocationID AllocationID `db:"allocation_id" bun:"allocation_id" json:"allocation_id"` OwnerID *UserID `db:"owner_id" bun:"owner_id" json:"owner_id"` }
AllocationSession corresponds to a row in the "allocation_sessions" DB table.
type AllocationState ¶
type AllocationState string
AllocationState represents the current state of the task. Value indicates a partial ordering.
const ( // AllocationStatePending state denotes that the command is awaiting allocation. AllocationStatePending AllocationState = "PENDING" // AllocationStateWaiting state denotes that the command is waiting on data. AllocationStateWaiting AllocationState = "WAITING" // AllocationStateAssigned state denotes that the command has been assigned to an agent but has // not started yet. AllocationStateAssigned AllocationState = "ASSIGNED" // AllocationStatePulling state denotes that the command's base image is being pulled from the // Docker registry. AllocationStatePulling AllocationState = "PULLING" // AllocationStateStarting state denotes that the image has been pulled and the task is being // started, but the task is not ready yet. AllocationStateStarting AllocationState = "STARTING" // AllocationStateRunning state denotes that the service in the command is running. AllocationStateRunning AllocationState = "RUNNING" // AllocationStateTerminated state denotes that the command has exited or has been aborted. AllocationStateTerminated AllocationState = "TERMINATED" // AllocationStateTerminating state denotes that the command is terminating. AllocationStateTerminating AllocationState = "TERMINATING" )
func MostProgressedAllocationState ¶
func MostProgressedAllocationState(states ...AllocationState) AllocationState
MostProgressedAllocationState returns the further progressed state. E.G. a call with PENDING, PULLING and STARTING returns PULLING.
func (*AllocationState) Proto ¶
func (s *AllocationState) Proto() taskv1.State
Proto returns the proto representation of the task state.
type AllocationWorkspaceRecord ¶
type AllocationWorkspaceRecord struct { bun.BaseModel `bun:"table:allocation_workspace_info"` AllocationID AllocationID `db:"allocation_id" bun:"allocation_id,notnull"` ExperimentID int `db:"experiment_id" bun:"experiment_id"` WorkspaceID int `db:"workspace_id" bun:"workspace_id,notnull"` WorkspaceName string `db:"workspace_name" bun:"workspace_name,notnull"` }
AllocationWorkspaceRecord is the model for persisting the workspace and experiment information associated with an allocation.
type AuthTokenKeypair ¶
type AuthTokenKeypair struct { bun.BaseModel `bun:"table:auth_token_keypair"` PublicKey ed25519.PublicKey `db:"public_key"` PrivateKey ed25519.PrivateKey `db:"private_key"` }
AuthTokenKeypair stores the public/private keypair used for asymmetric encryption of authentication tokens.
type BindMount ¶
type BindMount struct { HostPath string `json:"host_path"` ContainerPath string `json:"container_path"` ReadOnly bool `json:"read_only"` Propagation string `json:"propagation"` }
BindMount configures trial runner filesystem bind mounts.
func ToModelBindMount ¶
ToModelBindMount converts new expconf bind mounts into old modl bind mounts.
func (*BindMount) UnmarshalJSON ¶
UnmarshalJSON implements the json.Unmarshaler interface.
type BindMountsConfig ¶
type BindMountsConfig []BindMount
BindMountsConfig is the configuration for bind mounts.
func (BindMountsConfig) ToExpconf ¶
func (b BindMountsConfig) ToExpconf() expconf.BindMountsConfig
ToExpconf translates old model objects into an expconf object.
func (*BindMountsConfig) UnmarshalJSON ¶
func (b *BindMountsConfig) UnmarshalJSON(data []byte) error
UnmarshalJSON implements the json.Unmarshaler interface.
type Checkpoint ¶
type Checkpoint struct { bun.BaseModel `bun:"table:checkpoints_view"` ID int `db:"id"` UUID *uuid.UUID `db:"uuid"` TaskID *TaskID `db:"task_id"` AllocationID *AllocationID `db:"allocation_id"` ReportTime time.Time `db:"report_time"` State State `db:"state"` Resources JSONObj `db:"resources"` Metadata JSONObj `db:"metadata"` Size int64 `db:"size"` StorageID *StorageBackendID `db:"storage_id"` CheckpointTrainingMetadata }
Checkpoint represents a row from the `checkpoints_view` view.
type CheckpointTrainingMetadata ¶
type CheckpointTrainingMetadata struct { TrialID int `db:"trial_id"` ExperimentID int `db:"experiment_id"` ExperimentConfig JSONObj `db:"experiment_config"` HParams JSONObj `db:"hparams" bun:"hparams"` TrainingMetrics JSONObj `db:"training_metrics"` ValidationMetrics JSONObj `db:"validation_metrics"` SearcherMetric *float64 `db:"searcher_metric"` StepsCompleted int `db:"steps_completed"` }
CheckpointTrainingMetadata is a substruct of checkpoints encapsulating training specific information.
type CheckpointV2 ¶
type CheckpointV2 struct { bun.BaseModel `bun:"table:checkpoints_v2"` ID int `db:"id" bun:"id,pk,autoincrement"` UUID uuid.UUID `db:"uuid"` TaskID TaskID `db:"task_id"` AllocationID *AllocationID `db:"allocation_id"` ReportTime time.Time `db:"report_time"` State State `db:"state"` Resources map[string]int64 `db:"resources"` Metadata map[string]interface{} `db:"metadata"` Size int64 `db:"size"` // Can be nil for checkpoints older than this feature. // Also can be nil when a user creates a checkpoint without a checkpoint storage config. StorageID *StorageBackendID `db:"storage_id"` }
CheckpointV2 represents a row from the `checkpoints_v2` table.
type ClusterMessage ¶
type ClusterMessage struct { CreatedBy int Message string StartTime time.Time EndTime sql.NullTime CreatedTime sql.NullTime }
ClusterMessage represents a server status from the `cluster_messages` table.
func (*ClusterMessage) ToProto ¶
func (m *ClusterMessage) ToProto() *apiv1.ClusterMessage
ToProto converts m to a type suitable for gRPC protobuf response.
type CommandConfig ¶
type CommandConfig struct { Description string `json:"description"` BindMounts BindMountsConfig `json:"bind_mounts"` Environment Environment `json:"environment"` Resources ResourcesConfig `json:"resources"` Entrypoint []string `json:"entrypoint"` TensorBoardArgs []string `json:"tensorboard_args,omitempty"` IdleTimeout *Duration `json:"idle_timeout"` NotebookIdleType string `json:"notebook_idle_type"` WorkDir *string `json:"work_dir"` Debug bool `json:"debug"` Pbs expconf.PbsConfig `json:"pbs,omitempty"` Slurm expconf.SlurmConfig `json:"slurm,omitempty"` }
CommandConfig holds the necessary configurations to launch a command task in the cluster.
func DefaultConfig ¶
func DefaultConfig(taskContainerDefaults *TaskContainerDefaultsConfig) CommandConfig
DefaultConfig is the default configuration used by all commands (e.g., commands, notebooks, shells) if a request does not specify any configuration options.
func (*CommandConfig) Validate ¶
func (c *CommandConfig) Validate() []error
Validate implements the check.Validatable interface.
type ConfigFile ¶
ConfigFile represents a row from the `config_files` table.
type DefaultLoggingConfig ¶
type DefaultLoggingConfig struct{}
DefaultLoggingConfig configures logging for tasks using HTTP to the master.
type DeviceConfig ¶
type DeviceConfig struct { HostPath string `json:"host_path"` ContainerPath string `json:"container_path"` Mode string `json:"mode"` }
DeviceConfig configures container device access.
func (DeviceConfig) ToExpconf ¶
func (d DeviceConfig) ToExpconf() expconf.Device
ToExpconf translates old model objects into an expconf object.
func (*DeviceConfig) UnmarshalJSON ¶
func (d *DeviceConfig) UnmarshalJSON(data []byte) error
UnmarshalJSON implements the json.Unmarshaler interface.
type DevicesConfig ¶
type DevicesConfig []DeviceConfig
DevicesConfig is the configuration for devices. It is a named type because it needs custom merging behavior (via UnmarshalJSON).
func (DevicesConfig) ToExpconf ¶
func (d DevicesConfig) ToExpconf() expconf.DevicesConfig
ToExpconf translates old model objects into an expconf object.
func (*DevicesConfig) UnmarshalJSON ¶
func (d *DevicesConfig) UnmarshalJSON(data []byte) error
UnmarshalJSON implements the json.Unmarshaler interface so that DeviceConfigs are additive.
type Duration ¶
Duration is a JSON (un)marshallable version of time.Duration.
func (Duration) MarshalJSON ¶
MarshalJSON implements the json.Marshaler interface.
func (*Duration) UnmarshalJSON ¶
UnmarshalJSON implements the json.Unmarshaler interface.
type ElasticLoggingConfig ¶
type ElasticLoggingConfig struct { Host string `json:"host"` Port int `json:"port"` Security ElasticSecurityConfig `json:"security"` }
ElasticLoggingConfig configures logging for tasks using Elastic.
func (*ElasticLoggingConfig) Resolve ¶
func (o *ElasticLoggingConfig) Resolve() error
Resolve resolves the configuration.
type ElasticSecurityConfig ¶
type ElasticSecurityConfig struct { Username *string `json:"username"` Password *string `json:"password"` TLS TLSClientConfig `json:"tls"` }
ElasticSecurityConfig configures security-related options for the elastic logging backend.
func (*ElasticSecurityConfig) Resolve ¶
func (o *ElasticSecurityConfig) Resolve() error
Resolve resolves the configuration.
func (ElasticSecurityConfig) Validate ¶
func (o ElasticSecurityConfig) Validate() []error
Validate implements the check.Validatable interface.
type EntityType ¶
type EntityType string
EntityType represents an entity.
const ( // EntityTypeProject represents a project. EntityTypeProject EntityType = "Project" )
type Environment ¶
type Environment struct { Image RuntimeItem `json:"image"` EnvironmentVariables RuntimeItems `json:"environment_variables,omitempty"` ProxyPorts ProxyPortsConfig `json:"proxy_ports"` Ports map[string]int `json:"ports"` RegistryAuth *registry.AuthConfig `json:"registry_auth,omitempty"` ForcePullImage bool `json:"force_pull_image"` PodSpec *k8sV1.Pod `json:"pod_spec"` AddCapabilities []string `json:"add_capabilities"` DropCapabilities []string `json:"drop_capabilities"` }
Environment configures the environment of a Determined command or experiment.
func DefaultEnvConfig ¶
func DefaultEnvConfig(taskContainerDefaults *TaskContainerDefaultsConfig) Environment
DefaultEnvConfig returns the default environment configuration.
func (Environment) ToExpconf ¶
func (e Environment) ToExpconf() expconf.EnvironmentConfig
ToExpconf translates old model objects into an expconf object.
func (Environment) Validate ¶
func (e Environment) Validate() []error
Validate implements the check.Validatable interface.
type ExitedReason ¶
type ExitedReason string
ExitedReason defines why a workload exited early.
const ( // Errored signals the searcher that the workload errored out. Errored ExitedReason = "ERRORED" // UserRequestedStop signals the searcher that the user requested a cancelation, from code. UserRequestedStop ExitedReason = "USER_REQUESTED_STOP" // UserCanceled signals the searcher that the user requested a cancelation, from the CLI or UI. UserCanceled ExitedReason = "USER_CANCELED" // InvalidHP signals the searcher that the user raised an InvalidHP exception. InvalidHP ExitedReason = "INVALID_HP" // InitInvalidHP signals the searcher that the user raised an InvalidHP exception // in the trial init. InitInvalidHP ExitedReason = "INIT_INVALID_HP" )
func ExitedReasonFromProto ¶
func ExitedReasonFromProto(r trialv1.TrialEarlyExit_ExitedReason) ExitedReason
ExitedReasonFromProto returns an ExitedReason from its protobuf representation.
func (ExitedReason) ToSearcherProto ¶
func (r ExitedReason) ToSearcherProto() experimentv1.TrialExitedEarly_ExitedReason
ToSearcherProto converts an ExitedReason to its protobuf representation for searcher purposes.
type Experiment ¶
type Experiment struct { ID int `db:"id" bun:"id,pk"` JobID JobID `db:"job_id"` State State `db:"state"` Notes string `db:"notes"` // Offer a LegacyConfig rather than ExperimentConfig since most of the system is about querying // experiments which ran some time in the past, which is exactly what LegacyConfig is for. Config expconf.LegacyConfig `db:"config"` OriginalConfig string `db:"original_config"` StartTime time.Time `db:"start_time"` EndTime *time.Time `db:"end_time"` ParentID *int `db:"parent_id"` Archived bool `db:"archived"` OwnerID *UserID `db:"owner_id"` Username string `db:"username"` ProjectID int `db:"project_id"` Unmanaged bool `db:"unmanaged"` ExternalExperimentID *string `db:"external_experiment_id"` Progress *float64 }
Experiment represents a row from the `experiments` table.
func ExperimentFromProto ¶
func ExperimentFromProto(e *experimentv1.Experiment) (*Experiment, error)
ExperimentFromProto converts a experimentv1.Experiment to a model.Experiment.
func NewExperiment ¶
func NewExperiment( config expconf.ExperimentConfig, originalConfig string, parentID *int, archived bool, projectID int, unmanaged bool, ) (*Experiment, error)
NewExperiment creates a new experiment struct in the paused state. Note that the experiment ID will not be set.
func (*Experiment) Transition ¶
func (e *Experiment) Transition(state State) (bool, error)
Transition changes the state of the experiment to the new state. If the state was not modified the first return value returns false. If the state transition is illegal, an error is returned.
type ExtendedFloat64 ¶
type ExtendedFloat64 float64
ExtendedFloat64 handles serializing floats to JSON, including special cases for infinite values.
func (ExtendedFloat64) MarshalJSON ¶
func (f ExtendedFloat64) MarshalJSON() ([]byte, error)
MarshalJSON implements the json.Marshaler interface.
func (*ExtendedFloat64) UnmarshalJSON ¶
func (f *ExtendedFloat64) UnmarshalJSON(data []byte) error
UnmarshalJSON implements the json.Unmarshaler interface.
type ExternalSessions ¶
type ExternalSessions struct { LoginURI string `json:"login_uri"` LogoutURI string `json:"logout_uri"` InvalidationURI string `json:"invalidation_uri"` JwtKey string `json:"jwt_key"` OrgID OrgID `json:"org_id"` ClusterID ClusterID `json:"cluster_id"` Invalidations *InvalidationMap }
ExternalSessions provides an integration point for an external service to issue JWTs to control access to the cluster.
func (*ExternalSessions) Enabled ¶
func (e *ExternalSessions) Enabled() bool
Enabled returns whether or not external sessions are enabled.
func (*ExternalSessions) StartInvalidationPoll ¶
func (e *ExternalSessions) StartInvalidationPoll(cert *tls.Certificate)
StartInvalidationPoll polls for new invalidations every minute.
func (*ExternalSessions) Validate ¶
func (e *ExternalSessions) Validate(claims *JWT) error
Validate throws an error if the provided JWT is invalidated.
type FullUser ¶
type FullUser struct { ID UserID `db:"id" json:"id"` DisplayName null.String `db:"display_name" json:"display_name"` Username string `db:"username" json:"username"` Name string `db:"name" json:"name"` Admin bool `db:"admin" json:"admin"` Active bool `db:"active" json:"active"` ModifiedAt time.Time `db:"modified_at" json:"modified_at"` Remote bool `db:"remote" json:"remote"` LastAuthAt *time.Time `db:"last_auth_at" json:"last_auth_at"` AgentUID null.Int `db:"agent_uid" json:"agent_uid"` AgentGID null.Int `db:"agent_gid" json:"agent_gid"` AgentUser null.String `db:"agent_user" json:"agent_user"` AgentGroup null.String `db:"agent_group" json:"agent_group"` }
A FullUser is a User joined with any other user relations.
type GenericTaskConfig ¶
type GenericTaskConfig struct { BindMounts BindMountsConfig `json:"bind_mounts"` Environment Environment `json:"environment"` Resources expconf.ResourcesConfig `json:"resources"` Entrypoint []string `json:"entrypoint"` WorkDir *string `json:"work_dir"` Debug bool `json:"debug"` Pbs expconf.PbsConfig `json:"pbs,omitempty"` Slurm expconf.SlurmConfig `json:"slurm,omitempty"` }
GenericTaskConfig like expconf or command config but for generic tasks.
func DefaultConfigGenericTaskConfig ¶
func DefaultConfigGenericTaskConfig( taskContainerDefaults *TaskContainerDefaultsConfig, ) GenericTaskConfig
DefaultConfigGenericTaskConfig merges task containter defaults into a default generic task config struct.
func (*GenericTaskConfig) Validate ¶
func (c *GenericTaskConfig) Validate() []error
Validate implements the check.Validatable interface.
type Group ¶
type Group struct { bun.BaseModel `bun:"table:groups,alias:groups"` ID int `bun:"id,pk,autoincrement" json:"id"` Name string `bun:"group_name,notnull" json:"name"` OwnerID UserID `bun:"user_id,nullzero" json:"userId,omitempty"` }
Group represents a user group as it's stored in the database.
type GroupMembership ¶
type GroupMembership struct { bun.BaseModel `bun:"table:user_group_membership"` UserID UserID `bun:"user_id,notnull"` GroupID int `bun:"group_id,notnull"` }
GroupMembership represents a user's membership to a group as it's stored in the database.
type Groups ¶
type Groups []Group
Groups is a slice of Group objects—primarily useful for its methods.
type HealthCheck ¶
type HealthCheck struct { Status HealthStatus `json:"status"` Database HealthStatus `json:"database"` ResourceManagers []ResourceManagerHealth `json:"resource_managers"` }
HealthCheck is the response to the health check request.
type HealthStatus ¶
type HealthStatus string
HealthStatus is the up or down informational status.
const ( // Healthy indicates passing the health check. Healthy HealthStatus = "up" // Unhealthy indicates failing the health check. Unhealthy HealthStatus = "down" )
type Instance ¶
type Instance struct { ID string LaunchTime time.Time LastStateChangeTime time.Time AgentName string State InstanceState }
Instance connects a provider's name for a compute resource to the Determined agent name.
type InstanceState ¶
type InstanceState string
InstanceState is an enum type that describes an instance state.
const ( // Unknown describes the instance state cannot be recognized. Unknown InstanceState = "Unknown" // Starting describes the instance is starting up. Starting InstanceState = "Starting" // Running describes the instance is running. Running InstanceState = "Running" // Stopping describes the instance is stopping. Stopping InstanceState = "Stopping" // Stopped describes the instance is stopped. Stopped InstanceState = "Stopped" // Terminating is when the instance is in the process of being terminated. Terminating InstanceState = "Terminating" // SpotRequestPendingAWS indicates that the instance is actually a pending AWS spot request. SpotRequestPendingAWS InstanceState = "SpotRequestPendingAWS" )
type InstanceStats ¶
type InstanceStats struct { ResourcePool string `db:"resource_pool"` InstanceID string `db:"instance_id"` Slots int `db:"slots"` }
InstanceStats stores the start/end status of instance.
type InstanceType ¶
InstanceType describes an instance type.
type InvalidationMap ¶
type InvalidationMap struct { DefaultTime time.Time `json:"defaultTime"` LastUpdated time.Time `json:"lastUpdated"` InvalidationTimes map[string]map[string]time.Time `json:"invalidationTimes"` }
InvalidationMap tracks times before which users should be considered invalid.
func (*InvalidationMap) GetInvalidatonTime ¶
func (im *InvalidationMap) GetInvalidatonTime(id string) time.Time
GetInvalidatonTime returns which the token invalidation time for the specified user.
type JSONObj ¶
type JSONObj map[string]interface{}
JSONObj is a JSON object that converts to a []byte in SQL queries.
type JWT ¶
type JWT struct { jwt.StandardClaims UserID string // SaaS user IDs are strings, unlike Determined's int-based type Email string Name string OrgRoles map[OrgID]OrgRoleClaims }
JWT defines the claims that are serialized and signed to make a bearer token.
type Job ¶
type Job struct { bun.BaseModel `bun:"table:jobs"` JobID JobID `db:"job_id" bun:"job_id,pk"` JobType JobType `db:"job_type" bun:"job_type"` OwnerID *UserID `db:"owner_id" bun:"owner_id"` QPos decimal.Decimal `db:"q_position" bun:"q_position"` }
Job is the model for a job in the database.
type JobType ¶
type JobType string
JobType is the type of a job.
const ( // JobTypeNotebook is the "NOTEBOOK" job type for the enum public.job_type in Postgres. JobTypeNotebook JobType = "NOTEBOOK" // JobTypeShell is the "SHELL" job type for the enum public.job_type in Postgres. JobTypeShell JobType = "SHELL" // JobTypeCommand is the "COMMAND" job type for the enum public.job_type in Postgres. JobTypeCommand JobType = "COMMAND" // JobTypeTensorboard is the "TENSORBOARD" job type for the enum.job_type in Postgres. JobTypeTensorboard JobType = "TENSORBOARD" // JobTypeExperiment is the "EXPERIMENT" job type for the enum.job_type in Postgres. JobTypeExperiment JobType = "EXPERIMENT" // JobTypeCheckpointGC is the "CheckpointGC" job type for enum.job_type in Postgres. JobTypeCheckpointGC JobType = "CHECKPOINT_GC" // JobTypeGeneric is the "GENERIC" job type for enum.job_type in Postgres. JobTypeGeneric JobType = "GENERIC" )
func JobTypeFromProto ¶
JobTypeFromProto maps a jobv1.Type to JobType.
type KubernetesTaskContainerDefaults ¶
type KubernetesTaskContainerDefaults struct {
MaxSlotsPerPod *int `json:"max_slots_per_pod"`
}
KubernetesTaskContainerDefaults is task container defaults specific to Kubernetes.
type LogRetentionPolicy ¶
type LogRetentionPolicy struct { // Days is the default number of days to retain logs for. LogRetentionDays *int16 `json:"log_retention_days"` // Schedule is a time duration or cron expression interval to cleanup logs. Schedule *string `json:"schedule"` }
LogRetentionPolicy configures the default log retention policy for trials and tasks.
func (LogRetentionPolicy) Validate ¶
func (p LogRetentionPolicy) Validate() []error
Validate implements the check.Validatable interface.
type LoggingConfig ¶
type LoggingConfig struct { DefaultLoggingConfig *DefaultLoggingConfig `union:"type,default" json:"-"` ElasticLoggingConfig *ElasticLoggingConfig `union:"type,elastic" json:"-"` }
LoggingConfig configures logging for tasks (currently only trials) in Determined.
func (LoggingConfig) MarshalJSON ¶
func (c LoggingConfig) MarshalJSON() ([]byte, error)
MarshalJSON serializes LoggingConfig.
func (LoggingConfig) Resolve ¶
func (c LoggingConfig) Resolve() error
Resolve resolves the parts of the TaskContainerDefaultsConfig that must be evaluated on the master machine.
func (*LoggingConfig) UnmarshalJSON ¶
func (c *LoggingConfig) UnmarshalJSON(data []byte) error
UnmarshalJSON deserializes LoggingConfig.
type MetricGroup ¶
type MetricGroup string
MetricGroup denotes what custom group the metric is.
const ( // ValidationMetricGroup designates metrics from validation runs. ValidationMetricGroup MetricGroup = "validation" // TrainingMetricGroup designates metrics from training runs. TrainingMetricGroup MetricGroup = "training" // InferenceMetricGroup designates metrics from inference runs. InferenceMetricGroup MetricGroup = "inference" )
func TrialSummaryMetricGroup ¶
func TrialSummaryMetricGroup(jsonPath string) MetricGroup
TrialSummaryMetricGroup returns the metric group for the given summary JSON path.
func (MetricGroup) ToProto ¶
func (t MetricGroup) ToProto() apiv1.MetricType
ToProto returns the proto representation of the metric group.
func (MetricGroup) ToString ¶
func (t MetricGroup) ToString() string
ToString returns the string representation of the metric group.
func (MetricGroup) Validate ¶
func (t MetricGroup) Validate() error
Validate validates the metric group.
type MetricIdentifier ¶
type MetricIdentifier struct { Group MetricGroup Name metricName }
MetricIdentifier packages metric group and name together.
func DeserializeMetricIdentifier ¶
func DeserializeMetricIdentifier(s string) (*MetricIdentifier, error)
DeserializeMetricIdentifier deserialize a metric identifier from a string.
func (MetricIdentifier) ToProto ¶
func (m MetricIdentifier) ToProto() *metricv1.MetricIdentifier
ToProto returns the proto representation of the metric identifier.
type NotebookSession ¶
type NotebookSession struct { bun.BaseModel `bun:"table:notebook_sessions"` ID SessionID `db:"id" bun:"id,pk,autoincrement" json:"id"` TaskID TaskID `db:"task_id" bun:"task_id" json:"task_id"` // SessionID is only used for notebooks launched before UserID column was added. SessionID *SessionID `bun:"-" json:"user_session_id"` UserID UserID `db:"user_id" bun:"user_id" json:"user_id"` }
NotebookSession corresponds to a row in the "notebook_sessions" DB table.
type OAuthClient ¶
type OAuthClient struct { ID string `db:"id" json:"id"` Secret string `db:"secret" json:"secret"` Domain string `db:"domain" json:"domain"` Name string `db:"name" json:"name"` }
OAuthClient represents one OAuth client application.
func (OAuthClient) GetDomain ¶
func (c OAuthClient) GetDomain() string
GetDomain implements the oauth2.ClientInfo interface.
func (OAuthClient) GetID ¶
func (c OAuthClient) GetID() string
GetID implements the oauth2.ClientInfo interface.
func (OAuthClient) GetSecret ¶
func (c OAuthClient) GetSecret() string
GetSecret implements the oauth2.ClientInfo interface.
func (OAuthClient) GetUserID ¶
func (c OAuthClient) GetUserID() string
GetUserID implements the oauth2.ClientInfo interface.
type OAuthToken ¶
type OAuthToken struct { Access string `db:"access" json:"access"` AccessCreateAt time.Time `db:"access_create_at" json:"access_create_at"` AccessExpiresIn time.Duration `db:"access_expires_in" json:"access_expires_in"` ClientID string `db:"client_id" json:"client_id"` Code string `db:"code" json:"code"` CodeCreateAt time.Time `db:"code_create_at" json:"code_create_at"` CodeExpiresIn time.Duration `db:"code_expires_in" json:"code_expires_in"` RedirectURI string `db:"redirect_uri" json:"redirect_uri"` Refresh string `db:"refresh" json:"refresh"` RefreshCreateAt time.Time `db:"refresh_create_at" json:"refresh_create_at"` RefreshExpiresIn time.Duration `db:"refresh_expires_in" json:"refresh_expires_in"` Scope string `db:"scope" json:"scope"` UserID string `db:"user_id" json:"user_id"` ID int `db:"id" json:"id"` }
OAuthToken represents an OAuth token.
func (*OAuthToken) GetAccessCreateAt ¶
func (t *OAuthToken) GetAccessCreateAt() time.Time
GetAccessCreateAt create Time.
func (*OAuthToken) GetAccessExpiresIn ¶
func (t *OAuthToken) GetAccessExpiresIn() time.Duration
GetAccessExpiresIn the lifetime in seconds of the access token.
func (*OAuthToken) GetClientID ¶
func (t *OAuthToken) GetClientID() string
GetClientID the client id.
func (*OAuthToken) GetCodeCreateAt ¶
func (t *OAuthToken) GetCodeCreateAt() time.Time
GetCodeCreateAt create Time.
func (*OAuthToken) GetCodeExpiresIn ¶
func (t *OAuthToken) GetCodeExpiresIn() time.Duration
GetCodeExpiresIn the lifetime in seconds of the authorization code.
func (*OAuthToken) GetRedirectURI ¶
func (t *OAuthToken) GetRedirectURI() string
GetRedirectURI redirect URI.
func (*OAuthToken) GetRefreshCreateAt ¶
func (t *OAuthToken) GetRefreshCreateAt() time.Time
GetRefreshCreateAt create Time.
func (*OAuthToken) GetRefreshExpiresIn ¶
func (t *OAuthToken) GetRefreshExpiresIn() time.Duration
GetRefreshExpiresIn the lifetime in seconds of the refresh token.
func (*OAuthToken) GetScope ¶
func (t *OAuthToken) GetScope() string
GetScope get scope of authorization.
func (*OAuthToken) New ¶
func (t *OAuthToken) New() oauth2.TokenInfo
New create to token model instance.
func (*OAuthToken) SetAccess ¶
func (t *OAuthToken) SetAccess(access string)
SetAccess access Token.
func (*OAuthToken) SetAccessCreateAt ¶
func (t *OAuthToken) SetAccessCreateAt(createAt time.Time)
SetAccessCreateAt create Time.
func (*OAuthToken) SetAccessExpiresIn ¶
func (t *OAuthToken) SetAccessExpiresIn(exp time.Duration)
SetAccessExpiresIn the lifetime in seconds of the access token.
func (*OAuthToken) SetClientID ¶
func (t *OAuthToken) SetClientID(clientID string)
SetClientID the client id.
func (*OAuthToken) SetCodeCreateAt ¶
func (t *OAuthToken) SetCodeCreateAt(createAt time.Time)
SetCodeCreateAt create Time.
func (*OAuthToken) SetCodeExpiresIn ¶
func (t *OAuthToken) SetCodeExpiresIn(exp time.Duration)
SetCodeExpiresIn the lifetime in seconds of the authorization code.
func (*OAuthToken) SetRedirectURI ¶
func (t *OAuthToken) SetRedirectURI(redirectURI string)
SetRedirectURI redirect URI.
func (*OAuthToken) SetRefresh ¶
func (t *OAuthToken) SetRefresh(refresh string)
SetRefresh refresh Token.
func (*OAuthToken) SetRefreshCreateAt ¶
func (t *OAuthToken) SetRefreshCreateAt(createAt time.Time)
SetRefreshCreateAt create Time.
func (*OAuthToken) SetRefreshExpiresIn ¶
func (t *OAuthToken) SetRefreshExpiresIn(exp time.Duration)
SetRefreshExpiresIn the lifetime in seconds of the refresh token.
func (*OAuthToken) SetScope ¶
func (t *OAuthToken) SetScope(scope string)
SetScope get scope of authorization.
type OrgRoleClaims ¶
type OrgRoleClaims struct { Role Role DefaultClusterRole Role ClusterRoles map[ClusterID]Role ClusterActivations map[ClusterID]bool }
OrgRoleClaims is the specification of all permissions a user has in a given org.
type PatchOperation ¶
type PatchOperation struct { // Op is one of add, remove, replace, move, copy, test. Op string `json:"op"` // Path is the field to update. Path string `json:"path"` // Value is the new value. Value json.RawMessage `json:"value"` }
A PatchOperation is a RFC 6902 JSON Patch.
type PatchRequest ¶
type PatchRequest struct { Schemas PatchSchemas `json:"schemas"` Operations []PatchOperation `json:"operations"` }
A PatchRequest is a SCIM patch request.
type PatchSchemas ¶
type PatchSchemas struct{}
PatchSchemas is a constant schemas field for a patch.
func (PatchSchemas) MarshalJSON ¶
func (s PatchSchemas) MarshalJSON() ([]byte, error)
MarshalJSON implements json.Marshaler.
func (*PatchSchemas) UnmarshalJSON ¶
func (s *PatchSchemas) UnmarshalJSON(data []byte) error
UnmarshalJSON implements json.Unmarshaler.
type Project ¶
type Project struct { bun.BaseModel `bun:"table:projects"` ID int `bun:"id,pk,autoincrement"` Name string `bun:"name"` CreatedAt time.Time `bun:"created_at,scanonly"` Archived bool `bun:"archived"` WorkspaceID int `bun:"workspace_id"` WorkspaceName string `bun:"workspace_name,scanonly"` UserID int `bun:"user_id"` Username string `bun:"username,scanonly"` Immutable bool `bun:"immutable"` Description string `bun:"description"` Notes []*projectv1.Note `bun:"notes,type:jsonb,nullzero"` NumActiveExperiments int32 `bun:"num_active_experiments,scanonly"` NumExperiments int32 `bun:"num_experiments,scanonly"` NumRuns int32 `bun:"num_runs,scanonly"` State WorkspaceState `bun:"state,default:'UNSPECIFIED'::workspace_state"` ErrorMessage string `bun:"error_message"` LastExperimentStartedAt time.Time `bun:"last_experiment_started_at,scanonly"` Key string `bun:"key"` }
Project is the bun model of a project.
type ProjectHparam ¶
type ProjectHparam struct { bun.BaseModel `bun:"table:project_hparams"` ProjectID int `db:"project_id" bun:"project_id"` HParam string `bun:"hparam"` Type string `bun:"type"` }
ProjectHparam represents a row from the `project_hparams` table.
type ProxyPort ¶
type ProxyPort struct { ProxyPort int `json:"proxy_port"` ProxyTCP bool `json:"proxy_tcp"` Unauthenticated bool `json:"unauthenticated"` DefaultServiceID bool `json:"default_service_id"` }
ProxyPort is a legacy-style clone of expconf.ProxyPort. TODO(ilia): migrate command config to expconf.
type ProxyPortsConfig ¶
type ProxyPortsConfig []ProxyPort
ProxyPortsConfig is a legacy-style clone of expconf.ProxyPortsConfig.
func (ProxyPortsConfig) ToExpconf ¶
func (p ProxyPortsConfig) ToExpconf() expconf.ProxyPortsConfig
ToExpconf translates old model objects into an expconf object.
type RequestID ¶
RequestID links all operations with the same ID to a single trial create request.
func MustParseRequestID ¶ added in v0.750.0
MustParseRequestID decodes s into a request id or panics.
func NewRequestID ¶
NewRequestID returns a new request ID using the provided reader.
func ParseRequestID ¶ added in v0.750.0
ParseRequestID decodes s into a request id or returns an error.
func (RequestID) Before ¶
Before determines whether this UUID is strictly lexicographically less (comparing the sequences of bytes) than another one.
func (RequestID) MarshalText ¶
MarshalText returns the marshaled form of this ID, which is the string form of the underlying UUID.
func (*RequestID) UnmarshalText ¶
UnmarshalText unmarshals this ID from a text representation.
type ResourceAggregates ¶
type ResourceAggregates struct { Date *time.Time // AggregationType is the type of aggregation. E.g. "total", "queued", "resource_pool", "username" AggregationType string AggregationKey string Seconds float32 }
ResourceAggregates is the model for resource_aggregates in the database.
type ResourceManagerHealth ¶
type ResourceManagerHealth struct { ClusterName string `json:"cluster_name"` Status HealthStatus `json:"status"` }
ResourceManagerHealth is a pair of resource manager name and health status.
type ResourcesConfig ¶
type ResourcesConfig struct { Slots int `json:"slots"` MaxSlots *int `json:"max_slots,omitempty"` Weight float64 `json:"weight"` NativeParallel bool `json:"native_parallel,omitempty"` ShmSize *StorageSize `json:"shm_size,omitempty"` ResourcePool string `json:"resource_pool"` Priority *int `json:"priority,omitempty"` IsSingleNode *bool `json:"is_single_node"` Devices DevicesConfig `json:"devices"` }
ResourcesConfig configures resource usage for a command, notebook, tensorboard, generic task, or old experiment(new experiment uses ResourcesConfigV0).
func DefaultResourcesConfig ¶
func DefaultResourcesConfig(taskContainerDefaults *TaskContainerDefaultsConfig) ResourcesConfig
DefaultResourcesConfig returns the default resources configuration.
func ParseJustResources ¶
func ParseJustResources(configBytes []byte) ResourcesConfig
ParseJustResources is a helper function for breaking the circular dependency where we need the TaskContainerDefaults to unmarshal an ExperimentConfig, but we need the Resources.ResourcePool setting to know which TaskContainerDefaults to use. It does not throw errors; if unmarshalling fails that can just get caught later.
func (ResourcesConfig) ToExpconf ¶
func (r ResourcesConfig) ToExpconf() expconf.ResourcesConfig
ToExpconf translates old model objects into an expconf object.
func (ResourcesConfig) Validate ¶
func (r ResourcesConfig) Validate() []error
Validate implements the check.Validatable interface.
type RoleAssignmentScope ¶
type RoleAssignmentScope struct { bun.BaseModel `bun:"table:role_assignment_scopes"` ID int `bun:"id,pk,autoincrement" json:"id"` WorkspaceID sql.NullInt32 `bun:"scope_workspace_id" json:"workspace_id"` }
RoleAssignmentScope represents a RoleAssignmentScope as it's stored in the database.
type Run ¶
type Run struct { bun.BaseModel `bun:"table:runs"` ID int `db:"id" bun:",pk,autoincrement"` ProjectID int `db:"project_id" bun:"project_id"` ExperimentID int `db:"experiment_id"` State State `db:"state"` StartTime time.Time `db:"start_time"` EndTime *time.Time `db:"end_time"` HParams map[string]any `db:"hparams" bun:"hparams"` WarmStartCheckpointID *int `db:"warm_start_checkpoint_id"` TotalBatches int `db:"total_batches"` ExternalRunID *string `db:"external_run_id"` RestartID int `db:"restart_id"` Restarts int `db:"restarts"` RunnerState string `db:"runner_state"` LastActivity *time.Time `db:"last_activity"` LogRetentionDays *int16 `db:"log_retention_days"` Metadata map[string]any `db:"metadata" bun:"metadata,scanonly"` LocalID int `db:"local_id"` }
Run represents a row from the `runs` table.
type RunCheckpoints ¶
type RunCheckpoints struct { bun.BaseModel `bun:"table:run_checkpoints"` RunID int `bun:"run_id"` CheckpointID uuid.UUID `bun:"checkpoint_id"` }
RunCheckpoints represents a row from the `run_checkpoints` table.
type RunHparam ¶
type RunHparam struct { bun.BaseModel `bun:"table:run_hparams"` RunID int `bun:"run_id"` HParam string `bun:"hparam"` NumberVal *float64 `bun:"number_val"` TextVal *string `bun:"text_val"` BoolVal *bool `bun:"bool_val"` }
RunHparam represents a row from the `run_hparams` table.
type RunMetadata ¶
type RunMetadata struct { bun.BaseModel `bun:"table:runs_metadata"` RunID int Metadata map[string]interface{} }
RunMetadata is the bun model of a runMetadata entry.
type RunMetadataIndex ¶
type RunMetadataIndex struct { bun.BaseModel `bun:"table:runs_metadata_index"` ID int `bun:"id,pk,autoincrement"` RunID int `bun:"run_id"` FlatKey string `bun:"flat_key"` StringValue *string `bun:"string_value"` IntegerValue *int `bun:"integer_value"` FloatValue *float64 `bun:"float_value"` BooleanValue *bool `bun:"boolean_value"` TimestampValue *string `bun:"timestamp_value"` ProjectID int `bun:"project_id"` IsArrayElement bool `bun:"is_array_element"` }
RunMetadataIndex is the bun model of a runMetadataIndex entry.
type RuntimeItem ¶
type RuntimeItem struct { CPU string `json:"cpu,omitempty"` CUDA string `json:"cuda,omitempty"` ROCM string `json:"rocm,omitempty"` }
RuntimeItem configures the runtime image.
func (RuntimeItem) For ¶
func (r RuntimeItem) For(deviceType device.Type) string
For returns the value for the provided device type.
func (RuntimeItem) ToExpconf ¶
func (r RuntimeItem) ToExpconf() expconf.EnvironmentImageMap
ToExpconf translates old model objects into an expconf object.
func (*RuntimeItem) UnmarshalJSON ¶
func (r *RuntimeItem) UnmarshalJSON(data []byte) error
UnmarshalJSON implements the json.Unmarshaler interface.
type RuntimeItems ¶
type RuntimeItems struct { CPU []string `json:"cpu,omitempty"` CUDA []string `json:"cuda,omitempty"` ROCM []string `json:"rocm,omitempty"` }
RuntimeItems configures the runtime environment variables.
func (*RuntimeItems) For ¶
func (r *RuntimeItems) For(deviceType device.Type) []string
For returns the value for the provided device type.
func (RuntimeItems) ToExpconf ¶
func (r RuntimeItems) ToExpconf() expconf.EnvironmentVariablesMap
ToExpconf translates old model objects into an expconf object.
func (*RuntimeItems) UnmarshalJSON ¶
func (r *RuntimeItems) UnmarshalJSON(data []byte) error
UnmarshalJSON implements the json.Unmarshaler interface.
type SCIMEmail ¶
type SCIMEmail struct { Type string `json:"type"` SValue string `json:"value"` Primary bool `json:"primary"` }
SCIMEmail is an email address in SCIM.
type SCIMEmails ¶
type SCIMEmails []SCIMEmail
SCIMEmails is a list of emails in SCIM.
func SCIMEmailsFromJWT ¶
func SCIMEmailsFromJWT(claims *JWT) SCIMEmails
SCIMEmailsFromJWT returns a consistent SCIMEmails struct wrapping the single email in a JWT.
func (*SCIMEmails) Scan ¶
func (e *SCIMEmails) Scan(value interface{}) error
Scan implements sql.Scanner.
type SCIMError ¶
type SCIMError struct { Detail string `json:"detail,omitempty"` Status int `json:"status"` SCIMType string `json:"scimType,omitempty"` Schemas SCIMErrorSchemas `json:"schemas"` }
SCIMError is an error in SCIM.
type SCIMErrorSchemas ¶
type SCIMErrorSchemas struct{}
SCIMErrorSchemas is the constant schemas field for errors.
func (SCIMErrorSchemas) MarshalJSON ¶
func (s SCIMErrorSchemas) MarshalJSON() ([]byte, error)
MarshalJSON implements json.Marshaler.
func (*SCIMErrorSchemas) UnmarshalJSON ¶
func (s *SCIMErrorSchemas) UnmarshalJSON(data []byte) error
UnmarshalJSON implements json.Unmarshaler.
type SCIMGroup ¶
type SCIMGroup struct { ID UUID `json:"id"` DisplayName string `json:"displayName"` Members []*SCIMUser `json:"members"` Schemas SCIMGroupSchemas `json:"schemas"` Meta *SCIMGroupMeta `json:"meta"` }
SCIMGroup is a group in SCIM.
type SCIMGroupMeta ¶
type SCIMGroupMeta struct {
ResourceType SCIMGroupResourceType `json:"resourceType"`
}
SCIMGroupMeta is the metadata for a group in SCIM.
type SCIMGroupResourceType ¶
type SCIMGroupResourceType struct{}
SCIMGroupResourceType is the constant resource type field for groups.
func (SCIMGroupResourceType) MarshalJSON ¶
func (s SCIMGroupResourceType) MarshalJSON() ([]byte, error)
MarshalJSON implements json.Marshaler.
func (*SCIMGroupResourceType) UnmarshalJSON ¶
func (s *SCIMGroupResourceType) UnmarshalJSON(data []byte) error
UnmarshalJSON implements json.Unmarshaler.
type SCIMGroupSchemas ¶
type SCIMGroupSchemas struct{}
SCIMGroupSchemas is the constant schemas field for a user.
func (SCIMGroupSchemas) MarshalJSON ¶
func (s SCIMGroupSchemas) MarshalJSON() ([]byte, error)
MarshalJSON implements json.Marshaler.
func (*SCIMGroupSchemas) UnmarshalJSON ¶
func (s *SCIMGroupSchemas) UnmarshalJSON(data []byte) error
UnmarshalJSON implements json.Unmarshaler.
type SCIMGroups ¶
type SCIMGroups struct { TotalResults int `json:"totalResults"` StartIndex int `json:"startIndex"` Resources []*SCIMGroup `json:"Resources"` ItemsPerPage int `json:"itemsPerPage"` Schemas SCIMListSchemas `json:"schemas"` }
SCIMGroups is a list of groups in SCIM.
func (*SCIMGroups) SetSCIMFields ¶
func (g *SCIMGroups) SetSCIMFields(serverRoot *url.URL) error
SetSCIMFields sets the location field for all users given the URL of the master.
type SCIMListSchemas ¶
type SCIMListSchemas struct{}
SCIMListSchemas is the constant schemas field for lists.
func (SCIMListSchemas) MarshalJSON ¶
func (s SCIMListSchemas) MarshalJSON() ([]byte, error)
MarshalJSON implements json.Marshaler.
func (*SCIMListSchemas) UnmarshalJSON ¶
func (s *SCIMListSchemas) UnmarshalJSON(data []byte) error
UnmarshalJSON implements json.Unmarshaler.
type SCIMName ¶
SCIMName is a name in SCIM.
func SCIMNameFromJWT ¶
SCIMNameFromJWT returns a consistent SCIMName struct wrapping the single name in a JWT.
type SCIMUser ¶
type SCIMUser struct { ID UUID `bun:"id" json:"id"` Username string `bun:"username" json:"userName"` ExternalID string `bun:"external_id" json:"externalId"` Name SCIMName `bun:"name" json:"name"` DisplayName null.String `bun:"display_name" json:"displayName"` Emails SCIMEmails `bun:"emails" json:"emails"` Active bool `bun:"active" json:"active"` PasswordHash null.String `bun:"password_hash" json:"password_hash,omitempty"` Password string `json:"password,omitempty"` Schemas SCIMUserSchemas `json:"schemas"` Meta *SCIMUserMeta `json:"meta"` UserID UserID `bun:"user_id" json:"-"` RawAttributes map[string]interface{} `bun:"raw_attributes" json:"-"` }
SCIMUser is a user in SCIM.
func (*SCIMUser) Sanitize ¶
func (u *SCIMUser) Sanitize()
Sanitize sanitizes the user of external data that could be provided, but should always be ignored. See https://tools.ietf.org/html/rfc7643#section-3.1 for why meta must be cleared.
func (*SCIMUser) SetSCIMFields ¶
SetSCIMFields sets the location field for a user given the URL of the master and makes other changes, such as removing password fields from the model.
func (*SCIMUser) UpdatePasswordHash ¶
UpdatePasswordHash updates the SCIMUser's password hash.
func (SCIMUser) ValidateChanges ¶
ValidateChanges checks that a patch for a user satisifies the expected invariants.
type SCIMUserMeta ¶
type SCIMUserMeta struct { ResourceType SCIMUserResourceType `json:"resourceType"` Location string `json:"location"` }
SCIMUserMeta is the metadata for a user in SCIM.
type SCIMUserResourceType ¶
type SCIMUserResourceType struct{}
SCIMUserResourceType is the constant resource type field for users.
func (SCIMUserResourceType) MarshalJSON ¶
func (s SCIMUserResourceType) MarshalJSON() ([]byte, error)
MarshalJSON implements json.Marshaler.
func (*SCIMUserResourceType) UnmarshalJSON ¶
func (s *SCIMUserResourceType) UnmarshalJSON(data []byte) error
UnmarshalJSON implements json.Unmarshaler.
type SCIMUserSchemas ¶
type SCIMUserSchemas struct{}
SCIMUserSchemas is the constant schemas field for a user.
func (SCIMUserSchemas) MarshalJSON ¶
func (s SCIMUserSchemas) MarshalJSON() ([]byte, error)
MarshalJSON implements json.Marshaler.
func (*SCIMUserSchemas) UnmarshalJSON ¶
func (s *SCIMUserSchemas) UnmarshalJSON(data []byte) error
UnmarshalJSON implements json.Unmarshaler.
type SCIMUsers ¶
type SCIMUsers struct { TotalResults int `json:"totalResults"` StartIndex int `json:"startIndex"` Resources []*SCIMUser `json:"Resources"` ItemsPerPage int `json:"itemsPerPage"` Schemas SCIMListSchemas `json:"schemas"` }
SCIMUsers is a list of users in SCIM.
type SlotSummary ¶
type SlotSummary struct { ID string `json:"id"` Device device.Device `json:"device"` Enabled bool `json:"enabled"` Container *cproto.Container `json:"container"` Draining bool `json:"draining"` }
SlotSummary summarizes the state of a slot.
func (SlotSummary) ToProto ¶
func (s SlotSummary) ToProto() *agentv1.Slot
ToProto converts a SlotSummary to its protobuf representation.
type SlotsSummary ¶
type SlotsSummary map[string]SlotSummary
SlotsSummary contains a summary for a number of slots.
type Snapshotter ¶
type Snapshotter interface { Snapshot() (json.RawMessage, error) Restore(json.RawMessage) error }
Snapshotter is any object that implements how to save an restore its state.
type State ¶
type State string
State is the run state of an experiment / trial / step / etc.
func StateFromProto ¶
func StateFromProto(state experimentv1.State) State
StateFromProto maps experimentv1.State to State.
type StateWithReason ¶
StateWithReason is the run state of an experiment with an informational reason used for logging purposes.
type StorageBackendID ¶
type StorageBackendID int
StorageBackendID is the ID for the storage backend. Storage backend ID isn't backfilled so checkpoints older than 0.27.1 won't have this. There are also some cases where a user can create a checkpoint without this so don't rely on this always being set.
type StorageSize ¶
type StorageSize int64
StorageSize is a named type for custom marshaling behavior for shm_size.
func (*StorageSize) UnmarshalJSON ¶
func (d *StorageSize) UnmarshalJSON(data []byte) error
UnmarshalJSON implements the json.Unmarshaler interface.
type TLSClientConfig ¶
type TLSClientConfig struct { Enabled bool `json:"enabled"` SkipVerify bool `json:"skip_verify"` CertificatePath string `json:"certificate"` CertificateName string `json:"certificate_name"` CertBytes []byte }
TLSClientConfig configures how to make a TLS connection.
func MakeTLSConfig ¶
func MakeTLSConfig(cert *tls.Certificate) (TLSClientConfig, error)
MakeTLSConfig constructs a TLSClientConfig to use the provided tls.Certificate.
func (*TLSClientConfig) Resolve ¶
func (t *TLSClientConfig) Resolve() error
Resolve resolves the configuration.
func (TLSClientConfig) Validate ¶
func (t TLSClientConfig) Validate() []error
Validate implements the check.Validatable interface.
type Task ¶
type Task struct { bun.BaseModel `bun:"table:tasks"` TaskID TaskID `db:"task_id" bun:"task_id,pk"` JobID *JobID `db:"job_id"` TaskType TaskType `db:"task_type"` StartTime time.Time `db:"start_time"` EndTime *time.Time `db:"end_time"` // LogVersion indicates how the logs were stored. LogVersion TaskLogVersion `db:"log_version"` // Relations. Job *Job `bun:"rel:belongs-to,join:job_id=job_id"` ParentID *TaskID `db:"parent_id"` ForkedFrom *string `db:"forked_from"` State *TaskState `db:"task_state" bun:"task_state"` Config *string `db:"config"` NoPause *bool `db:"no_pause"` }
Task is the model for a task in the database.
type TaskContainerDefaultsConfig ¶
type TaskContainerDefaultsConfig struct { DtrainNetworkInterface string `json:"dtrain_network_interface,omitempty"` NCCLPortRange string `json:"nccl_port_range,omitempty"` GLOOPortRange string `json:"gloo_port_range,omitempty"` ShmSizeBytes int64 `json:"shm_size_bytes,omitempty"` NetworkMode container.NetworkMode `json:"network_mode,omitempty"` // TODO(DET-9855) we should move these over to KubernetesTaskContainerDefaults. CPUPodSpec *k8sV1.Pod `json:"cpu_pod_spec"` GPUPodSpec *k8sV1.Pod `json:"gpu_pod_spec"` CheckpointGCPodSpec *k8sV1.Pod `json:"checkpoint_gc_pod_spec"` Image *RuntimeItem `json:"image,omitempty"` RegistryAuth *registry.AuthConfig `json:"registry_auth,omitempty"` ForcePullImage bool `json:"force_pull_image,omitempty"` EnvironmentVariables *RuntimeItems `json:"environment_variables,omitempty"` AddCapabilities []string `json:"add_capabilities"` DropCapabilities []string `json:"drop_capabilities"` Devices DevicesConfig `json:"devices"` BindMounts BindMountsConfig `json:"bind_mounts"` WorkDir *string `json:"work_dir"` Slurm expconf.SlurmConfigV0 `json:"slurm"` Pbs expconf.PbsConfigV0 `json:"pbs"` StartupHook string `json:"startup_hook"` LogPolicies expconf.LogPoliciesConfig `json:"log_policies"` // TODO(DET-9856) we should probably eventually move this to expconf and allow setting // on a per task level. Kubernetes *KubernetesTaskContainerDefaults `json:"kubernetes"` }
TaskContainerDefaultsConfig configures docker defaults for all containers. If you add a field to this, you must update the merge impl.
func DefaultTaskContainerDefaults ¶
func DefaultTaskContainerDefaults() *TaskContainerDefaultsConfig
DefaultTaskContainerDefaults returns the default for TaskContainerDefaultsConfig.
func (TaskContainerDefaultsConfig) Merge ¶
func (c TaskContainerDefaultsConfig) Merge( other TaskContainerDefaultsConfig, ) (TaskContainerDefaultsConfig, error)
Merge merges other into self, preferring other. The result is a deepcopy of self, with deep copies of values taken from other.
func (*TaskContainerDefaultsConfig) MergeIntoExpConfig ¶
func (c *TaskContainerDefaultsConfig) MergeIntoExpConfig(config *expconf.ExperimentConfig)
MergeIntoExpConfig sets any unset ExperimentConfig values from TaskContainerDefaults.
func (*TaskContainerDefaultsConfig) UnmarshalJSON ¶
func (c *TaskContainerDefaultsConfig) UnmarshalJSON(data []byte) error
UnmarshalJSON implements the json.Unmarshaler interface. Setting defaults here is necessary over our usual "Define a default struct and unmarshal into it" strategy because there are places (resource pool configs) where we need to know if the task container defaults were set at all or if they were not; if they were set then that resource pool's task container defaults are used instead of the toplevel master config's settings. To know if the user set them at the resource pool level, the resource pool has to have a nullable pointer, which is not compatible with our usual strategy for defaults.
func (*TaskContainerDefaultsConfig) Validate ¶
func (c *TaskContainerDefaultsConfig) Validate() []error
Validate implements the check.Validatable interface.
type TaskContextDirectory ¶
type TaskContextDirectory struct { bun.BaseModel `bun:"table:task_context_directory"` TaskID TaskID `bun:"task_id"` ContextDirectory []byte `bun:"context_directory"` }
TaskContextDirectory represents a row in database for a tasks context directory. This currently is only for notebooks, trials, tensorboards, and commands now. Trials aren't in it because they are stored on experiments.model_def. In addition trials can have many tasks but currently can only have one model_def. We would end up duplicating a lot of data migrating experiment's model_def over to this table. Also that migration would be pretty painful.
type TaskLog ¶
type TaskLog struct { // A task log should have one of these IDs after being persisted. All should be unique. ID *int `db:"id" bun:"id,pk,autoincrement" json:"id,omitempty"` // The body of an Elasticsearch log response will look something like // { _id: ..., _source: { ... }} where _source is the rest of this struct. // StringID doesn't have serialization tags because it is not part of // _source and populated from _id. StringID *string `json:"-"` TaskID string `db:"task_id" json:"task_id"` AllocationID *string `db:"allocation_id" json:"allocation_id"` AgentID *string `db:"agent_id" json:"agent_id,omitempty"` // In the case of k8s, container_id is a pod name instead. ContainerID *string `db:"container_id" json:"container_id,omitempty"` RankID *int `db:"rank_id" json:"rank_id,omitempty"` Timestamp *time.Time `db:"timestamp" json:"timestamp"` Level *string `db:"level" json:"level"` Log string `db:"log" json:"log"` Source *string `db:"source" json:"source,omitempty"` StdType *string `db:"stdtype" json:"stdtype,omitempty"` }
TaskLog represents a structured log emitted by an allocation.
func TaskLogFromProto ¶
TaskLogFromProto converts a proto task log to a model task log.
type TaskLogBatch ¶
type TaskLogBatch []*TaskLog
TaskLogBatch represents a batch of model.TaskLog.
func (TaskLogBatch) ForEach ¶
func (t TaskLogBatch) ForEach(f func(interface{}) error) error
ForEach implements logs.Batch.
type TaskLogVersion ¶
type TaskLogVersion int32
TaskLogVersion is the version for our log-storing scheme. Useful because changing designs would involve either a really costly migration or versioning schemes and we pick the latter.
const ( TaskLogVersion0 TaskLogVersion = 0 TaskLogVersion1 TaskLogVersion = 1 CurrentTaskLogVersion = TaskLogVersion1 )
CurrentTaskLogVersion describes the current scheme in which we store task logs. To avoid a migration that in some cases would be extremely costly, we record the log version so that we can just read old logs the old way and do the new however we please.
type TaskState ¶
type TaskState string
TaskState represents the state of a generic task.
const ( // TaskStateActive denotes that task is running. TaskStateActive TaskState = "ACTIVE" // TaskStateCanceled denotes that task is killed. TaskStateCanceled TaskState = "CANCELED" // TaskStateCompleted denotes that task has finished running. TaskStateCompleted TaskState = "COMPLETED" // TaskStateError denotes that task has exited with an error. TaskStateError TaskState = "ERROR" // TaskStatePaused denotes that task has been paused. TaskStatePaused TaskState = "PAUSED" // TaskStateStoppingPaused denotes that the task is in the process of being paused. TaskStateStoppingPaused TaskState = "STOPPING_PAUSED" // TaskStateStoppingCanceled denotes that the task is in the process of being canceled. TaskStateStoppingCanceled TaskState = "STOPPING_CANCELED" // TaskStateStoppingCompleted denotes that the task is in the process of being completed. TaskStateStoppingCompleted TaskState = "STOPPING_COMPLETED" // TaskStateStoppingError denotes that the task is in the process of returning an error. TaskStateStoppingError TaskState = "STOPPING_ERROR" )
type TaskStats ¶
type TaskStats struct { AllocationID AllocationID EventType string // ContainerID is sent by the agent. This won't always be present in the database // This is a weird table since sometimes it is one row per allocation // (like in record queued stats) and sometimes it is many per allocation like in // pulled time. ContainerID *cproto.ID StartTime *time.Time EndTime *time.Time }
TaskStats is the model for task stats in the database.
type Template ¶
type Template struct { Name string `db:"name" json:"name"` Config []byte `db:"config" json:"config" bun:"config"` WorkspaceID int `db:"workspace_id" json:"workspace_id"` }
Template represents a row from the `templates` table.
type Trial ¶
type Trial struct { bun.BaseModel `bun:"table:trials"` ID int `db:"id" bun:",pk,autoincrement"` RequestID *RequestID `db:"request_id"` ExperimentID int `db:"experiment_id"` State State `db:"state"` StartTime time.Time `db:"start_time"` EndTime *time.Time `db:"end_time"` HParams map[string]any `db:"hparams" bun:"hparams"` WarmStartCheckpointID *int `db:"warm_start_checkpoint_id"` Seed int64 `db:"seed"` TotalBatches int `db:"total_batches"` ExternalTrialID *string `db:"external_trial_id"` RunID int `db:"run_id"` // run_id as in restart_id not "runs" id. Restarts int `db:"restarts"` RunnerState string `db:"runner_state"` LastActivity *time.Time `db:"last_activity"` LogRetentionDays *int16 `db:"log_retention_days"` Metadata map[string]any `db:"metadata" bun:"metadata"` }
Trial represents a row from the `trials` table.
type TrialLog ¶
type TrialLog struct { // A trial log should have one of these IDs. All should be unique. // TODO(Brad): This must be int64. ID *int `db:"id" json:"id,omitempty"` // The body of an Elasticsearch log response will look something like // { _id: ..., _source: { ... }} where _source is the rest of this struct. // StringID doesn't have serialization tags because it is not part of // _source and populated from _id. StringID *string `json:"-"` TrialID int `db:"trial_id" json:"trial_id"` Message string `db:"message" json:"message,omitempty"` AgentID *string `db:"agent_id" json:"agent_id,omitempty"` // In the case of k8s, container_id is a pod name instead. ContainerID *string `db:"container_id" json:"container_id,omitempty"` RankID *int `db:"rank_id" json:"rank_id,omitempty"` Timestamp *time.Time `db:"timestamp" json:"timestamp"` Level *string `db:"level" json:"level"` Log *string `db:"log" json:"log"` Source *string `db:"source" json:"source,omitempty"` StdType *string `db:"stdtype" json:"stdtype,omitempty"` }
TrialLog represents a row from the `trial_logs` table.
type TrialLogBatch ¶
type TrialLogBatch []*TrialLog
TrialLogBatch represents a batch of model.TrialLog.
func (TrialLogBatch) ForEach ¶
func (t TrialLogBatch) ForEach(f func(interface{}) error) error
ForEach implements logs.Batch.
type TrialMetrics ¶
type TrialMetrics struct { ID int `db:"id" json:"id"` TrialID int `db:"trial_id" json:"trial_id"` TrialRunID int `db:"trial_run_id" json:"-"` TotalBatches int `db:"total_batches" json:"total_batches"` EndTime *time.Time `db:"end_time" json:"end_time"` Metrics JSONObj `db:"metrics" json:"metrics"` }
TrialMetrics represents a row from the `steps` or `validations` table.
type TrialProfilerMetricsBatch ¶
type TrialProfilerMetricsBatch struct { Values pgtype.Float4Array `db:"values"` Batches pgtype.Int4Array `db:"batches"` Timestamps pgtype.TimestamptzArray `db:"timestamps"` Labels []byte `db:"labels"` }
TrialProfilerMetricsBatch represents a row from the `trial_profiler_metrics` table.
func (*TrialProfilerMetricsBatch) ToProto ¶
func (t *TrialProfilerMetricsBatch) ToProto() (*trialv1.TrialProfilerMetricsBatch, error)
ToProto converts a TrialProfilerMetricsBatch to its protobuf representation.
type TrialProfilerMetricsBatchBatch ¶
type TrialProfilerMetricsBatchBatch []*trialv1.TrialProfilerMetricsBatch
TrialProfilerMetricsBatchBatch represents a batch of trialv1.TrialProfilerMetricsBatch.
func (TrialProfilerMetricsBatchBatch) ForEach ¶
func (t TrialProfilerMetricsBatchBatch) ForEach(f func(interface{}) error) error
ForEach implements logs.Batch.
func (TrialProfilerMetricsBatchBatch) Size ¶
func (t TrialProfilerMetricsBatchBatch) Size() int
Size implements logs.Batch.
type TrialV2 ¶
type TrialV2 struct { bun.BaseModel `bun:"table:trials_v2"` RunID int `bun:"run_id"` RequestID *RequestID `bun:"request_id"` Seed int64 `bun:"seed"` }
TrialV2 represents a row from the `trials_v2` table.
type UUID ¶
UUID is a UUID that converts to a nullable string in SQL queries.
func ParseUUID ¶
ParseUUID initializes a non-null UUID from a string. It returns an error if the string does not follow the format of a UUID.
func (UUID) MarshalJSON ¶
MarshalJSON implements the json.Marshaler interface.
func (UUID) String ¶
String returns the string representation of the UUID. If this UUID is null, return the empty string.
func (*UUID) UnmarshalJSON ¶
UnmarshalJSON implements the json.Unmarshaler interface.
type User ¶
type User struct { bun.BaseModel `bun:"table:users"` ID UserID `db:"id" bun:"id,pk,autoincrement" json:"id"` Username string `db:"username" json:"username"` PasswordHash null.String `db:"password_hash" json:"-"` DisplayName null.String `db:"display_name" json:"display_name"` Admin bool `db:"admin" json:"admin"` Active bool `db:"active" json:"active"` ModifiedAt time.Time `db:"modified_at" json:"modified_at"` Remote bool `db:"remote" json:"remote"` LastAuthAt *time.Time `db:"last_auth_at" json:"last_auth_at"` }
User corresponds to a row in the "users" DB table.
func (*User) UpdatePasswordHash ¶
UpdatePasswordHash updates the model's password hash employing necessary cryptographic techniques.
func (User) ValidatePassword ¶
ValidatePassword checks that the supplied password is correct.
type UserActivity ¶
type UserActivity struct { bun.BaseModel `bun:"table:activity"` UserID UserID `db:"user_id" json:"user_id"` ActivityType ActivityType `db:"activity_type" json:"activity_type"` EntityType EntityType `db:"entity_type" json:"entity_type"` EntityID int32 `db:"entity_id" json:"entity_id"` ActivityTime time.Time `db:"activity_time" json:"activity_time"` }
UserActivity is a record of user activity.
func UserActivityFromProto ¶
func UserActivityFromProto( a userv1.ActivityType, e userv1.EntityType, entityID int32, userID int32, timestamp time.Time, ) *UserActivity
UserActivityFromProto returns a model UserActivity from a proto definition.
type UserSession ¶
type UserSession struct { bun.BaseModel `bun:"table:user_sessions"` ID SessionID `db:"id" json:"id"` UserID UserID `db:"user_id" json:"user_id"` Expiry time.Time `db:"expiry" json:"expiry"` InheritedClaims map[string]string `bun:"-"` // InheritedClaims contains the OIDC raw ID token when OIDC is enabled }
UserSession corresponds to a row in the "user_sessions" DB table.
type UserWebSetting ¶
UserWebSetting is a record of user web setting.
func (UserWebSetting) Proto ¶
func (s UserWebSetting) Proto() *userv1.UserWebSetting
Proto returns the protobuf representation.
type WorkloadManagerType ¶
type WorkloadManagerType string
WorkloadManagerType indicates which type of workloads the harness should prepare to receive.
type WorkloadSequencerType ¶
type WorkloadSequencerType string
WorkloadSequencerType is the type of sequencer that a trial actor should use.
type Workspace ¶
type Workspace struct { bun.BaseModel `bun:"table:workspaces"` ID int `bun:"id,pk,autoincrement"` Name string `bun:"name"` Archived bool `bun:"archived"` CreatedAt time.Time `bun:"created_at,scanonly"` UserID UserID `bun:"user_id"` Immutable bool `bun:"immutable"` State *WorkspaceState `bun:"state"` AgentUID *int32 `bun:"uid"` AgentUser *string `bun:"user_"` AgentGID *int32 `bun:"gid"` AgentGroup *string `bun:"group_"` CheckpointStorageConfig *expconf.CheckpointStorageConfig `bun:"checkpoint_storage_config"` DefaultComputePool string `bun:"default_compute_pool"` DefaultAuxPool string `bun:"default_aux_pool"` }
Workspace is the bun model of a workspace.
type WorkspaceNamespace ¶
type WorkspaceNamespace struct { bun.BaseModel `bun:"table:workspace_namespace_bindings"` WorkspaceID int `bun:"workspace_id"` Namespace string `bun:"namespace"` ClusterName string `bun:"cluster_name"` AutoCreateNamespace bool `bun:"auto_create_namespace"` }
WorkspaceNamespace is the bun model of a workspace-namespace binding.
func (WorkspaceNamespace) ToProto ¶
func (wn WorkspaceNamespace) ToProto() *workspacev1.WorkspaceNamespaceBinding
ToProto converts a WorkspaceNamespaceBinding to its protobuf representation.
type WorkspacePin ¶
type WorkspacePin struct { bun.BaseModel `bun:"table:workspace_pins"` WorkspaceID int `bun:"workspace_id"` UserID UserID `bun:"user_id"` }
WorkspacePin is the bun model of a workspace.
type WorkspaceState ¶
type WorkspaceState string
WorkspaceState is the state of the workspace state with regards to being deleted.
const ( // WorkspaceStateDeleting constant. WorkspaceStateDeleting WorkspaceState = "DELETING" // WorkspaceStateDeleteFailed constant. WorkspaceStateDeleteFailed WorkspaceState = "DELETE_FAILED" // WorkspaceStateDeleted constant. WorkspaceStateDeleted WorkspaceState = "DELETED" )
func (*WorkspaceState) ToProto ¶
func (s *WorkspaceState) ToProto() workspacev1.WorkspaceState
ToProto converts a WorkspaceState to a proto workspacev1.Workspace state.
Source Files ¶
- agent.go
- agent_user_group.go
- auth_token_keypair.go
- command_config.go
- compat.go
- config_file.go
- defaults.go
- duration.go
- environment_config.go
- experiment.go
- experiment_config.go
- generic_task_config.go
- health.go
- instance.go
- job.go
- logging_config.go
- maintenance_message.go
- metrics.go
- model.go
- notebook_session.go
- oauth.go
- patch.go
- project.go
- rbac.go
- run.go
- saas.go
- scim.go
- scim_group.go
- scim_user.go
- searcher.go
- task.go
- task_container_defaults.go
- task_session.go
- template.go
- types.go
- user.go
- user_activity.go
- user_group.go
- workspace.go