model

package
v0.38.0-rc1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 28, 2024 License: Apache-2.0 Imports: 69 Imported by: 4

Documentation

Index

Constants

View Source
const (
	// NotebookIdleTypeKernelsOrTerminals indicates that a notebook should be considered active if any
	// kernels or terminals are open.
	NotebookIdleTypeKernelsOrTerminals = "kernels_or_terminals"
	// NotebookIdleTypeKernelConnections indicates that a notebook should be considered active if any
	// connections to kernels are open.
	NotebookIdleTypeKernelConnections = "kernel_connections"
	// NotebookIdleTypeActivity indicates that a notebook should be considered active if any kernel is
	// running a command or any terminal is inputting or outputting data.
	NotebookIdleTypeActivity = "activity"
)
View Source
const (
	// ActiveState constant.
	ActiveState State = "ACTIVE"
	// CanceledState constant.
	CanceledState State = "CANCELED"
	// CompletedState constant.
	CompletedState State = "COMPLETED"
	// ErrorState constant.
	ErrorState State = "ERROR"
	// PausedState constant.
	PausedState State = "PAUSED"
	// StoppingKilledState constant.
	StoppingKilledState State = "STOPPING_KILLED"
	// StoppingCanceledState constant.
	StoppingCanceledState State = "STOPPING_CANCELED"
	// StoppingCompletedState constant.
	StoppingCompletedState State = "STOPPING_COMPLETED"
	// StoppingErrorState constant.
	StoppingErrorState State = "STOPPING_ERROR"
	// DeletingState constant.
	DeletingState State = "DELETING"
	// DeleteFailedState constant.
	DeleteFailedState State = "DELETE_FAILED"
	// DeletedState constant.
	DeletedState State = "DELETED"
	// PartiallyDeletedState constant.
	PartiallyDeletedState State = "PARTIALLY_DELETED"
	// RunningState constant. Currently only used by unmanaged trials.
	RunningState State = "RUNNING"

	// TrialWorkloadSequencerType constant.
	TrialWorkloadSequencerType WorkloadSequencerType = "TRIAL_WORKLOAD_SEQUENCER"
)
View Source
const (
	// MinUserSchedulingPriority is the smallest priority users may specify.
	MinUserSchedulingPriority = 1
	// MaxUserSchedulingPriority is the largest priority users may specify.
	MaxUserSchedulingPriority = 99
)
View Source
const (
	// TaskTypeTrial is the "TRIAL" job type for the enum public.job_type in Postgres.
	TaskTypeTrial TaskType = "TRIAL"
	// TaskTypeNotebook is the "NOTEBOOK" job type for the enum public.job_type in Postgres.
	TaskTypeNotebook TaskType = "NOTEBOOK"
	// TaskTypeShell is the "SHELL" job type for the enum public.job_type in Postgres.
	TaskTypeShell TaskType = "SHELL"
	// TaskTypeCommand is the "COMMAND" job type for the enum public.job_type in Postgres.
	TaskTypeCommand TaskType = "COMMAND"
	// TaskTypeTensorboard is the "TENSORBOARD" task type for the enum.task_type in Postgres.
	TaskTypeTensorboard TaskType = "TENSORBOARD"
	// TaskTypeCheckpointGC is the "CHECKPOINT_GC" job type for the enum public.job_type in Postgres.
	TaskTypeCheckpointGC TaskType = "CHECKPOINT_GC"
	// TaskTypeGeneric is the "GENERIC" job type for the enum public.job_type in Postgres.
	TaskTypeGeneric TaskType = "GENERIC"
	// GlobalAccessScopeID represents global permission access.
	GlobalAccessScopeID AccessScopeID = 0
	// AggregationTypeQueued is the type of aggregation for queued tasks.
	AggregationTypeQueued = "queued"
)
View Source
const (

	// LogLevelTrace is the trace task log level.
	LogLevelTrace = tasklog.LogLevelTrace
	// LogLevelDebug is the debug task log level.
	LogLevelDebug = tasklog.LogLevelDebug
	// LogLevelInfo is the info task log level.
	LogLevelInfo = tasklog.LogLevelInfo
	// LogLevelWarning is the warn task log level.
	LogLevelWarning = tasklog.LogLevelWarning
	// LogLevelError is the error task log level.
	LogLevelError = tasklog.LogLevelError
	// LogLevelCritical is the critical task log level.
	LogLevelCritical = tasklog.LogLevelCritical
	// LogLevelUnspecified is the unspecified task log level.
	LogLevelUnspecified = tasklog.LogLevelUnspecified
)
View Source
const (
	// UnknownType constant.
	UnknownType string = "UNSPECIFIED"
	// ExperimentType constant.
	ExperimentType string = "EXPERIMENT"
	// NTSCType constant.
	NTSCType string = "NTSC"
)
View Source
const (
	// DefaultWorkspaceID is a special, always-existing, workspace titled "Uncategorized".
	DefaultWorkspaceID = 1
	// DefaultWorkspaceName is the default workspace name, which is always present, and always has ID 1.
	DefaultWorkspaceName = "Uncategorized"
	// DefaultProjectID is the default project ID for the default workspace.
	DefaultProjectID = 1
)
View Source
const BCryptCost = 15

BCryptCost is a stopgap until we implement sane master-configuration.

View Source
const DefaultPreemptionTimeout = 60 * 60

DefaultPreemptionTimeout is the number of seconds to wait for preempted task to exit gracefully.

View Source
const (
	// DeterminedK8ContainerName is the name of the container that executes the task within Kubernetes
	// pods that are launched by Determined.
	DeterminedK8ContainerName = "determined-container"
)
View Source
const NotebookSessionEnvVar = "DET_NOTEBOOK_TOKEN"

NotebookSessionEnvVar is the environment variable name for notebook task tokens.

View Source
const (
	// RFC3339MicroTrailingZeroes unlike time.RFC3339Nano is a time format specifier that preserves
	// trailing zeroes.
	RFC3339MicroTrailingZeroes = "2006-01-02T15:04:05.000000Z07:00"
)
View Source
const (
	// StepsCompletedMetadataKey is the key within metadata to find steps completed now, if it exists.
	StepsCompletedMetadataKey = "steps_completed"
)

Variables

View Source
var (
	// EmptyPassword is the empty password (i.e., the empty string).
	EmptyPassword = null.NewString("", false)

	// NoPasswordLogin is a password that prevents the user from logging in
	// directly. They can still login via external authentication methods like
	// OAuth.
	NoPasswordLogin = null.NewString("", true)
)
View Source
var CheckpointReverseTransitions = reverseTransitions(CheckpointTransitions)

CheckpointReverseTransitions list possible ancestor states.

View Source
var CheckpointTransitions = map[State]map[State]bool{
	ActiveState: {
		CompletedState: true,
		ErrorState:     true,
	},
	CompletedState: {
		DeletedState: true,
	},
	DeletedState: {},
	ErrorState:   {},
}

CheckpointTransitions maps checkpoint states to their possible transitions.

DeletingStates are the valid deleting states.

View Source
var ExperimentReverseTransitions = reverseTransitions(ExperimentTransitions)

ExperimentReverseTransitions lists possible ancestor states.

ExperimentTransitions maps experiment states to their possible transitions.

ManualStates are the states the user can set an experiment to.

View Source
var NonTerminalStates = func() []State {
	var states []State
	for s := range ExperimentTransitions {
		if !TerminalStates[s] && !DeletingStates[s] {
			states = append(states, s)
		}
	}
	return states
}()

NonTerminalStates where an experiment can be canceled or killed.

View Source
var ProfilingMetricGroups = []MetricGroup{
	"gpu", "cpu", "memory", "disk", "network",
}

ProfilingMetricGroups designates metrics from profiling runs.

View Source
var RunningStates = map[State]bool{
	ActiveState: true,
	PausedState: true,
}

RunningStates are the valid running states.

View Source
var StepReverseTransitions = reverseTransitions(StepTransitions)

StepReverseTransitions list possible ancestor states.

View Source
var StepTransitions = map[State]map[State]bool{
	ActiveState: {
		CompletedState: true,
		ErrorState:     true,
	},
	CompletedState: {},
	ErrorState:     {},
}

StepTransitions maps step and validation states to their possible transitions.

StoppingStates are the valid stopping states.

StoppingToTerminalStates maps from stopping states to the corresponding terminal states.

TerminalStates are the valid terminal states.

View Source
var TrialReverseTransitions = reverseTransitions(TrialTransitions)

TrialReverseTransitions list possible ancestor states.

TrialTransitions maps trial states to their possible transitions. Trials are mostly the same as experiments, but when immediate exits through ErrorState allowed since can die immediately and let the RM clean us up.

Functions

func FmtInstances

func FmtInstances(instances []*Instance) string

FmtInstances formats instance ids and states to print.

func HashPassword

func HashPassword(password string) (string, error)

HashPassword hashes the user's password.

func MostProgressedExperimentState

func MostProgressedExperimentState(
	state1 experimentv1.State, state2 experimentv1.State,
) experimentv1.State

MostProgressedExperimentState returns the more advanced active state based on experimentStateIndex (Queued -> Pulling -> Starting -> Running).

func ProjectsToProto

func ProjectsToProto(ps []*Project) []*projectv1.Project

ProjectsToProto converts a slice of projects to its protobuf representation.

func SortableSlotIndex

func SortableSlotIndex(i int) string

SortableSlotIndex returns a slot index that will sort as you want to.

This is a hack to fix a bug seen by the webui. The webui displays a list of slots and if they are filled, so they expect that the order of what slots are filled in is consistent. In Kubernetes this is an illusion, we don't know what slot is running what job. Our API returns a map of slot IDs to slots that get returned. This map gets parsed and display in the frontend lexicographically. Just doing indexes breaks when there are more than 10 GPUs per agent since it will go 1,10,11 instead of 1,2,3,4.

To fix this on our just pad the numbers with 0s so they sort in the response.

func StateToProto

func StateToProto(state State) experimentv1.State

StateToProto maps State to experimentv1.State.

func StatesToStrings

func StatesToStrings(inStates map[State]bool) []string

StatesToStrings converts a State map to a list of strings for db queries.

func SummarizeSlots

func SummarizeSlots(slots map[string]*agentv1.Slot) *agentv1.SlotStats

SummarizeSlots a set of slots.

func TaskLogLevelFromLogrus

func TaskLogLevelFromLogrus(l logrus.Level) string

TaskLogLevelFromLogrus returns an equivalent task log level from a logrus level.

func TaskLogLevelFromProto

func TaskLogLevelFromProto(l logv1.LogLevel) string

TaskLogLevelFromProto returns a task log level from its protobuf repr.

func TaskLogLevelToProto

func TaskLogLevelToProto(l string) logv1.LogLevel

TaskLogLevelToProto returns a protobuf task log level from its string repr.

func TrialMetricsJSONPath

func TrialMetricsJSONPath(isValidation bool) string

TrialMetricsJSONPath returns the legacy JSON path to the metrics field in the metrics table.

func TrialSummaryMetricsJSONPath

func TrialSummaryMetricsJSONPath(metricGroup MetricGroup) string

TrialSummaryMetricsJSONPath returns the JSON path to the trials metric summary.

func UsingCustomImage

func UsingCustomImage(req *apiv1.LaunchTensorboardRequest) bool

UsingCustomImage checks for image argument in request. It's only used for tensor board now. Error is ignored because we treat unexpected error when parsing as not using custom image.

func ValidatePrioritySetting

func ValidatePrioritySetting(priority *int) []error

ValidatePrioritySetting checks that priority if set is within a valid range.

Types

type AcceleratorData

type AcceleratorData struct {
	bun.BaseModel `bun:"table:allocation_accelerators"`

	ContainerID      string       `db:"container_id" bun:"container_id"`
	AllocationID     AllocationID `db:"allocation_id" bun:"allocation_id,notnull"`
	NodeName         string       `db:"node_name" bun:"node_name,notnull"`
	AcceleratorType  string       `db:"accelerator_type" bun:"accelerator_type,notnull"`
	AcceleratorUuids []string     `db:"accelerator_uuids" bun:"accelerator_uuids,array"`
	ID               *int         `db:"id" bun:"id,pk,autoincrement"`
}

AcceleratorData is the model for an allocation accelerator data in the database.

func (AcceleratorData) Proto

Proto returns the proto representation of the task state.

type AccessScopeID

type AccessScopeID int

AccessScopeID is an identifier for an access scope.

type AccessScopeSet

type AccessScopeSet = map[AccessScopeID]bool

AccessScopeSet is a set of access scopes.

type ActivityType

type ActivityType string

ActivityType describes a user activity.

const (
	// ActivityTypeGet represents a get request.
	ActivityTypeGet ActivityType = "GET"
)

type AgentStats

type AgentStats struct {
	ResourcePool string `db:"resource_pool"`
	AgentID      string `db:"agent_id"`
	Slots        int    `db:"slots"`
}

AgentStats stores the start/end status of instance.

type AgentSummary

type AgentSummary struct {
	ID             string       `json:"id"`
	RegisteredTime time.Time    `json:"registered_time"`
	Slots          SlotsSummary `json:"slots"`
	NumContainers  int          `json:"num_containers"`
	ResourcePool   []string     `json:"resource_pool"`
	Addresses      []string     `json:"addresses"`
	Enabled        bool         `json:"enabled"`
	Draining       bool         `json:"draining"`
	Version        string       `json:"version"`
}

AgentSummary summarizes the state on an agent.

func (AgentSummary) ToProto

func (a AgentSummary) ToProto() *agentv1.Agent

ToProto converts an agent summary to a proto struct.

type AgentUserGroup

type AgentUserGroup struct {
	bun.BaseModel `bun:"table:agent_user_groups"`

	ID int `db:"id" bun:"id,pk,autoincrement" json:"id"`

	UserID UserID `db:"user_id" json:"user_id"`

	// The User is the username on an agent host machine. This may be different
	// from the username of the user in the User database.
	User string `db:"user_" bun:"user_" json:"user"`
	UID  int    `db:"uid" json:"uid"`

	// The Group is the primary group of the user.
	Group string `db:"group_" bun:"group_" json:"group"`
	GID   int    `db:"gid" json:"gid"`
}

An AgentUserGroup represents a username and primary group for a user on an agent host machine. There is at most one AgentUserGroup for each User.

func AgentUserGroupFromProto

func AgentUserGroupFromProto(aug *userv1.AgentUserGroup) (*AgentUserGroup, error)

AgentUserGroupFromProto convert agent user group from proto to model.

func (*AgentUserGroup) OwnArchive

func (c *AgentUserGroup) OwnArchive(oldArchive archive.Archive) archive.Archive

OwnArchive will return an archive.Archive modified to be owned by the AgentUserGroup, or unmodified if c is nil.

func (*AgentUserGroup) OwnedArchiveItem

func (c *AgentUserGroup) OwnedArchiveItem(
	path string, content []byte, mode int, fileType byte,
) archive.Item

OwnedArchiveItem will create an archive.Item owned by the AgentUserGroup, or by root if c is nil.

func (AgentUserGroup) Validate

func (c AgentUserGroup) Validate() []error

Validate validates the fields of the AgentUserGroup.

type AgentsSummary

type AgentsSummary map[string]AgentSummary

AgentsSummary is a map of agent IDs to a summary of the agent.

type Allocation

type Allocation struct {
	bun.BaseModel `bun:"table:allocations"`

	AllocationID AllocationID     `db:"allocation_id" bun:"allocation_id,pk"`
	TaskID       TaskID           `db:"task_id" bun:"task_id,notnull"`
	Slots        int              `db:"slots" bun:"slots,notnull"`
	ResourcePool string           `db:"resource_pool" bun:"resource_pool,notnull"`
	StartTime    *time.Time       `db:"start_time" bun:"start_time"`
	EndTime      *time.Time       `db:"end_time" bun:"end_time"`
	State        *AllocationState `db:"state" bun:"state"`
	IsReady      *bool            `db:"is_ready" bun:"is_ready"`
	Ports        map[string]int   `db:"ports" bun:"ports,notnull"`
	// ProxyAddress stores the explicitly provided task-provided proxy address for resource
	// managers that do not supply us with it. Comes from `determined.exec.prep_container --proxy`.
	ProxyAddress *string `db:"proxy_address" bun:"proxy_address"`
	ExitReason   *string `db:"exit_reason" bun:"exit_reason"`
	ExitErr      *string `db:"exit_error" bun:"exit_error"`
	StatusCode   *int32  `db:"status_code" bun:"status_code"`
}

Allocation is the model for an allocation in the database.

func (Allocation) Proto

func (a Allocation) Proto() *taskv1.Allocation

Proto returns the proto representation of the allocation state.

type AllocationID

type AllocationID string

AllocationID is the ID of an allocation of a task. It is usually of the form TaskID.allocation_number, maybe with some other metadata if different types of allocations run.

func NewAllocationID

func NewAllocationID(in *string) *AllocationID

NewAllocationID casts string ptr to AllocationID ptr.

func (AllocationID) GetAllocationSpecifier

func (a AllocationID) GetAllocationSpecifier() (int, error)

GetAllocationSpecifier retrieves number at the end of the allocation's id.

func (AllocationID) String

func (a AllocationID) String() string

func (AllocationID) ToTaskID

func (a AllocationID) ToTaskID() TaskID

ToTaskID converts an AllocationID to its taskID.

type AllocationSession

type AllocationSession struct {
	bun.BaseModel `bun:"table:allocation_sessions"`
	ID            SessionID    `db:"id" bun:"id,pk,autoincrement" json:"id"`
	AllocationID  AllocationID `db:"allocation_id" bun:"allocation_id" json:"allocation_id"`
	OwnerID       *UserID      `db:"owner_id" bun:"owner_id" json:"owner_id"`
}

AllocationSession corresponds to a row in the "allocation_sessions" DB table.

type AllocationState

type AllocationState string

AllocationState represents the current state of the task. Value indicates a partial ordering.

const (
	// AllocationStatePending state denotes that the command is awaiting allocation.
	AllocationStatePending AllocationState = "PENDING"
	// AllocationStateWaiting state denotes that the command is waiting on data.
	AllocationStateWaiting AllocationState = "WAITING"
	// AllocationStateAssigned state denotes that the command has been assigned to an agent but has
	// not started yet.
	AllocationStateAssigned AllocationState = "ASSIGNED"
	// AllocationStatePulling state denotes that the command's base image is being pulled from the
	// Docker registry.
	AllocationStatePulling AllocationState = "PULLING"
	// AllocationStateStarting state denotes that the image has been pulled and the task is being
	// started, but the task is not ready yet.
	AllocationStateStarting AllocationState = "STARTING"
	// AllocationStateRunning state denotes that the service in the command is running.
	AllocationStateRunning AllocationState = "RUNNING"
	// AllocationStateTerminated state denotes that the command has exited or has been aborted.
	AllocationStateTerminated AllocationState = "TERMINATED"
	// AllocationStateTerminating state denotes that the command is terminating.
	AllocationStateTerminating AllocationState = "TERMINATING"
)

func MostProgressedAllocationState

func MostProgressedAllocationState(states ...AllocationState) AllocationState

MostProgressedAllocationState returns the further progressed state. E.G. a call with PENDING, PULLING and STARTING returns PULLING.

func (*AllocationState) Proto

func (s *AllocationState) Proto() taskv1.State

Proto returns the proto representation of the task state.

type AllocationWorkspaceRecord

type AllocationWorkspaceRecord struct {
	bun.BaseModel `bun:"table:allocation_workspace_info"`
	AllocationID  AllocationID `db:"allocation_id" bun:"allocation_id,notnull"`
	ExperimentID  int          `db:"experiment_id" bun:"experiment_id"`
	WorkspaceID   int          `db:"workspace_id" bun:"workspace_id,notnull"`
	WorkspaceName string       `db:"workspace_name" bun:"workspace_name,notnull"`
}

AllocationWorkspaceRecord is the model for persisting the workspace and experiment information associated with an allocation.

type AuthTokenKeypair

type AuthTokenKeypair struct {
	bun.BaseModel `bun:"table:auth_token_keypair"`
	PublicKey     ed25519.PublicKey  `db:"public_key"`
	PrivateKey    ed25519.PrivateKey `db:"private_key"`
}

AuthTokenKeypair stores the public/private keypair used for asymmetric encryption of authentication tokens.

type BindMount

type BindMount struct {
	HostPath      string `json:"host_path"`
	ContainerPath string `json:"container_path"`
	ReadOnly      bool   `json:"read_only"`
	Propagation   string `json:"propagation"`
}

BindMount configures trial runner filesystem bind mounts.

func ToModelBindMount

func ToModelBindMount(b expconf.BindMount) BindMount

ToModelBindMount converts new expconf bind mounts into old modl bind mounts.

func (BindMount) ToExpconf

func (b BindMount) ToExpconf() expconf.BindMount

ToExpconf translates old model objects into an expconf object.

func (*BindMount) UnmarshalJSON

func (b *BindMount) UnmarshalJSON(data []byte) error

UnmarshalJSON implements the json.Unmarshaler interface.

func (BindMount) Validate

func (b BindMount) Validate() []error

Validate implements the check.Validatable interface.

type BindMountsConfig

type BindMountsConfig []BindMount

BindMountsConfig is the configuration for bind mounts.

func (BindMountsConfig) ToExpconf

ToExpconf translates old model objects into an expconf object.

func (*BindMountsConfig) UnmarshalJSON

func (b *BindMountsConfig) UnmarshalJSON(data []byte) error

UnmarshalJSON implements the json.Unmarshaler interface.

type Checkpoint

type Checkpoint struct {
	bun.BaseModel `bun:"table:checkpoints_view"`
	ID            int `db:"id"`

	UUID         *uuid.UUID        `db:"uuid"`
	TaskID       *TaskID           `db:"task_id"`
	AllocationID *AllocationID     `db:"allocation_id"`
	ReportTime   time.Time         `db:"report_time"`
	State        State             `db:"state"`
	Resources    JSONObj           `db:"resources"`
	Metadata     JSONObj           `db:"metadata"`
	Size         int64             `db:"size"`
	StorageID    *StorageBackendID `db:"storage_id"`

	CheckpointTrainingMetadata
}

Checkpoint represents a row from the `checkpoints_view` view.

type CheckpointTrainingMetadata

type CheckpointTrainingMetadata struct {
	TrialID           int      `db:"trial_id"`
	ExperimentID      int      `db:"experiment_id"`
	ExperimentConfig  JSONObj  `db:"experiment_config"`
	HParams           JSONObj  `db:"hparams" bun:"hparams"`
	TrainingMetrics   JSONObj  `db:"training_metrics"`
	ValidationMetrics JSONObj  `db:"validation_metrics"`
	SearcherMetric    *float64 `db:"searcher_metric"`
	StepsCompleted    int      `db:"steps_completed"`
}

CheckpointTrainingMetadata is a substruct of checkpoints encapsulating training specific information.

type CheckpointV2

type CheckpointV2 struct {
	bun.BaseModel `bun:"table:checkpoints_v2"`
	ID            int                    `db:"id" bun:"id,pk,autoincrement"`
	UUID          uuid.UUID              `db:"uuid"`
	TaskID        TaskID                 `db:"task_id"`
	AllocationID  *AllocationID          `db:"allocation_id"`
	ReportTime    time.Time              `db:"report_time"`
	State         State                  `db:"state"`
	Resources     map[string]int64       `db:"resources"`
	Metadata      map[string]interface{} `db:"metadata"`
	Size          int64                  `db:"size"`
	// Can be nil for checkpoints older than this feature.
	// Also can be nil when a user creates a checkpoint without a checkpoint storage config.
	StorageID *StorageBackendID `db:"storage_id"`
}

CheckpointV2 represents a row from the `checkpoints_v2` table.

type ClusterID

type ClusterID string

ClusterID is a string intended specifically as a cluster ID.

type ClusterMessage

type ClusterMessage struct {
	CreatedBy   int
	Message     string
	StartTime   time.Time
	EndTime     sql.NullTime
	CreatedTime sql.NullTime
}

ClusterMessage represents a server status from the `cluster_messages` table.

func (*ClusterMessage) ToProto

func (m *ClusterMessage) ToProto() *apiv1.ClusterMessage

ToProto converts m to a type suitable for gRPC protobuf response.

type CommandConfig

type CommandConfig struct {
	Description      string              `json:"description"`
	BindMounts       BindMountsConfig    `json:"bind_mounts"`
	Environment      Environment         `json:"environment"`
	Resources        ResourcesConfig     `json:"resources"`
	Entrypoint       []string            `json:"entrypoint"`
	TensorBoardArgs  []string            `json:"tensorboard_args,omitempty"`
	IdleTimeout      *Duration           `json:"idle_timeout"`
	NotebookIdleType string              `json:"notebook_idle_type"`
	WorkDir          *string             `json:"work_dir"`
	Debug            bool                `json:"debug"`
	Pbs              expconf.PbsConfig   `json:"pbs,omitempty"`
	Slurm            expconf.SlurmConfig `json:"slurm,omitempty"`
}

CommandConfig holds the necessary configurations to launch a command task in the cluster.

func DefaultConfig

func DefaultConfig(taskContainerDefaults *TaskContainerDefaultsConfig) CommandConfig

DefaultConfig is the default configuration used by all commands (e.g., commands, notebooks, shells) if a request does not specify any configuration options.

func (*CommandConfig) Validate

func (c *CommandConfig) Validate() []error

Validate implements the check.Validatable interface.

type ConfigFile

type ConfigFile struct {
	ID      int    `db:"id" json:"id"`
	Content []byte `db:"content"`
}

ConfigFile represents a row from the `config_files` table.

type Constraints

type Constraints struct {
	ResourceConstraints *ResourceConstraints `json:"resources"`
	PriorityLimit       *int                 `json:"priority_limit"`
}

Constraints are non-overridable workload constraints. Submitted workloads whose config's respective field(s) exceed defined constraints within a given scope are rejected.

type DefaultLoggingConfig

type DefaultLoggingConfig struct{}

DefaultLoggingConfig configures logging for tasks using HTTP to the master.

type DeviceConfig

type DeviceConfig struct {
	HostPath      string `json:"host_path"`
	ContainerPath string `json:"container_path"`
	Mode          string `json:"mode"`
}

DeviceConfig configures container device access.

func (DeviceConfig) ToExpconf

func (d DeviceConfig) ToExpconf() expconf.Device

ToExpconf translates old model objects into an expconf object.

func (*DeviceConfig) UnmarshalJSON

func (d *DeviceConfig) UnmarshalJSON(data []byte) error

UnmarshalJSON implements the json.Unmarshaler interface.

type DevicesConfig

type DevicesConfig []DeviceConfig

DevicesConfig is the configuration for devices. It is a named type because it needs custom merging behavior (via UnmarshalJSON).

func (DevicesConfig) ToExpconf

func (d DevicesConfig) ToExpconf() expconf.DevicesConfig

ToExpconf translates old model objects into an expconf object.

func (*DevicesConfig) UnmarshalJSON

func (d *DevicesConfig) UnmarshalJSON(data []byte) error

UnmarshalJSON implements the json.Unmarshaler interface so that DeviceConfigs are additive.

type Duration

type Duration time.Duration

Duration is a JSON (un)marshallable version of time.Duration.

func (Duration) MarshalJSON

func (d Duration) MarshalJSON() ([]byte, error)

MarshalJSON implements the json.Marshaler interface.

func (*Duration) UnmarshalJSON

func (d *Duration) UnmarshalJSON(b []byte) error

UnmarshalJSON implements the json.Unmarshaler interface.

type ElasticLoggingConfig

type ElasticLoggingConfig struct {
	Host     string                `json:"host"`
	Port     int                   `json:"port"`
	Security ElasticSecurityConfig `json:"security"`
}

ElasticLoggingConfig configures logging for tasks using Elastic.

func (*ElasticLoggingConfig) Resolve

func (o *ElasticLoggingConfig) Resolve() error

Resolve resolves the configuration.

type ElasticSecurityConfig

type ElasticSecurityConfig struct {
	Username *string         `json:"username"`
	Password *string         `json:"password"`
	TLS      TLSClientConfig `json:"tls"`
}

ElasticSecurityConfig configures security-related options for the elastic logging backend.

func (*ElasticSecurityConfig) Resolve

func (o *ElasticSecurityConfig) Resolve() error

Resolve resolves the configuration.

func (ElasticSecurityConfig) Validate

func (o ElasticSecurityConfig) Validate() []error

Validate implements the check.Validatable interface.

type EntityType

type EntityType string

EntityType represents an entity.

const (
	// EntityTypeProject represents a project.
	EntityTypeProject EntityType = "Project"
)

type Environment

type Environment struct {
	Image                RuntimeItem      `json:"image"`
	EnvironmentVariables RuntimeItems     `json:"environment_variables,omitempty"`
	ProxyPorts           ProxyPortsConfig `json:"proxy_ports"`

	Ports          map[string]int       `json:"ports"`
	RegistryAuth   *registry.AuthConfig `json:"registry_auth,omitempty"`
	ForcePullImage bool                 `json:"force_pull_image"`
	PodSpec        *k8sV1.Pod           `json:"pod_spec"`

	AddCapabilities  []string `json:"add_capabilities"`
	DropCapabilities []string `json:"drop_capabilities"`
}

Environment configures the environment of a Determined command or experiment.

func DefaultEnvConfig

func DefaultEnvConfig(taskContainerDefaults *TaskContainerDefaultsConfig) Environment

DefaultEnvConfig returns the default environment configuration.

func (Environment) ToExpconf

func (e Environment) ToExpconf() expconf.EnvironmentConfig

ToExpconf translates old model objects into an expconf object.

func (Environment) Validate

func (e Environment) Validate() []error

Validate implements the check.Validatable interface.

type ExitedReason

type ExitedReason string

ExitedReason defines why a workload exited early.

const (
	// Errored signals the searcher that the workload errored out.
	Errored ExitedReason = "ERRORED"
	// UserRequestedStop signals the searcher that the user requested a cancelation, from code.
	UserRequestedStop ExitedReason = "USER_REQUESTED_STOP"
	// UserCanceled signals the searcher that the user requested a cancelation, from the CLI or UI.
	UserCanceled ExitedReason = "USER_CANCELED"
	// InvalidHP signals the searcher that the user raised an InvalidHP exception.
	InvalidHP ExitedReason = "INVALID_HP"
	// InitInvalidHP signals the searcher that the user raised an InvalidHP exception
	// in the trial init.
	InitInvalidHP ExitedReason = "INIT_INVALID_HP"
)

func ExitedReasonFromProto

func ExitedReasonFromProto(r trialv1.TrialEarlyExit_ExitedReason) ExitedReason

ExitedReasonFromProto returns an ExitedReason from its protobuf representation.

func (ExitedReason) ToSearcherProto

ToSearcherProto converts an ExitedReason to its protobuf representation for searcher purposes.

type Experiment

type Experiment struct {
	ID    int    `db:"id" bun:"id,pk"`
	JobID JobID  `db:"job_id"`
	State State  `db:"state"`
	Notes string `db:"notes"`

	// Offer a LegacyConfig rather than ExperimentConfig since most of the system is about querying
	// experiments which ran some time in the past, which is exactly what LegacyConfig is for.
	Config         expconf.LegacyConfig `db:"config"`
	OriginalConfig string               `db:"original_config"`

	StartTime            time.Time  `db:"start_time"`
	EndTime              *time.Time `db:"end_time"`
	ParentID             *int       `db:"parent_id"`
	Archived             bool       `db:"archived"`
	OwnerID              *UserID    `db:"owner_id"`
	Username             string     `db:"username"`
	ProjectID            int        `db:"project_id"`
	Unmanaged            bool       `db:"unmanaged"`
	ExternalExperimentID *string    `db:"external_experiment_id"`
	Progress             *float64
}

Experiment represents a row from the `experiments` table.

func ExperimentFromProto

func ExperimentFromProto(e *experimentv1.Experiment) (*Experiment, error)

ExperimentFromProto converts a experimentv1.Experiment to a model.Experiment.

func NewExperiment

func NewExperiment(
	config expconf.ExperimentConfig,
	originalConfig string,
	parentID *int,
	archived bool,
	projectID int,
	unmanaged bool,
) (*Experiment, error)

NewExperiment creates a new experiment struct in the paused state. Note that the experiment ID will not be set.

func (*Experiment) Transition

func (e *Experiment) Transition(state State) (bool, error)

Transition changes the state of the experiment to the new state. If the state was not modified the first return value returns false. If the state transition is illegal, an error is returned.

type ExtendedFloat64

type ExtendedFloat64 float64

ExtendedFloat64 handles serializing floats to JSON, including special cases for infinite values.

func (ExtendedFloat64) MarshalJSON

func (f ExtendedFloat64) MarshalJSON() ([]byte, error)

MarshalJSON implements the json.Marshaler interface.

func (*ExtendedFloat64) UnmarshalJSON

func (f *ExtendedFloat64) UnmarshalJSON(data []byte) error

UnmarshalJSON implements the json.Unmarshaler interface.

type ExternalSessions

type ExternalSessions struct {
	LoginURI        string    `json:"login_uri"`
	LogoutURI       string    `json:"logout_uri"`
	InvalidationURI string    `json:"invalidation_uri"`
	JwtKey          string    `json:"jwt_key"`
	OrgID           OrgID     `json:"org_id"`
	ClusterID       ClusterID `json:"cluster_id"`
	Invalidations   *InvalidationMap
}

ExternalSessions provides an integration point for an external service to issue JWTs to control access to the cluster.

func (*ExternalSessions) Enabled

func (e *ExternalSessions) Enabled() bool

Enabled returns whether or not external sessions are enabled.

func (*ExternalSessions) StartInvalidationPoll

func (e *ExternalSessions) StartInvalidationPoll(cert *tls.Certificate)

StartInvalidationPoll polls for new invalidations every minute.

func (*ExternalSessions) Validate

func (e *ExternalSessions) Validate(claims *JWT) error

Validate throws an error if the provided JWT is invalidated.

type FullUser

type FullUser struct {
	ID          UserID      `db:"id" json:"id"`
	DisplayName null.String `db:"display_name" json:"display_name"`
	Username    string      `db:"username" json:"username"`
	Name        string      `db:"name" json:"name"`
	Admin       bool        `db:"admin" json:"admin"`
	Active      bool        `db:"active" json:"active"`
	ModifiedAt  time.Time   `db:"modified_at" json:"modified_at"`
	Remote      bool        `db:"remote" json:"remote"`
	LastAuthAt  *time.Time  `db:"last_auth_at" json:"last_auth_at"`

	AgentUID   null.Int    `db:"agent_uid" json:"agent_uid"`
	AgentGID   null.Int    `db:"agent_gid" json:"agent_gid"`
	AgentUser  null.String `db:"agent_user" json:"agent_user"`
	AgentGroup null.String `db:"agent_group" json:"agent_group"`
}

A FullUser is a User joined with any other user relations.

func (FullUser) ToUser

func (u FullUser) ToUser() User

ToUser converts a FullUser model to just a User model.

type GenericTaskConfig

type GenericTaskConfig struct {
	BindMounts  BindMountsConfig        `json:"bind_mounts"`
	Environment Environment             `json:"environment"`
	Resources   expconf.ResourcesConfig `json:"resources"`
	Entrypoint  []string                `json:"entrypoint"`
	WorkDir     *string                 `json:"work_dir"`
	Debug       bool                    `json:"debug"`

	Pbs               expconf.PbsConfig   `json:"pbs,omitempty"`
	Slurm             expconf.SlurmConfig `json:"slurm,omitempty"`
	PreemptionTimeout int                 `json:"preemption_timeout,omitempty"`
}

GenericTaskConfig like expconf or command config but for generic tasks.

func DefaultConfigGenericTaskConfig

func DefaultConfigGenericTaskConfig(
	taskContainerDefaults *TaskContainerDefaultsConfig,
) GenericTaskConfig

DefaultConfigGenericTaskConfig merges task containter defaults into a default generic task config struct.

func (*GenericTaskConfig) Validate

func (c *GenericTaskConfig) Validate() []error

Validate implements the check.Validatable interface.

type Group

type Group struct {
	bun.BaseModel `bun:"table:groups,alias:groups"`

	ID      int    `bun:"id,pk,autoincrement" json:"id"`
	Name    string `bun:"group_name,notnull"  json:"name"`
	OwnerID UserID `bun:"user_id,nullzero"    json:"userId,omitempty"`
}

Group represents a user group as it's stored in the database.

func (*Group) Proto

func (g *Group) Proto() *groupv1.Group

Proto converts a group to its protobuf representation.

type GroupMembership

type GroupMembership struct {
	bun.BaseModel `bun:"table:user_group_membership"`

	UserID  UserID `bun:"user_id,notnull"`
	GroupID int    `bun:"group_id,notnull"`
}

GroupMembership represents a user's membership to a group as it's stored in the database.

type Groups

type Groups []Group

Groups is a slice of Group objects—primarily useful for its methods.

func (Groups) Proto

func (gs Groups) Proto() []*groupv1.Group

Proto converts Groups into its protobuf representation.

type HealthCheck

type HealthCheck struct {
	Status           HealthStatus            `json:"status"`
	Database         HealthStatus            `json:"database"`
	ResourceManagers []ResourceManagerHealth `json:"resource_managers"`
}

HealthCheck is the response to the health check request.

type HealthStatus

type HealthStatus string

HealthStatus is the up or down informational status.

const (
	// Healthy indicates passing the health check.
	Healthy HealthStatus = "up"
	// Unhealthy indicates failing the health check.
	Unhealthy HealthStatus = "down"
)

type Instance

type Instance struct {
	ID                  string
	LaunchTime          time.Time
	LastStateChangeTime time.Time
	AgentName           string
	State               InstanceState
}

Instance connects a provider's name for a compute resource to the Determined agent name.

func (Instance) Equals

func (inst Instance) Equals(other Instance) bool

Equals checks if this instance is the same resource as instance `other`.

func (Instance) String

func (inst Instance) String() string

type InstanceState

type InstanceState string

InstanceState is an enum type that describes an instance state.

const (
	// Unknown describes the instance state cannot be recognized.
	Unknown InstanceState = "Unknown"
	// Starting describes the instance is starting up.
	Starting InstanceState = "Starting"
	// Running describes the instance is running.
	Running InstanceState = "Running"
	// Stopping describes the instance is stopping.
	Stopping InstanceState = "Stopping"
	// Stopped describes the instance is stopped.
	Stopped InstanceState = "Stopped"
	// Terminating is when the instance is in the process of being terminated.
	Terminating InstanceState = "Terminating"
	// SpotRequestPendingAWS indicates that the instance is actually a pending AWS spot request.
	SpotRequestPendingAWS InstanceState = "SpotRequestPendingAWS"
)

type InstanceStats

type InstanceStats struct {
	ResourcePool string `db:"resource_pool"`
	InstanceID   string `db:"instance_id"`
	Slots        int    `db:"slots"`
}

InstanceStats stores the start/end status of instance.

type InstanceType

type InstanceType interface {
	Name() string
	Slots() int
}

InstanceType describes an instance type.

type InvalidationMap

type InvalidationMap struct {
	DefaultTime       time.Time                       `json:"defaultTime"`
	LastUpdated       time.Time                       `json:"lastUpdated"`
	InvalidationTimes map[string]map[string]time.Time `json:"invalidationTimes"`
}

InvalidationMap tracks times before which users should be considered invalid.

func (*InvalidationMap) GetInvalidatonTime

func (im *InvalidationMap) GetInvalidatonTime(id string) time.Time

GetInvalidatonTime returns which the token invalidation time for the specified user.

type JSONObj

type JSONObj map[string]interface{}

JSONObj is a JSON object that converts to a []byte in SQL queries.

func (*JSONObj) Scan

func (j *JSONObj) Scan(src interface{}) error

Scan unmarshals JSON in []byte to map[string]interface{}.

func (JSONObj) Value

func (j JSONObj) Value() (driver.Value, error)

Value marshals a []byte.

type JWT

type JWT struct {
	jwt.StandardClaims
	UserID   string // SaaS user IDs are strings, unlike Determined's int-based type
	Email    string
	Name     string
	OrgRoles map[OrgID]OrgRoleClaims
}

JWT defines the claims that are serialized and signed to make a bearer token.

type Job

type Job struct {
	bun.BaseModel `bun:"table:jobs"`

	JobID   JobID           `db:"job_id" bun:"job_id,pk"`
	JobType JobType         `db:"job_type" bun:"job_type"`
	OwnerID *UserID         `db:"owner_id" bun:"owner_id"`
	QPos    decimal.Decimal `db:"q_position" bun:"q_position"`
}

Job is the model for a job in the database.

type JobID

type JobID string

JobID is the unique ID of a job among all jobs.

func NewJobID

func NewJobID() JobID

NewJobID returns a random, globally unique job ID.

func (JobID) String

func (id JobID) String() string

String represents the job ID as a string.

type JobType

type JobType string

JobType is the type of a job.

const (
	// JobTypeNotebook is the "NOTEBOOK" job type for the enum public.job_type in Postgres.
	JobTypeNotebook JobType = "NOTEBOOK"
	// JobTypeShell is the "SHELL" job type for the enum public.job_type in Postgres.
	JobTypeShell JobType = "SHELL"
	// JobTypeCommand is the "COMMAND" job type for the enum public.job_type in Postgres.
	JobTypeCommand JobType = "COMMAND"
	// JobTypeTensorboard is the "TENSORBOARD" job type for the enum.job_type in Postgres.
	JobTypeTensorboard JobType = "TENSORBOARD"
	// JobTypeExperiment is the "EXPERIMENT" job type for the enum.job_type in Postgres.
	JobTypeExperiment JobType = "EXPERIMENT"
	// JobTypeCheckpointGC is the "CheckpointGC" job type for enum.job_type in Postgres.
	JobTypeCheckpointGC JobType = "CHECKPOINT_GC"
	// JobTypeGeneric is the "GENERIC" job type for enum.job_type in Postgres.
	JobTypeGeneric JobType = "GENERIC"
)

func JobTypeFromProto

func JobTypeFromProto(t jobv1.Type) JobType

JobTypeFromProto maps a jobv1.Type to JobType.

func (JobType) Proto

func (jt JobType) Proto() jobv1.Type

Proto returns the proto representation of the job type.

type KubernetesTaskContainerDefaults

type KubernetesTaskContainerDefaults struct {
	MaxSlotsPerPod *int `json:"max_slots_per_pod"`
}

KubernetesTaskContainerDefaults is task container defaults specific to Kubernetes.

type LogRetentionPolicy

type LogRetentionPolicy struct {
	// Days is the default number of days to retain logs for.
	LogRetentionDays *int16 `json:"log_retention_days"`
	// Schedule is a time duration or cron expression interval to cleanup logs.
	Schedule *string `json:"schedule"`
}

LogRetentionPolicy configures the default log retention policy for trials and tasks.

func (LogRetentionPolicy) Validate

func (p LogRetentionPolicy) Validate() []error

Validate implements the check.Validatable interface.

type LoggingConfig

type LoggingConfig struct {
	DefaultLoggingConfig *DefaultLoggingConfig `union:"type,default" json:"-"`
	ElasticLoggingConfig *ElasticLoggingConfig `union:"type,elastic" json:"-"`
}

LoggingConfig configures logging for tasks (currently only trials) in Determined.

func (LoggingConfig) MarshalJSON

func (c LoggingConfig) MarshalJSON() ([]byte, error)

MarshalJSON serializes LoggingConfig.

func (LoggingConfig) Resolve

func (c LoggingConfig) Resolve() error

Resolve resolves the parts of the TaskContainerDefaultsConfig that must be evaluated on the master machine.

func (*LoggingConfig) UnmarshalJSON

func (c *LoggingConfig) UnmarshalJSON(data []byte) error

UnmarshalJSON deserializes LoggingConfig.

type MetricGroup

type MetricGroup string

MetricGroup denotes what custom group the metric is.

const (
	// ValidationMetricGroup designates metrics from validation runs.
	ValidationMetricGroup MetricGroup = "validation"
	// TrainingMetricGroup designates metrics from training runs.
	TrainingMetricGroup MetricGroup = "training"
	// InferenceMetricGroup designates metrics from inference runs.
	InferenceMetricGroup MetricGroup = "inference"
)

func TrialSummaryMetricGroup

func TrialSummaryMetricGroup(jsonPath string) MetricGroup

TrialSummaryMetricGroup returns the metric group for the given summary JSON path.

func (MetricGroup) ToProto

func (t MetricGroup) ToProto() apiv1.MetricType

ToProto returns the proto representation of the metric group.

func (MetricGroup) ToString

func (t MetricGroup) ToString() string

ToString returns the string representation of the metric group.

func (MetricGroup) Validate

func (t MetricGroup) Validate() error

Validate validates the metric group.

type MetricIdentifier

type MetricIdentifier struct {
	Group MetricGroup
	Name  metricName
}

MetricIdentifier packages metric group and name together.

func DeserializeMetricIdentifier

func DeserializeMetricIdentifier(s string) (*MetricIdentifier, error)

DeserializeMetricIdentifier deserialize a metric identifier from a string.

func (MetricIdentifier) ToProto

ToProto returns the proto representation of the metric identifier.

type NotebookSession

type NotebookSession struct {
	bun.BaseModel `bun:"table:notebook_sessions"`
	ID            SessionID `db:"id" bun:"id,pk,autoincrement" json:"id"`
	TaskID        TaskID    `db:"task_id" bun:"task_id" json:"task_id"`
	// SessionID is only used for notebooks launched before UserID column was added.
	SessionID *SessionID `bun:"-" json:"user_session_id"`
	UserID    UserID     `db:"user_id" bun:"user_id" json:"user_id"`
}

NotebookSession corresponds to a row in the "notebook_sessions" DB table.

type OAuthClient

type OAuthClient struct {
	ID     string `db:"id" json:"id"`
	Secret string `db:"secret" json:"secret"`
	Domain string `db:"domain" json:"domain"`
	Name   string `db:"name" json:"name"`
}

OAuthClient represents one OAuth client application.

func (OAuthClient) GetDomain

func (c OAuthClient) GetDomain() string

GetDomain implements the oauth2.ClientInfo interface.

func (OAuthClient) GetID

func (c OAuthClient) GetID() string

GetID implements the oauth2.ClientInfo interface.

func (OAuthClient) GetSecret

func (c OAuthClient) GetSecret() string

GetSecret implements the oauth2.ClientInfo interface.

func (OAuthClient) GetUserID

func (c OAuthClient) GetUserID() string

GetUserID implements the oauth2.ClientInfo interface.

type OAuthToken

type OAuthToken struct {
	Access           string        `db:"access" json:"access"`
	AccessCreateAt   time.Time     `db:"access_create_at" json:"access_create_at"`
	AccessExpiresIn  time.Duration `db:"access_expires_in" json:"access_expires_in"`
	ClientID         string        `db:"client_id" json:"client_id"`
	Code             string        `db:"code" json:"code"`
	CodeCreateAt     time.Time     `db:"code_create_at" json:"code_create_at"`
	CodeExpiresIn    time.Duration `db:"code_expires_in" json:"code_expires_in"`
	RedirectURI      string        `db:"redirect_uri" json:"redirect_uri"`
	Refresh          string        `db:"refresh" json:"refresh"`
	RefreshCreateAt  time.Time     `db:"refresh_create_at" json:"refresh_create_at"`
	RefreshExpiresIn time.Duration `db:"refresh_expires_in" json:"refresh_expires_in"`
	Scope            string        `db:"scope" json:"scope"`
	UserID           string        `db:"user_id" json:"user_id"`

	ID int `db:"id" json:"id"`
}

OAuthToken represents an OAuth token.

func (*OAuthToken) GetAccess

func (t *OAuthToken) GetAccess() string

GetAccess access Token.

func (*OAuthToken) GetAccessCreateAt

func (t *OAuthToken) GetAccessCreateAt() time.Time

GetAccessCreateAt create Time.

func (*OAuthToken) GetAccessExpiresIn

func (t *OAuthToken) GetAccessExpiresIn() time.Duration

GetAccessExpiresIn the lifetime in seconds of the access token.

func (*OAuthToken) GetClientID

func (t *OAuthToken) GetClientID() string

GetClientID the client id.

func (*OAuthToken) GetCode

func (t *OAuthToken) GetCode() string

GetCode authorization code.

func (*OAuthToken) GetCodeCreateAt

func (t *OAuthToken) GetCodeCreateAt() time.Time

GetCodeCreateAt create Time.

func (*OAuthToken) GetCodeExpiresIn

func (t *OAuthToken) GetCodeExpiresIn() time.Duration

GetCodeExpiresIn the lifetime in seconds of the authorization code.

func (*OAuthToken) GetRedirectURI

func (t *OAuthToken) GetRedirectURI() string

GetRedirectURI redirect URI.

func (*OAuthToken) GetRefresh

func (t *OAuthToken) GetRefresh() string

GetRefresh refresh Token.

func (*OAuthToken) GetRefreshCreateAt

func (t *OAuthToken) GetRefreshCreateAt() time.Time

GetRefreshCreateAt create Time.

func (*OAuthToken) GetRefreshExpiresIn

func (t *OAuthToken) GetRefreshExpiresIn() time.Duration

GetRefreshExpiresIn the lifetime in seconds of the refresh token.

func (*OAuthToken) GetScope

func (t *OAuthToken) GetScope() string

GetScope get scope of authorization.

func (*OAuthToken) GetUserID

func (t *OAuthToken) GetUserID() string

GetUserID the user id.

func (*OAuthToken) New

func (t *OAuthToken) New() oauth2.TokenInfo

New create to token model instance.

func (*OAuthToken) SetAccess

func (t *OAuthToken) SetAccess(access string)

SetAccess access Token.

func (*OAuthToken) SetAccessCreateAt

func (t *OAuthToken) SetAccessCreateAt(createAt time.Time)

SetAccessCreateAt create Time.

func (*OAuthToken) SetAccessExpiresIn

func (t *OAuthToken) SetAccessExpiresIn(exp time.Duration)

SetAccessExpiresIn the lifetime in seconds of the access token.

func (*OAuthToken) SetClientID

func (t *OAuthToken) SetClientID(clientID string)

SetClientID the client id.

func (*OAuthToken) SetCode

func (t *OAuthToken) SetCode(code string)

SetCode authorization code.

func (*OAuthToken) SetCodeCreateAt

func (t *OAuthToken) SetCodeCreateAt(createAt time.Time)

SetCodeCreateAt create Time.

func (*OAuthToken) SetCodeExpiresIn

func (t *OAuthToken) SetCodeExpiresIn(exp time.Duration)

SetCodeExpiresIn the lifetime in seconds of the authorization code.

func (*OAuthToken) SetRedirectURI

func (t *OAuthToken) SetRedirectURI(redirectURI string)

SetRedirectURI redirect URI.

func (*OAuthToken) SetRefresh

func (t *OAuthToken) SetRefresh(refresh string)

SetRefresh refresh Token.

func (*OAuthToken) SetRefreshCreateAt

func (t *OAuthToken) SetRefreshCreateAt(createAt time.Time)

SetRefreshCreateAt create Time.

func (*OAuthToken) SetRefreshExpiresIn

func (t *OAuthToken) SetRefreshExpiresIn(exp time.Duration)

SetRefreshExpiresIn the lifetime in seconds of the refresh token.

func (*OAuthToken) SetScope

func (t *OAuthToken) SetScope(scope string)

SetScope get scope of authorization.

func (*OAuthToken) SetUserID

func (t *OAuthToken) SetUserID(userID string)

SetUserID the user id.

type OrgID

type OrgID string

OrgID is a string intended specifically as a organization ID.

type OrgRoleClaims

type OrgRoleClaims struct {
	Role               Role
	DefaultClusterRole Role
	ClusterRoles       map[ClusterID]Role
	ClusterActivations map[ClusterID]bool
}

OrgRoleClaims is the specification of all permissions a user has in a given org.

type PatchOperation

type PatchOperation struct {
	// Op is one of add, remove, replace, move, copy, test.
	Op string `json:"op"`

	// Path is the field to update.
	Path string `json:"path"`

	// Value is the new value.
	Value json.RawMessage `json:"value"`
}

A PatchOperation is a RFC 6902 JSON Patch.

https://tools.ietf.org/html/rfc6902

type PatchRequest

type PatchRequest struct {
	Schemas    PatchSchemas     `json:"schemas"`
	Operations []PatchOperation `json:"operations"`
}

A PatchRequest is a SCIM patch request.

type PatchSchemas

type PatchSchemas struct{}

PatchSchemas is a constant schemas field for a patch.

func (PatchSchemas) MarshalJSON

func (s PatchSchemas) MarshalJSON() ([]byte, error)

MarshalJSON implements json.Marshaler.

func (*PatchSchemas) UnmarshalJSON

func (s *PatchSchemas) UnmarshalJSON(data []byte) error

UnmarshalJSON implements json.Unmarshaler.

type Project

type Project struct {
	bun.BaseModel           `bun:"table:projects"`
	ID                      int               `bun:"id,pk,autoincrement"`
	Name                    string            `bun:"name"`
	CreatedAt               time.Time         `bun:"created_at,scanonly"`
	Archived                bool              `bun:"archived"`
	WorkspaceID             int               `bun:"workspace_id"`
	WorkspaceName           string            `bun:"workspace_name,scanonly"`
	UserID                  int               `bun:"user_id"`
	Username                string            `bun:"username,scanonly"`
	Immutable               bool              `bun:"immutable"`
	Description             string            `bun:"description"`
	Notes                   []*projectv1.Note `bun:"notes,type:jsonb,nullzero"`
	NumActiveExperiments    int32             `bun:"num_active_experiments,scanonly"`
	NumExperiments          int32             `bun:"num_experiments,scanonly"`
	NumRuns                 int32             `bun:"num_runs,scanonly"`
	State                   WorkspaceState    `bun:"state,default:'UNSPECIFIED'::workspace_state"`
	ErrorMessage            string            `bun:"error_message"`
	LastExperimentStartedAt time.Time         `bun:"last_experiment_started_at,scanonly"`
	Key                     string            `bun:"key"`
}

Project is the bun model of a project.

func (Project) Proto

func (p Project) Proto() *projectv1.Project

Proto converts a bun model of a project to a proto object.

type ProjectHparam

type ProjectHparam struct {
	bun.BaseModel `bun:"table:project_hparams"`

	ProjectID int    `db:"project_id" bun:"project_id"`
	HParam    string `bun:"hparam"`
	Type      string `bun:"type"`
}

ProjectHparam represents a row from the `project_hparams` table.

type Projects

type Projects []*Project

Projects is an array of project instances.

type ProxyPort

type ProxyPort struct {
	ProxyPort        int  `json:"proxy_port"`
	ProxyTCP         bool `json:"proxy_tcp"`
	Unauthenticated  bool `json:"unauthenticated"`
	DefaultServiceID bool `json:"default_service_id"`
}

ProxyPort is a legacy-style clone of expconf.ProxyPort. TODO(ilia): migrate command config to expconf.

func (ProxyPort) ToExpconf

func (p ProxyPort) ToExpconf() expconf.ProxyPort

ToExpconf translates old model objects into an expconf object.

type ProxyPortsConfig

type ProxyPortsConfig []ProxyPort

ProxyPortsConfig is a legacy-style clone of expconf.ProxyPortsConfig.

func (ProxyPortsConfig) ToExpconf

ToExpconf translates old model objects into an expconf object.

type RequestID

type RequestID uuid.UUID

RequestID links all operations with the same ID to a single trial create request.

func NewRequestID

func NewRequestID(r io.Reader) RequestID

NewRequestID returns a new request ID using the provided reader.

func (RequestID) Before

func (r RequestID) Before(s RequestID) bool

Before determines whether this UUID is strictly lexicographically less (comparing the sequences of bytes) than another one.

func (RequestID) MarshalText

func (r RequestID) MarshalText() ([]byte, error)

MarshalText returns the marshaled form of this ID, which is the string form of the underlying UUID.

func (*RequestID) Scan

func (r *RequestID) Scan(value interface{}) error

Scan implements the sql.Scanner interface.

func (RequestID) String

func (r RequestID) String() string

func (*RequestID) UnmarshalText

func (r *RequestID) UnmarshalText(data []byte) error

UnmarshalText unmarshals this ID from a text representation.

func (RequestID) Value

func (r RequestID) Value() (driver.Value, error)

Value implements the sql.Driver interface.

type ResourceAggregates

type ResourceAggregates struct {
	Date *time.Time
	// AggregationType is the type of aggregation. E.g. "total", "queued", "resource_pool", "username"
	AggregationType string
	AggregationKey  string
	Seconds         float32
}

ResourceAggregates is the model for resource_aggregates in the database.

type ResourceConstraints

type ResourceConstraints struct {
	MaxSlots *int `json:"max_slots"`
}

ResourceConstraints are non-overridable resource constraints. Submitted workloads that request resource quanities exceeding defined resource constraints in a given scope are rejected.

type ResourceManagerHealth

type ResourceManagerHealth struct {
	ClusterName string       `json:"cluster_name"`
	Status      HealthStatus `json:"status"`
}

ResourceManagerHealth is a pair of resource manager name and health status.

type ResourcesConfig

type ResourcesConfig struct {
	Slots int `json:"slots"`

	MaxSlots       *int         `json:"max_slots,omitempty"`
	Weight         float64      `json:"weight"`
	NativeParallel bool         `json:"native_parallel,omitempty"`
	ShmSize        *StorageSize `json:"shm_size,omitempty"`
	ResourcePool   string       `json:"resource_pool"`
	Priority       *int         `json:"priority,omitempty"`
	IsSingleNode   *bool        `json:"is_single_node"`

	Devices DevicesConfig `json:"devices"`
}

ResourcesConfig configures resource usage for a command, notebook, tensorboard, generic task, or old experiment(new experiment uses ResourcesConfigV0).

func DefaultResourcesConfig

func DefaultResourcesConfig(taskContainerDefaults *TaskContainerDefaultsConfig) ResourcesConfig

DefaultResourcesConfig returns the default resources configuration.

func ParseJustResources

func ParseJustResources(configBytes []byte) ResourcesConfig

ParseJustResources is a helper function for breaking the circular dependency where we need the TaskContainerDefaults to unmarshal an ExperimentConfig, but we need the Resources.ResourcePool setting to know which TaskContainerDefaults to use. It does not throw errors; if unmarshalling fails that can just get caught later.

func (ResourcesConfig) ToExpconf

func (r ResourcesConfig) ToExpconf() expconf.ResourcesConfig

ToExpconf translates old model objects into an expconf object.

func (ResourcesConfig) Validate

func (r ResourcesConfig) Validate() []error

Validate implements the check.Validatable interface.

type Role

type Role string

Role is a string intended specifically as an access level.

const (
	// NoRole implies previous access has been revoked.
	NoRole Role = "none"

	// UserRole implies normal worker access.
	UserRole Role = "user"

	// AdminRole implies management / administrative access.
	AdminRole Role = "admin"
)

type RoleAssignmentScope

type RoleAssignmentScope struct {
	bun.BaseModel `bun:"table:role_assignment_scopes"`

	ID          int           `bun:"id,pk,autoincrement" json:"id"`
	WorkspaceID sql.NullInt32 `bun:"scope_workspace_id"  json:"workspace_id"`
}

RoleAssignmentScope represents a RoleAssignmentScope as it's stored in the database.

type Run

type Run struct {
	bun.BaseModel `bun:"table:runs"`

	ID                    int            `db:"id" bun:",pk,autoincrement"`
	ProjectID             int            `db:"project_id" bun:"project_id"`
	ExperimentID          int            `db:"experiment_id"`
	State                 State          `db:"state"`
	StartTime             time.Time      `db:"start_time"`
	EndTime               *time.Time     `db:"end_time"`
	HParams               map[string]any `db:"hparams" bun:"hparams"`
	WarmStartCheckpointID *int           `db:"warm_start_checkpoint_id"`
	TotalBatches          int            `db:"total_batches"`
	ExternalRunID         *string        `db:"external_run_id"`
	RestartID             int            `db:"restart_id"`
	Restarts              int            `db:"restarts"`
	RunnerState           string         `db:"runner_state"`
	LastActivity          *time.Time     `db:"last_activity"`
	LogRetentionDays      *int16         `db:"log_retention_days"`
	Metadata              map[string]any `db:"metadata" bun:"metadata,scanonly"`
	LocalID               int            `db:"local_id"`
}

Run represents a row from the `runs` table.

type RunCheckpoints

type RunCheckpoints struct {
	bun.BaseModel `bun:"table:run_checkpoints"`
	RunID         int       `bun:"run_id"`
	CheckpointID  uuid.UUID `bun:"checkpoint_id"`
}

RunCheckpoints represents a row from the `run_checkpoints` table.

type RunHparam

type RunHparam struct {
	bun.BaseModel `bun:"table:run_hparams"`

	RunID     int      `bun:"run_id"`
	HParam    string   `bun:"hparam"`
	NumberVal *float64 `bun:"number_val"`
	TextVal   *string  `bun:"text_val"`
	BoolVal   *bool    `bun:"bool_val"`
}

RunHparam represents a row from the `run_hparams` table.

type RunMetadata

type RunMetadata struct {
	bun.BaseModel `bun:"table:runs_metadata"`
	RunID         int
	Metadata      map[string]interface{}
}

RunMetadata is the bun model of a runMetadata entry.

type RunMetadataIndex

type RunMetadataIndex struct {
	bun.BaseModel  `bun:"table:runs_metadata_index"`
	ID             int      `bun:"id,pk,autoincrement"`
	RunID          int      `bun:"run_id"`
	FlatKey        string   `bun:"flat_key"`
	StringValue    *string  `bun:"string_value"`
	IntegerValue   *int     `bun:"integer_value"`
	FloatValue     *float64 `bun:"float_value"`
	BooleanValue   *bool    `bun:"boolean_value"`
	TimestampValue *string  `bun:"timestamp_value"`
	ProjectID      int      `bun:"project_id"`
	IsArrayElement bool     `bun:"is_array_element"`
}

RunMetadataIndex is the bun model of a runMetadataIndex entry.

type RunTaskID

type RunTaskID struct {
	bun.BaseModel `bun:"table:run_id_task_id"`

	RunID  int
	TaskID TaskID
}

RunTaskID represents a row from the `run_id_task_id` table.

type RuntimeItem

type RuntimeItem struct {
	CPU  string `json:"cpu,omitempty"`
	CUDA string `json:"cuda,omitempty"`
	ROCM string `json:"rocm,omitempty"`
}

RuntimeItem configures the runtime image.

func (RuntimeItem) For

func (r RuntimeItem) For(deviceType device.Type) string

For returns the value for the provided device type.

func (RuntimeItem) ToExpconf

func (r RuntimeItem) ToExpconf() expconf.EnvironmentImageMap

ToExpconf translates old model objects into an expconf object.

func (*RuntimeItem) UnmarshalJSON

func (r *RuntimeItem) UnmarshalJSON(data []byte) error

UnmarshalJSON implements the json.Unmarshaler interface.

type RuntimeItems

type RuntimeItems struct {
	CPU  []string `json:"cpu,omitempty"`
	CUDA []string `json:"cuda,omitempty"`
	ROCM []string `json:"rocm,omitempty"`
}

RuntimeItems configures the runtime environment variables.

func (*RuntimeItems) For

func (r *RuntimeItems) For(deviceType device.Type) []string

For returns the value for the provided device type.

func (RuntimeItems) ToExpconf

ToExpconf translates old model objects into an expconf object.

func (*RuntimeItems) UnmarshalJSON

func (r *RuntimeItems) UnmarshalJSON(data []byte) error

UnmarshalJSON implements the json.Unmarshaler interface.

type SCIMEmail

type SCIMEmail struct {
	Type    string `json:"type"`
	SValue  string `json:"value"`
	Primary bool   `json:"primary"`
}

SCIMEmail is an email address in SCIM.

func (*SCIMEmail) Scan

func (e *SCIMEmail) Scan(value interface{}) error

Scan implements sql.Scanner.

type SCIMEmails

type SCIMEmails []SCIMEmail

SCIMEmails is a list of emails in SCIM.

func SCIMEmailsFromJWT

func SCIMEmailsFromJWT(claims *JWT) SCIMEmails

SCIMEmailsFromJWT returns a consistent SCIMEmails struct wrapping the single email in a JWT.

func (*SCIMEmails) Scan

func (e *SCIMEmails) Scan(value interface{}) error

Scan implements sql.Scanner.

type SCIMError

type SCIMError struct {
	Detail   string           `json:"detail,omitempty"`
	Status   int              `json:"status"`
	SCIMType string           `json:"scimType,omitempty"`
	Schemas  SCIMErrorSchemas `json:"schemas"`
}

SCIMError is an error in SCIM.

type SCIMErrorSchemas

type SCIMErrorSchemas struct{}

SCIMErrorSchemas is the constant schemas field for errors.

func (SCIMErrorSchemas) MarshalJSON

func (s SCIMErrorSchemas) MarshalJSON() ([]byte, error)

MarshalJSON implements json.Marshaler.

func (*SCIMErrorSchemas) UnmarshalJSON

func (s *SCIMErrorSchemas) UnmarshalJSON(data []byte) error

UnmarshalJSON implements json.Unmarshaler.

type SCIMGroup

type SCIMGroup struct {
	ID          UUID        `json:"id"`
	DisplayName string      `json:"displayName"`
	Members     []*SCIMUser `json:"members"`

	Schemas SCIMGroupSchemas `json:"schemas"`
	Meta    *SCIMGroupMeta   `json:"meta"`
}

SCIMGroup is a group in SCIM.

type SCIMGroupMeta

type SCIMGroupMeta struct {
	ResourceType SCIMGroupResourceType `json:"resourceType"`
}

SCIMGroupMeta is the metadata for a group in SCIM.

type SCIMGroupResourceType

type SCIMGroupResourceType struct{}

SCIMGroupResourceType is the constant resource type field for groups.

func (SCIMGroupResourceType) MarshalJSON

func (s SCIMGroupResourceType) MarshalJSON() ([]byte, error)

MarshalJSON implements json.Marshaler.

func (*SCIMGroupResourceType) UnmarshalJSON

func (s *SCIMGroupResourceType) UnmarshalJSON(data []byte) error

UnmarshalJSON implements json.Unmarshaler.

type SCIMGroupSchemas

type SCIMGroupSchemas struct{}

SCIMGroupSchemas is the constant schemas field for a user.

func (SCIMGroupSchemas) MarshalJSON

func (s SCIMGroupSchemas) MarshalJSON() ([]byte, error)

MarshalJSON implements json.Marshaler.

func (*SCIMGroupSchemas) UnmarshalJSON

func (s *SCIMGroupSchemas) UnmarshalJSON(data []byte) error

UnmarshalJSON implements json.Unmarshaler.

type SCIMGroups

type SCIMGroups struct {
	TotalResults int          `json:"totalResults"`
	StartIndex   int          `json:"startIndex"`
	Resources    []*SCIMGroup `json:"Resources"`

	ItemsPerPage int             `json:"itemsPerPage"`
	Schemas      SCIMListSchemas `json:"schemas"`
}

SCIMGroups is a list of groups in SCIM.

func (*SCIMGroups) SetSCIMFields

func (g *SCIMGroups) SetSCIMFields(serverRoot *url.URL) error

SetSCIMFields sets the location field for all users given the URL of the master.

type SCIMListSchemas

type SCIMListSchemas struct{}

SCIMListSchemas is the constant schemas field for lists.

func (SCIMListSchemas) MarshalJSON

func (s SCIMListSchemas) MarshalJSON() ([]byte, error)

MarshalJSON implements json.Marshaler.

func (*SCIMListSchemas) UnmarshalJSON

func (s *SCIMListSchemas) UnmarshalJSON(data []byte) error

UnmarshalJSON implements json.Unmarshaler.

type SCIMName

type SCIMName struct {
	GivenName  string `json:"givenName"`
	FamilyName string `json:"familyName"`
}

SCIMName is a name in SCIM.

func SCIMNameFromJWT

func SCIMNameFromJWT(claims *JWT) SCIMName

SCIMNameFromJWT returns a consistent SCIMName struct wrapping the single name in a JWT.

func (*SCIMName) Scan

func (e *SCIMName) Scan(value interface{}) error

Scan implements sql.Scanner.

type SCIMUser

type SCIMUser struct {
	ID          UUID        `bun:"id" json:"id"`
	Username    string      `bun:"username" json:"userName"`
	ExternalID  string      `bun:"external_id" json:"externalId"`
	Name        SCIMName    `bun:"name" json:"name"`
	DisplayName null.String `bun:"display_name" json:"displayName"`
	Emails      SCIMEmails  `bun:"emails" json:"emails"`
	Active      bool        `bun:"active" json:"active"`

	PasswordHash null.String `bun:"password_hash" json:"password_hash,omitempty"`

	Password string          `json:"password,omitempty"`
	Schemas  SCIMUserSchemas `json:"schemas"`
	Meta     *SCIMUserMeta   `json:"meta"`

	UserID        UserID                 `bun:"user_id" json:"-"`
	RawAttributes map[string]interface{} `bun:"raw_attributes" json:"-"`
}

SCIMUser is a user in SCIM.

func (*SCIMUser) Sanitize

func (u *SCIMUser) Sanitize()

Sanitize sanitizes the user of external data that could be provided, but should always be ignored. See https://tools.ietf.org/html/rfc7643#section-3.1 for why meta must be cleared.

func (*SCIMUser) SetSCIMFields

func (u *SCIMUser) SetSCIMFields(serverRoot *url.URL) error

SetSCIMFields sets the location field for a user given the URL of the master and makes other changes, such as removing password fields from the model.

func (*SCIMUser) UpdatePasswordHash

func (u *SCIMUser) UpdatePasswordHash(password string) error

UpdatePasswordHash updates the SCIMUser's password hash.

func (SCIMUser) Validate

func (u SCIMUser) Validate() []error

Validate checks that external data satisfies the expected invariants.

func (SCIMUser) ValidateChanges

func (u SCIMUser) ValidateChanges() error

ValidateChanges checks that a patch for a user satisifies the expected invariants.

type SCIMUserMeta

type SCIMUserMeta struct {
	ResourceType SCIMUserResourceType `json:"resourceType"`
	Location     string               `json:"location"`
}

SCIMUserMeta is the metadata for a user in SCIM.

type SCIMUserResourceType

type SCIMUserResourceType struct{}

SCIMUserResourceType is the constant resource type field for users.

func (SCIMUserResourceType) MarshalJSON

func (s SCIMUserResourceType) MarshalJSON() ([]byte, error)

MarshalJSON implements json.Marshaler.

func (*SCIMUserResourceType) UnmarshalJSON

func (s *SCIMUserResourceType) UnmarshalJSON(data []byte) error

UnmarshalJSON implements json.Unmarshaler.

type SCIMUserSchemas

type SCIMUserSchemas struct{}

SCIMUserSchemas is the constant schemas field for a user.

func (SCIMUserSchemas) MarshalJSON

func (s SCIMUserSchemas) MarshalJSON() ([]byte, error)

MarshalJSON implements json.Marshaler.

func (*SCIMUserSchemas) UnmarshalJSON

func (s *SCIMUserSchemas) UnmarshalJSON(data []byte) error

UnmarshalJSON implements json.Unmarshaler.

type SCIMUsers

type SCIMUsers struct {
	TotalResults int         `json:"totalResults"`
	StartIndex   int         `json:"startIndex"`
	Resources    []*SCIMUser `json:"Resources"`

	ItemsPerPage int             `json:"itemsPerPage"`
	Schemas      SCIMListSchemas `json:"schemas"`
}

SCIMUsers is a list of users in SCIM.

func (*SCIMUsers) SetSCIMFields

func (u *SCIMUsers) SetSCIMFields(serverRoot *url.URL) error

SetSCIMFields sets the location field for all users given the URL of the master.

type SessionID

type SessionID int

SessionID is the type for user session IDs.

type SlotSummary

type SlotSummary struct {
	ID        string            `json:"id"`
	Device    device.Device     `json:"device"`
	Enabled   bool              `json:"enabled"`
	Container *cproto.Container `json:"container"`
	Draining  bool              `json:"draining"`
}

SlotSummary summarizes the state of a slot.

func (SlotSummary) ToProto

func (s SlotSummary) ToProto() *agentv1.Slot

ToProto converts a SlotSummary to its protobuf representation.

type SlotsSummary

type SlotsSummary map[string]SlotSummary

SlotsSummary contains a summary for a number of slots.

type Snapshotter

type Snapshotter interface {
	Snapshot() (json.RawMessage, error)
	Restore(json.RawMessage) error
}

Snapshotter is any object that implements how to save an restore its state.

type State

type State string

State is the run state of an experiment / trial / step / etc.

func StateFromProto

func StateFromProto(state experimentv1.State) State

StateFromProto maps experimentv1.State to State.

type StateWithReason

type StateWithReason struct {
	State               State
	InformationalReason string
}

StateWithReason is the run state of an experiment with an informational reason used for logging purposes.

type StorageBackendID

type StorageBackendID int

StorageBackendID is the ID for the storage backend. Storage backend ID isn't backfilled so checkpoints older than 0.27.1 won't have this. There are also some cases where a user can create a checkpoint without this so don't rely on this always being set.

type StorageSize

type StorageSize int64

StorageSize is a named type for custom marshaling behavior for shm_size.

func (*StorageSize) UnmarshalJSON

func (d *StorageSize) UnmarshalJSON(data []byte) error

UnmarshalJSON implements the json.Unmarshaler interface.

type TLSClientConfig

type TLSClientConfig struct {
	Enabled         bool   `json:"enabled"`
	SkipVerify      bool   `json:"skip_verify"`
	CertificatePath string `json:"certificate"`
	CertificateName string `json:"certificate_name"`
	CertBytes       []byte
}

TLSClientConfig configures how to make a TLS connection.

func MakeTLSConfig

func MakeTLSConfig(cert *tls.Certificate) (TLSClientConfig, error)

MakeTLSConfig constructs a TLSClientConfig to use the provided tls.Certificate.

func (*TLSClientConfig) Resolve

func (t *TLSClientConfig) Resolve() error

Resolve resolves the configuration.

func (TLSClientConfig) Validate

func (t TLSClientConfig) Validate() []error

Validate implements the check.Validatable interface.

type Task

type Task struct {
	bun.BaseModel `bun:"table:tasks"`

	TaskID    TaskID     `db:"task_id" bun:"task_id,pk"`
	JobID     *JobID     `db:"job_id"`
	TaskType  TaskType   `db:"task_type"`
	StartTime time.Time  `db:"start_time"`
	EndTime   *time.Time `db:"end_time"`
	// LogVersion indicates how the logs were stored.
	LogVersion TaskLogVersion `db:"log_version"`

	// Relations.
	Job        *Job    `bun:"rel:belongs-to,join:job_id=job_id"`
	ParentID   *TaskID `db:"parent_id"`
	ForkedFrom *string `db:"forked_from"`

	State *TaskState `db:"task_state" bun:"task_state"`

	Config *string `db:"config"`

	NoPause          *bool   `db:"no_pause"`
	LogPolicyMatched *string `db:"log_policy_matched"`
}

Task is the model for a task in the database.

type TaskConfigPolicies

type TaskConfigPolicies struct {
	bun.BaseModel   `bun:"table:task_config_policies"`
	WorkspaceID     *int      `bun:"workspace_id"`
	WorkloadType    string    `bun:"workload_type,notnull"`
	LastUpdatedBy   UserID    `bun:"last_updated_by,notnull"`
	LastUpdatedTime time.Time `bun:"last_updated_time,notnull"`
	InvariantConfig *string   `bun:"invariant_config"`
	Constraints     *string   `bun:"constraints"`
}

TaskConfigPolicies is the bun model of a task config policy.

type TaskContainerDefaultsConfig

type TaskContainerDefaultsConfig struct {
	DtrainNetworkInterface string                `json:"dtrain_network_interface,omitempty"`
	NCCLPortRange          string                `json:"nccl_port_range,omitempty"`
	GLOOPortRange          string                `json:"gloo_port_range,omitempty"`
	ShmSizeBytes           int64                 `json:"shm_size_bytes,omitempty"`
	NetworkMode            container.NetworkMode `json:"network_mode,omitempty"`
	// TODO(DET-9855) we should move these over to KubernetesTaskContainerDefaults.
	CPUPodSpec           *k8sV1.Pod           `json:"cpu_pod_spec"`
	GPUPodSpec           *k8sV1.Pod           `json:"gpu_pod_spec"`
	CheckpointGCPodSpec  *k8sV1.Pod           `json:"checkpoint_gc_pod_spec"`
	Image                *RuntimeItem         `json:"image,omitempty"`
	RegistryAuth         *registry.AuthConfig `json:"registry_auth,omitempty"`
	ForcePullImage       bool                 `json:"force_pull_image,omitempty"`
	EnvironmentVariables *RuntimeItems        `json:"environment_variables,omitempty"`

	AddCapabilities  []string      `json:"add_capabilities"`
	DropCapabilities []string      `json:"drop_capabilities"`
	Devices          DevicesConfig `json:"devices"`

	BindMounts BindMountsConfig      `json:"bind_mounts"`
	WorkDir    *string               `json:"work_dir"`
	Slurm      expconf.SlurmConfigV0 `json:"slurm"`
	Pbs        expconf.PbsConfigV0   `json:"pbs"`

	StartupHook string `json:"startup_hook"`

	LogPolicies expconf.LogPoliciesConfig `json:"log_policies"`

	PreemptionTimeout int `json:"preemption_timeout,omitempty"`

	// TODO(DET-9856) we should probably eventually move this to expconf and allow setting
	// on a per task level.
	Kubernetes *KubernetesTaskContainerDefaults `json:"kubernetes"`
}

TaskContainerDefaultsConfig configures docker defaults for all containers. If you add a field to this, you must update the merge impl.

func DefaultTaskContainerDefaults

func DefaultTaskContainerDefaults() *TaskContainerDefaultsConfig

DefaultTaskContainerDefaults returns the default for TaskContainerDefaultsConfig.

func (TaskContainerDefaultsConfig) Merge

Merge merges other into self, preferring other. The result is a deepcopy of self, with deep copies of values taken from other.

func (*TaskContainerDefaultsConfig) MergeIntoExpConfig

func (c *TaskContainerDefaultsConfig) MergeIntoExpConfig(config *expconf.ExperimentConfig)

MergeIntoExpConfig sets any unset ExperimentConfig values from TaskContainerDefaults.

func (*TaskContainerDefaultsConfig) UnmarshalJSON

func (c *TaskContainerDefaultsConfig) UnmarshalJSON(data []byte) error

UnmarshalJSON implements the json.Unmarshaler interface. Setting defaults here is necessary over our usual "Define a default struct and unmarshal into it" strategy because there are places (resource pool configs) where we need to know if the task container defaults were set at all or if they were not; if they were set then that resource pool's task container defaults are used instead of the toplevel master config's settings. To know if the user set them at the resource pool level, the resource pool has to have a nullable pointer, which is not compatible with our usual strategy for defaults.

func (*TaskContainerDefaultsConfig) Validate

func (c *TaskContainerDefaultsConfig) Validate() []error

Validate implements the check.Validatable interface.

type TaskContextDirectory

type TaskContextDirectory struct {
	bun.BaseModel `bun:"table:task_context_directory"`

	TaskID           TaskID `bun:"task_id"`
	ContextDirectory []byte `bun:"context_directory"`
}

TaskContextDirectory represents a row in database for a tasks context directory. This currently is only for notebooks, trials, tensorboards, and commands now. Trials aren't in it because they are stored on experiments.model_def. In addition trials can have many tasks but currently can only have one model_def. We would end up duplicating a lot of data migrating experiment's model_def over to this table. Also that migration would be pretty painful.

type TaskID

type TaskID string

TaskID is the unique ID of a task among all tasks.

func NewTaskID

func NewTaskID() TaskID

NewTaskID returns a random, globally unique task ID.

func (TaskID) String

func (a TaskID) String() string

type TaskLog

type TaskLog struct {
	// A task log should have one of these IDs after being persisted. All should be unique.
	ID *int `db:"id" bun:"id,pk,autoincrement" json:"id,omitempty"`
	// The body of an Elasticsearch log response will look something like
	// { _id: ..., _source: { ... }} where _source is the rest of this struct.
	// StringID doesn't have serialization tags because it is not part of
	// _source and populated from _id.
	StringID     *string `json:"-"`
	TaskID       string  `db:"task_id" json:"task_id"`
	AllocationID *string `db:"allocation_id" json:"allocation_id"`
	AgentID      *string `db:"agent_id" json:"agent_id,omitempty"`
	// In the case of k8s, container_id is a pod name instead.
	ContainerID *string    `db:"container_id" json:"container_id,omitempty"`
	RankID      *int       `db:"rank_id" json:"rank_id,omitempty"`
	Timestamp   *time.Time `db:"timestamp" json:"timestamp"`
	Level       *string    `db:"level" json:"level"`
	Log         string     `db:"log" json:"log"`
	Source      *string    `db:"source" json:"source,omitempty"`
	StdType     *string    `db:"stdtype" json:"stdtype,omitempty"`
}

TaskLog represents a structured log emitted by an allocation.

func TaskLogFromProto

func TaskLogFromProto(in *taskv1.TaskLog) *TaskLog

TaskLogFromProto converts a proto task log to a model task log.

func (*TaskLog) Message

func (t *TaskLog) Message() string

Message resolves the flat version of the log that UIs have shown historically. TODO(task-unif): Should we just.. stop doing this? And send the log as is and let the UIs handle display (yes, IMO).

func (TaskLog) Proto

func (t TaskLog) Proto() (*apiv1.TaskLogsResponse, error)

Proto converts a task log to its protobuf representation.

type TaskLogBatch

type TaskLogBatch []*TaskLog

TaskLogBatch represents a batch of model.TaskLog.

func (TaskLogBatch) ForEach

func (t TaskLogBatch) ForEach(f func(interface{}) error) error

ForEach implements logs.Batch.

func (TaskLogBatch) Size

func (t TaskLogBatch) Size() int

Size implements logs.Batch.

type TaskLogVersion

type TaskLogVersion int32

TaskLogVersion is the version for our log-storing scheme. Useful because changing designs would involve either a really costly migration or versioning schemes and we pick the latter.

const (
	TaskLogVersion0       TaskLogVersion = 0
	TaskLogVersion1       TaskLogVersion = 1
	CurrentTaskLogVersion                = TaskLogVersion1
)

CurrentTaskLogVersion describes the current scheme in which we store task logs. To avoid a migration that in some cases would be extremely costly, we record the log version so that we can just read old logs the old way and do the new however we please.

type TaskState

type TaskState string

TaskState represents the state of a generic task.

const (
	// TaskStateActive denotes that task is running.
	TaskStateActive TaskState = "ACTIVE"
	// TaskStateCanceled denotes that task is killed.
	TaskStateCanceled TaskState = "CANCELED"
	// TaskStateCompleted denotes that task has finished running.
	TaskStateCompleted TaskState = "COMPLETED"
	// TaskStateError denotes that task has exited with an error.
	TaskStateError TaskState = "ERROR"
	// TaskStatePaused denotes that task has been paused.
	TaskStatePaused TaskState = "PAUSED"
	// TaskStateStoppingPaused denotes that the task is in the process of being paused.
	TaskStateStoppingPaused TaskState = "STOPPING_PAUSED"
	// TaskStateStoppingCanceled denotes that the task is in the process of being canceled.
	TaskStateStoppingCanceled TaskState = "STOPPING_CANCELED"
	// TaskStateStoppingCompleted denotes that the task is in the process of being completed.
	TaskStateStoppingCompleted TaskState = "STOPPING_COMPLETED"
	// TaskStateStoppingError denotes that the task is in the process of returning an error.
	TaskStateStoppingError TaskState = "STOPPING_ERROR"
)

type TaskStats

type TaskStats struct {
	AllocationID AllocationID
	EventType    string
	// ContainerID is sent by the agent. This won't always be present in the database
	// This is a weird table since sometimes it is one row per allocation
	// (like in record queued stats) and sometimes it is many per allocation like in
	// pulled time.
	ContainerID *cproto.ID
	StartTime   *time.Time
	EndTime     *time.Time
}

TaskStats is the model for task stats in the database.

type TaskType

type TaskType string

TaskType is the type of a task.

type Template

type Template struct {
	Name        string `db:"name" json:"name"`
	Config      []byte `db:"config" json:"config" bun:"config"`
	WorkspaceID int    `db:"workspace_id" json:"workspace_id"`
}

Template represents a row from the `templates` table.

type TokenID

type TokenID int

TokenID is the type for token IDs.

type TokenType

type TokenType string

TokenType is the type of a token.

const (
	// TokenTypeUserSession is the "USER_SESSION" token type for the enum public.token_type in Postgres.
	TokenTypeUserSession TokenType = "USER_SESSION"
	// TokenTypeAccessToken is the "ACCESS_TOKEN" token type for the enum public.token_type in Postgres.
	TokenTypeAccessToken TokenType = "ACCESS_TOKEN"
)

func TokenTypeFromProto

func TokenTypeFromProto(t userv1.TokenType) TokenType

TokenTypeFromProto maps a userv1.TokenType to TokenType.

func (TokenType) Proto

func (tt TokenType) Proto() userv1.TokenType

Proto returns the proto representation of the token type.

type Trial

type Trial struct {
	bun.BaseModel `bun:"table:trials"`

	ID                    int            `db:"id" bun:",pk,autoincrement"`
	RequestID             *RequestID     `db:"request_id"`
	ExperimentID          int            `db:"experiment_id"`
	State                 State          `db:"state"`
	StartTime             time.Time      `db:"start_time"`
	EndTime               *time.Time     `db:"end_time"`
	HParams               map[string]any `db:"hparams" bun:"hparams"`
	WarmStartCheckpointID *int           `db:"warm_start_checkpoint_id"`
	Seed                  int64          `db:"seed"`
	TotalBatches          int            `db:"total_batches"`
	ExternalTrialID       *string        `db:"external_trial_id"`
	RunID                 int            `db:"run_id"` // run_id as in restart_id not "runs" id.
	Restarts              int            `db:"restarts"`
	RunnerState           string         `db:"runner_state"`
	LastActivity          *time.Time     `db:"last_activity"`
	LogRetentionDays      *int16         `db:"log_retention_days"`
	Metadata              map[string]any `db:"metadata" bun:"metadata"`
}

Trial represents a row from the `trials` table.

func NewTrial

func NewTrial(
	state State,
	requestID RequestID,
	experimentID int,
	hparams JSONObj,
	warmStartCheckpoint *Checkpoint,
	trialSeed int64,
	logRetentionDays *int16,
) *Trial

NewTrial creates a new trial in the specified state. Note that the trial ID will not be set.

func (*Trial) ToRunAndTrialV2

func (t *Trial) ToRunAndTrialV2(experimentsProjectID int) (*Run, *TrialV2)

ToRunAndTrialV2 converts a trial to a run.

type TrialLog

type TrialLog struct {
	// A trial log should have one of these IDs. All should be unique.
	// TODO(Brad): This must be int64.
	ID *int `db:"id" json:"id,omitempty"`
	// The body of an Elasticsearch log response will look something like
	// { _id: ..., _source: { ... }} where _source is the rest of this struct.
	// StringID doesn't have serialization tags because it is not part of
	// _source and populated from _id.
	StringID *string `json:"-"`

	TrialID int    `db:"trial_id" json:"trial_id"`
	Message string `db:"message" json:"message,omitempty"`

	AgentID *string `db:"agent_id" json:"agent_id,omitempty"`
	// In the case of k8s, container_id is a pod name instead.
	ContainerID *string    `db:"container_id" json:"container_id,omitempty"`
	RankID      *int       `db:"rank_id" json:"rank_id,omitempty"`
	Timestamp   *time.Time `db:"timestamp" json:"timestamp"`
	Level       *string    `db:"level" json:"level"`
	Log         *string    `db:"log" json:"log"`
	Source      *string    `db:"source" json:"source,omitempty"`
	StdType     *string    `db:"stdtype" json:"stdtype,omitempty"`
}

TrialLog represents a row from the `trial_logs` table.

func (TrialLog) Proto

func (t TrialLog) Proto() (*apiv1.TrialLogsResponse, error)

Proto converts a trial log to its protobuf representation.

func (*TrialLog) Resolve

func (t *TrialLog) Resolve()

Resolve resolves the legacy Message field from the others provided.

type TrialLogBatch

type TrialLogBatch []*TrialLog

TrialLogBatch represents a batch of model.TrialLog.

func (TrialLogBatch) ForEach

func (t TrialLogBatch) ForEach(f func(interface{}) error) error

ForEach implements logs.Batch.

func (TrialLogBatch) Size

func (t TrialLogBatch) Size() int

Size implements logs.Batch.

type TrialMetrics

type TrialMetrics struct {
	ID           int        `db:"id" json:"id"`
	TrialID      int        `db:"trial_id" json:"trial_id"`
	TrialRunID   int        `db:"trial_run_id" json:"-"`
	TotalBatches int        `db:"total_batches" json:"total_batches"`
	EndTime      *time.Time `db:"end_time" json:"end_time"`
	Metrics      JSONObj    `db:"metrics" json:"metrics"`
}

TrialMetrics represents a row from the `steps` or `validations` table.

type TrialProfilerMetricsBatch

type TrialProfilerMetricsBatch struct {
	Values     pgtype.Float4Array      `db:"values"`
	Batches    pgtype.Int4Array        `db:"batches"`
	Timestamps pgtype.TimestamptzArray `db:"timestamps"`
	Labels     []byte                  `db:"labels"`
}

TrialProfilerMetricsBatch represents a row from the `trial_profiler_metrics` table.

func (*TrialProfilerMetricsBatch) ToProto

ToProto converts a TrialProfilerMetricsBatch to its protobuf representation.

type TrialProfilerMetricsBatchBatch

type TrialProfilerMetricsBatchBatch []*trialv1.TrialProfilerMetricsBatch

TrialProfilerMetricsBatchBatch represents a batch of trialv1.TrialProfilerMetricsBatch.

func (TrialProfilerMetricsBatchBatch) ForEach

func (t TrialProfilerMetricsBatchBatch) ForEach(f func(interface{}) error) error

ForEach implements logs.Batch.

func (TrialProfilerMetricsBatchBatch) Size

Size implements logs.Batch.

type TrialV2

type TrialV2 struct {
	bun.BaseModel `bun:"table:trials_v2"`

	RunID     int        `bun:"run_id"`
	RequestID *RequestID `bun:"request_id"`
	Seed      int64      `bun:"seed"`
}

TrialV2 represents a row from the `trials_v2` table.

type UUID

type UUID struct {
	UUID  uuid.UUID
	Valid bool
}

UUID is a UUID that converts to a nullable string in SQL queries.

func NewUUID

func NewUUID() UUID

NewUUID creates a new, non-null and random UUID.

func ParseUUID

func ParseUUID(s string) (UUID, error)

ParseUUID initializes a non-null UUID from a string. It returns an error if the string does not follow the format of a UUID.

func (UUID) MarshalJSON

func (u UUID) MarshalJSON() ([]byte, error)

MarshalJSON implements the json.Marshaler interface.

func (*UUID) Scan

func (u *UUID) Scan(value interface{}) error

Scan implements the sql.Scanner interface.

func (UUID) String

func (u UUID) String() string

String returns the string representation of the UUID. If this UUID is null, return the empty string.

func (*UUID) UnmarshalJSON

func (u *UUID) UnmarshalJSON(data []byte) error

UnmarshalJSON implements the json.Unmarshaler interface.

func (UUID) Value

func (u UUID) Value() (driver.Value, error)

Value implements the sql.Driver interface.

type User

type User struct {
	bun.BaseModel `bun:"table:users"`
	ID            UserID      `db:"id" bun:"id,pk,autoincrement" json:"id"`
	Username      string      `db:"username" json:"username"`
	PasswordHash  null.String `db:"password_hash" json:"-"`
	DisplayName   null.String `db:"display_name" json:"display_name"`
	Admin         bool        `db:"admin" json:"admin"`
	Active        bool        `db:"active" json:"active"`
	ModifiedAt    time.Time   `db:"modified_at" json:"modified_at"`
	Remote        bool        `db:"remote" json:"remote"`
	LastAuthAt    *time.Time  `db:"last_auth_at" json:"last_auth_at"`
}

User corresponds to a row in the "users" DB table.

func (*User) Proto

func (user *User) Proto() *userv1.User

Proto converts a user to its protobuf representation.

func (*User) UpdatePasswordHash

func (user *User) UpdatePasswordHash(password string) error

UpdatePasswordHash updates the model's password hash employing necessary cryptographic techniques.

func (User) ValidatePassword

func (user User) ValidatePassword(password string) bool

ValidatePassword checks that the supplied password is correct.

type UserActivity

type UserActivity struct {
	bun.BaseModel `bun:"table:activity"`
	UserID        UserID       `db:"user_id" json:"user_id"`
	ActivityType  ActivityType `db:"activity_type" json:"activity_type"`
	EntityType    EntityType   `db:"entity_type" json:"entity_type"`
	EntityID      int32        `db:"entity_id" json:"entity_id"`
	ActivityTime  time.Time    `db:"activity_time" json:"activity_time"`
}

UserActivity is a record of user activity.

func UserActivityFromProto

func UserActivityFromProto(
	a userv1.ActivityType,
	e userv1.EntityType,
	entityID int32,
	userID int32,
	timestamp time.Time,
) *UserActivity

UserActivityFromProto returns a model UserActivity from a proto definition.

type UserID

type UserID int

UserID is the type for user IDs.

type UserSession

type UserSession struct {
	bun.BaseModel   `bun:"table:user_sessions"`
	ID              SessionID         `db:"id" json:"id"`
	UserID          UserID            `db:"user_id" json:"user_id"`
	Expiry          time.Time         `db:"expiry" json:"expiry"`
	CreatedAt       time.Time         `db:"created_at" json:"created_at"`
	TokenType       TokenType         `db:"token_type" json:"token_type"`
	RevokedAt       null.Time         `db:"revoked_at" json:"revoked_at"`
	Description     null.String       `db:"description" json:"description"`
	InheritedClaims map[string]string `bun:"-"` // InheritedClaims contains the OIDC raw ID token when OIDC is enabled
}

UserSession corresponds to a row in the "user_sessions" DB table.

func (UserSession) Proto

func (s UserSession) Proto() *userv1.TokenInfo

Proto returns the protobuf representation of User_Sessions table.

type UserWebSetting

type UserWebSetting struct {
	UserID      UserID
	Key         string
	Value       string
	StoragePath string
}

UserWebSetting is a record of user web setting.

func (UserWebSetting) Proto

Proto returns the protobuf representation.

type Users

type Users []User

Users is a slice of User objects—primarily useful for its methods.

func (Users) Proto

func (users Users) Proto() []*userv1.User

Proto converts a slice of users to its protobuf representation.

type WorkloadManagerType

type WorkloadManagerType string

WorkloadManagerType indicates which type of workloads the harness should prepare to receive.

type WorkloadSequencerType

type WorkloadSequencerType string

WorkloadSequencerType is the type of sequencer that a trial actor should use.

type Workspace

type Workspace struct {
	bun.BaseModel            `bun:"table:workspaces"`
	ID                       int                              `bun:"id,pk,autoincrement"`
	Name                     string                           `bun:"name"`
	Archived                 bool                             `bun:"archived"`
	CreatedAt                time.Time                        `bun:"created_at,scanonly"`
	UserID                   UserID                           `bun:"user_id"`
	Immutable                bool                             `bun:"immutable"`
	State                    *WorkspaceState                  `bun:"state"`
	AgentUID                 *int32                           `bun:"uid"`
	AgentUser                *string                          `bun:"user_"`
	AgentGID                 *int32                           `bun:"gid"`
	AgentGroup               *string                          `bun:"group_"`
	CheckpointStorageConfig  *expconf.CheckpointStorageConfig `bun:"checkpoint_storage_config"`
	DefaultComputePool       string                           `bun:"default_compute_pool"`
	DefaultAuxPool           string                           `bun:"default_aux_pool"`
	AutoCreatedNamespaceName *string                          `bun:"auto_created_namespace_name"`
}

Workspace is the bun model of a workspace.

func (*Workspace) ToProto

func (w *Workspace) ToProto() (*workspacev1.Workspace, error)

ToProto converts a bun model of a workspace to a proto object. Some fields like username and pinned are not included since they are not on the bun model.

type WorkspaceNamespace

type WorkspaceNamespace struct {
	bun.BaseModel       `bun:"table:workspace_namespace_bindings"`
	WorkspaceID         int    `bun:"workspace_id"`
	Namespace           string `bun:"namespace"`
	ClusterName         string `bun:"cluster_name"`
	AutoCreateNamespace bool   `bun:"auto_create_namespace"`
}

WorkspaceNamespace is the bun model of a workspace-namespace binding.

func (WorkspaceNamespace) ToProto

ToProto converts a WorkspaceNamespaceBinding to its protobuf representation.

type WorkspacePin

type WorkspacePin struct {
	bun.BaseModel `bun:"table:workspace_pins"`
	WorkspaceID   int    `bun:"workspace_id"`
	UserID        UserID `bun:"user_id"`
}

WorkspacePin is the bun model of a workspace.

type WorkspaceState

type WorkspaceState string

WorkspaceState is the state of the workspace state with regards to being deleted.

const (
	// WorkspaceStateDeleting constant.
	WorkspaceStateDeleting WorkspaceState = "DELETING"
	// WorkspaceStateDeleteFailed constant.
	WorkspaceStateDeleteFailed WorkspaceState = "DELETE_FAILED"
	// WorkspaceStateDeleted constant.
	WorkspaceStateDeleted WorkspaceState = "DELETED"
)

func (*WorkspaceState) ToProto

ToProto converts a WorkspaceState to a proto workspacev1.Workspace state.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL