db

package

v0.0.0-...-3511abf Latest Latest Go to latest Published: Nov 2, 2023 License: Apache-2.0 Imports: 49 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/determined-ai/determined

Documentation ¶

Index ¶

Constants
Variables
func ActiveLogPolicies(ctx context.Context, id int) (expconf.LogPoliciesConfig, error)
func AddAllocationExitStatus(ctx context.Context, a *model.Allocation) error
func AddCheckpointMetadata(ctx context.Context, m *model.CheckpointV2) error
func AddExperiment(ctx context.Context, experiment *model.Experiment, ...) (err error)
func AddExperimentTx(ctx context.Context, idb bun.IDB, experiment *model.Experiment, ...) (err error)
func AddProjectHyperparameters(ctx context.Context, idb bun.IDB, projectID int32, experimentIDs []int32) error
func AddRPWorkspaceBindings(ctx context.Context, workspaceIds []int32, poolName string, ...) error
func AddTask(ctx context.Context, t *model.Task) error
func AddTaskTx(ctx context.Context, idb bun.IDB, t *model.Task) error
func AddTrial(ctx context.Context, trial *model.Trial, taskID model.TaskID) error
func ApplyDoubleFieldFilter[T string | schema.Ident](q *bun.SelectQuery, column T, filter *commonv1.DoubleFieldFilter) (*bun.SelectQuery, error)
func ApplyInt32FieldFilter[T string | schema.Ident](q *bun.SelectQuery, column T, filter *commonv1.Int32FieldFilter) (*bun.SelectQuery, error)
func ApplyPolymorphicFilter(q *bun.SelectQuery, column string, filter *commonv1.PolymorphicFilter) (*bun.SelectQuery, error)
func ApplyTimestampFieldFilter[T string | schema.Ident](q *bun.SelectQuery, column T, filter *commonv1.TimestampFieldFilter) (*bun.SelectQuery, error)
func Bun() *bun.DB
func BunSelectMetricGroupNames() *bun.SelectQuery
func BunSelectMetricsQuery(mGroup model.MetricGroup, inclArchived bool) *bun.SelectQuery
func CheckIfRPUnbound(poolName string) error
func EndAgentStats(a *model.AgentStats) error
func ExperimentBestSearcherValidation(ctx context.Context, id int) (float32, error)
func ExperimentByExternalIDTx(ctx context.Context, idb bun.IDB, externalExperimentID string) (*model.Experiment, error)
func ExperimentByID(ctx context.Context, expID int) (*model.Experiment, error)
func ExperimentByTaskID(ctx context.Context, taskID model.TaskID) (*model.Experiment, error)
func ExperimentByTrialID(ctx context.Context, trialID int) (*model.Experiment, error)
func ExperimentsTrialAndTaskIDs(ctx context.Context, idb bun.IDB, expIDs []int) ([]int, []model.TaskID, error)
func GetCommandOwnerID(ctx context.Context, taskID model.TaskID) (model.UserID, error)
func GetDefaultPoolsForWorkspace(ctx context.Context, workspaceID int) (computePool, auxPool string, err error)
func GetMetrics(ctx context.Context, trialID, afterBatches, limit int, mGroup *string) ([]*trialv1.MetricsReport, error)
func GetModelIDsAssociatedWithCheckpoint(ctx context.Context, ckptUUID uuid.UUID) ([]int32, error)
func GetNonTerminalExperimentCount(ctx context.Context, experimentIDs []int32) (count int, err error)
func GetTokenKeys() *model.AuthTokenKeypair
func GetUnboundRPs(ctx context.Context, resourcePools []string) ([]string, error)
func HackAddUser(ctx context.Context, user *model.User) (model.UserID, error)
func InitAuthKeys() error
func MarkCheckpointsDeleted(ctx context.Context, deleteCheckpoints []uuid.UUID) error
func MatchSentinelError(err error) error
func MetricBatches(experimentID int, metricName string, startTime time.Time, ...) (batches []int32, endTime time.Time, err error)
func MustHaveAffectedRows(result sql.Result, err error) error
func NonExperimentTasksContextDirectory(ctx context.Context, tID model.TaskID) ([]byte, error)
func OrderByToSQL(order apiv1.OrderBy) string
func OverwriteRPWorkspaceBindings(ctx context.Context, workspaceIds []int32, poolName string, ...) error
func PaginateBun(query *bun.SelectQuery, orderColumn string, direction SortDirection, ...) *bun.SelectQuery
func PaginateBunUnsafe(query *bun.SelectQuery, orderColumn string, direction SortDirection, ...) *bun.SelectQuery
func ParseMapToProto(dest map[string]interface{}, val interface{}) error
func ReadRPsAvailableToWorkspace(ctx context.Context, workspaceID int32, offset int32, limit int32, ...) ([]string, *apiv1.Pagination, error)
func RecordTaskEndStatsBun(stats *model.TaskStats) error
func RecordTaskStatsBun(stats *model.TaskStats) error
func RegisterModel(m interface{})
func RemoveProjectHyperparameters(ctx context.Context, idb bun.IDB, experimentIDs []int32) error
func RemoveRPWorkspaceBindings(ctx context.Context, workspaceIds []int32, poolName string) error
func SetClause(fields []string) string
func SetTokenKeys(tk *model.AuthTokenKeypair)
func TaskByID(ctx context.Context, tID model.TaskID) (*model.Task, error)
func TopTrialsByMetric(ctx context.Context, experimentID int, maxTrials int, metric string, ...) ([]int32, error)
func TrialByExperimentAndRequestID(ctx context.Context, experimentID int, requestID model.RequestID) (*model.Trial, error)
func TrialByID(ctx context.Context, id int) (*model.Trial, error)
func TrialByTaskID(ctx context.Context, taskID model.TaskID) (*model.Trial, error)
func TrialTaskIDsByTrialID(ctx context.Context, trialID int) ([]*model.TrialTaskID, error)
func UpdateAllocationPorts(a model.Allocation) error
func UpdateCheckpointSizeTx(ctx context.Context, idb bun.IDB, checkpoints []uuid.UUID) error
func UpsertTrialByExternalIDTx(ctx context.Context, tx bun.Tx, trial *model.Trial, taskID model.TaskID) error
func ValidateDoubleFieldFilterComparison(filter *commonv1.DoubleFieldFilter) error
func ValidateInt32FieldFilterComparison(filter *commonv1.Int32FieldFilter) error
func ValidatePolymorphicFilter(filter *commonv1.PolymorphicFilter) error
func ValidateTimeStampFieldFilterComparison(filter *commonv1.TimestampFieldFilter) error
type DB
type ExperimentCheckpointGrouping
type FilterComparison
type MetricMeasurements
type MetricPartitionType
type PgDB
- func Connect(opts *config.DBConfig) (*PgDB, error)
- func ConnectPostgres(url string) (*PgDB, error)
- func Setup(opts *config.DBConfig) (*PgDB, error)
- func SingleDB() *PgDB
- func (db *PgDB) ActiveExperimentConfig(id int) (expconf.ExperimentConfig, error)
- func (db *PgDB) AddAllocation(a *model.Allocation) error
- func (db *PgDB) AddExperiment(experiment *model.Experiment, activeConfig expconf.ExperimentConfig) (err error)
- func (db *PgDB) AddJob(j *model.Job) error
- func (db *PgDB) AddTask(t *model.Task) error
- func (db *PgDB) AddTaskLogs(logs []*model.TaskLog) error
- func (db *PgDB) AddTrainingMetrics(ctx context.Context, m *trialv1.TrialMetrics) error
- func (db *PgDB) AddTrialMetrics(ctx context.Context, m *trialv1.TrialMetrics, mGroup model.MetricGroup) error
- func (db *PgDB) AddValidationMetrics(ctx context.Context, m *trialv1.TrialMetrics) error
- func (db *PgDB) AllocationByID(aID model.AllocationID) (*model.Allocation, error)
- func (db *PgDB) CheckExperimentExists(id int) (bool, error)
- func (db *PgDB) CheckTaskExists(id model.TaskID) (bool, error)
- func (db *PgDB) CheckTrialExists(id int) (bool, error)
- func (db *PgDB) CheckpointByTotalBatches(trialID, totalBatches int) (*model.Checkpoint, error)
- func (db *PgDB) CheckpointByUUID(id uuid.UUID) (*model.Checkpoint, error)
- func (db *PgDB) CheckpointByUUIDs(ckptUUIDs []uuid.UUID) ([]model.Checkpoint, error)
- func (db *PgDB) Close() error
- func (db *PgDB) CloseOpenAllocations(exclude []model.AllocationID) error
- func (db *PgDB) CompleteAllocation(a *model.Allocation) error
- func (db *PgDB) CompleteAllocationTelemetry(aID model.AllocationID) ([]byte, error)
- func (db *PgDB) CompleteTask(tID model.TaskID, endTime time.Time) error
- func (db *PgDB) DeleteAllocationSession(allocationID model.AllocationID) error
- func (db *PgDB) DeleteExperiments(ctx context.Context, ids []int) error
- func (db *PgDB) DeleteSnapshotsForExperiment(experimentID int) error
- func (db *PgDB) DeleteSnapshotsForExperiments(experimentIDs []int) func(ctx context.Context, tx *bun.Tx) error
- func (db *PgDB) DeleteSnapshotsForTerminalExperiments() error
- func (db *PgDB) DeleteTaskLogs(ids []model.TaskID) error
- func (db *PgDB) DeleteTrialLogs(ids []int) error
- func (db *PgDB) EndAllAgentStats() error
- func (db *PgDB) EndAllInstanceStats() error
- func (db *PgDB) EndAllTaskStats() error
- func (db *PgDB) EndInstanceStats(a *model.InstanceStats) error
- func (db *PgDB) ExperimentCheckpointsToGCRaw(id int, experimentBest, trialBest, trialLatest int) ([]uuid.UUID, error)
- func (db *PgDB) ExperimentConfigRaw(id int) ([]byte, error)
- func (db *PgDB) ExperimentHasCheckpointsInRegistry(id int) (bool, error)
- func (db *PgDB) ExperimentIDByTrialID(trialID int) (int, error)
- func (db *PgDB) ExperimentLabelUsage(projectID int32) (labelUsage map[string]int, err error)
- func (db *PgDB) ExperimentModelDefinitionRaw(id int) ([]byte, error)
- func (db *PgDB) ExperimentNumSteps(id int) (int64, error)
- func (db *PgDB) ExperimentNumTrials(id int) (int64, error)
- func (db *PgDB) ExperimentSnapshot(experimentID int) ([]byte, int, error)
- func (db *PgDB) ExperimentTotalStepTime(id int) (float64, error)
- func (db *PgDB) ExperimentTrialIDs(expID int) ([]int, error)
- func (db *PgDB) FailDeletingExperiment() error
- func (db *PgDB) GetExperimentStatus(experimentID int) (state model.State, progress float64, err error)
- func (db *PgDB) GetOrCreateClusterID(telemetryID string) (string, error)
- func (db *PgDB) GetRegisteredCheckpoints(checkpoints []uuid.UUID) (map[uuid.UUID]bool, error)
- func (db *PgDB) GetTrialProfilerMetricsBatches(labelsJSON []byte, offset, limit int) (model.TrialProfilerMetricsBatchBatch, error)
- func (db *PgDB) GroupCheckpointUUIDsByExperimentID(checkpoints []uuid.UUID) ([]*ExperimentCheckpointGrouping, error)
- func (db *PgDB) InsertTrialProfilerMetricsBatch(values []float32, batches []int32, timestamps []time.Time, labels []byte) error
- func (db *PgDB) JobByID(jID model.JobID) (*model.Job, error)
- func (db *PgDB) LatestCheckpointForTrial(trialID int) (*model.Checkpoint, error)
- func (db *PgDB) LegacyExperimentConfigByID(id int) (expconf.LegacyConfig, error)
- func (db *PgDB) MaxTerminationDelay() time.Duration
- func (db *PgDB) MetricNames(ctx context.Context, experimentIDs []int) (map[model.MetricGroup][]string, error)
- func (db *PgDB) Migrate(migrationURL string, actions []string) error
- func (db *PgDB) NonTerminalExperiments() ([]*model.Experiment, error)
- func (db *PgDB) PeriodicTelemetryInfo() ([]byte, error)
- func (db *PgDB) ProjectByName(workspaceName string, projectName string) (int, error)
- func (db *PgDB) ProjectExperiments(id int) (experiments []*model.Experiment, err error)
- func (db *PgDB) Query(queryName string, v interface{}, params ...interface{}) error
- func (db *PgDB) QueryF(queryName string, args []interface{}, v interface{}, params ...interface{}) error
- func (db *PgDB) QueryProto(queryName string, v interface{}, args ...interface{}) error
- func (db *PgDB) QueryProtof(queryName string, args []interface{}, v interface{}, params ...interface{}) error
- func (db *PgDB) RawQuery(queryName string, params ...interface{}) ([]byte, error)
- func (db *PgDB) RecordAgentStats(a *model.AgentStats) error
- func (db *PgDB) RecordInstanceStats(a *model.InstanceStats) error
- func (db *PgDB) RecordTaskEndStats(stats *model.TaskStats) error
- func (db *PgDB) RecordTaskStats(stats *model.TaskStats) error
- func (db *PgDB) SaveExperimentArchiveStatus(experiment *model.Experiment) error
- func (db *PgDB) SaveExperimentConfig(id int, config expconf.ExperimentConfig) error
- func (db *PgDB) SaveExperimentProgress(id int, progress *float64) error
- func (db *PgDB) SaveExperimentState(experiment *model.Experiment) error
- func (db *PgDB) SaveSnapshot(experimentID int, version int, experimentSnapshot []byte) error
- func (db *PgDB) StartAllocationSession(allocationID model.AllocationID, owner *model.User) (string, error)
- func (db *PgDB) TaskLogs(taskID model.TaskID, limit int, fs []api.Filter, order apiv1.OrderBy, ...) ([]*model.TaskLog, interface{}, error)
- func (db *PgDB) TaskLogsCount(taskID model.TaskID, fs []api.Filter) (int, error)
- func (db *PgDB) TaskLogsFields(taskID model.TaskID) (*apiv1.TaskLogsFieldsResponse, error)
- func (db *PgDB) TerminateExperimentInRestart(id int, state model.State) error
- func (db *PgDB) TopTrialsByTrainingLength(experimentID int, maxTrials int, metric string, smallerIsBetter bool) (trials []int32, err error)
- func (db *PgDB) TrainingMetricBatches(experimentID int, metricName string, startTime time.Time) (batches []int32, endTime time.Time, err error)
- func (db *PgDB) TrialExperimentAndRequestID(id int) (int, model.RequestID, error)
- func (db *PgDB) TrialLogs(trialID, limit int, fs []api.Filter, order apiv1.OrderBy, ...) ([]*model.TrialLog, interface{}, error)
- func (db *PgDB) TrialLogsCount(trialID int, fs []api.Filter) (int, error)
- func (db *PgDB) TrialLogsFields(trialID int) (*apiv1.TrialLogsFieldsResponse, error)
- func (db *PgDB) TrialRunIDAndRestarts(trialID int) (int, int, error)
- func (db *PgDB) TrialState(trialID int) (model.State, error)
- func (db *PgDB) TrialStatus(trialID int) (model.State, *time.Time, error)
- func (db *PgDB) TrialsSnapshot(experimentID int, minBatches int, maxBatches int, metricName string, ...) (trials []*apiv1.TrialsSnapshotResponse_Trial, endTime time.Time, err error)
- func (db *PgDB) TrySaveExperimentState(experiment *model.Experiment) error
- func (db *PgDB) UpdateAllocationProxyAddress(a model.Allocation) error
- func (db *PgDB) UpdateAllocationStartTime(a model.Allocation) error
- func (db *PgDB) UpdateAllocationState(a model.Allocation) error
- func (db *PgDB) UpdateClusterHeartBeat(currentClusterHeartbeat time.Time) error
- func (db *PgDB) UpdateJobPosition(jobID model.JobID, position decimal.Decimal) error
- func (db *PgDB) UpdateResourceAllocationAggregation() error
- func (db *PgDB) UpdateTrial(id int, newState model.State) error
- func (db *PgDB) UpdateTrialRestarts(id, restartCount int) error
- func (db *PgDB) UpdateTrialRunID(id, runID int) error
- func (db *PgDB) UpdateTrialRunnerMetadata(id int, md *trialv1.TrialRunnerMetadata) error
- func (db *PgDB) UpdateTrialRunnerState(id int, state string) error
- func (db *PgDB) ValidationByTotalBatches(trialID, totalBatches int) (*model.TrialMetrics, error)
- func (db *PgDB) ValidationMetricBatches(experimentID int, metricName string, startTime time.Time) (batches []int32, endTime time.Time, err error)
type RPWorkspaceBinding
- func GetAllBindings(ctx context.Context) ([]*RPWorkspaceBinding, error)
- func ReadWorkspacesBoundToRP(ctx context.Context, poolName string, offset, limit int32, ...) ([]*RPWorkspaceBinding, *apiv1.Pagination, error)
type SortDirection
type StaticQueryMap
- func (q *StaticQueryMap) GetOrLoad(queryName string) string
type TaskMetadata
- func IdentifyTask(ctx context.Context, taskID model.TaskID) (TaskMetadata, error)

Constants ¶

View Source

const (
	// CodeUniqueViolation is the error code that Postgres uses to indicate that an attempted
	// insert/update violates a uniqueness constraint.  Obtained from:
	// https://www.postgresql.org/docs/10/errcodes-appendix.html
	CodeUniqueViolation = "23505"
	// CodeForeignKeyViolation is the error code that Postgres uses to indicate that an attempted
	// insert/update violates a foreign key constraint.  Obtained from:
	// https://www.postgresql.org/docs/10/errcodes-appendix.html
	CodeForeignKeyViolation = "23503"
)

View Source

const (
	// InfPostgresString how we store infinity in JSONB in postgres.
	InfPostgresString = "Infinity"
	// NegInfPostgresString how we store -infinity in JSONB in postgres.
	NegInfPostgresString = "-Infinity"
	// NaNPostgresString how we store NaN in JSONB in postgres.
	NaNPostgresString = "NaN"

	// MetricTypeString is the summary metric type for string or mixed types.
	MetricTypeString = "string"
	// MetricTypeNumber is the summary metric type for floats or ints.
	MetricTypeNumber = "number"
	// MetricTypeBool is the summary metric type for boolean.
	MetricTypeBool = "boolean"
	// MetricTypeDate is the summary metric type for date metrics.
	MetricTypeDate = "date"
	// MetricTypeObject is the summary metric type for object types.
	MetricTypeObject = "object"
	// MetricTypeArray is the summary metric type for array types.
	MetricTypeArray = "array"
	// MetricTypeNull is the summary metric type for array types.
	MetricTypeNull = "null"
)

Variables ¶

View Source

var (
	// ErrNotFound is returned if nothing is found.
	ErrNotFound = errors.New("not found")

	// ErrTooManyRowsAffected is returned if too many rows are affected.
	ErrTooManyRowsAffected = errors.New("too many rows are affected")

	// ErrDuplicateRecord is returned when trying to create a row that already exists.
	ErrDuplicateRecord = errors.New("row already exists")

	// ErrInvalidInput is returned when the data passed to a function is invalid for semantic or
	// syntactic reasons.
	ErrInvalidInput = errors.New("invalid input")
)

Functions ¶

func ActiveLogPolicies ¶

func ActiveLogPolicies(
	ctx context.Context, id int,
) (expconf.LogPoliciesConfig, error)

ActiveLogPolicies returns log pattern policies for an experiment ID. This should only be called on a running experiment.

func AddAllocationExitStatus ¶

func AddAllocationExitStatus(ctx context.Context, a *model.Allocation) error

AddAllocationExitStatus adds the allocation exit status to the allocations table.

func AddCheckpointMetadata ¶

func AddCheckpointMetadata(ctx context.Context, m *model.CheckpointV2) error

AddCheckpointMetadata persists metadata for a completed checkpoint to the database.

func AddExperiment ¶

func AddExperiment(
	ctx context.Context, experiment *model.Experiment, activeConfig expconf.ExperimentConfig,
) (err error)

AddExperiment adds the experiment to the database and sets its ID.

func AddExperimentTx ¶

func AddExperimentTx(
	ctx context.Context, idb bun.IDB,
	experiment *model.Experiment, activeConfig expconf.ExperimentConfig,
	upsert bool,
) (err error)

AddExperimentTx adds the experiment to the database and sets its ID.

func AddProjectHyperparameters ¶

func AddProjectHyperparameters(
	ctx context.Context, idb bun.IDB, projectID int32, experimentIDs []int32,
) error

AddProjectHyperparameters takes a list of project ids, combine their hyper parameters with existing one.

func AddRPWorkspaceBindings ¶

func AddRPWorkspaceBindings(ctx context.Context, workspaceIds []int32, poolName string,
	resourcePools []config.ResourcePoolConfig,
) error

AddRPWorkspaceBindings inserts new bindings between workspaceIds and poolName.

func AddTask ¶

func AddTask(ctx context.Context, t *model.Task) error

AddTask UPSERT's the existence of a task.

func AddTaskTx ¶

func AddTaskTx(ctx context.Context, idb bun.IDB, t *model.Task) error

AddTaskTx UPSERT's the existence of a task in a tx.

func AddTrial ¶

func AddTrial(ctx context.Context, trial *model.Trial, taskID model.TaskID) error

AddTrial adds the trial to the database and sets its ID.

func ApplyDoubleFieldFilter ¶

func ApplyDoubleFieldFilter[T string | schema.Ident](
	q *bun.SelectQuery,
	column T,
	filter *commonv1.DoubleFieldFilter,
) (*bun.SelectQuery, error)

ApplyDoubleFieldFilter applies filtering on a bun query for double field.

func ApplyInt32FieldFilter ¶

func ApplyInt32FieldFilter[T string | schema.Ident](
	q *bun.SelectQuery,
	column T,
	filter *commonv1.Int32FieldFilter,
) (*bun.SelectQuery, error)

ApplyInt32FieldFilter applies filtering on a bun query for int32 field.

func ApplyPolymorphicFilter ¶

func ApplyPolymorphicFilter(
	q *bun.SelectQuery,
	column string,
	filter *commonv1.PolymorphicFilter,
) (*bun.SelectQuery, error)

ApplyPolymorphicFilter applies filtering on a bun query for a polymorphic filter.

func ApplyTimestampFieldFilter ¶

func ApplyTimestampFieldFilter[T string | schema.Ident](
	q *bun.SelectQuery,
	column T,
	filter *commonv1.TimestampFieldFilter,
) (*bun.SelectQuery, error)

ApplyTimestampFieldFilter applies filtering on a bun query for timestamp field.

func Bun ¶

func Bun() *bun.DB

Bun returns the singleton database connection through the bun library. bun is the database library we have decided to use for new code in the future due to its superior composability over bare SQL, and its superior flexibility over e.g. gorm. New code should not use the old bare SQL tooling.

func BunSelectMetricGroupNames ¶

func BunSelectMetricGroupNames() *bun.SelectQuery

BunSelectMetricGroupNames sets up a bun select query for getting all the metric group and names.

func BunSelectMetricsQuery ¶

func BunSelectMetricsQuery(mGroup model.MetricGroup, inclArchived bool) *bun.SelectQuery

BunSelectMetricsQuery sets up a bun select query for based on new metrics table simplifying some weirdness we set up for pg10 support.

func CheckIfRPUnbound ¶

func CheckIfRPUnbound(poolName string) error

CheckIfRPUnbound checks to make sure the specified resource pools is not bound to any workspace and returns an error if it is.

func EndAgentStats ¶

func EndAgentStats(a *model.AgentStats) error

EndAgentStats updates the end time of an instance.

func ExperimentBestSearcherValidation ¶

func ExperimentBestSearcherValidation(ctx context.Context, id int) (float32, error)

ExperimentBestSearcherValidation returns the best searcher validation for an experiment.

func ExperimentByExternalIDTx ¶

func ExperimentByExternalIDTx(ctx context.Context, idb bun.IDB, externalExperimentID string) (
	*model.Experiment, error,
)

ExperimentByExternalIDTx looks up an experiment by a given external experiment id.

func ExperimentByID ¶

func ExperimentByID(ctx context.Context, expID int) (*model.Experiment, error)

ExperimentByID looks up an experiment by ID in a database, returning an error if none exists.

func ExperimentByTaskID ¶

func ExperimentByTaskID(
	ctx context.Context, taskID model.TaskID,
) (*model.Experiment, error)

ExperimentByTaskID looks up an experiment by a given taskID, returning an error if none exists.

func ExperimentByTrialID ¶

func ExperimentByTrialID(ctx context.Context, trialID int) (*model.Experiment, error)

ExperimentByTrialID looks up an experiment by a given trialID, returning an error if none exists.

func ExperimentsTrialAndTaskIDs ¶

func ExperimentsTrialAndTaskIDs(ctx context.Context, idb bun.IDB, expIDs []int) ([]int,
	[]model.TaskID, error,
)

ExperimentsTrialAndTaskIDs returns the trial and task IDs for one or more experiments.

func GetCommandOwnerID ¶

func GetCommandOwnerID(ctx context.Context, taskID model.TaskID) (model.UserID, error)

GetCommandOwnerID gets a command's ownerID from a taskID. Uses persisted command state. Returns db.ErrNotFound if a command with given taskID does not exist.

func GetDefaultPoolsForWorkspace ¶

func GetDefaultPoolsForWorkspace(ctx context.Context, workspaceID int,
) (computePool, auxPool string, err error)

GetDefaultPoolsForWorkspace returns the default compute and aux pools for a workspace.

func GetMetrics ¶

func GetMetrics(ctx context.Context, trialID, afterBatches, limit int,
	mGroup *string,
) ([]*trialv1.MetricsReport, error)

GetMetrics returns a subset metrics of the requested type for the given trial ID.

func GetModelIDsAssociatedWithCheckpoint ¶

func GetModelIDsAssociatedWithCheckpoint(ctx context.Context, ckptUUID uuid.UUID) ([]int32, error)

GetModelIDsAssociatedWithCheckpoint returns the model ids associated with a checkpoint, returning nil if error.

func GetNonTerminalExperimentCount ¶

func GetNonTerminalExperimentCount(ctx context.Context,
	experimentIDs []int32,
) (count int, err error)

GetNonTerminalExperimentCount returns the number of non terminal experiments.

func GetTokenKeys ¶

func GetTokenKeys() *model.AuthTokenKeypair

GetTokenKeys returns tokenKeys.

func GetUnboundRPs ¶

func GetUnboundRPs(
	ctx context.Context, resourcePools []string,
) ([]string, error)

GetUnboundRPs get unbound resource pools.

func HackAddUser ¶

func HackAddUser(
	ctx context.Context,
	user *model.User,
) (model.UserID, error)

HackAddUser is used to prevent an import cycle in postgres_test_utils & postgres_scim (EE).

func InitAuthKeys ¶

func InitAuthKeys() error

InitAuthKeys initializes auth token keypairs.

func MarkCheckpointsDeleted ¶

func MarkCheckpointsDeleted(ctx context.Context, deleteCheckpoints []uuid.UUID) error

MarkCheckpointsDeleted updates the provided delete checkpoints to DELETED state.

func MatchSentinelError ¶

func MatchSentinelError(err error) error

MatchSentinelError checks if the error belongs to specific families of errors and ensures that the returned error has the proper type and text.

func MetricBatches ¶

func MetricBatches(
	experimentID int, metricName string, startTime time.Time, metricGroup model.MetricGroup,
) (
	batches []int32, endTime time.Time, err error,
)

MetricBatches returns the milestones (in batches processed) at which a specific metric was recorded.

func MustHaveAffectedRows ¶

func MustHaveAffectedRows(result sql.Result, err error) error

MustHaveAffectedRows checks if bun has affected rows in a table or not. Returns ErrNotFound if no rows were affected and returns the provided error otherwise.

func NonExperimentTasksContextDirectory ¶

func NonExperimentTasksContextDirectory(ctx context.Context, tID model.TaskID) ([]byte, error)

NonExperimentTasksContextDirectory returns a non experiment's context directory.

func OrderByToSQL ¶

func OrderByToSQL(order apiv1.OrderBy) string

OrderByToSQL computes the SQL keyword corresponding to the given ordering type.

func OverwriteRPWorkspaceBindings ¶

func OverwriteRPWorkspaceBindings(ctx context.Context,
	workspaceIds []int32, poolName string, resourcePools []config.ResourcePoolConfig,
) error

OverwriteRPWorkspaceBindings overwrites the bindings between workspaceIds and poolName.

func PaginateBun ¶

func PaginateBun(
	query *bun.SelectQuery,
	orderColumn string,
	direction SortDirection,
	offset,
	limit int,
) *bun.SelectQuery

PaginateBun adds sorting and pagination to the provided bun query, defaulting to certain values if they are not specified. By default, we order by ascending on the id column, with no limit.

func PaginateBunUnsafe ¶

func PaginateBunUnsafe(
	query *bun.SelectQuery,
	orderColumn string,
	direction SortDirection,
	offset,
	limit int,
) *bun.SelectQuery

PaginateBunUnsafe is a version of PaginateBun that allows an arbitrary order expression like `metrics->>'loss'`.

func ParseMapToProto ¶

func ParseMapToProto(dest map[string]interface{}, val interface{}) error

ParseMapToProto converts sqlx/bun-scanned map to a proto object.

func ReadRPsAvailableToWorkspace ¶

func ReadRPsAvailableToWorkspace(
	ctx context.Context,
	workspaceID int32,
	offset int32,
	limit int32,
	resourcePoolConfig []config.ResourcePoolConfig,
) ([]string, *apiv1.Pagination, error)

ReadRPsAvailableToWorkspace returns the names of resource pools available to a workspace.

func RecordTaskEndStatsBun ¶

func RecordTaskEndStatsBun(stats *model.TaskStats) error

RecordTaskEndStatsBun record end stats for tasks with bun.

func RecordTaskStatsBun ¶

func RecordTaskStatsBun(stats *model.TaskStats) error

RecordTaskStatsBun record stats for tasks with bun.

func RegisterModel ¶

func RegisterModel(m interface{})

RegisterModel registers a model in Bun or, if theOneBun is not yet initialized, sets it up to be registered once initialized. It's generally best to pass a nil pointer of your model's type as argument m.

func RemoveProjectHyperparameters ¶

func RemoveProjectHyperparameters(ctx context.Context, idb bun.IDB, experimentIDs []int32) error

RemoveProjectHyperparameters take a list of experiment ids, recalculate their respective project hyper parameters.

func RemoveRPWorkspaceBindings ¶

func RemoveRPWorkspaceBindings(ctx context.Context,
	workspaceIds []int32, poolName string,
) error

RemoveRPWorkspaceBindings removes the bindings between workspaceIds and poolName.

func SetClause ¶

func SetClause(fields []string) string

SetClause returns a SET subquery.

func SetTokenKeys ¶

func SetTokenKeys(tk *model.AuthTokenKeypair)

SetTokenKeys sets tokenKeys.

func TaskByID ¶

func TaskByID(ctx context.Context, tID model.TaskID) (*model.Task, error)

TaskByID returns a task by its ID.

func TopTrialsByMetric ¶

func TopTrialsByMetric(
	ctx context.Context, experimentID int, maxTrials int, metric string, smallerIsBetter bool,
) ([]int32, error)

TopTrialsByMetric chooses the subset of trials from an experiment that recorded the best values for the specified metric at any point during the trial.

func TrialByExperimentAndRequestID ¶

func TrialByExperimentAndRequestID(
	ctx context.Context, experimentID int, requestID model.RequestID,
) (*model.Trial, error)

TrialByExperimentAndRequestID looks up a trial, returning an error if none exists.

func TrialByID ¶

func TrialByID(ctx context.Context, id int) (*model.Trial, error)

TrialByID looks up a trial by ID, returning an error if none exists.

func TrialByTaskID ¶

func TrialByTaskID(ctx context.Context, taskID model.TaskID) (*model.Trial, error)

TrialByTaskID looks up a trial by taskID, returning an error if none exists. This errors if you called it with a non trial taskID.

func TrialTaskIDsByTrialID ¶

func TrialTaskIDsByTrialID(ctx context.Context, trialID int) ([]*model.TrialTaskID, error)

TrialTaskIDsByTrialID returns trial id task ids by trial ID, sorted by task run ID.

func UpdateAllocationPorts ¶

func UpdateAllocationPorts(a model.Allocation) error

UpdateAllocationPorts stores the latest task state and readiness.

func UpdateCheckpointSizeTx ¶

func UpdateCheckpointSizeTx(ctx context.Context, idb bun.IDB, checkpoints []uuid.UUID) error

UpdateCheckpointSizeTx updates checkpoint size and count to experiment and trial.

func UpsertTrialByExternalIDTx ¶

func UpsertTrialByExternalIDTx(
	ctx context.Context, tx bun.Tx, trial *model.Trial, taskID model.TaskID,
) error

UpsertTrialByExternalIDTx UPSERTs the trial with respect to the external_trial_id.

func ValidateDoubleFieldFilterComparison ¶

func ValidateDoubleFieldFilterComparison(
	filter *commonv1.DoubleFieldFilter,
) error

ValidateDoubleFieldFilterComparison validates the min and max values in the range.

func ValidateInt32FieldFilterComparison ¶

func ValidateInt32FieldFilterComparison(
	filter *commonv1.Int32FieldFilter,
) error

ValidateInt32FieldFilterComparison validates the min and max values in the range.

func ValidatePolymorphicFilter ¶

func ValidatePolymorphicFilter(
	filter *commonv1.PolymorphicFilter,
) error

ValidatePolymorphicFilter ensures that a Polymorphic filter contains at most one valid range.

func ValidateTimeStampFieldFilterComparison ¶

func ValidateTimeStampFieldFilterComparison(
	filter *commonv1.TimestampFieldFilter,
) error

ValidateTimeStampFieldFilterComparison validates the min and max timestamps in the range.

Types ¶

type DB ¶

type DB interface {
	Migrate(migrationURL string, actions []string) error
	Close() error
	GetOrCreateClusterID(telemetryID string) (string, error)
	CheckExperimentExists(id int) (bool, error)
	CheckTrialExists(id int) (bool, error)
	TrialExperimentAndRequestID(id int) (int, model.RequestID, error)
	AddExperiment(experiment *model.Experiment, activeConfig expconf.ExperimentConfig) error
	ExperimentIDByTrialID(trialID int) (int, error)
	NonTerminalExperiments() ([]*model.Experiment, error)
	TerminateExperimentInRestart(id int, state model.State) error
	SaveExperimentConfig(id int, config expconf.ExperimentConfig) error
	SaveExperimentState(experiment *model.Experiment) error
	SaveExperimentArchiveStatus(experiment *model.Experiment) error
	DeleteExperiments(ctx context.Context, ids []int) error
	ExperimentHasCheckpointsInRegistry(id int) (bool, error)
	SaveExperimentProgress(id int, progress *float64) error
	ActiveExperimentConfig(id int) (expconf.ExperimentConfig, error)
	ExperimentTotalStepTime(id int) (float64, error)
	ExperimentNumTrials(id int) (int64, error)
	ExperimentTrialIDs(expID int) ([]int, error)
	ExperimentNumSteps(id int) (int64, error)
	ExperimentModelDefinitionRaw(id int) ([]byte, error)
	ExperimentCheckpointsToGCRaw(
		id int,
		experimentBest, trialBest, trialLatest int,
	) ([]uuid.UUID, error)
	AddTask(t *model.Task) error
	UpdateTrial(id int, newState model.State) error
	UpdateTrialRunnerState(id int, state string) error
	UpdateTrialRunnerMetadata(id int, md *trialv1.TrialRunnerMetadata) error
	AddAllocation(a *model.Allocation) error
	CompleteAllocation(a *model.Allocation) error
	CompleteAllocationTelemetry(aID model.AllocationID) ([]byte, error)
	TrialRunIDAndRestarts(trialID int) (int, int, error)
	UpdateTrialRunID(id, runID int) error
	UpdateTrialRestarts(id, restarts int) error
	AddTrainingMetrics(ctx context.Context, m *trialv1.TrialMetrics) error
	AddValidationMetrics(
		ctx context.Context, m *trialv1.TrialMetrics,
	) error
	ValidationByTotalBatches(trialID, totalBatches int) (*model.TrialMetrics, error)
	CheckpointByTotalBatches(trialID, totalBatches int) (*model.Checkpoint, error)
	CheckpointByUUID(id uuid.UUID) (*model.Checkpoint, error)
	LatestCheckpointForTrial(trialID int) (*model.Checkpoint, error)
	PeriodicTelemetryInfo() ([]byte, error)
	TrialState(trialID int) (model.State, error)
	TrialStatus(trialID int) (model.State, *time.Time, error)
	Query(queryName string, v interface{}, params ...interface{}) error
	QueryF(
		queryName string, args []interface{}, v interface{}, params ...interface{}) error
	RawQuery(queryName string, params ...interface{}) ([]byte, error)
	UpdateResourceAllocationAggregation() error
	InsertTrialProfilerMetricsBatch(
		values []float32, batches []int32, timestamps []time.Time, labels []byte,
	) error
	GetTrialProfilerMetricsBatches(
		labelsJSON []byte, offset, limit int,
	) (model.TrialProfilerMetricsBatchBatch, error)
	ProjectByName(workspaceName string, projectName string) (projectID int, err error)
	ProjectExperiments(id int) (experiments []*model.Experiment, err error)
	ExperimentLabelUsage(projectID int32) (labelUsage map[string]int, err error)
	GetExperimentStatus(experimentID int) (state model.State, progress float64,
		err error)
	TrainingMetricBatches(experimentID int, metricName string, startTime time.Time) (
		batches []int32, endTime time.Time, err error)
	ValidationMetricBatches(experimentID int, metricName string, startTime time.Time) (
		batches []int32, endTime time.Time, err error)
	TrialsSnapshot(experimentID int, minBatches int, maxBatches int,
		metricName string, startTime time.Time, metricGroup model.MetricGroup) (
		trials []*apiv1.TrialsSnapshotResponse_Trial, endTime time.Time, err error)
	TopTrialsByTrainingLength(experimentID int, maxTrials int, metric string,
		smallerIsBetter bool) (trials []int32, err error)
	StartAllocationSession(allocationID model.AllocationID, owner *model.User) (string, error)
	DeleteAllocationSession(allocationID model.AllocationID) error
	UpdateAllocationState(allocation model.Allocation) error
	UpdateAllocationStartTime(allocation model.Allocation) error
	UpdateAllocationProxyAddress(allocation model.Allocation) error
	ExperimentSnapshot(experimentID int) ([]byte, int, error)
	SaveSnapshot(
		experimentID int, version int, experimentSnapshot []byte,
	) error
	DeleteSnapshotsForExperiment(experimentID int) error
	DeleteSnapshotsForExperiments(experimentIDs []int) func(ctx context.Context,
		tx *bun.Tx) error
	DeleteSnapshotsForTerminalExperiments() error
	QueryProto(queryName string, v interface{}, args ...interface{}) error
	QueryProtof(
		queryName string, args []interface{}, v interface{}, params ...interface{}) error
	TrialLogs(
		trialID, limit int, fs []api.Filter, order apiv1.OrderBy, followState interface{},
	) ([]*model.TrialLog, interface{}, error)
	DeleteTrialLogs(ids []int) error
	TrialLogsCount(trialID int, fs []api.Filter) (int, error)
	TrialLogsFields(trialID int) (*apiv1.TrialLogsFieldsResponse, error)
	RecordAgentStats(a *model.AgentStats) error
	EndAllAgentStats() error
	RecordInstanceStats(a *model.InstanceStats) error
	EndInstanceStats(a *model.InstanceStats) error
	EndAllInstanceStats() error
	EndAllTaskStats() error
	RecordTaskEndStats(stats *model.TaskStats) error
	RecordTaskStats(stats *model.TaskStats) error
	UpdateJobPosition(jobID model.JobID, position decimal.Decimal) error
}

DB is an interface for _all_ the functionality packed into the DB.

type ExperimentCheckpointGrouping ¶

type ExperimentCheckpointGrouping struct {
	ExperimentID       int
	CheckpointUUIDSStr string
}

ExperimentCheckpointGrouping represents a mapping of checkpoint uuids to experiment id.

type FilterComparison ¶

type FilterComparison[T any] struct {
	Gt  *T
	Gte *T
	Lt  *T
	Lte *T
}

FilterComparison makes you wish for properties in generic structs/interfaces.

type MetricMeasurements ¶

type MetricMeasurements struct {
	Values  map[string]interface{}
	Batches uint
	Time    time.Time
	Epoch   *float64 `json:"epoch,omitempty"`
	TrialID int32
}

MetricMeasurements represents a metric measured by all possible independent variables.

type MetricPartitionType ¶

type MetricPartitionType string

MetricPartitionType denotes what type the metric is. This is planned to be deprecated once we upgrade to pg11 and can use DEFAULT partitioning.

const (
	// TrainingMetric designates metrics from training steps.
	TrainingMetric MetricPartitionType = "TRAINING"
	// ValidationMetric designates metrics from validation steps.
	ValidationMetric MetricPartitionType = "VALIDATION"
	// GenericMetric designates metrics from other sources.
	GenericMetric MetricPartitionType = "GENERIC"
)

type PgDB ¶

type PgDB struct {
	// contains filtered or unexported fields
}

PgDB represents a Postgres database connection. The type definition is needed to define methods.

func Connect ¶

func Connect(opts *config.DBConfig) (*PgDB, error)

Connect connects to the database, but doesn't run migrations & inits.

func ConnectPostgres ¶

func ConnectPostgres(url string) (*PgDB, error)

ConnectPostgres connects to a Postgres database.

func Setup ¶

func Setup(opts *config.DBConfig) (*PgDB, error)

Setup connects to the database and run any necessary migrations.

func SingleDB ¶

func SingleDB() *PgDB

SingleDB returns a singleton database client. Bun() should be preferred over this for all new queries.

func (*PgDB) ActiveExperimentConfig ¶

func (db *PgDB) ActiveExperimentConfig(id int) (expconf.ExperimentConfig, error)

ActiveExperimentConfig returns the full config object for an experiment.

func (*PgDB) AddAllocation ¶

func (db *PgDB) AddAllocation(a *model.Allocation) error

AddAllocation upserts the existence of an allocation. Allocation IDs may conflict in the event the master restarts and the trial run ID increment is not persisted, but it is the same allocation so this is OK.

func (*PgDB) AddExperiment ¶

func (db *PgDB) AddExperiment(
	experiment *model.Experiment, activeConfig expconf.ExperimentConfig,
) (err error)

AddExperiment adds the experiment to the database and sets its ID.

TODO(ilia): deprecate and use module function instead.

func (*PgDB) AddJob ¶

func (db *PgDB) AddJob(j *model.Job) error

AddJob persists the existence of a job.

func (*PgDB) AddTask ¶

func (db *PgDB) AddTask(t *model.Task) error

AddTask UPSERT's the existence of a task.

TODO(ilia): deprecate and use module function instead.

func (*PgDB) AddTaskLogs ¶

func (db *PgDB) AddTaskLogs(logs []*model.TaskLog) error

AddTaskLogs adds a list of *model.TaskLog objects to the database with automatic IDs.

func (*PgDB) AddTrainingMetrics ¶

func (db *PgDB) AddTrainingMetrics(ctx context.Context, m *trialv1.TrialMetrics) error

AddTrainingMetrics [DEPRECATED] adds a completed step to the database with the given training metrics. If these training metrics occur before any others, a rollback is assumed and later training and validation metrics are cleaned up.

func (*PgDB) AddTrialMetrics ¶

func (db *PgDB) AddTrialMetrics(
	ctx context.Context, m *trialv1.TrialMetrics, mGroup model.MetricGroup,
) error

AddTrialMetrics persists the given trial metrics to the database.

func (*PgDB) AddValidationMetrics ¶

func (db *PgDB) AddValidationMetrics(
	ctx context.Context, m *trialv1.TrialMetrics,
) error

AddValidationMetrics [DEPRECATED] adds a completed validation to the database with the given validation metrics. If these validation metrics occur before any others, a rollback is assumed and later metrics are cleaned up from the database.

func (*PgDB) AllocationByID ¶

func (db *PgDB) AllocationByID(aID model.AllocationID) (*model.Allocation, error)

AllocationByID retrieves an allocation by its ID.

func (*PgDB) CheckExperimentExists ¶

func (db *PgDB) CheckExperimentExists(id int) (bool, error)

CheckExperimentExists checks if the experiment exists.

func (*PgDB) CheckTaskExists ¶

func (db *PgDB) CheckTaskExists(id model.TaskID) (bool, error)

CheckTaskExists checks if the task exists.

func (*PgDB) CheckTrialExists ¶

func (db *PgDB) CheckTrialExists(id int) (bool, error)

CheckTrialExists checks if the trial exists.

func (*PgDB) CheckpointByTotalBatches ¶

func (db *PgDB) CheckpointByTotalBatches(trialID, totalBatches int) (*model.Checkpoint, error)

CheckpointByTotalBatches looks up a checkpoint by trial and total batch, returning nil if none exists.

func (*PgDB) CheckpointByUUID ¶

func (db *PgDB) CheckpointByUUID(id uuid.UUID) (*model.Checkpoint, error)

CheckpointByUUID looks up a checkpoint by UUID, returning nil if none exists.

func (*PgDB) CheckpointByUUIDs ¶

func (db *PgDB) CheckpointByUUIDs(ckptUUIDs []uuid.UUID) ([]model.Checkpoint, error)

CheckpointByUUIDs looks up a checkpoint by list of UUIDS, returning nil if error.

func (*PgDB) Close ¶

func (db *PgDB) Close() error

Close closes the underlying pq connection.

func (*PgDB) CloseOpenAllocations ¶

func (db *PgDB) CloseOpenAllocations(exclude []model.AllocationID) error

CloseOpenAllocations finds all allocations that were open when the master crashed and adds an end time.

func (*PgDB) CompleteAllocation ¶

func (db *PgDB) CompleteAllocation(a *model.Allocation) error

CompleteAllocation persists the end of an allocation lifetime.

func (*PgDB) CompleteAllocationTelemetry ¶

func (db *PgDB) CompleteAllocationTelemetry(aID model.AllocationID) ([]byte, error)

CompleteAllocationTelemetry returns the analytics of an allocation for the telemetry.

func (*PgDB) CompleteTask ¶

func (db *PgDB) CompleteTask(tID model.TaskID, endTime time.Time) error

CompleteTask persists the completion of a task.

func (*PgDB) DeleteAllocationSession ¶

func (db *PgDB) DeleteAllocationSession(allocationID model.AllocationID) error

DeleteAllocationSession deletes the task session with the given AllocationID.

func (*PgDB) DeleteExperiments ¶

func (db *PgDB) DeleteExperiments(ctx context.Context, ids []int) error

DeleteExperiments deletes zero or more experiments.

func (*PgDB) DeleteSnapshotsForExperiment ¶

func (db *PgDB) DeleteSnapshotsForExperiment(experimentID int) error

DeleteSnapshotsForExperiment deletes all snapshots for one given experiment.

func (*PgDB) DeleteSnapshotsForExperiments ¶

func (db *PgDB) DeleteSnapshotsForExperiments(experimentIDs []int) func(ctx context.Context,
	tx *bun.Tx) error

DeleteSnapshotsForExperiments deletes all snapshots for multiple given experiments.

func (*PgDB) DeleteSnapshotsForTerminalExperiments ¶

func (db *PgDB) DeleteSnapshotsForTerminalExperiments() error

DeleteSnapshotsForTerminalExperiments deletes all snapshots for terminal state experiments from the database.

func (*PgDB) DeleteTaskLogs ¶

func (db *PgDB) DeleteTaskLogs(ids []model.TaskID) error

DeleteTaskLogs deletes the logs for the given tasks.

func (*PgDB) DeleteTrialLogs ¶

func (db *PgDB) DeleteTrialLogs(ids []int) error

DeleteTrialLogs deletes the logs for the given trial IDs.

func (*PgDB) EndAllAgentStats ¶

func (db *PgDB) EndAllAgentStats() error

EndAllAgentStats called at master starts, in case master previously crushed If master stops, statistics would treat “live” agents as live until master restarts.

func (*PgDB) EndAllInstanceStats ¶

func (db *PgDB) EndAllInstanceStats() error

EndAllInstanceStats called at master starts, in case master previously crushed If master stops, statistics would treat “live” instances as live until master restarts.

func (*PgDB) EndAllTaskStats ¶

func (db *PgDB) EndAllTaskStats() error

EndAllTaskStats called at master starts, in case master previously crashed.

func (*PgDB) EndInstanceStats ¶

func (db *PgDB) EndInstanceStats(a *model.InstanceStats) error

EndInstanceStats updates the end time of an instance.

func (*PgDB) ExperimentCheckpointsToGCRaw ¶

func (db *PgDB) ExperimentCheckpointsToGCRaw(
	id int,
	experimentBest, trialBest, trialLatest int,
) ([]uuid.UUID, error)

ExperimentCheckpointsToGCRaw returns a comma-separated string describing checkpoints that should be GCed according to the given GC policy parameters. If the delete parameter is true, the returned checkpoints are also marked as deleted in the database.

func (*PgDB) ExperimentConfigRaw ¶

func (db *PgDB) ExperimentConfigRaw(id int) ([]byte, error)

ExperimentConfigRaw returns the full config object for an experiment as a JSON string.

func (*PgDB) ExperimentHasCheckpointsInRegistry ¶

func (db *PgDB) ExperimentHasCheckpointsInRegistry(id int) (bool, error)

ExperimentHasCheckpointsInRegistry checks if the experiment has any checkpoints in the registry.

func (*PgDB) ExperimentIDByTrialID ¶

func (db *PgDB) ExperimentIDByTrialID(trialID int) (int, error)

ExperimentIDByTrialID looks up an experiment ID by a trial ID.

func (*PgDB) ExperimentLabelUsage ¶

func (db *PgDB) ExperimentLabelUsage(projectID int32) (labelUsage map[string]int, err error)

ExperimentLabelUsage returns a flattened and deduplicated list of all the labels in use across all experiments.

func (*PgDB) ExperimentModelDefinitionRaw ¶

func (db *PgDB) ExperimentModelDefinitionRaw(id int) ([]byte, error)

ExperimentModelDefinitionRaw returns the zipped model definition for an experiment as a byte array.

func (*PgDB) ExperimentNumSteps ¶

func (db *PgDB) ExperimentNumSteps(id int) (int64, error)

ExperimentNumSteps returns the total number of steps for all trials of the experiment.

func (*PgDB) ExperimentNumTrials ¶

func (db *PgDB) ExperimentNumTrials(id int) (int64, error)

ExperimentNumTrials returns the total number of trials for the experiment.

func (*PgDB) ExperimentSnapshot ¶

func (db *PgDB) ExperimentSnapshot(experimentID int) ([]byte, int, error)

ExperimentSnapshot returns the snapshot for the specified experiment.

func (*PgDB) ExperimentTotalStepTime ¶

func (db *PgDB) ExperimentTotalStepTime(id int) (float64, error)

ExperimentTotalStepTime returns the total elapsed time for all allocations of the experiment with the given ID. Any step with a NULL end_time does not contribute. Elapsed time is expressed as a floating point number of seconds.

func (*PgDB) ExperimentTrialIDs ¶

func (db *PgDB) ExperimentTrialIDs(expID int) ([]int, error)

ExperimentTrialIDs returns the trial IDs for the experiment.

func (*PgDB) FailDeletingExperiment ¶

func (db *PgDB) FailDeletingExperiment() error

FailDeletingExperiment finds all experiments that were deleting when the master crashed and moves them to DELETE_FAILED.

func (*PgDB) GetExperimentStatus ¶

func (db *PgDB) GetExperimentStatus(experimentID int) (state model.State, progress float64,
	err error,
)

GetExperimentStatus returns the current state of the experiment.

func (*PgDB) GetOrCreateClusterID ¶

func (db *PgDB) GetOrCreateClusterID(telemetryID string) (string, error)

GetOrCreateClusterID queries the master uuid in the database, adding one if it doesn't exist. If a nonempty telemetryID is provided, it will be the one added, otherwise a uuid is generated.

func (*PgDB) GetRegisteredCheckpoints ¶

func (db *PgDB) GetRegisteredCheckpoints(checkpoints []uuid.UUID) (map[uuid.UUID]bool, error)

GetRegisteredCheckpoints gets the checkpoints in the model registrys from the list of checkpoints provided.

func (*PgDB) GetTrialProfilerMetricsBatches ¶

func (db *PgDB) GetTrialProfilerMetricsBatches(
	labelsJSON []byte, offset, limit int,
) (model.TrialProfilerMetricsBatchBatch, error)

GetTrialProfilerMetricsBatches gets a batch of profiler metric batches from the database.

func (*PgDB) GroupCheckpointUUIDsByExperimentID ¶

func (db *PgDB) GroupCheckpointUUIDsByExperimentID(checkpoints []uuid.UUID) (
	[]*ExperimentCheckpointGrouping, error,
)

GroupCheckpointUUIDsByExperimentID creates the mapping of checkpoint uuids to experiment id. The checkpount uuids grouped together are comma separated.

func (*PgDB) InsertTrialProfilerMetricsBatch ¶

func (db *PgDB) InsertTrialProfilerMetricsBatch(
	values []float32, batches []int32, timestamps []time.Time, labels []byte,
) error

InsertTrialProfilerMetricsBatch inserts a batch of metrics into the database.

func (*PgDB) JobByID ¶

func (db *PgDB) JobByID(jID model.JobID) (*model.Job, error)

JobByID retrieves a job by ID.

func (*PgDB) LatestCheckpointForTrial ¶

func (db *PgDB) LatestCheckpointForTrial(trialID int) (*model.Checkpoint, error)

LatestCheckpointForTrial finds the latest completed checkpoint for a trial, returning nil if none exists.

func (*PgDB) LegacyExperimentConfigByID ¶

func (db *PgDB) LegacyExperimentConfigByID(
	id int,
) (expconf.LegacyConfig, error)

LegacyExperimentConfigByID parses very old configs, returning a LegacyConfig which exposes a select subset of fields in a type-safe way.

func (*PgDB) MaxTerminationDelay ¶

func (db *PgDB) MaxTerminationDelay() time.Duration

MaxTerminationDelay is the max delay before a consumer can be sure all logs have been recevied. For Postgres, we don't need to wait very long at all; this was a hypothetical cap on fluent to DB latency prior to fluent's deprecation.

func (*PgDB) MetricNames ¶

func (db *PgDB) MetricNames(ctx context.Context, experimentIDs []int) (
	map[model.MetricGroup][]string, error,
)

MetricNames returns a list of metric names for the given experiment IDs.

func (*PgDB) Migrate ¶

func (db *PgDB) Migrate(migrationURL string, actions []string) error

Migrate runs the migrations from the specified directory URL.

func (*PgDB) NonTerminalExperiments ¶

func (db *PgDB) NonTerminalExperiments() ([]*model.Experiment, error)

NonTerminalExperiments finds all experiments in the database whose states are not terminal.

func (*PgDB) PeriodicTelemetryInfo ¶

func (db *PgDB) PeriodicTelemetryInfo() ([]byte, error)

PeriodicTelemetryInfo returns anonymous information about the usage of the current Determined cluster.

func (*PgDB) ProjectByName ¶

func (db *PgDB) ProjectByName(workspaceName string, projectName string) (int, error)

ProjectByName returns a project's ID if it exists in the given workspace.

func (*PgDB) ProjectExperiments ¶

func (db *PgDB) ProjectExperiments(id int) (experiments []*model.Experiment, err error)

ProjectExperiments returns a list of experiments within a project.

func (*PgDB) Query ¶

func (db *PgDB) Query(queryName string, v interface{}, params ...interface{}) error

Query returns the result of the query. Any placeholder parameters are replaced with supplied params.

func (*PgDB) QueryF ¶

func (db *PgDB) QueryF(
	queryName string, args []interface{}, v interface{}, params ...interface{},
) error

QueryF returns the result of the formatted query. Any placeholder parameters are replaced with supplied params.

func (*PgDB) QueryProto ¶

func (db *PgDB) QueryProto(queryName string, v interface{}, args ...interface{}) error

QueryProto returns the result of the query. Any placeholder parameters are replaced with supplied args. Enum values must be the full name of the enum.

func (*PgDB) QueryProtof ¶

func (db *PgDB) QueryProtof(
	queryName string, args []interface{}, v interface{}, params ...interface{},
) error

QueryProtof returns the result of the formated query. Any placeholder parameters are replaced with supplied params.

func (*PgDB) RawQuery ¶

func (db *PgDB) RawQuery(queryName string, params ...interface{}) ([]byte, error)

RawQuery returns the result of the query as a raw byte string. Any placeholder parameters are replaced with supplied params.

func (*PgDB) RecordAgentStats ¶

func (db *PgDB) RecordAgentStats(a *model.AgentStats) error

RecordAgentStats insert a record of instance start time if instance has not been started or already ended.

func (*PgDB) RecordInstanceStats ¶

func (db *PgDB) RecordInstanceStats(a *model.InstanceStats) error

RecordInstanceStats insert a record of instance start time if instance has not been started or already ended.

func (*PgDB) RecordTaskEndStats ¶

func (db *PgDB) RecordTaskEndStats(stats *model.TaskStats) error

RecordTaskEndStats record end stats for tasks.

func (*PgDB) RecordTaskStats ¶

func (db *PgDB) RecordTaskStats(stats *model.TaskStats) error

RecordTaskStats record stats for tasks.

func (*PgDB) SaveExperimentArchiveStatus ¶

func (db *PgDB) SaveExperimentArchiveStatus(experiment *model.Experiment) error

SaveExperimentArchiveStatus saves the current experiment archive status to the database.

func (*PgDB) SaveExperimentConfig ¶

func (db *PgDB) SaveExperimentConfig(id int, config expconf.ExperimentConfig) error

SaveExperimentConfig saves the current experiment config to the database.

func (*PgDB) SaveExperimentProgress ¶

func (db *PgDB) SaveExperimentProgress(id int, progress *float64) error

SaveExperimentProgress stores the progress for an experiment in the database.

func (*PgDB) SaveExperimentState ¶

func (db *PgDB) SaveExperimentState(experiment *model.Experiment) error

SaveExperimentState saves the current experiment state to the database.

func (*PgDB) SaveSnapshot ¶

func (db *PgDB) SaveSnapshot(
	experimentID int, version int, experimentSnapshot []byte,
) error

SaveSnapshot saves a searcher and trial snapshot together.

func (*PgDB) StartAllocationSession ¶

func (db *PgDB) StartAllocationSession(
	allocationID model.AllocationID, owner *model.User,
) (string, error)

StartAllocationSession creates a row in the allocation_sessions table.

func (*PgDB) TaskLogs ¶

func (db *PgDB) TaskLogs(
	taskID model.TaskID, limit int, fs []api.Filter, order apiv1.OrderBy, followState interface{},
) ([]*model.TaskLog, interface{}, error)

TaskLogs takes a task ID and log offset, limit and filters and returns matching logs.

func (*PgDB) TaskLogsCount ¶

func (db *PgDB) TaskLogsCount(taskID model.TaskID, fs []api.Filter) (int, error)

TaskLogsCount returns the number of logs in postgres for the given task.

func (*PgDB) TaskLogsFields ¶

func (db *PgDB) TaskLogsFields(taskID model.TaskID) (*apiv1.TaskLogsFieldsResponse, error)

TaskLogsFields returns the unique fields that can be filtered on for the given task.

func (*PgDB) TerminateExperimentInRestart ¶

func (db *PgDB) TerminateExperimentInRestart(id int, state model.State) error

TerminateExperimentInRestart is used during master restart to properly terminate an experiment which was either in the process of stopping or which is not restorable for some reason, such as an invalid experiment config after a version upgrade.

func (*PgDB) TopTrialsByTrainingLength ¶

func (db *PgDB) TopTrialsByTrainingLength(experimentID int, maxTrials int, metric string,
	smallerIsBetter bool,
) (trials []int32, err error)

TopTrialsByTrainingLength chooses the subset of trials that has been training for the highest number of batches, using the specified metric as a tie breaker.

func (*PgDB) TrainingMetricBatches ¶

func (db *PgDB) TrainingMetricBatches(experimentID int, metricName string, startTime time.Time) (
	batches []int32, endTime time.Time, err error,
)

TrainingMetricBatches returns the milestones (in batches processed) at which a specific training metric was recorded.

func (*PgDB) TrialExperimentAndRequestID ¶

func (db *PgDB) TrialExperimentAndRequestID(id int) (int, model.RequestID, error)

TrialExperimentAndRequestID returns the trial's experiment and request ID.

func (*PgDB) TrialLogs ¶

func (db *PgDB) TrialLogs(
	trialID, limit int, fs []api.Filter, order apiv1.OrderBy, followState interface{},
) ([]*model.TrialLog, interface{}, error)

TrialLogs takes a trial ID and log offset, limit and filters and returns matching trial logs.

func (*PgDB) TrialLogsCount ¶

func (db *PgDB) TrialLogsCount(trialID int, fs []api.Filter) (int, error)

TrialLogsCount returns the number of logs in postgres for the given trial.

func (*PgDB) TrialLogsFields ¶

func (db *PgDB) TrialLogsFields(trialID int) (*apiv1.TrialLogsFieldsResponse, error)

TrialLogsFields returns the unique fields that can be filtered on for the given trial.

func (*PgDB) TrialRunIDAndRestarts ¶

func (db *PgDB) TrialRunIDAndRestarts(trialID int) (int, int, error)

TrialRunIDAndRestarts returns the run id and restart count for a trial.

func (*PgDB) TrialState ¶

func (db *PgDB) TrialState(trialID int) (model.State, error)

TrialState returns the current state of the given trial.

func (*PgDB) TrialStatus ¶

func (db *PgDB) TrialStatus(trialID int) (model.State, *time.Time, error)

TrialStatus returns the current status of the given trial, including the end time without returning all its hparams and other unneeded details. Called in paths hotter than TrialByID allows.

func (*PgDB) TrialsSnapshot ¶

func (db *PgDB) TrialsSnapshot(experimentID int, minBatches int, maxBatches int,
	metricName string, startTime time.Time, metricGroup model.MetricGroup,
) (trials []*apiv1.TrialsSnapshotResponse_Trial, endTime time.Time, err error)

TrialsSnapshot returns metrics across each trial in an experiment at a specific point of progress, for metric groups other than training and validation.

func (*PgDB) TrySaveExperimentState ¶

func (db *PgDB) TrySaveExperimentState(experiment *model.Experiment) error

TrySaveExperimentState saves the current experiment state to the database and returns if we successfully changed the state or not.

func (*PgDB) UpdateAllocationProxyAddress ¶

func (db *PgDB) UpdateAllocationProxyAddress(a model.Allocation) error

UpdateAllocationProxyAddress stores the proxy address.

func (*PgDB) UpdateAllocationStartTime ¶

func (db *PgDB) UpdateAllocationStartTime(a model.Allocation) error

UpdateAllocationStartTime stores the latest start time.

func (*PgDB) UpdateAllocationState ¶

func (db *PgDB) UpdateAllocationState(a model.Allocation) error

UpdateAllocationState stores the latest task state and readiness.

func (*PgDB) UpdateClusterHeartBeat ¶

func (db *PgDB) UpdateClusterHeartBeat(currentClusterHeartbeat time.Time) error

UpdateClusterHeartBeat updates the clusterheartbeat column in the cluster_id table.

func (*PgDB) UpdateJobPosition ¶

func (db *PgDB) UpdateJobPosition(jobID model.JobID, position decimal.Decimal) error

UpdateJobPosition propagates the new queue position to the job.

func (*PgDB) UpdateResourceAllocationAggregation ¶

func (db *PgDB) UpdateResourceAllocationAggregation() error

UpdateResourceAllocationAggregation updates the aggregated resource allocation table.

func (*PgDB) UpdateTrial ¶

func (db *PgDB) UpdateTrial(id int, newState model.State) error

UpdateTrial updates an existing trial. Fields that are nil or zero are not updated. end_time is set if the trial moves to a terminal state.

func (*PgDB) UpdateTrialRestarts ¶

func (db *PgDB) UpdateTrialRestarts(id, restartCount int) error

UpdateTrialRestarts sets the trial's restart count.

func (*PgDB) UpdateTrialRunID ¶

func (db *PgDB) UpdateTrialRunID(id, runID int) error

UpdateTrialRunID sets the trial's run ID.

func (*PgDB) UpdateTrialRunnerMetadata ¶

func (db *PgDB) UpdateTrialRunnerMetadata(id int, md *trialv1.TrialRunnerMetadata) error

UpdateTrialRunnerMetadata updates a trial's metadata about its runner.

func (*PgDB) UpdateTrialRunnerState ¶

func (db *PgDB) UpdateTrialRunnerState(id int, state string) error

UpdateTrialRunnerState updates a trial runner's state.

func (*PgDB) ValidationByTotalBatches ¶

func (db *PgDB) ValidationByTotalBatches(trialID, totalBatches int) (*model.TrialMetrics, error)

ValidationByTotalBatches looks up a validation by trial and total batches, returning nil if none exists.

func (*PgDB) ValidationMetricBatches ¶

func (db *PgDB) ValidationMetricBatches(experimentID int, metricName string, startTime time.Time) (
	batches []int32, endTime time.Time, err error,
)

ValidationMetricBatches returns the milestones (in batches processed) at which a specific validation metric was recorded.

type RPWorkspaceBinding ¶

type RPWorkspaceBinding struct {
	bun.BaseModel `bun:"table:rp_workspace_bindings"`

	WorkspaceID int    `bun:"workspace_id"`
	PoolName    string `bun:"pool_name"`
	Valid       bool   `bun:"valid"`
}

RPWorkspaceBinding is a struct reflecting the db table rp_workspace_bindings.

func GetAllBindings ¶

func GetAllBindings(
	ctx context.Context,
) ([]*RPWorkspaceBinding, error)

GetAllBindings gets all valid rp-workspace bindings.

func ReadWorkspacesBoundToRP ¶

func ReadWorkspacesBoundToRP(
	ctx context.Context, poolName string, offset, limit int32,
	resourcePools []config.ResourcePoolConfig,
) ([]*RPWorkspaceBinding, *apiv1.Pagination, error)

ReadWorkspacesBoundToRP get the bindings between workspaceIds and the requested resource pool.

type SortDirection ¶

type SortDirection string

SortDirection represents the order by in a query.

const (
	// SortDirectionAsc represents ordering by ascending.
	SortDirectionAsc SortDirection = "ASC"
	// SortDirectionDesc represents ordering by descending.
	SortDirectionDesc SortDirection = "DESC"
	// SortDirectionAscNullsFirst represents ordering by ascending with nulls first.
	SortDirectionAscNullsFirst SortDirection = "ASC NULLS FIRST"
	// SortDirectionDescNullsLast represents ordering by descending with nulls last.
	SortDirectionDescNullsLast SortDirection = "DESC NULLS LAST"
)

type StaticQueryMap ¶

type StaticQueryMap struct {
	sync.Mutex
	// contains filtered or unexported fields
}

StaticQueryMap caches static sql files.

func (*StaticQueryMap) GetOrLoad ¶

func (q *StaticQueryMap) GetOrLoad(queryName string) string

GetOrLoad fetches static sql from the cache or loads them from disk.

type TaskMetadata ¶

type TaskMetadata struct {
	bun.BaseModel `bun:"table:command_state"`
	WorkspaceID   model.AccessScopeID `bun:"workspace_id"`
	TaskType      model.TaskType      `bun:"task_type"`
	ExperimentIDs []int32             `bun:"experiment_ids"`
	TrialIDs      []int32             `bun:"trial_ids"`
}

TaskMetadata captures minimal metadata about a task.

func IdentifyTask ¶

func IdentifyTask(ctx context.Context, taskID model.TaskID) (TaskMetadata, error)

IdentifyTask returns the task metadata for a given task ID. Returns db.ErrNotFound if a command with given taskID does not exist.

Source Files ¶

View all Source files

Directories ¶

Path	Synopsis
bunutils

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL