Documentation ¶
Index ¶
- Constants
- Variables
- func ActiveLogPolicies(ctx context.Context, id int) (expconf.LogPoliciesConfig, error)
- func AddAllocationExitStatus(ctx context.Context, a *model.Allocation) error
- func AddCheckpointMetadata(ctx context.Context, m *model.CheckpointV2) error
- func AddExperiment(ctx context.Context, experiment *model.Experiment, ...) (err error)
- func AddExperimentTx(ctx context.Context, idb bun.IDB, experiment *model.Experiment, ...) (err error)
- func AddProjectHyperparameters(ctx context.Context, idb bun.IDB, projectID int32, experimentIDs []int32) error
- func AddRPWorkspaceBindings(ctx context.Context, workspaceIds []int32, poolName string, ...) error
- func AddTask(ctx context.Context, t *model.Task) error
- func AddTaskTx(ctx context.Context, idb bun.IDB, t *model.Task) error
- func AddTrial(ctx context.Context, trial *model.Trial, taskID model.TaskID) error
- func ApplyDoubleFieldFilter[T string | schema.Ident](q *bun.SelectQuery, column T, filter *commonv1.DoubleFieldFilter) (*bun.SelectQuery, error)
- func ApplyInt32FieldFilter[T string | schema.Ident](q *bun.SelectQuery, column T, filter *commonv1.Int32FieldFilter) (*bun.SelectQuery, error)
- func ApplyPolymorphicFilter(q *bun.SelectQuery, column string, filter *commonv1.PolymorphicFilter) (*bun.SelectQuery, error)
- func ApplyTimestampFieldFilter[T string | schema.Ident](q *bun.SelectQuery, column T, filter *commonv1.TimestampFieldFilter) (*bun.SelectQuery, error)
- func Bun() *bun.DB
- func BunSelectMetricGroupNames() *bun.SelectQuery
- func BunSelectMetricsQuery(mGroup model.MetricGroup, inclArchived bool) *bun.SelectQuery
- func CheckIfRPUnbound(poolName string) error
- func EndAgentStats(a *model.AgentStats) error
- func ExperimentBestSearcherValidation(ctx context.Context, id int) (float32, error)
- func ExperimentByExternalIDTx(ctx context.Context, idb bun.IDB, externalExperimentID string) (*model.Experiment, error)
- func ExperimentByID(ctx context.Context, expID int) (*model.Experiment, error)
- func ExperimentByTaskID(ctx context.Context, taskID model.TaskID) (*model.Experiment, error)
- func ExperimentByTrialID(ctx context.Context, trialID int) (*model.Experiment, error)
- func ExperimentsTrialAndTaskIDs(ctx context.Context, idb bun.IDB, expIDs []int) ([]int, []model.TaskID, error)
- func GetCommandOwnerID(ctx context.Context, taskID model.TaskID) (model.UserID, error)
- func GetDefaultPoolsForWorkspace(ctx context.Context, workspaceID int) (computePool, auxPool string, err error)
- func GetMetrics(ctx context.Context, trialID, afterBatches, limit int, mGroup *string) ([]*trialv1.MetricsReport, error)
- func GetModelIDsAssociatedWithCheckpoint(ctx context.Context, ckptUUID uuid.UUID) ([]int32, error)
- func GetNonTerminalExperimentCount(ctx context.Context, experimentIDs []int32) (count int, err error)
- func GetTokenKeys() *model.AuthTokenKeypair
- func GetUnboundRPs(ctx context.Context, resourcePools []string) ([]string, error)
- func HackAddUser(ctx context.Context, user *model.User) (model.UserID, error)
- func InitAuthKeys() error
- func MarkCheckpointsDeleted(ctx context.Context, deleteCheckpoints []uuid.UUID) error
- func MatchSentinelError(err error) error
- func MetricBatches(experimentID int, metricName string, startTime time.Time, ...) (batches []int32, endTime time.Time, err error)
- func MustHaveAffectedRows(result sql.Result, err error) error
- func NonExperimentTasksContextDirectory(ctx context.Context, tID model.TaskID) ([]byte, error)
- func OrderByToSQL(order apiv1.OrderBy) string
- func OverwriteRPWorkspaceBindings(ctx context.Context, workspaceIds []int32, poolName string, ...) error
- func PaginateBun(query *bun.SelectQuery, orderColumn string, direction SortDirection, ...) *bun.SelectQuery
- func PaginateBunUnsafe(query *bun.SelectQuery, orderColumn string, direction SortDirection, ...) *bun.SelectQuery
- func ParseMapToProto(dest map[string]interface{}, val interface{}) error
- func ReadRPsAvailableToWorkspace(ctx context.Context, workspaceID int32, offset int32, limit int32, ...) ([]string, *apiv1.Pagination, error)
- func RecordTaskEndStatsBun(stats *model.TaskStats) error
- func RecordTaskStatsBun(stats *model.TaskStats) error
- func RegisterModel(m interface{})
- func RemoveProjectHyperparameters(ctx context.Context, idb bun.IDB, experimentIDs []int32) error
- func RemoveRPWorkspaceBindings(ctx context.Context, workspaceIds []int32, poolName string) error
- func SetClause(fields []string) string
- func SetTokenKeys(tk *model.AuthTokenKeypair)
- func TaskByID(ctx context.Context, tID model.TaskID) (*model.Task, error)
- func TopTrialsByMetric(ctx context.Context, experimentID int, maxTrials int, metric string, ...) ([]int32, error)
- func TrialByExperimentAndRequestID(ctx context.Context, experimentID int, requestID model.RequestID) (*model.Trial, error)
- func TrialByID(ctx context.Context, id int) (*model.Trial, error)
- func TrialByTaskID(ctx context.Context, taskID model.TaskID) (*model.Trial, error)
- func TrialTaskIDsByTrialID(ctx context.Context, trialID int) ([]*model.TrialTaskID, error)
- func UpdateAllocationPorts(a model.Allocation) error
- func UpdateCheckpointSizeTx(ctx context.Context, idb bun.IDB, checkpoints []uuid.UUID) error
- func UpsertTrialByExternalIDTx(ctx context.Context, tx bun.Tx, trial *model.Trial, taskID model.TaskID) error
- func ValidateDoubleFieldFilterComparison(filter *commonv1.DoubleFieldFilter) error
- func ValidateInt32FieldFilterComparison(filter *commonv1.Int32FieldFilter) error
- func ValidatePolymorphicFilter(filter *commonv1.PolymorphicFilter) error
- func ValidateTimeStampFieldFilterComparison(filter *commonv1.TimestampFieldFilter) error
- type DB
- type ExperimentCheckpointGrouping
- type FilterComparison
- type MetricMeasurements
- type MetricPartitionType
- type PgDB
- func (db *PgDB) ActiveExperimentConfig(id int) (expconf.ExperimentConfig, error)
- func (db *PgDB) AddAllocation(a *model.Allocation) error
- func (db *PgDB) AddExperiment(experiment *model.Experiment, activeConfig expconf.ExperimentConfig) (err error)
- func (db *PgDB) AddJob(j *model.Job) error
- func (db *PgDB) AddTask(t *model.Task) error
- func (db *PgDB) AddTaskLogs(logs []*model.TaskLog) error
- func (db *PgDB) AddTrainingMetrics(ctx context.Context, m *trialv1.TrialMetrics) error
- func (db *PgDB) AddTrialMetrics(ctx context.Context, m *trialv1.TrialMetrics, mGroup model.MetricGroup) error
- func (db *PgDB) AddValidationMetrics(ctx context.Context, m *trialv1.TrialMetrics) error
- func (db *PgDB) AllocationByID(aID model.AllocationID) (*model.Allocation, error)
- func (db *PgDB) CheckExperimentExists(id int) (bool, error)
- func (db *PgDB) CheckTaskExists(id model.TaskID) (bool, error)
- func (db *PgDB) CheckTrialExists(id int) (bool, error)
- func (db *PgDB) CheckpointByTotalBatches(trialID, totalBatches int) (*model.Checkpoint, error)
- func (db *PgDB) CheckpointByUUID(id uuid.UUID) (*model.Checkpoint, error)
- func (db *PgDB) CheckpointByUUIDs(ckptUUIDs []uuid.UUID) ([]model.Checkpoint, error)
- func (db *PgDB) Close() error
- func (db *PgDB) CloseOpenAllocations(exclude []model.AllocationID) error
- func (db *PgDB) CompleteAllocation(a *model.Allocation) error
- func (db *PgDB) CompleteAllocationTelemetry(aID model.AllocationID) ([]byte, error)
- func (db *PgDB) CompleteTask(tID model.TaskID, endTime time.Time) error
- func (db *PgDB) DeleteAllocationSession(allocationID model.AllocationID) error
- func (db *PgDB) DeleteExperiments(ctx context.Context, ids []int) error
- func (db *PgDB) DeleteSnapshotsForExperiment(experimentID int) error
- func (db *PgDB) DeleteSnapshotsForExperiments(experimentIDs []int) func(ctx context.Context, tx *bun.Tx) error
- func (db *PgDB) DeleteSnapshotsForTerminalExperiments() error
- func (db *PgDB) DeleteTaskLogs(ids []model.TaskID) error
- func (db *PgDB) DeleteTrialLogs(ids []int) error
- func (db *PgDB) EndAllAgentStats() error
- func (db *PgDB) EndAllInstanceStats() error
- func (db *PgDB) EndAllTaskStats() error
- func (db *PgDB) EndInstanceStats(a *model.InstanceStats) error
- func (db *PgDB) ExperimentCheckpointsToGCRaw(id int, experimentBest, trialBest, trialLatest int) ([]uuid.UUID, error)
- func (db *PgDB) ExperimentConfigRaw(id int) ([]byte, error)
- func (db *PgDB) ExperimentHasCheckpointsInRegistry(id int) (bool, error)
- func (db *PgDB) ExperimentIDByTrialID(trialID int) (int, error)
- func (db *PgDB) ExperimentLabelUsage(projectID int32) (labelUsage map[string]int, err error)
- func (db *PgDB) ExperimentModelDefinitionRaw(id int) ([]byte, error)
- func (db *PgDB) ExperimentNumSteps(id int) (int64, error)
- func (db *PgDB) ExperimentNumTrials(id int) (int64, error)
- func (db *PgDB) ExperimentSnapshot(experimentID int) ([]byte, int, error)
- func (db *PgDB) ExperimentTotalStepTime(id int) (float64, error)
- func (db *PgDB) ExperimentTrialIDs(expID int) ([]int, error)
- func (db *PgDB) FailDeletingExperiment() error
- func (db *PgDB) GetExperimentStatus(experimentID int) (state model.State, progress float64, err error)
- func (db *PgDB) GetOrCreateClusterID(telemetryID string) (string, error)
- func (db *PgDB) GetRegisteredCheckpoints(checkpoints []uuid.UUID) (map[uuid.UUID]bool, error)
- func (db *PgDB) GetTrialProfilerMetricsBatches(labelsJSON []byte, offset, limit int) (model.TrialProfilerMetricsBatchBatch, error)
- func (db *PgDB) GroupCheckpointUUIDsByExperimentID(checkpoints []uuid.UUID) ([]*ExperimentCheckpointGrouping, error)
- func (db *PgDB) InsertTrialProfilerMetricsBatch(values []float32, batches []int32, timestamps []time.Time, labels []byte) error
- func (db *PgDB) JobByID(jID model.JobID) (*model.Job, error)
- func (db *PgDB) LatestCheckpointForTrial(trialID int) (*model.Checkpoint, error)
- func (db *PgDB) LegacyExperimentConfigByID(id int) (expconf.LegacyConfig, error)
- func (db *PgDB) MaxTerminationDelay() time.Duration
- func (db *PgDB) MetricNames(ctx context.Context, experimentIDs []int) (map[model.MetricGroup][]string, error)
- func (db *PgDB) Migrate(migrationURL string, actions []string) error
- func (db *PgDB) NonTerminalExperiments() ([]*model.Experiment, error)
- func (db *PgDB) PeriodicTelemetryInfo() ([]byte, error)
- func (db *PgDB) ProjectByName(workspaceName string, projectName string) (int, error)
- func (db *PgDB) ProjectExperiments(id int) (experiments []*model.Experiment, err error)
- func (db *PgDB) Query(queryName string, v interface{}, params ...interface{}) error
- func (db *PgDB) QueryF(queryName string, args []interface{}, v interface{}, params ...interface{}) error
- func (db *PgDB) QueryProto(queryName string, v interface{}, args ...interface{}) error
- func (db *PgDB) QueryProtof(queryName string, args []interface{}, v interface{}, params ...interface{}) error
- func (db *PgDB) RawQuery(queryName string, params ...interface{}) ([]byte, error)
- func (db *PgDB) RecordAgentStats(a *model.AgentStats) error
- func (db *PgDB) RecordInstanceStats(a *model.InstanceStats) error
- func (db *PgDB) RecordTaskEndStats(stats *model.TaskStats) error
- func (db *PgDB) RecordTaskStats(stats *model.TaskStats) error
- func (db *PgDB) SaveExperimentArchiveStatus(experiment *model.Experiment) error
- func (db *PgDB) SaveExperimentConfig(id int, config expconf.ExperimentConfig) error
- func (db *PgDB) SaveExperimentProgress(id int, progress *float64) error
- func (db *PgDB) SaveExperimentState(experiment *model.Experiment) error
- func (db *PgDB) SaveSnapshot(experimentID int, version int, experimentSnapshot []byte) error
- func (db *PgDB) StartAllocationSession(allocationID model.AllocationID, owner *model.User) (string, error)
- func (db *PgDB) TaskLogs(taskID model.TaskID, limit int, fs []api.Filter, order apiv1.OrderBy, ...) ([]*model.TaskLog, interface{}, error)
- func (db *PgDB) TaskLogsCount(taskID model.TaskID, fs []api.Filter) (int, error)
- func (db *PgDB) TaskLogsFields(taskID model.TaskID) (*apiv1.TaskLogsFieldsResponse, error)
- func (db *PgDB) TerminateExperimentInRestart(id int, state model.State) error
- func (db *PgDB) TopTrialsByTrainingLength(experimentID int, maxTrials int, metric string, smallerIsBetter bool) (trials []int32, err error)
- func (db *PgDB) TrainingMetricBatches(experimentID int, metricName string, startTime time.Time) (batches []int32, endTime time.Time, err error)
- func (db *PgDB) TrialExperimentAndRequestID(id int) (int, model.RequestID, error)
- func (db *PgDB) TrialLogs(trialID, limit int, fs []api.Filter, order apiv1.OrderBy, ...) ([]*model.TrialLog, interface{}, error)
- func (db *PgDB) TrialLogsCount(trialID int, fs []api.Filter) (int, error)
- func (db *PgDB) TrialLogsFields(trialID int) (*apiv1.TrialLogsFieldsResponse, error)
- func (db *PgDB) TrialRunIDAndRestarts(trialID int) (int, int, error)
- func (db *PgDB) TrialState(trialID int) (model.State, error)
- func (db *PgDB) TrialStatus(trialID int) (model.State, *time.Time, error)
- func (db *PgDB) TrialsSnapshot(experimentID int, minBatches int, maxBatches int, metricName string, ...) (trials []*apiv1.TrialsSnapshotResponse_Trial, endTime time.Time, err error)
- func (db *PgDB) TrySaveExperimentState(experiment *model.Experiment) error
- func (db *PgDB) UpdateAllocationProxyAddress(a model.Allocation) error
- func (db *PgDB) UpdateAllocationStartTime(a model.Allocation) error
- func (db *PgDB) UpdateAllocationState(a model.Allocation) error
- func (db *PgDB) UpdateClusterHeartBeat(currentClusterHeartbeat time.Time) error
- func (db *PgDB) UpdateJobPosition(jobID model.JobID, position decimal.Decimal) error
- func (db *PgDB) UpdateResourceAllocationAggregation() error
- func (db *PgDB) UpdateTrial(id int, newState model.State) error
- func (db *PgDB) UpdateTrialRestarts(id, restartCount int) error
- func (db *PgDB) UpdateTrialRunID(id, runID int) error
- func (db *PgDB) UpdateTrialRunnerMetadata(id int, md *trialv1.TrialRunnerMetadata) error
- func (db *PgDB) UpdateTrialRunnerState(id int, state string) error
- func (db *PgDB) ValidationByTotalBatches(trialID, totalBatches int) (*model.TrialMetrics, error)
- func (db *PgDB) ValidationMetricBatches(experimentID int, metricName string, startTime time.Time) (batches []int32, endTime time.Time, err error)
- type RPWorkspaceBinding
- type SortDirection
- type StaticQueryMap
- type TaskMetadata
Constants ¶
const ( // CodeUniqueViolation is the error code that Postgres uses to indicate that an attempted // insert/update violates a uniqueness constraint. Obtained from: // https://www.postgresql.org/docs/10/errcodes-appendix.html CodeUniqueViolation = "23505" // CodeForeignKeyViolation is the error code that Postgres uses to indicate that an attempted // insert/update violates a foreign key constraint. Obtained from: // https://www.postgresql.org/docs/10/errcodes-appendix.html CodeForeignKeyViolation = "23503" )
const ( // InfPostgresString how we store infinity in JSONB in postgres. InfPostgresString = "Infinity" // NegInfPostgresString how we store -infinity in JSONB in postgres. NegInfPostgresString = "-Infinity" // NaNPostgresString how we store NaN in JSONB in postgres. NaNPostgresString = "NaN" // MetricTypeString is the summary metric type for string or mixed types. MetricTypeString = "string" // MetricTypeNumber is the summary metric type for floats or ints. MetricTypeNumber = "number" // MetricTypeBool is the summary metric type for boolean. MetricTypeBool = "boolean" // MetricTypeDate is the summary metric type for date metrics. MetricTypeDate = "date" // MetricTypeObject is the summary metric type for object types. MetricTypeObject = "object" // MetricTypeArray is the summary metric type for array types. MetricTypeArray = "array" // MetricTypeNull is the summary metric type for array types. MetricTypeNull = "null" )
Variables ¶
var ( // ErrNotFound is returned if nothing is found. ErrNotFound = errors.New("not found") // ErrTooManyRowsAffected is returned if too many rows are affected. ErrTooManyRowsAffected = errors.New("too many rows are affected") // ErrDuplicateRecord is returned when trying to create a row that already exists. ErrDuplicateRecord = errors.New("row already exists") // ErrInvalidInput is returned when the data passed to a function is invalid for semantic or // syntactic reasons. ErrInvalidInput = errors.New("invalid input") )
Functions ¶
func ActiveLogPolicies ¶
ActiveLogPolicies returns log pattern policies for an experiment ID. This should only be called on a running experiment.
func AddAllocationExitStatus ¶
func AddAllocationExitStatus(ctx context.Context, a *model.Allocation) error
AddAllocationExitStatus adds the allocation exit status to the allocations table.
func AddCheckpointMetadata ¶
func AddCheckpointMetadata(ctx context.Context, m *model.CheckpointV2) error
AddCheckpointMetadata persists metadata for a completed checkpoint to the database.
func AddExperiment ¶
func AddExperiment( ctx context.Context, experiment *model.Experiment, activeConfig expconf.ExperimentConfig, ) (err error)
AddExperiment adds the experiment to the database and sets its ID.
func AddExperimentTx ¶
func AddExperimentTx( ctx context.Context, idb bun.IDB, experiment *model.Experiment, activeConfig expconf.ExperimentConfig, upsert bool, ) (err error)
AddExperimentTx adds the experiment to the database and sets its ID.
func AddProjectHyperparameters ¶
func AddProjectHyperparameters( ctx context.Context, idb bun.IDB, projectID int32, experimentIDs []int32, ) error
AddProjectHyperparameters takes a list of project ids, combine their hyper parameters with existing one.
func AddRPWorkspaceBindings ¶
func AddRPWorkspaceBindings(ctx context.Context, workspaceIds []int32, poolName string, resourcePools []config.ResourcePoolConfig, ) error
AddRPWorkspaceBindings inserts new bindings between workspaceIds and poolName.
func ApplyDoubleFieldFilter ¶
func ApplyDoubleFieldFilter[T string | schema.Ident]( q *bun.SelectQuery, column T, filter *commonv1.DoubleFieldFilter, ) (*bun.SelectQuery, error)
ApplyDoubleFieldFilter applies filtering on a bun query for double field.
func ApplyInt32FieldFilter ¶
func ApplyInt32FieldFilter[T string | schema.Ident]( q *bun.SelectQuery, column T, filter *commonv1.Int32FieldFilter, ) (*bun.SelectQuery, error)
ApplyInt32FieldFilter applies filtering on a bun query for int32 field.
func ApplyPolymorphicFilter ¶
func ApplyPolymorphicFilter( q *bun.SelectQuery, column string, filter *commonv1.PolymorphicFilter, ) (*bun.SelectQuery, error)
ApplyPolymorphicFilter applies filtering on a bun query for a polymorphic filter.
func ApplyTimestampFieldFilter ¶
func ApplyTimestampFieldFilter[T string | schema.Ident]( q *bun.SelectQuery, column T, filter *commonv1.TimestampFieldFilter, ) (*bun.SelectQuery, error)
ApplyTimestampFieldFilter applies filtering on a bun query for timestamp field.
func Bun ¶
Bun returns the singleton database connection through the bun library. bun is the database library we have decided to use for new code in the future due to its superior composability over bare SQL, and its superior flexibility over e.g. gorm. New code should not use the old bare SQL tooling.
func BunSelectMetricGroupNames ¶
func BunSelectMetricGroupNames() *bun.SelectQuery
BunSelectMetricGroupNames sets up a bun select query for getting all the metric group and names.
func BunSelectMetricsQuery ¶
func BunSelectMetricsQuery(mGroup model.MetricGroup, inclArchived bool) *bun.SelectQuery
BunSelectMetricsQuery sets up a bun select query for based on new metrics table simplifying some weirdness we set up for pg10 support.
func CheckIfRPUnbound ¶
CheckIfRPUnbound checks to make sure the specified resource pools is not bound to any workspace and returns an error if it is.
func EndAgentStats ¶
func EndAgentStats(a *model.AgentStats) error
EndAgentStats updates the end time of an instance.
func ExperimentBestSearcherValidation ¶
ExperimentBestSearcherValidation returns the best searcher validation for an experiment.
func ExperimentByExternalIDTx ¶
func ExperimentByExternalIDTx(ctx context.Context, idb bun.IDB, externalExperimentID string) ( *model.Experiment, error, )
ExperimentByExternalIDTx looks up an experiment by a given external experiment id.
func ExperimentByID ¶
ExperimentByID looks up an experiment by ID in a database, returning an error if none exists.
func ExperimentByTaskID ¶
ExperimentByTaskID looks up an experiment by a given taskID, returning an error if none exists.
func ExperimentByTrialID ¶
ExperimentByTrialID looks up an experiment by a given trialID, returning an error if none exists.
func ExperimentsTrialAndTaskIDs ¶
func ExperimentsTrialAndTaskIDs(ctx context.Context, idb bun.IDB, expIDs []int) ([]int, []model.TaskID, error, )
ExperimentsTrialAndTaskIDs returns the trial and task IDs for one or more experiments.
func GetCommandOwnerID ¶
GetCommandOwnerID gets a command's ownerID from a taskID. Uses persisted command state. Returns db.ErrNotFound if a command with given taskID does not exist.
func GetDefaultPoolsForWorkspace ¶
func GetDefaultPoolsForWorkspace(ctx context.Context, workspaceID int, ) (computePool, auxPool string, err error)
GetDefaultPoolsForWorkspace returns the default compute and aux pools for a workspace.
func GetMetrics ¶
func GetMetrics(ctx context.Context, trialID, afterBatches, limit int, mGroup *string, ) ([]*trialv1.MetricsReport, error)
GetMetrics returns a subset metrics of the requested type for the given trial ID.
func GetModelIDsAssociatedWithCheckpoint ¶
GetModelIDsAssociatedWithCheckpoint returns the model ids associated with a checkpoint, returning nil if error.
func GetNonTerminalExperimentCount ¶
func GetNonTerminalExperimentCount(ctx context.Context, experimentIDs []int32, ) (count int, err error)
GetNonTerminalExperimentCount returns the number of non terminal experiments.
func GetUnboundRPs ¶
GetUnboundRPs get unbound resource pools.
func HackAddUser ¶
HackAddUser is used to prevent an import cycle in postgres_test_utils & postgres_scim (EE).
func MarkCheckpointsDeleted ¶
MarkCheckpointsDeleted updates the provided delete checkpoints to DELETED state.
func MatchSentinelError ¶
MatchSentinelError checks if the error belongs to specific families of errors and ensures that the returned error has the proper type and text.
func MetricBatches ¶
func MetricBatches( experimentID int, metricName string, startTime time.Time, metricGroup model.MetricGroup, ) ( batches []int32, endTime time.Time, err error, )
MetricBatches returns the milestones (in batches processed) at which a specific metric was recorded.
func MustHaveAffectedRows ¶
MustHaveAffectedRows checks if bun has affected rows in a table or not. Returns ErrNotFound if no rows were affected and returns the provided error otherwise.
func NonExperimentTasksContextDirectory ¶
NonExperimentTasksContextDirectory returns a non experiment's context directory.
func OrderByToSQL ¶
OrderByToSQL computes the SQL keyword corresponding to the given ordering type.
func OverwriteRPWorkspaceBindings ¶
func OverwriteRPWorkspaceBindings(ctx context.Context, workspaceIds []int32, poolName string, resourcePools []config.ResourcePoolConfig, ) error
OverwriteRPWorkspaceBindings overwrites the bindings between workspaceIds and poolName.
func PaginateBun ¶
func PaginateBun( query *bun.SelectQuery, orderColumn string, direction SortDirection, offset, limit int, ) *bun.SelectQuery
PaginateBun adds sorting and pagination to the provided bun query, defaulting to certain values if they are not specified. By default, we order by ascending on the id column, with no limit.
func PaginateBunUnsafe ¶
func PaginateBunUnsafe( query *bun.SelectQuery, orderColumn string, direction SortDirection, offset, limit int, ) *bun.SelectQuery
PaginateBunUnsafe is a version of PaginateBun that allows an arbitrary order expression like `metrics->>'loss'`.
func ParseMapToProto ¶
ParseMapToProto converts sqlx/bun-scanned map to a proto object.
func ReadRPsAvailableToWorkspace ¶
func ReadRPsAvailableToWorkspace( ctx context.Context, workspaceID int32, offset int32, limit int32, resourcePoolConfig []config.ResourcePoolConfig, ) ([]string, *apiv1.Pagination, error)
ReadRPsAvailableToWorkspace returns the names of resource pools available to a workspace.
func RecordTaskEndStatsBun ¶
RecordTaskEndStatsBun record end stats for tasks with bun.
func RecordTaskStatsBun ¶
RecordTaskStatsBun record stats for tasks with bun.
func RegisterModel ¶
func RegisterModel(m interface{})
RegisterModel registers a model in Bun or, if theOneBun is not yet initialized, sets it up to be registered once initialized. It's generally best to pass a nil pointer of your model's type as argument m.
func RemoveProjectHyperparameters ¶
RemoveProjectHyperparameters take a list of experiment ids, recalculate their respective project hyper parameters.
func RemoveRPWorkspaceBindings ¶
RemoveRPWorkspaceBindings removes the bindings between workspaceIds and poolName.
func TopTrialsByMetric ¶
func TopTrialsByMetric( ctx context.Context, experimentID int, maxTrials int, metric string, smallerIsBetter bool, ) ([]int32, error)
TopTrialsByMetric chooses the subset of trials from an experiment that recorded the best values for the specified metric at any point during the trial.
func TrialByExperimentAndRequestID ¶
func TrialByExperimentAndRequestID( ctx context.Context, experimentID int, requestID model.RequestID, ) (*model.Trial, error)
TrialByExperimentAndRequestID looks up a trial, returning an error if none exists.
func TrialByTaskID ¶
TrialByTaskID looks up a trial by taskID, returning an error if none exists. This errors if you called it with a non trial taskID.
func TrialTaskIDsByTrialID ¶
TrialTaskIDsByTrialID returns trial id task ids by trial ID, sorted by task run ID.
func UpdateAllocationPorts ¶
func UpdateAllocationPorts(a model.Allocation) error
UpdateAllocationPorts stores the latest task state and readiness.
func UpdateCheckpointSizeTx ¶
UpdateCheckpointSizeTx updates checkpoint size and count to experiment and trial.
func UpsertTrialByExternalIDTx ¶
func UpsertTrialByExternalIDTx( ctx context.Context, tx bun.Tx, trial *model.Trial, taskID model.TaskID, ) error
UpsertTrialByExternalIDTx UPSERTs the trial with respect to the external_trial_id.
func ValidateDoubleFieldFilterComparison ¶
func ValidateDoubleFieldFilterComparison( filter *commonv1.DoubleFieldFilter, ) error
ValidateDoubleFieldFilterComparison validates the min and max values in the range.
func ValidateInt32FieldFilterComparison ¶
func ValidateInt32FieldFilterComparison( filter *commonv1.Int32FieldFilter, ) error
ValidateInt32FieldFilterComparison validates the min and max values in the range.
func ValidatePolymorphicFilter ¶
func ValidatePolymorphicFilter( filter *commonv1.PolymorphicFilter, ) error
ValidatePolymorphicFilter ensures that a Polymorphic filter contains at most one valid range.
func ValidateTimeStampFieldFilterComparison ¶
func ValidateTimeStampFieldFilterComparison( filter *commonv1.TimestampFieldFilter, ) error
ValidateTimeStampFieldFilterComparison validates the min and max timestamps in the range.
Types ¶
type DB ¶
type DB interface { Migrate(migrationURL string, actions []string) error Close() error GetOrCreateClusterID(telemetryID string) (string, error) CheckExperimentExists(id int) (bool, error) CheckTrialExists(id int) (bool, error) TrialExperimentAndRequestID(id int) (int, model.RequestID, error) AddExperiment(experiment *model.Experiment, activeConfig expconf.ExperimentConfig) error ExperimentIDByTrialID(trialID int) (int, error) NonTerminalExperiments() ([]*model.Experiment, error) TerminateExperimentInRestart(id int, state model.State) error SaveExperimentConfig(id int, config expconf.ExperimentConfig) error SaveExperimentState(experiment *model.Experiment) error SaveExperimentArchiveStatus(experiment *model.Experiment) error DeleteExperiments(ctx context.Context, ids []int) error ExperimentHasCheckpointsInRegistry(id int) (bool, error) SaveExperimentProgress(id int, progress *float64) error ActiveExperimentConfig(id int) (expconf.ExperimentConfig, error) ExperimentTotalStepTime(id int) (float64, error) ExperimentNumTrials(id int) (int64, error) ExperimentTrialIDs(expID int) ([]int, error) ExperimentNumSteps(id int) (int64, error) ExperimentModelDefinitionRaw(id int) ([]byte, error) ExperimentCheckpointsToGCRaw( id int, experimentBest, trialBest, trialLatest int, ) ([]uuid.UUID, error) AddTask(t *model.Task) error UpdateTrial(id int, newState model.State) error UpdateTrialRunnerState(id int, state string) error UpdateTrialRunnerMetadata(id int, md *trialv1.TrialRunnerMetadata) error AddAllocation(a *model.Allocation) error CompleteAllocation(a *model.Allocation) error CompleteAllocationTelemetry(aID model.AllocationID) ([]byte, error) TrialRunIDAndRestarts(trialID int) (int, int, error) UpdateTrialRunID(id, runID int) error UpdateTrialRestarts(id, restarts int) error AddTrainingMetrics(ctx context.Context, m *trialv1.TrialMetrics) error AddValidationMetrics( ctx context.Context, m *trialv1.TrialMetrics, ) error ValidationByTotalBatches(trialID, totalBatches int) (*model.TrialMetrics, error) CheckpointByTotalBatches(trialID, totalBatches int) (*model.Checkpoint, error) CheckpointByUUID(id uuid.UUID) (*model.Checkpoint, error) LatestCheckpointForTrial(trialID int) (*model.Checkpoint, error) PeriodicTelemetryInfo() ([]byte, error) TrialState(trialID int) (model.State, error) TrialStatus(trialID int) (model.State, *time.Time, error) Query(queryName string, v interface{}, params ...interface{}) error QueryF( queryName string, args []interface{}, v interface{}, params ...interface{}) error RawQuery(queryName string, params ...interface{}) ([]byte, error) UpdateResourceAllocationAggregation() error InsertTrialProfilerMetricsBatch( values []float32, batches []int32, timestamps []time.Time, labels []byte, ) error GetTrialProfilerMetricsBatches( labelsJSON []byte, offset, limit int, ) (model.TrialProfilerMetricsBatchBatch, error) ProjectByName(workspaceName string, projectName string) (projectID int, err error) ProjectExperiments(id int) (experiments []*model.Experiment, err error) ExperimentLabelUsage(projectID int32) (labelUsage map[string]int, err error) GetExperimentStatus(experimentID int) (state model.State, progress float64, err error) TrainingMetricBatches(experimentID int, metricName string, startTime time.Time) ( batches []int32, endTime time.Time, err error) ValidationMetricBatches(experimentID int, metricName string, startTime time.Time) ( batches []int32, endTime time.Time, err error) TrialsSnapshot(experimentID int, minBatches int, maxBatches int, metricName string, startTime time.Time, metricGroup model.MetricGroup) ( trials []*apiv1.TrialsSnapshotResponse_Trial, endTime time.Time, err error) TopTrialsByTrainingLength(experimentID int, maxTrials int, metric string, smallerIsBetter bool) (trials []int32, err error) StartAllocationSession(allocationID model.AllocationID, owner *model.User) (string, error) DeleteAllocationSession(allocationID model.AllocationID) error UpdateAllocationState(allocation model.Allocation) error UpdateAllocationStartTime(allocation model.Allocation) error UpdateAllocationProxyAddress(allocation model.Allocation) error ExperimentSnapshot(experimentID int) ([]byte, int, error) SaveSnapshot( experimentID int, version int, experimentSnapshot []byte, ) error DeleteSnapshotsForExperiment(experimentID int) error DeleteSnapshotsForExperiments(experimentIDs []int) func(ctx context.Context, tx *bun.Tx) error DeleteSnapshotsForTerminalExperiments() error QueryProto(queryName string, v interface{}, args ...interface{}) error QueryProtof( queryName string, args []interface{}, v interface{}, params ...interface{}) error TrialLogs( trialID, limit int, fs []api.Filter, order apiv1.OrderBy, followState interface{}, ) ([]*model.TrialLog, interface{}, error) DeleteTrialLogs(ids []int) error TrialLogsCount(trialID int, fs []api.Filter) (int, error) TrialLogsFields(trialID int) (*apiv1.TrialLogsFieldsResponse, error) RecordAgentStats(a *model.AgentStats) error EndAllAgentStats() error RecordInstanceStats(a *model.InstanceStats) error EndInstanceStats(a *model.InstanceStats) error EndAllInstanceStats() error EndAllTaskStats() error RecordTaskEndStats(stats *model.TaskStats) error RecordTaskStats(stats *model.TaskStats) error UpdateJobPosition(jobID model.JobID, position decimal.Decimal) error }
DB is an interface for _all_ the functionality packed into the DB.
type ExperimentCheckpointGrouping ¶
ExperimentCheckpointGrouping represents a mapping of checkpoint uuids to experiment id.
type FilterComparison ¶
type FilterComparison[T any] struct { Gt *T Gte *T Lt *T Lte *T }
FilterComparison makes you wish for properties in generic structs/interfaces.
type MetricMeasurements ¶
type MetricMeasurements struct { Values map[string]interface{} Batches uint Time time.Time Epoch *float64 `json:"epoch,omitempty"` TrialID int32 }
MetricMeasurements represents a metric measured by all possible independent variables.
type MetricPartitionType ¶
type MetricPartitionType string
MetricPartitionType denotes what type the metric is. This is planned to be deprecated once we upgrade to pg11 and can use DEFAULT partitioning.
const ( // TrainingMetric designates metrics from training steps. TrainingMetric MetricPartitionType = "TRAINING" // ValidationMetric designates metrics from validation steps. ValidationMetric MetricPartitionType = "VALIDATION" // GenericMetric designates metrics from other sources. GenericMetric MetricPartitionType = "GENERIC" )
type PgDB ¶
type PgDB struct {
// contains filtered or unexported fields
}
PgDB represents a Postgres database connection. The type definition is needed to define methods.
func ConnectPostgres ¶
ConnectPostgres connects to a Postgres database.
func SingleDB ¶
func SingleDB() *PgDB
SingleDB returns a singleton database client. Bun() should be preferred over this for all new queries.
func (*PgDB) ActiveExperimentConfig ¶
func (db *PgDB) ActiveExperimentConfig(id int) (expconf.ExperimentConfig, error)
ActiveExperimentConfig returns the full config object for an experiment.
func (*PgDB) AddAllocation ¶
func (db *PgDB) AddAllocation(a *model.Allocation) error
AddAllocation upserts the existence of an allocation. Allocation IDs may conflict in the event the master restarts and the trial run ID increment is not persisted, but it is the same allocation so this is OK.
func (*PgDB) AddExperiment ¶
func (db *PgDB) AddExperiment( experiment *model.Experiment, activeConfig expconf.ExperimentConfig, ) (err error)
AddExperiment adds the experiment to the database and sets its ID.
TODO(ilia): deprecate and use module function instead.
func (*PgDB) AddTask ¶
AddTask UPSERT's the existence of a task.
TODO(ilia): deprecate and use module function instead.
func (*PgDB) AddTaskLogs ¶
AddTaskLogs adds a list of *model.TaskLog objects to the database with automatic IDs.
func (*PgDB) AddTrainingMetrics ¶
AddTrainingMetrics [DEPRECATED] adds a completed step to the database with the given training metrics. If these training metrics occur before any others, a rollback is assumed and later training and validation metrics are cleaned up.
func (*PgDB) AddTrialMetrics ¶
func (db *PgDB) AddTrialMetrics( ctx context.Context, m *trialv1.TrialMetrics, mGroup model.MetricGroup, ) error
AddTrialMetrics persists the given trial metrics to the database.
func (*PgDB) AddValidationMetrics ¶
AddValidationMetrics [DEPRECATED] adds a completed validation to the database with the given validation metrics. If these validation metrics occur before any others, a rollback is assumed and later metrics are cleaned up from the database.
func (*PgDB) AllocationByID ¶
func (db *PgDB) AllocationByID(aID model.AllocationID) (*model.Allocation, error)
AllocationByID retrieves an allocation by its ID.
func (*PgDB) CheckExperimentExists ¶
CheckExperimentExists checks if the experiment exists.
func (*PgDB) CheckTaskExists ¶
CheckTaskExists checks if the task exists.
func (*PgDB) CheckTrialExists ¶
CheckTrialExists checks if the trial exists.
func (*PgDB) CheckpointByTotalBatches ¶
func (db *PgDB) CheckpointByTotalBatches(trialID, totalBatches int) (*model.Checkpoint, error)
CheckpointByTotalBatches looks up a checkpoint by trial and total batch, returning nil if none exists.
func (*PgDB) CheckpointByUUID ¶
CheckpointByUUID looks up a checkpoint by UUID, returning nil if none exists.
func (*PgDB) CheckpointByUUIDs ¶
CheckpointByUUIDs looks up a checkpoint by list of UUIDS, returning nil if error.
func (*PgDB) CloseOpenAllocations ¶
func (db *PgDB) CloseOpenAllocations(exclude []model.AllocationID) error
CloseOpenAllocations finds all allocations that were open when the master crashed and adds an end time.
func (*PgDB) CompleteAllocation ¶
func (db *PgDB) CompleteAllocation(a *model.Allocation) error
CompleteAllocation persists the end of an allocation lifetime.
func (*PgDB) CompleteAllocationTelemetry ¶
func (db *PgDB) CompleteAllocationTelemetry(aID model.AllocationID) ([]byte, error)
CompleteAllocationTelemetry returns the analytics of an allocation for the telemetry.
func (*PgDB) CompleteTask ¶
CompleteTask persists the completion of a task.
func (*PgDB) DeleteAllocationSession ¶
func (db *PgDB) DeleteAllocationSession(allocationID model.AllocationID) error
DeleteAllocationSession deletes the task session with the given AllocationID.
func (*PgDB) DeleteExperiments ¶
DeleteExperiments deletes zero or more experiments.
func (*PgDB) DeleteSnapshotsForExperiment ¶
DeleteSnapshotsForExperiment deletes all snapshots for one given experiment.
func (*PgDB) DeleteSnapshotsForExperiments ¶
func (db *PgDB) DeleteSnapshotsForExperiments(experimentIDs []int) func(ctx context.Context, tx *bun.Tx) error
DeleteSnapshotsForExperiments deletes all snapshots for multiple given experiments.
func (*PgDB) DeleteSnapshotsForTerminalExperiments ¶
DeleteSnapshotsForTerminalExperiments deletes all snapshots for terminal state experiments from the database.
func (*PgDB) DeleteTaskLogs ¶
DeleteTaskLogs deletes the logs for the given tasks.
func (*PgDB) DeleteTrialLogs ¶
DeleteTrialLogs deletes the logs for the given trial IDs.
func (*PgDB) EndAllAgentStats ¶
EndAllAgentStats called at master starts, in case master previously crushed If master stops, statistics would treat “live” agents as live until master restarts.
func (*PgDB) EndAllInstanceStats ¶
EndAllInstanceStats called at master starts, in case master previously crushed If master stops, statistics would treat “live” instances as live until master restarts.
func (*PgDB) EndAllTaskStats ¶
EndAllTaskStats called at master starts, in case master previously crashed.
func (*PgDB) EndInstanceStats ¶
func (db *PgDB) EndInstanceStats(a *model.InstanceStats) error
EndInstanceStats updates the end time of an instance.
func (*PgDB) ExperimentCheckpointsToGCRaw ¶
func (db *PgDB) ExperimentCheckpointsToGCRaw( id int, experimentBest, trialBest, trialLatest int, ) ([]uuid.UUID, error)
ExperimentCheckpointsToGCRaw returns a comma-separated string describing checkpoints that should be GCed according to the given GC policy parameters. If the delete parameter is true, the returned checkpoints are also marked as deleted in the database.
func (*PgDB) ExperimentConfigRaw ¶
ExperimentConfigRaw returns the full config object for an experiment as a JSON string.
func (*PgDB) ExperimentHasCheckpointsInRegistry ¶
ExperimentHasCheckpointsInRegistry checks if the experiment has any checkpoints in the registry.
func (*PgDB) ExperimentIDByTrialID ¶
ExperimentIDByTrialID looks up an experiment ID by a trial ID.
func (*PgDB) ExperimentLabelUsage ¶
ExperimentLabelUsage returns a flattened and deduplicated list of all the labels in use across all experiments.
func (*PgDB) ExperimentModelDefinitionRaw ¶
ExperimentModelDefinitionRaw returns the zipped model definition for an experiment as a byte array.
func (*PgDB) ExperimentNumSteps ¶
ExperimentNumSteps returns the total number of steps for all trials of the experiment.
func (*PgDB) ExperimentNumTrials ¶
ExperimentNumTrials returns the total number of trials for the experiment.
func (*PgDB) ExperimentSnapshot ¶
ExperimentSnapshot returns the snapshot for the specified experiment.
func (*PgDB) ExperimentTotalStepTime ¶
ExperimentTotalStepTime returns the total elapsed time for all allocations of the experiment with the given ID. Any step with a NULL end_time does not contribute. Elapsed time is expressed as a floating point number of seconds.
func (*PgDB) ExperimentTrialIDs ¶
ExperimentTrialIDs returns the trial IDs for the experiment.
func (*PgDB) FailDeletingExperiment ¶
FailDeletingExperiment finds all experiments that were deleting when the master crashed and moves them to DELETE_FAILED.
func (*PgDB) GetExperimentStatus ¶
func (db *PgDB) GetExperimentStatus(experimentID int) (state model.State, progress float64, err error, )
GetExperimentStatus returns the current state of the experiment.
func (*PgDB) GetOrCreateClusterID ¶
GetOrCreateClusterID queries the master uuid in the database, adding one if it doesn't exist. If a nonempty telemetryID is provided, it will be the one added, otherwise a uuid is generated.
func (*PgDB) GetRegisteredCheckpoints ¶
GetRegisteredCheckpoints gets the checkpoints in the model registrys from the list of checkpoints provided.
func (*PgDB) GetTrialProfilerMetricsBatches ¶
func (db *PgDB) GetTrialProfilerMetricsBatches( labelsJSON []byte, offset, limit int, ) (model.TrialProfilerMetricsBatchBatch, error)
GetTrialProfilerMetricsBatches gets a batch of profiler metric batches from the database.
func (*PgDB) GroupCheckpointUUIDsByExperimentID ¶
func (db *PgDB) GroupCheckpointUUIDsByExperimentID(checkpoints []uuid.UUID) ( []*ExperimentCheckpointGrouping, error, )
GroupCheckpointUUIDsByExperimentID creates the mapping of checkpoint uuids to experiment id. The checkpount uuids grouped together are comma separated.
func (*PgDB) InsertTrialProfilerMetricsBatch ¶
func (db *PgDB) InsertTrialProfilerMetricsBatch( values []float32, batches []int32, timestamps []time.Time, labels []byte, ) error
InsertTrialProfilerMetricsBatch inserts a batch of metrics into the database.
func (*PgDB) LatestCheckpointForTrial ¶
func (db *PgDB) LatestCheckpointForTrial(trialID int) (*model.Checkpoint, error)
LatestCheckpointForTrial finds the latest completed checkpoint for a trial, returning nil if none exists.
func (*PgDB) LegacyExperimentConfigByID ¶
func (db *PgDB) LegacyExperimentConfigByID( id int, ) (expconf.LegacyConfig, error)
LegacyExperimentConfigByID parses very old configs, returning a LegacyConfig which exposes a select subset of fields in a type-safe way.
func (*PgDB) MaxTerminationDelay ¶
MaxTerminationDelay is the max delay before a consumer can be sure all logs have been recevied. For Postgres, we don't need to wait very long at all; this was a hypothetical cap on fluent to DB latency prior to fluent's deprecation.
func (*PgDB) MetricNames ¶
func (db *PgDB) MetricNames(ctx context.Context, experimentIDs []int) ( map[model.MetricGroup][]string, error, )
MetricNames returns a list of metric names for the given experiment IDs.
func (*PgDB) NonTerminalExperiments ¶
func (db *PgDB) NonTerminalExperiments() ([]*model.Experiment, error)
NonTerminalExperiments finds all experiments in the database whose states are not terminal.
func (*PgDB) PeriodicTelemetryInfo ¶
PeriodicTelemetryInfo returns anonymous information about the usage of the current Determined cluster.
func (*PgDB) ProjectByName ¶
ProjectByName returns a project's ID if it exists in the given workspace.
func (*PgDB) ProjectExperiments ¶
func (db *PgDB) ProjectExperiments(id int) (experiments []*model.Experiment, err error)
ProjectExperiments returns a list of experiments within a project.
func (*PgDB) Query ¶
Query returns the result of the query. Any placeholder parameters are replaced with supplied params.
func (*PgDB) QueryF ¶
func (db *PgDB) QueryF( queryName string, args []interface{}, v interface{}, params ...interface{}, ) error
QueryF returns the result of the formatted query. Any placeholder parameters are replaced with supplied params.
func (*PgDB) QueryProto ¶
QueryProto returns the result of the query. Any placeholder parameters are replaced with supplied args. Enum values must be the full name of the enum.
func (*PgDB) QueryProtof ¶
func (db *PgDB) QueryProtof( queryName string, args []interface{}, v interface{}, params ...interface{}, ) error
QueryProtof returns the result of the formated query. Any placeholder parameters are replaced with supplied params.
func (*PgDB) RawQuery ¶
RawQuery returns the result of the query as a raw byte string. Any placeholder parameters are replaced with supplied params.
func (*PgDB) RecordAgentStats ¶
func (db *PgDB) RecordAgentStats(a *model.AgentStats) error
RecordAgentStats insert a record of instance start time if instance has not been started or already ended.
func (*PgDB) RecordInstanceStats ¶
func (db *PgDB) RecordInstanceStats(a *model.InstanceStats) error
RecordInstanceStats insert a record of instance start time if instance has not been started or already ended.
func (*PgDB) RecordTaskEndStats ¶
RecordTaskEndStats record end stats for tasks.
func (*PgDB) RecordTaskStats ¶
RecordTaskStats record stats for tasks.
func (*PgDB) SaveExperimentArchiveStatus ¶
func (db *PgDB) SaveExperimentArchiveStatus(experiment *model.Experiment) error
SaveExperimentArchiveStatus saves the current experiment archive status to the database.
func (*PgDB) SaveExperimentConfig ¶
func (db *PgDB) SaveExperimentConfig(id int, config expconf.ExperimentConfig) error
SaveExperimentConfig saves the current experiment config to the database.
func (*PgDB) SaveExperimentProgress ¶
SaveExperimentProgress stores the progress for an experiment in the database.
func (*PgDB) SaveExperimentState ¶
func (db *PgDB) SaveExperimentState(experiment *model.Experiment) error
SaveExperimentState saves the current experiment state to the database.
func (*PgDB) SaveSnapshot ¶
SaveSnapshot saves a searcher and trial snapshot together.
func (*PgDB) StartAllocationSession ¶
func (db *PgDB) StartAllocationSession( allocationID model.AllocationID, owner *model.User, ) (string, error)
StartAllocationSession creates a row in the allocation_sessions table.
func (*PgDB) TaskLogs ¶
func (db *PgDB) TaskLogs( taskID model.TaskID, limit int, fs []api.Filter, order apiv1.OrderBy, followState interface{}, ) ([]*model.TaskLog, interface{}, error)
TaskLogs takes a task ID and log offset, limit and filters and returns matching logs.
func (*PgDB) TaskLogsCount ¶
TaskLogsCount returns the number of logs in postgres for the given task.
func (*PgDB) TaskLogsFields ¶
TaskLogsFields returns the unique fields that can be filtered on for the given task.
func (*PgDB) TerminateExperimentInRestart ¶
TerminateExperimentInRestart is used during master restart to properly terminate an experiment which was either in the process of stopping or which is not restorable for some reason, such as an invalid experiment config after a version upgrade.
func (*PgDB) TopTrialsByTrainingLength ¶
func (db *PgDB) TopTrialsByTrainingLength(experimentID int, maxTrials int, metric string, smallerIsBetter bool, ) (trials []int32, err error)
TopTrialsByTrainingLength chooses the subset of trials that has been training for the highest number of batches, using the specified metric as a tie breaker.
func (*PgDB) TrainingMetricBatches ¶
func (db *PgDB) TrainingMetricBatches(experimentID int, metricName string, startTime time.Time) ( batches []int32, endTime time.Time, err error, )
TrainingMetricBatches returns the milestones (in batches processed) at which a specific training metric was recorded.
func (*PgDB) TrialExperimentAndRequestID ¶
TrialExperimentAndRequestID returns the trial's experiment and request ID.
func (*PgDB) TrialLogs ¶
func (db *PgDB) TrialLogs( trialID, limit int, fs []api.Filter, order apiv1.OrderBy, followState interface{}, ) ([]*model.TrialLog, interface{}, error)
TrialLogs takes a trial ID and log offset, limit and filters and returns matching trial logs.
func (*PgDB) TrialLogsCount ¶
TrialLogsCount returns the number of logs in postgres for the given trial.
func (*PgDB) TrialLogsFields ¶
func (db *PgDB) TrialLogsFields(trialID int) (*apiv1.TrialLogsFieldsResponse, error)
TrialLogsFields returns the unique fields that can be filtered on for the given trial.
func (*PgDB) TrialRunIDAndRestarts ¶
TrialRunIDAndRestarts returns the run id and restart count for a trial.
func (*PgDB) TrialState ¶
TrialState returns the current state of the given trial.
func (*PgDB) TrialStatus ¶
TrialStatus returns the current status of the given trial, including the end time without returning all its hparams and other unneeded details. Called in paths hotter than TrialByID allows.
func (*PgDB) TrialsSnapshot ¶
func (db *PgDB) TrialsSnapshot(experimentID int, minBatches int, maxBatches int, metricName string, startTime time.Time, metricGroup model.MetricGroup, ) (trials []*apiv1.TrialsSnapshotResponse_Trial, endTime time.Time, err error)
TrialsSnapshot returns metrics across each trial in an experiment at a specific point of progress, for metric groups other than training and validation.
func (*PgDB) TrySaveExperimentState ¶
func (db *PgDB) TrySaveExperimentState(experiment *model.Experiment) error
TrySaveExperimentState saves the current experiment state to the database and returns if we successfully changed the state or not.
func (*PgDB) UpdateAllocationProxyAddress ¶
func (db *PgDB) UpdateAllocationProxyAddress(a model.Allocation) error
UpdateAllocationProxyAddress stores the proxy address.
func (*PgDB) UpdateAllocationStartTime ¶
func (db *PgDB) UpdateAllocationStartTime(a model.Allocation) error
UpdateAllocationStartTime stores the latest start time.
func (*PgDB) UpdateAllocationState ¶
func (db *PgDB) UpdateAllocationState(a model.Allocation) error
UpdateAllocationState stores the latest task state and readiness.
func (*PgDB) UpdateClusterHeartBeat ¶
UpdateClusterHeartBeat updates the clusterheartbeat column in the cluster_id table.
func (*PgDB) UpdateJobPosition ¶
UpdateJobPosition propagates the new queue position to the job.
func (*PgDB) UpdateResourceAllocationAggregation ¶
UpdateResourceAllocationAggregation updates the aggregated resource allocation table.
func (*PgDB) UpdateTrial ¶
UpdateTrial updates an existing trial. Fields that are nil or zero are not updated. end_time is set if the trial moves to a terminal state.
func (*PgDB) UpdateTrialRestarts ¶
UpdateTrialRestarts sets the trial's restart count.
func (*PgDB) UpdateTrialRunID ¶
UpdateTrialRunID sets the trial's run ID.
func (*PgDB) UpdateTrialRunnerMetadata ¶
func (db *PgDB) UpdateTrialRunnerMetadata(id int, md *trialv1.TrialRunnerMetadata) error
UpdateTrialRunnerMetadata updates a trial's metadata about its runner.
func (*PgDB) UpdateTrialRunnerState ¶
UpdateTrialRunnerState updates a trial runner's state.
func (*PgDB) ValidationByTotalBatches ¶
func (db *PgDB) ValidationByTotalBatches(trialID, totalBatches int) (*model.TrialMetrics, error)
ValidationByTotalBatches looks up a validation by trial and total batches, returning nil if none exists.
func (*PgDB) ValidationMetricBatches ¶
func (db *PgDB) ValidationMetricBatches(experimentID int, metricName string, startTime time.Time) ( batches []int32, endTime time.Time, err error, )
ValidationMetricBatches returns the milestones (in batches processed) at which a specific validation metric was recorded.
type RPWorkspaceBinding ¶
type RPWorkspaceBinding struct { bun.BaseModel `bun:"table:rp_workspace_bindings"` WorkspaceID int `bun:"workspace_id"` PoolName string `bun:"pool_name"` Valid bool `bun:"valid"` }
RPWorkspaceBinding is a struct reflecting the db table rp_workspace_bindings.
func GetAllBindings ¶
func GetAllBindings( ctx context.Context, ) ([]*RPWorkspaceBinding, error)
GetAllBindings gets all valid rp-workspace bindings.
func ReadWorkspacesBoundToRP ¶
func ReadWorkspacesBoundToRP( ctx context.Context, poolName string, offset, limit int32, resourcePools []config.ResourcePoolConfig, ) ([]*RPWorkspaceBinding, *apiv1.Pagination, error)
ReadWorkspacesBoundToRP get the bindings between workspaceIds and the requested resource pool.
type SortDirection ¶
type SortDirection string
SortDirection represents the order by in a query.
const ( // SortDirectionAsc represents ordering by ascending. SortDirectionAsc SortDirection = "ASC" // SortDirectionDesc represents ordering by descending. SortDirectionDesc SortDirection = "DESC" // SortDirectionAscNullsFirst represents ordering by ascending with nulls first. SortDirectionAscNullsFirst SortDirection = "ASC NULLS FIRST" // SortDirectionDescNullsLast represents ordering by descending with nulls last. SortDirectionDescNullsLast SortDirection = "DESC NULLS LAST" )
type StaticQueryMap ¶
StaticQueryMap caches static sql files.
func (*StaticQueryMap) GetOrLoad ¶
func (q *StaticQueryMap) GetOrLoad(queryName string) string
GetOrLoad fetches static sql from the cache or loads them from disk.
type TaskMetadata ¶
type TaskMetadata struct { bun.BaseModel `bun:"table:command_state"` WorkspaceID model.AccessScopeID `bun:"workspace_id"` TaskType model.TaskType `bun:"task_type"` ExperimentIDs []int32 `bun:"experiment_ids"` TrialIDs []int32 `bun:"trial_ids"` }
TaskMetadata captures minimal metadata about a task.
func IdentifyTask ¶
IdentifyTask returns the task metadata for a given task ID. Returns db.ErrNotFound if a command with given taskID does not exist.
Source Files ¶
- bun_filters.go
- database.go
- migrations.go
- postgres.go
- postgres_agent.go
- postgres_checkpoints.go
- postgres_cluster.go
- postgres_command.go
- postgres_experiments.go
- postgres_filters.go
- postgres_instance.go
- postgres_jobs.go
- postgres_proto.go
- postgres_rp_workspace_bindings.go
- postgres_snapshots.go
- postgres_tasks.go
- postgres_trial.go
- postgres_trial_logs.go
- postgres_trial_metrics.go
- postgres_trial_profiler_metrics.go
- setup.go
- static_query_map.go
- utils.go