Documentation ¶
Index ¶
- Constants
- Variables
- func ActiveLogPolicies(ctx context.Context, id int) (expconf.LogPoliciesConfig, error)
- func AddAllocation(ctx context.Context, a *model.Allocation) error
- func AddAllocationExitStatus(ctx context.Context, a *model.Allocation) error
- func AddCheckpointMetadata(ctx context.Context, m *model.CheckpointV2, runID int) error
- func AddExperiment(ctx context.Context, experiment *model.Experiment, modelDef []byte, ...) (err error)
- func AddExperimentTx(ctx context.Context, idb bun.IDB, experiment *model.Experiment, ...) (err error)
- func AddJob(j *model.Job) error
- func AddJobTx(ctx context.Context, idb bun.IDB, j *model.Job) error
- func AddNonExperimentTasksContextDirectory(ctx context.Context, tID model.TaskID, bytes []byte) error
- func AddProjectHparams(ctx context.Context, tx bun.Tx, projectID int, runIDs []int32) error
- func AddRPWorkspaceBindings(ctx context.Context, workspaceIds []int32, poolName string, ...) error
- func AddTask(ctx context.Context, t *model.Task) error
- func AddTaskTx(ctx context.Context, idb bun.IDB, t *model.Task) error
- func AddTrial(ctx context.Context, trial *model.Trial, taskID model.TaskID) error
- func AllocationByID(ctx context.Context, aID model.AllocationID) (*model.Allocation, error)
- func ApplyDoubleFieldFilter[T string | schema.Ident](q *bun.SelectQuery, column T, filter *commonv1.DoubleFieldFilter) (*bun.SelectQuery, error)
- func ApplyInt32FieldFilter[T string | schema.Ident](q *bun.SelectQuery, column T, filter *commonv1.Int32FieldFilter) (*bun.SelectQuery, error)
- func ApplyPolymorphicFilter(q *bun.SelectQuery, column string, filter *commonv1.PolymorphicFilter) (*bun.SelectQuery, error)
- func ApplyTimestampFieldFilter[T string | schema.Ident](q *bun.SelectQuery, column T, filter *commonv1.TimestampFieldFilter) (*bun.SelectQuery, error)
- func BuildRunHParams(runID int, projectID int, hparams map[string]any, parentName string) ([]model.RunHparam, []model.ProjectHparam, error)
- func Bun() *bun.DB
- func BunSelectMetricGroupNames() *bun.SelectQuery
- func BunSelectMetricsQuery(mGroup model.MetricGroup, inclArchived bool) *bun.SelectQuery
- func CheckIfRPUnbound(poolName string) error
- func ClearClusterMessage(ctx context.Context, db *bun.DB) error
- func CloseOpenAllocations(ctx context.Context, exclude []model.AllocationID) error
- func CompleteAllocation(ctx context.Context, a *model.Allocation) error
- func CompleteAllocationTelemetry(ctx context.Context, aID model.AllocationID) ([]byte, error)
- func CompleteGenericTask(tID model.TaskID, endTime time.Time) error
- func CompleteTask(ctx context.Context, tID model.TaskID, endTime time.Time) error
- func DeleteAllocationSession(ctx context.Context, allocationID model.AllocationID) error
- func DeleteDispatch(ctx context.Context, id string) (int64, error)
- func DeleteDispatches(ctx context.Context, opts func(*bun.DeleteQuery) *bun.DeleteQuery) (int64, error)
- func DeleteNotebookSessionByTask(ctx context.Context, taskID model.TaskID) error
- func DoPermissionsExist(ctx context.Context, curUserID model.UserID, ...) error
- func DoesPermissionMatch(ctx context.Context, curUserID model.UserID, workspaceID *int32, ...) error
- func DoesPermissionMatchAll(ctx context.Context, curUserID model.UserID, ...) error
- func EndAgentStats(a *model.AgentStats) error
- func EndAllTaskStats(ctx context.Context) error
- func ExperimentBestSearcherValidation(ctx context.Context, id int) (float32, error)
- func ExperimentByExternalIDTx(ctx context.Context, idb bun.IDB, externalExperimentID string) (*model.Experiment, error)
- func ExperimentByID(ctx context.Context, expID int) (*model.Experiment, error)
- func ExperimentByTaskID(ctx context.Context, taskID model.TaskID) (*model.Experiment, error)
- func ExperimentByTrialID(ctx context.Context, trialID int) (*model.Experiment, error)
- func ExperimentIDsToWorkspaceIDs(ctx context.Context, experimentIDs []int32) ([]model.AccessScopeID, error)
- func ExperimentNumSteps(ctx context.Context, id int) (int64, error)
- func ExperimentTotalStepTime(ctx context.Context, id int) (float64, error)
- func ExperimentsByTrialID(ctx context.Context, trialIDs []int) ([]*model.Experiment, error)
- func ExperimentsTrialAndTaskIDs(ctx context.Context, idb bun.IDB, expIDs []int) ([]int, []model.TaskID, error)
- func GenerateNotebookSessionToken(userID model.UserID, taskID model.TaskID) (string, error)
- func GetActiveClusterMessage(ctx context.Context, db *bun.DB) (model.ClusterMessage, error)
- func GetCheckpoint(ctx context.Context, checkpointUUID string) (*checkpointv1.Checkpoint, error)
- func GetClusterMessage(ctx context.Context, db *bun.DB) (model.ClusterMessage, error)
- func GetDefaultPoolsForWorkspace(ctx context.Context, workspaceID int) (computePool, auxPool string, err error)
- func GetMetrics(ctx context.Context, trialID, afterBatches, limit int, mGroup *string) ([]*trialv1.MetricsReport, error)
- func GetNonGlobalWorkspacesWithPermission(ctx context.Context, curUserID model.UserID, ...) ([]int, error)
- func GetNonTerminalExperimentCount(ctx context.Context, experimentIDs []int32) (count int, err error)
- func GetRunMetadata(ctx context.Context, runID int) (map[string]any, error)
- func GetTokenKeys() *model.AuthTokenKeypair
- func GetTrialProfilerAvailableSeries(ctx context.Context, trialID int32) ([]*trialv1.TrialProfilerMetricLabels, error)
- func GetUnboundRPs(ctx context.Context, resourcePools []string) ([]string, error)
- func HackAddUser(ctx context.Context, user *model.User) (model.UserID, error)
- func InitAuthKeys() error
- func InsertDispatch(ctx context.Context, r *Dispatch) error
- func InsertModel(ctx context.Context, name string, description string, metadata []byte, ...) (*modelv1.Model, error)
- func InsertModelVersion(ctx context.Context, id int32, ckptID string, name string, comment string, ...) (*modelv1.ModelVersion, error)
- func IsPaused(ctx context.Context, tID model.TaskID) (bool, error)
- func JobByID(ctx context.Context, jobID model.JobID) (*model.Job, error)
- func KillGenericTask(tID model.TaskID, endTime time.Time) error
- func MatchSentinelError(err error) error
- func MetricBatches(experimentID int, metricName string, startTime time.Time, ...) (batches []int32, endTime time.Time, err error)
- func MustHaveAffectedRows(result sql.Result, err error) error
- func NonExperimentTasksContextDirectory(ctx context.Context, tID model.TaskID) ([]byte, error)
- func OrderByToSQL(order apiv1.OrderBy) string
- func OverwriteRPWorkspaceBindings(ctx context.Context, workspaceIds []int32, poolName string, ...) error
- func PaginateBun(query *bun.SelectQuery, orderColumn string, direction SortDirection, ...) *bun.SelectQuery
- func PaginateBunUnsafe(query *bun.SelectQuery, orderColumn string, direction SortDirection, ...) *bun.SelectQuery
- func ParseMapToProto(dest map[string]interface{}, val interface{}) error
- func ProjectExperiments(ctx context.Context, pID int) (experiments []*model.Experiment, err error)
- func ReadRPsAvailableToWorkspace(ctx context.Context, workspaceID int32, offset int32, limit int32, ...) ([]string, *apiv1.Pagination, error)
- func RecordTaskEndStats(ctx context.Context, stats *model.TaskStats) error
- func RecordTaskStats(ctx context.Context, stats *model.TaskStats) error
- func RegisterModel(m interface{})
- func RemoveOutdatedProjectHparams(ctx context.Context, tx bun.Tx, projectID int) error
- func RemoveRPWorkspaceBindings(ctx context.Context, workspaceIds []int32, poolName string) error
- func SetClause(fields []string) string
- func SetClusterMessage(ctx context.Context, db *bun.DB, msg model.ClusterMessage) error
- func SetErrorState(taskID model.TaskID, endTime time.Time) error
- func SetPausedState(taskID model.TaskID, endTime time.Time) error
- func SetTokenKeys(tk *model.AuthTokenKeypair)
- func StartAllocationSession(ctx context.Context, allocationID model.AllocationID, owner *model.User) (string, error)
- func StartNotebookSession(ctx context.Context, userID model.UserID, taskID model.TaskID) error
- func TaskByID(ctx context.Context, tID model.TaskID) (*model.Task, error)
- func TaskCompleted(ctx context.Context, tID model.TaskID) (bool, error)
- func TopTrialsByMetric(ctx context.Context, experimentID int, maxTrials int, metric string, ...) ([]int32, error)
- func TrialByExperimentAndRequestID(ctx context.Context, experimentID int, requestID model.RequestID) (*model.Trial, error)
- func TrialByID(ctx context.Context, id int) (*model.Trial, error)
- func TrialByTaskID(ctx context.Context, taskID model.TaskID) (*model.Trial, error)
- func TrialIDsToWorkspaceIDs(ctx context.Context, trialIDs []int32) ([]model.AccessScopeID, error)
- func TrialTaskIDsByTrialID(ctx context.Context, trialID int) ([]*model.RunTaskID, error)
- func UpdateAllocationPorts(ctx context.Context, a model.Allocation) error
- func UpdateAllocationProxyAddress(ctx context.Context, a model.Allocation) error
- func UpdateAllocationStartTime(ctx context.Context, a model.Allocation) error
- func UpdateAllocationState(ctx context.Context, a model.Allocation) error
- func UpdateCheckpointSizeTx(ctx context.Context, idb bun.IDB, checkpoints []uuid.UUID) error
- func UpdateRunMetadata(ctx context.Context, runID int, rawMetadata map[string]any, ...) (result map[string]any, err error)
- func UpdateTrial(ctx context.Context, id int, newState model.State) error
- func UpsertTrialByExternalIDTx(ctx context.Context, tx bun.Tx, trial *model.Trial, taskID model.TaskID) error
- func ValidateDoubleFieldFilterComparison(filter *commonv1.DoubleFieldFilter) error
- func ValidateInt32FieldFilterComparison(filter *commonv1.Int32FieldFilter) error
- func ValidatePolymorphicFilter(filter *commonv1.PolymorphicFilter) error
- func ValidateTimeStampFieldFilterComparison(filter *commonv1.TimestampFieldFilter) error
- type ClientStore
- type DB
- type Dispatch
- func DispatchByID(ctx context.Context, id string) (*Dispatch, error)
- func ListAllDispatches(ctx context.Context) ([]*Dispatch, error)
- func ListDispatches(ctx context.Context, opts func(*bun.SelectQuery) (*bun.SelectQuery, error)) ([]*Dispatch, error)
- func ListDispatchesByAllocationID(ctx context.Context, id model.AllocationID) ([]*Dispatch, error)
- func ListDispatchesByJobID(ctx context.Context, jobID string) ([]*Dispatch, error)
- type FilterComparison
- type MetricMeasurements
- type MetricPartitionType
- type PgDB
- func (db *PgDB) ActiveExperimentConfig(id int) (expconf.ExperimentConfig, error)
- func (db *PgDB) AddExperiment(experiment *model.Experiment, modelDef []byte, ...) (err error)
- func (db *PgDB) AddTaskLogs(logs []*model.TaskLog) error
- func (db *PgDB) AddTrainingMetrics(ctx context.Context, m *trialv1.TrialMetrics) error
- func (db *PgDB) AddTrialMetrics(ctx context.Context, m *trialv1.TrialMetrics, mGroup model.MetricGroup) error
- func (db *PgDB) AddValidationMetrics(ctx context.Context, m *trialv1.TrialMetrics) error
- func (db *PgDB) CheckpointByTotalBatches(trialID, totalBatches int) (*model.Checkpoint, error)
- func (db *PgDB) ClientStore() *ClientStore
- func (db *PgDB) Close() error
- func (db *PgDB) DeleteExperiments(ctx context.Context, ids []int) error
- func (db *PgDB) DeleteSnapshotsForExperiment(experimentID int) error
- func (db *PgDB) DeleteSnapshotsForTerminalExperiments() error
- func (db *PgDB) DeleteTaskLogs(ids []model.TaskID) error
- func (db *PgDB) DeleteTrialLogs(ids []int) error
- func (db *PgDB) EndAllAgentStats() error
- func (db *PgDB) EndAllInstanceStats() error
- func (db *PgDB) EndInstanceStats(a *model.InstanceStats) error
- func (db *PgDB) ExperimentConfigRaw(id int) ([]byte, error)
- func (db *PgDB) ExperimentHasCheckpointsInRegistry(id int) (bool, error)
- func (db *PgDB) ExperimentIDByTrialID(trialID int) (int, error)
- func (db *PgDB) ExperimentLabelUsage(projectID int32) (labelUsage map[string]int, err error)
- func (db *PgDB) ExperimentModelDefinitionRaw(id int) ([]byte, error)
- func (db *PgDB) ExperimentNumTrials(id int) (int64, error)
- func (db *PgDB) ExperimentSnapshot(experimentID int) ([]byte, int, error)
- func (db *PgDB) ExperimentTrialIDs(expID int) ([]int, error)
- func (db *PgDB) FailDeletingExperiment() error
- func (db *PgDB) GetExperimentStatus(experimentID int) (state model.State, progress float64, err error)
- func (db *PgDB) GetOrCreateClusterID(telemetryID string) (string, error)
- func (db *PgDB) GetTrialProfilerMetricsBatches(labels *trialv1.TrialProfilerMetricLabels, offset, limit int) (model.TrialProfilerMetricsBatchBatch, error)
- func (db *PgDB) InsertTrialProfilerMetricsBatch(values []float32, batches []int32, timestamps []time.Time, labels []byte) error
- func (db *PgDB) LatestCheckpointForTrial(trialID int) (*model.Checkpoint, error)
- func (db *PgDB) LegacyExperimentConfigByID(id int) (expconf.LegacyConfig, error)
- func (db *PgDB) MaxTerminationDelay() time.Duration
- func (db *PgDB) MetricNames(ctx context.Context, experimentIDs []int) (map[model.MetricGroup][]string, error)
- func (db *PgDB) Migrate(migrationURL string, dbCodeDir string, actions []string) error
- func (db *PgDB) NonTerminalExperiments() ([]*model.Experiment, error)
- func (db *PgDB) PeriodicTelemetryInfo() ([]byte, error)
- func (db *PgDB) Query(queryName string, v interface{}, params ...interface{}) error
- func (db *PgDB) QueryF(queryName string, args []interface{}, v interface{}, params ...interface{}) error
- func (db *PgDB) QueryProto(queryName string, v interface{}, args ...interface{}) error
- func (db *PgDB) QueryProtof(queryName string, args []interface{}, v interface{}, params ...interface{}) error
- func (db *PgDB) RawQuery(queryName string, params ...interface{}) ([]byte, error)
- func (db *PgDB) RecordAgentStats(a *model.AgentStats) error
- func (db *PgDB) RecordInstanceStats(a *model.InstanceStats) error
- func (db *PgDB) SaveExperimentArchiveStatus(experiment *model.Experiment) error
- func (db *PgDB) SaveExperimentConfig(id int, config expconf.ExperimentConfig) error
- func (db *PgDB) SaveExperimentProgress(id int, progress *float64) error
- func (db *PgDB) SaveExperimentState(experiment *model.Experiment) error
- func (db *PgDB) SaveSnapshot(experimentID int, version int, experimentSnapshot []byte) error
- func (db *PgDB) TaskLogs(taskID model.TaskID, limit int, fs []api.Filter, order apiv1.OrderBy, ...) ([]*model.TaskLog, interface{}, error)
- func (db *PgDB) TaskLogsCount(taskID model.TaskID, fs []api.Filter) (int, error)
- func (db *PgDB) TaskLogsFields(taskID model.TaskID) (*apiv1.TaskLogsFieldsResponse, error)
- func (db *PgDB) TerminateExperimentInRestart(id int, state model.State) error
- func (db *PgDB) TokenStore() *TokenStore
- func (db *PgDB) TopTrialsByTrainingLength(experimentID int, maxTrials int, metric string, smallerIsBetter bool) (trials []int32, err error)
- func (db *PgDB) TrainingMetricBatches(experimentID int, metricName string, startTime time.Time) (batches []int32, endTime time.Time, err error)
- func (db *PgDB) TrialExperimentAndRequestID(id int) (int, model.RequestID, error)
- func (db *PgDB) TrialLogs(trialID, limit int, fs []api.Filter, order apiv1.OrderBy, ...) ([]*model.TrialLog, interface{}, error)
- func (db *PgDB) TrialLogsCount(trialID int, fs []api.Filter) (int, error)
- func (db *PgDB) TrialLogsFields(trialID int) (*apiv1.TrialLogsFieldsResponse, error)
- func (db *PgDB) TrialRunIDAndRestarts(trialID int) (runID int, restart int, err error)
- func (db *PgDB) TrialState(trialID int) (model.State, error)
- func (db *PgDB) TrialStatus(trialID int) (model.State, *time.Time, error)
- func (db *PgDB) TrialsSnapshot(experimentID int, minBatches int, maxBatches int, metricName string, ...) (trials []*apiv1.TrialsSnapshotResponse_Trial, endTime time.Time, err error)
- func (db *PgDB) TrySaveExperimentState(experiment *model.Experiment) error
- func (db *PgDB) UpdateClusterHeartBeat(currentClusterHeartbeat time.Time) error
- func (db *PgDB) UpdateResourceAllocationAggregation() error
- func (db *PgDB) UpdateTrialFields(id int, newRunnerMetadata *trialv1.TrialRunnerMetadata, ...) error
- func (db *PgDB) ValidationByTotalBatches(trialID, totalBatches int) (*model.TrialMetrics, error)
- func (db *PgDB) ValidationMetricBatches(experimentID int, metricName string, startTime time.Time) (batches []int32, endTime time.Time, err error)
- type RPWorkspaceBinding
- type SortDirection
- type StaticQueryMap
- type TokenStore
- func (s *TokenStore) Create(info oauth2.TokenInfo) error
- func (s *TokenStore) GetByAccess(access string) (oauth2.TokenInfo, error)
- func (s *TokenStore) GetByCode(code string) (oauth2.TokenInfo, error)
- func (s *TokenStore) GetByRefresh(refresh string) (oauth2.TokenInfo, error)
- func (s *TokenStore) RemoveByAccess(access string) error
- func (s *TokenStore) RemoveByCode(code string) error
- func (s *TokenStore) RemoveByRefresh(refresh string) error
Constants ¶
const ( // CodeUniqueViolation is the error code that Postgres uses to indicate that an attempted // insert/update violates a uniqueness constraint. Obtained from: // https://www.postgresql.org/docs/10/errcodes-appendix.html CodeUniqueViolation = "23505" // CodeForeignKeyViolation is the error code that Postgres uses to indicate that an attempted // insert/update violates a foreign key constraint. Obtained from: // https://www.postgresql.org/docs/10/errcodes-appendix.html CodeForeignKeyViolation = "23503" // CodeSerializationFailure is the error code that Postgres uses to indicate that a transaction // failed due to a serialization failure. Obtained from: // https://www.postgresql.org/docs/10/errcodes-appendix.html CodeSerializationFailure = "40001" )
const ( // InfPostgresString how we store infinity in JSONB in postgres. InfPostgresString = "Infinity" // NegInfPostgresString how we store -infinity in JSONB in postgres. NegInfPostgresString = "-Infinity" // NaNPostgresString how we store NaN in JSONB in postgres. NaNPostgresString = "NaN" // MetricTypeString is the summary metric type for string or mixed types. MetricTypeString = "string" // MetricTypeNumber is the summary metric type for floats or ints. MetricTypeNumber = "number" // MetricTypeBool is the summary metric type for boolean. MetricTypeBool = "boolean" // MetricTypeDate is the summary metric type for date metrics. MetricTypeDate = "date" // MetricTypeObject is the summary metric type for object types. MetricTypeObject = "object" // MetricTypeArray is the summary metric type for array types. MetricTypeArray = "array" // MetricTypeNull is the summary metric type for array types. MetricTypeNull = "null" )
const ClusterMessageMaxLength = 250
ClusterMessageMaxLength caps the length of a cluster-wide message.
Variables ¶
var ( // ErrNotFound is returned if nothing is found. ErrNotFound = errors.New("not found") // ErrTooManyRowsAffected is returned if too many rows are affected. ErrTooManyRowsAffected = errors.New("too many rows are affected") // ErrDuplicateRecord is returned when trying to create a row that already exists. ErrDuplicateRecord = errors.New("row already exists") // ErrInvalidInput is returned when the data passed to a function is invalid for semantic or // syntactic reasons. ErrInvalidInput = errors.New("invalid input") // ErrDeleteDefaultBinding is returned when trying to delete a workspace bound to its default // namespace. ErrDeleteDefaultBinding = errors.New("cannot delete the default namespace binding") )
Functions ¶
func ActiveLogPolicies ¶
ActiveLogPolicies returns log pattern policies for an experiment ID. This should only be called on a running experiment.
func AddAllocation ¶
func AddAllocation(ctx context.Context, a *model.Allocation) error
AddAllocation upserts the existence of an allocation. Allocation IDs may conflict in the event the master restarts and the trial run ID increment is not persisted, but it is the same allocation so this is OK.
func AddAllocationExitStatus ¶
func AddAllocationExitStatus(ctx context.Context, a *model.Allocation) error
AddAllocationExitStatus adds the allocation exit status to the allocations table.
func AddCheckpointMetadata ¶
AddCheckpointMetadata persists metadata for a completed checkpoint to the database.
func AddExperiment ¶
func AddExperiment( ctx context.Context, experiment *model.Experiment, modelDef []byte, activeConfig expconf.ExperimentConfig, ) (err error)
AddExperiment adds the experiment to the database and sets its ID.
func AddExperimentTx ¶
func AddExperimentTx( ctx context.Context, idb bun.IDB, experiment *model.Experiment, modelDef []byte, activeConfig expconf.ExperimentConfig, upsert bool, ) (err error)
AddExperimentTx adds the experiment to the database and sets its ID.
func AddNonExperimentTasksContextDirectory ¶
func AddNonExperimentTasksContextDirectory(ctx context.Context, tID model.TaskID, bytes []byte) error
AddNonExperimentTasksContextDirectory adds a context directory for a non experiment task.
func AddProjectHparams ¶
AddProjectHparams adds project hyperparams from provided runs to provided project.
func AddRPWorkspaceBindings ¶
func AddRPWorkspaceBindings(ctx context.Context, workspaceIds []int32, poolName string, resourcePools []config.ResourcePoolConfig, ) error
AddRPWorkspaceBindings inserts new bindings between workspaceIds and poolName.
func AllocationByID ¶
func AllocationByID(ctx context.Context, aID model.AllocationID) (*model.Allocation, error)
AllocationByID retrieves an allocation by its ID.
func ApplyDoubleFieldFilter ¶
func ApplyDoubleFieldFilter[T string | schema.Ident]( q *bun.SelectQuery, column T, filter *commonv1.DoubleFieldFilter, ) (*bun.SelectQuery, error)
ApplyDoubleFieldFilter applies filtering on a bun query for double field.
func ApplyInt32FieldFilter ¶
func ApplyInt32FieldFilter[T string | schema.Ident]( q *bun.SelectQuery, column T, filter *commonv1.Int32FieldFilter, ) (*bun.SelectQuery, error)
ApplyInt32FieldFilter applies filtering on a bun query for int32 field.
func ApplyPolymorphicFilter ¶
func ApplyPolymorphicFilter( q *bun.SelectQuery, column string, filter *commonv1.PolymorphicFilter, ) (*bun.SelectQuery, error)
ApplyPolymorphicFilter applies filtering on a bun query for a polymorphic filter.
func ApplyTimestampFieldFilter ¶
func ApplyTimestampFieldFilter[T string | schema.Ident]( q *bun.SelectQuery, column T, filter *commonv1.TimestampFieldFilter, ) (*bun.SelectQuery, error)
ApplyTimestampFieldFilter applies filtering on a bun query for timestamp field.
func BuildRunHParams ¶
func BuildRunHParams(runID int, projectID int, hparams map[string]any, parentName string, ) ([]model.RunHparam, []model.ProjectHparam, error)
BuildRunHParams builds hyperparameters objects to add into the `run_hparams` & `project_hparams` table.
func Bun ¶
Bun returns the singleton database connection through the bun library. bun is the database library we have decided to use for new code in the future due to its superior composability over bare SQL, and its superior flexibility over e.g. gorm. New code should not use the old bare SQL tooling.
func BunSelectMetricGroupNames ¶
func BunSelectMetricGroupNames() *bun.SelectQuery
BunSelectMetricGroupNames sets up a bun select query for getting all the metric group and names.
func BunSelectMetricsQuery ¶
func BunSelectMetricsQuery(mGroup model.MetricGroup, inclArchived bool) *bun.SelectQuery
BunSelectMetricsQuery sets up a bun select query for based on new metrics table simplifying some weirdness we set up for pg10 support.
func CheckIfRPUnbound ¶
CheckIfRPUnbound checks to make sure the specified resource pools is not bound to any workspace and returns an error if it is.
func ClearClusterMessage ¶
ClearClusterMessage clears the active cluster message.
func CloseOpenAllocations ¶
func CloseOpenAllocations(ctx context.Context, exclude []model.AllocationID) error
CloseOpenAllocations finds all allocations that were open when the master crashed and adds an end time.
func CompleteAllocation ¶
func CompleteAllocation(ctx context.Context, a *model.Allocation) error
CompleteAllocation persists the end of an allocation lifetime.
func CompleteAllocationTelemetry ¶
CompleteAllocationTelemetry returns the analytics of an allocation for the telemetry.
func CompleteGenericTask ¶
CompleteGenericTask persists the completion of a task of type GENERIC.
func CompleteTask ¶
CompleteTask persists the completion of a task.
func DeleteAllocationSession ¶
func DeleteAllocationSession(ctx context.Context, allocationID model.AllocationID) error
DeleteAllocationSession deletes the task session with the given AllocationID.
func DeleteDispatch ¶
DeleteDispatch deletes the specified dispatch and returns the number deleted.
func DeleteDispatches ¶
func DeleteDispatches( ctx context.Context, opts func(*bun.DeleteQuery) *bun.DeleteQuery, ) (int64, error)
DeleteDispatches deletes all dispatches for the specified query and returns the number deleted.
func DeleteNotebookSessionByTask ¶
DeleteNotebookSessionByTask deletes the notebook session associated with the task.
func DoPermissionsExist ¶
func DoPermissionsExist(ctx context.Context, curUserID model.UserID, permissionIDs ...rbacv1.PermissionType, ) error
DoPermissionsExist checks for the existence of a permission in any workspace.
func DoesPermissionMatch ¶
func DoesPermissionMatch(ctx context.Context, curUserID model.UserID, workspaceID *int32, permissionID rbacv1.PermissionType, ) error
DoesPermissionMatch checks for the existence of a permission in a workspace.
func DoesPermissionMatchAll ¶
func DoesPermissionMatchAll(ctx context.Context, curUserID model.UserID, permissionID rbacv1.PermissionType, workspaceIds ...int32, ) error
DoesPermissionMatchAll checks for the existence of a permission in all specified workspaces.
func EndAgentStats ¶
func EndAgentStats(a *model.AgentStats) error
EndAgentStats updates the end time of an instance.
func EndAllTaskStats ¶
EndAllTaskStats called at master starts, in case master previously crashed.
func ExperimentBestSearcherValidation ¶
ExperimentBestSearcherValidation returns the best searcher validation for an experiment.
func ExperimentByExternalIDTx ¶
func ExperimentByExternalIDTx(ctx context.Context, idb bun.IDB, externalExperimentID string) ( *model.Experiment, error, )
ExperimentByExternalIDTx looks up an experiment by a given external experiment id.
func ExperimentByID ¶
ExperimentByID looks up an experiment by ID in a database, returning an error if none exists.
func ExperimentByTaskID ¶
ExperimentByTaskID looks up an experiment by a given taskID, returning an error if none exists.
func ExperimentByTrialID ¶
ExperimentByTrialID looks up an experiment by a given trialID, returning an error if none exists.
func ExperimentIDsToWorkspaceIDs ¶
func ExperimentIDsToWorkspaceIDs(ctx context.Context, experimentIDs []int32) ( []model.AccessScopeID, error, )
ExperimentIDsToWorkspaceIDs returns a slice of workspaces that the given experiments belong to.
func ExperimentNumSteps ¶
ExperimentNumSteps returns the total number of steps for all trials of the experiment.
func ExperimentTotalStepTime ¶
ExperimentTotalStepTime returns the total elapsed time for all allocations of the experiment with the given ID. Any step with a NULL end_time does not contribute. Elapsed time is expressed as a floating point number of seconds.
func ExperimentsByTrialID ¶
ExperimentsByTrialID looks up an experiment by a given list of trialIDs, returning an error if none exists.
func ExperimentsTrialAndTaskIDs ¶
func ExperimentsTrialAndTaskIDs(ctx context.Context, idb bun.IDB, expIDs []int) ( []int, []model.TaskID, error, )
ExperimentsTrialAndTaskIDs returns the trial and task IDs for one or more experiments.
func GenerateNotebookSessionToken ¶
GenerateNotebookSessionToken generates a token for a notebook session.
func GetActiveClusterMessage ¶
GetActiveClusterMessage returns the active cluster message if one is set and active, or ErrNotFound if not.
func GetCheckpoint ¶
func GetCheckpoint(ctx context.Context, checkpointUUID string) (*checkpointv1.Checkpoint, error)
GetCheckpoint gets checkpointv1.Checkpoint from the database by UUID. Can be moved to master/internal/checkpoints once db/postgres_model_intg_test is bunified. WARNING: Function does not account for "NaN", "Infinity", or "-Infinity" due to Bun unmarshallling.
func GetClusterMessage ¶
GetClusterMessage returns the cluster message even if it's not yet active, or ErrNotFound if all cluster messages have expired.
func GetDefaultPoolsForWorkspace ¶
func GetDefaultPoolsForWorkspace(ctx context.Context, workspaceID int, ) (computePool, auxPool string, err error)
GetDefaultPoolsForWorkspace returns the default compute and aux pools for a workspace.
func GetMetrics ¶
func GetMetrics(ctx context.Context, trialID, afterBatches, limit int, mGroup *string, ) ([]*trialv1.MetricsReport, error)
GetMetrics returns a subset metrics of the requested type for the given trial ID.
func GetNonGlobalWorkspacesWithPermission ¶
func GetNonGlobalWorkspacesWithPermission(ctx context.Context, curUserID model.UserID, permissionID rbacv1.PermissionType, ) ([]int, error)
GetNonGlobalWorkspacesWithPermission returns all workspaces the user has permissionID on. This does not check for permissions granted on scopes higher than workspace level (eg cluster).
func GetNonTerminalExperimentCount ¶
func GetNonTerminalExperimentCount(ctx context.Context, experimentIDs []int32, ) (count int, err error)
GetNonTerminalExperimentCount returns the number of non terminal experiments.
func GetRunMetadata ¶
GetRunMetadata returns the metadata of a run from the database. If the run does not have any metadata, it returns an empty map.
func GetTrialProfilerAvailableSeries ¶
func GetTrialProfilerAvailableSeries( ctx context.Context, trialID int32, ) ([]*trialv1.TrialProfilerMetricLabels, error)
GetTrialProfilerAvailableSeries returns all available system profiling metric names. This method is to be deprecated in the future in place of generic metrics APIs.
func GetUnboundRPs ¶
GetUnboundRPs get unbound resource pools.
func HackAddUser ¶
HackAddUser is used to prevent an import cycle in postgres_test_utils & postgres_scim (EE).
func InsertDispatch ¶
InsertDispatch persists the existence for a dispatch.
func InsertModel ¶
func InsertModel(ctx context.Context, name string, description string, metadata []byte, labels string, notes string, userID model.UserID, workspaceID int, ) (*modelv1.Model, error)
InsertModel inserts the model into the database.
func InsertModelVersion ¶
func InsertModelVersion(ctx context.Context, id int32, ckptID string, name string, comment string, metadata []byte, labels string, notes string, userID model.UserID, ) (*modelv1.ModelVersion, error)
InsertModelVersion inserts the model version into the database.
func KillGenericTask ¶
KillGenericTask persists the termination of a task of type GENERIC.
func MatchSentinelError ¶
MatchSentinelError checks if the error belongs to specific families of errors and ensures that the returned error has the proper type and text.
func MetricBatches ¶
func MetricBatches( experimentID int, metricName string, startTime time.Time, metricGroup model.MetricGroup, ) ( batches []int32, endTime time.Time, err error, )
MetricBatches returns the milestones (in batches processed) at which a specific metric was recorded.
func MustHaveAffectedRows ¶
MustHaveAffectedRows checks if bun has affected rows in a table or not. Returns ErrNotFound if no rows were affected and returns the provided error otherwise.
func NonExperimentTasksContextDirectory ¶
NonExperimentTasksContextDirectory returns a non experiment's context directory.
func OrderByToSQL ¶
OrderByToSQL computes the SQL keyword corresponding to the given ordering type.
func OverwriteRPWorkspaceBindings ¶
func OverwriteRPWorkspaceBindings(ctx context.Context, workspaceIds []int32, poolName string, resourcePools []config.ResourcePoolConfig, ) error
OverwriteRPWorkspaceBindings overwrites the bindings between workspaceIds and poolName.
func PaginateBun ¶
func PaginateBun( query *bun.SelectQuery, orderColumn string, direction SortDirection, offset, limit int, ) *bun.SelectQuery
PaginateBun adds sorting and pagination to the provided bun query, defaulting to certain values if they are not specified. By default, we order by ascending on the id column, with no limit.
func PaginateBunUnsafe ¶
func PaginateBunUnsafe( query *bun.SelectQuery, orderColumn string, direction SortDirection, offset, limit int, ) *bun.SelectQuery
PaginateBunUnsafe is a version of PaginateBun that allows an arbitrary order expression like `metrics->>'loss'`.
func ParseMapToProto ¶
ParseMapToProto converts sqlx/bun-scanned map to a proto object.
func ProjectExperiments ¶
ProjectExperiments returns a list of experiments within a project.
func ReadRPsAvailableToWorkspace ¶
func ReadRPsAvailableToWorkspace( ctx context.Context, workspaceID int32, offset int32, limit int32, resourcePoolConfig []config.ResourcePoolConfig, ) ([]string, *apiv1.Pagination, error)
ReadRPsAvailableToWorkspace returns the names of resource pools available to a workspace.
func RecordTaskEndStats ¶
RecordTaskEndStats record end stats for tasks.
func RecordTaskStats ¶
RecordTaskStats record stats for tasks.
func RegisterModel ¶
func RegisterModel(m interface{})
RegisterModel registers a model in Bun or, if theOneBun is not yet initialized, sets it up to be registered once initialized. It's generally best to pass a nil pointer of your model's type as argument m.
func RemoveOutdatedProjectHparams ¶
RemoveOutdatedProjectHparams removes outdated project hyperparams from provided project.
func RemoveRPWorkspaceBindings ¶
RemoveRPWorkspaceBindings removes the bindings between workspaceIds and poolName.
func SetClusterMessage ¶
SetClusterMessage sets the cluster-wide message. Any existing message will be expired because only one cluster message is allowed at any time. Messages may be at most ClusterMessageMaxLength characters long. Returns a wrapped ErrInvalidInput when input is invalid.
func SetErrorState ¶
SetErrorState sets given task to a ERROR state.
func SetPausedState ¶
SetPausedState sets given task to a PAUSED state.
func StartAllocationSession ¶
func StartAllocationSession( ctx context.Context, allocationID model.AllocationID, owner *model.User, ) (string, error)
StartAllocationSession creates a row in the allocation_sessions table.
func StartNotebookSession ¶
StartNotebookSession persists a new notebook session row into the database.
func TaskCompleted ¶
TaskCompleted checks if the end time exists for a task, if so, the task has completed.
func TopTrialsByMetric ¶
func TopTrialsByMetric( ctx context.Context, experimentID int, maxTrials int, metric string, smallerIsBetter bool, ) ([]int32, error)
TopTrialsByMetric chooses the subset of trials from an experiment that recorded the best values for the specified metric at any point during the trial.
func TrialByExperimentAndRequestID ¶
func TrialByExperimentAndRequestID( ctx context.Context, experimentID int, requestID model.RequestID, ) (*model.Trial, error)
TrialByExperimentAndRequestID looks up a trial, returning an error if none exists.
func TrialByTaskID ¶
TrialByTaskID looks up a trial by taskID, returning an error if none exists. This errors if you called it with a non trial taskID.
func TrialIDsToWorkspaceIDs ¶
func TrialIDsToWorkspaceIDs(ctx context.Context, trialIDs []int32) ( []model.AccessScopeID, error, )
TrialIDsToWorkspaceIDs returns a slice of workspaces that the given trials belong to.
func TrialTaskIDsByTrialID ¶
TrialTaskIDsByTrialID returns trial id task ids by trial ID, sorted by start time.
func UpdateAllocationPorts ¶
func UpdateAllocationPorts(ctx context.Context, a model.Allocation) error
UpdateAllocationPorts stores the latest task state and readiness.
func UpdateAllocationProxyAddress ¶
func UpdateAllocationProxyAddress(ctx context.Context, a model.Allocation) error
UpdateAllocationProxyAddress stores the proxy address.
func UpdateAllocationStartTime ¶
func UpdateAllocationStartTime(ctx context.Context, a model.Allocation) error
UpdateAllocationStartTime stores the latest start time.
func UpdateAllocationState ¶
func UpdateAllocationState(ctx context.Context, a model.Allocation) error
UpdateAllocationState stores the latest task state and readiness.
func UpdateCheckpointSizeTx ¶
UpdateCheckpointSizeTx which updates checkpoint size and count to experiment and trial, is duplicated here. Remove from this file when bunifying. Original is in master/internal/checkpoints/postgres_checkpoints.go.
func UpdateRunMetadata ¶
func UpdateRunMetadata( ctx context.Context, runID int, rawMetadata map[string]any, flatMetadata []model.RunMetadataIndex, ) (result map[string]any, err error)
UpdateRunMetadata updates the metadata of a run, including the metadata indexes.
func UpdateTrial ¶
UpdateTrial updates the state of an existing trial. end_time is set if the trial moves to a terminal state.
func UpsertTrialByExternalIDTx ¶
func UpsertTrialByExternalIDTx( ctx context.Context, tx bun.Tx, trial *model.Trial, taskID model.TaskID, ) error
UpsertTrialByExternalIDTx UPSERTs the trial with respect to the external_trial_id.
func ValidateDoubleFieldFilterComparison ¶
func ValidateDoubleFieldFilterComparison( filter *commonv1.DoubleFieldFilter, ) error
ValidateDoubleFieldFilterComparison validates the min and max values in the range.
func ValidateInt32FieldFilterComparison ¶
func ValidateInt32FieldFilterComparison( filter *commonv1.Int32FieldFilter, ) error
ValidateInt32FieldFilterComparison validates the min and max values in the range.
func ValidatePolymorphicFilter ¶
func ValidatePolymorphicFilter( filter *commonv1.PolymorphicFilter, ) error
ValidatePolymorphicFilter ensures that a Polymorphic filter contains at most one valid range.
func ValidateTimeStampFieldFilterComparison ¶
func ValidateTimeStampFieldFilterComparison( filter *commonv1.TimestampFieldFilter, ) error
ValidateTimeStampFieldFilterComparison validates the min and max timestamps in the range.
Types ¶
type ClientStore ¶
type ClientStore struct {
// contains filtered or unexported fields
}
ClientStore is a store for OAuth clients. It is separate from PgDB so we can implement an interface of the external OAuth library without polluting PgDB's method set.
func (*ClientStore) Create ¶
func (s *ClientStore) Create(c model.OAuthClient) error
Create adds a new client to the database.
func (*ClientStore) GetByID ¶
func (s *ClientStore) GetByID(id string) (oauth2.ClientInfo, error)
GetByID returns a client given its ID, including the secret. It implements the gopkg.in/oauth2.v3#ClientStore interface, so it returns an external interface type; the returned object is always actually of type model.OAuthClient.
func (*ClientStore) List ¶
func (s *ClientStore) List() ([]model.OAuthClient, error)
List returns all OAuth clients in the database. The secrets are not included.
func (*ClientStore) RemoveByID ¶
func (s *ClientStore) RemoveByID(id string) error
RemoveByID removes the client with the given client ID.
type DB ¶
type DB interface { Migrate(migrationURL, codeURL string, actions []string) error Close() error GetOrCreateClusterID(telemetryID string) (string, error) TrialExperimentAndRequestID(id int) (int, model.RequestID, error) AddExperiment(experiment *model.Experiment, modelDef []byte, activeConfig expconf.ExperimentConfig) error ExperimentIDByTrialID(trialID int) (int, error) NonTerminalExperiments() ([]*model.Experiment, error) TerminateExperimentInRestart(id int, state model.State) error SaveExperimentConfig(id int, config expconf.ExperimentConfig) error SaveExperimentState(experiment *model.Experiment) error SaveExperimentArchiveStatus(experiment *model.Experiment) error DeleteExperiments(ctx context.Context, ids []int) error ExperimentHasCheckpointsInRegistry(id int) (bool, error) SaveExperimentProgress(id int, progress *float64) error ActiveExperimentConfig(id int) (expconf.ExperimentConfig, error) ExperimentNumTrials(id int) (int64, error) ExperimentTrialIDs(expID int) ([]int, error) ExperimentModelDefinitionRaw(id int) ([]byte, error) UpdateTrialFields(id int, newRunnerMetadata *trialv1.TrialRunnerMetadata, newRunID, newRestarts int) error TrialRunIDAndRestarts(trialID int) (int, int, error) AddTrainingMetrics(ctx context.Context, m *trialv1.TrialMetrics) error AddValidationMetrics( ctx context.Context, m *trialv1.TrialMetrics, ) error ValidationByTotalBatches(trialID, totalBatches int) (*model.TrialMetrics, error) CheckpointByTotalBatches(trialID, totalBatches int) (*model.Checkpoint, error) LatestCheckpointForTrial(trialID int) (*model.Checkpoint, error) PeriodicTelemetryInfo() ([]byte, error) TrialState(trialID int) (model.State, error) TrialStatus(trialID int) (model.State, *time.Time, error) Query(queryName string, v interface{}, params ...interface{}) error QueryF( queryName string, args []interface{}, v interface{}, params ...interface{}) error RawQuery(queryName string, params ...interface{}) ([]byte, error) UpdateResourceAllocationAggregation() error InsertTrialProfilerMetricsBatch( values []float32, batches []int32, timestamps []time.Time, labels []byte, ) error GetTrialProfilerMetricsBatches( labels *trialv1.TrialProfilerMetricLabels, offset, limit int, ) (model.TrialProfilerMetricsBatchBatch, error) ExperimentLabelUsage(projectID int32) (labelUsage map[string]int, err error) GetExperimentStatus(experimentID int) (state model.State, progress float64, err error) TrainingMetricBatches(experimentID int, metricName string, startTime time.Time) ( batches []int32, endTime time.Time, err error) ValidationMetricBatches(experimentID int, metricName string, startTime time.Time) ( batches []int32, endTime time.Time, err error) TrialsSnapshot(experimentID int, minBatches int, maxBatches int, metricName string, startTime time.Time, metricGroup model.MetricGroup) ( trials []*apiv1.TrialsSnapshotResponse_Trial, endTime time.Time, err error) TopTrialsByTrainingLength(experimentID int, maxTrials int, metric string, smallerIsBetter bool) (trials []int32, err error) ExperimentSnapshot(experimentID int) ([]byte, int, error) SaveSnapshot( experimentID int, version int, experimentSnapshot []byte, ) error DeleteSnapshotsForExperiment(experimentID int) error DeleteSnapshotsForTerminalExperiments() error QueryProto(queryName string, v interface{}, args ...interface{}) error QueryProtof( queryName string, args []interface{}, v interface{}, params ...interface{}) error TrialLogs( trialID, limit int, fs []api.Filter, order apiv1.OrderBy, followState interface{}, ) ([]*model.TrialLog, interface{}, error) DeleteTrialLogs(ids []int) error TrialLogsCount(trialID int, fs []api.Filter) (int, error) TrialLogsFields(trialID int) (*apiv1.TrialLogsFieldsResponse, error) RecordAgentStats(a *model.AgentStats) error EndAllAgentStats() error RecordInstanceStats(a *model.InstanceStats) error EndInstanceStats(a *model.InstanceStats) error EndAllInstanceStats() error }
DB is an interface for _all_ the functionality packed into the DB.
type Dispatch ¶
type Dispatch struct { bun.BaseModel `bun:"table:resourcemanagers_dispatcher_dispatches"` DispatchID string `bun:"dispatch_id"` ResourceID sproto.ResourcesID `bun:"resource_id"` AllocationID model.AllocationID `bun:"allocation_id"` ImpersonatedUser string `bun:"impersonated_user"` }
Dispatch is the Determined-persisted representation for dispatch existence.
func DispatchByID ¶
DispatchByID retrieves a dispatch by its ID.
func ListAllDispatches ¶
ListAllDispatches lists all dispatches in the DB.
func ListDispatches ¶
func ListDispatches( ctx context.Context, opts func(*bun.SelectQuery) (*bun.SelectQuery, error), ) ([]*Dispatch, error)
ListDispatches lists all dispatches according to the options provided.
func ListDispatchesByAllocationID ¶
func ListDispatchesByAllocationID( ctx context.Context, id model.AllocationID, ) ([]*Dispatch, error)
ListDispatchesByAllocationID lists all dispatches for an allocation ID.
type FilterComparison ¶
type FilterComparison[T any] struct { Gt *T Gte *T Lt *T Lte *T }
FilterComparison makes you wish for properties in generic structs/interfaces.
type MetricMeasurements ¶
type MetricMeasurements struct { Values map[string]interface{} Batches uint Time time.Time Epoch *float64 `json:"epoch,omitempty"` TrialID int32 }
MetricMeasurements represents a metric measured by all possible independent variables.
type MetricPartitionType ¶
type MetricPartitionType string
MetricPartitionType denotes what type the metric is. This is planned to be deprecated once we upgrade to pg11 and can use DEFAULT partitioning.
const ( // TrainingMetric designates metrics from training steps. TrainingMetric MetricPartitionType = "TRAINING" // ValidationMetric designates metrics from validation steps. ValidationMetric MetricPartitionType = "VALIDATION" // ProfilingMetric designates metrics from profiling steps. ProfilingMetric MetricPartitionType = "PROFILING" // GenericMetric designates metrics from other sources. GenericMetric MetricPartitionType = "GENERIC" )
type PgDB ¶
type PgDB struct { URL string // contains filtered or unexported fields }
PgDB represents a Postgres database connection. The type definition is needed to define methods.
func ConnectPostgres ¶
ConnectPostgres connects to a Postgres database.
func SingleDB ¶
func SingleDB() *PgDB
SingleDB returns a singleton database client. Bun() should be preferred over this for all new queries.
func (*PgDB) ActiveExperimentConfig ¶
func (db *PgDB) ActiveExperimentConfig(id int) (expconf.ExperimentConfig, error)
ActiveExperimentConfig returns the full config object for an experiment.
func (*PgDB) AddExperiment ¶
func (db *PgDB) AddExperiment( experiment *model.Experiment, modelDef []byte, activeConfig expconf.ExperimentConfig, ) (err error)
AddExperiment adds the experiment to the database and sets its ID.
TODO(ilia): deprecate and use module function instead.
func (*PgDB) AddTaskLogs ¶
AddTaskLogs bulk-inserts a list of *model.TaskLog objects to the database with automatic IDs.
func (*PgDB) AddTrainingMetrics ¶
AddTrainingMetrics [DEPRECATED] adds a completed step to the database with the given training metrics. If these training metrics occur before any others, a rollback is assumed and later training and validation metrics are cleaned up.
func (*PgDB) AddTrialMetrics ¶
func (db *PgDB) AddTrialMetrics( ctx context.Context, m *trialv1.TrialMetrics, mGroup model.MetricGroup, ) error
AddTrialMetrics persists the given trial metrics to the database.
func (*PgDB) AddValidationMetrics ¶
AddValidationMetrics [DEPRECATED] adds a completed validation to the database with the given validation metrics. If these validation metrics occur before any others, a rollback is assumed and later metrics are cleaned up from the database.
func (*PgDB) CheckpointByTotalBatches ¶
func (db *PgDB) CheckpointByTotalBatches(trialID, totalBatches int) (*model.Checkpoint, error)
CheckpointByTotalBatches looks up a checkpoint by trial and total batch, returning nil if none exists.
func (*PgDB) ClientStore ¶
func (db *PgDB) ClientStore() *ClientStore
ClientStore returns a store for OAuth clients backed by this database.
func (*PgDB) DeleteExperiments ¶
DeleteExperiments deletes zero or more experiments.
func (*PgDB) DeleteSnapshotsForExperiment ¶
DeleteSnapshotsForExperiment deletes all snapshots for one given experiment.
func (*PgDB) DeleteSnapshotsForTerminalExperiments ¶
DeleteSnapshotsForTerminalExperiments deletes all snapshots for terminal state experiments from the database.
func (*PgDB) DeleteTaskLogs ¶
DeleteTaskLogs deletes the logs for the given tasks.
func (*PgDB) DeleteTrialLogs ¶
DeleteTrialLogs deletes the logs for the given trial IDs.
func (*PgDB) EndAllAgentStats ¶
EndAllAgentStats called at master starts, in case master previously crushed If master stops, statistics would treat “live” agents as live until master restarts.
func (*PgDB) EndAllInstanceStats ¶
EndAllInstanceStats called at master starts, in case master previously crushed If master stops, statistics would treat “live” instances as live until master restarts.
func (*PgDB) EndInstanceStats ¶
func (db *PgDB) EndInstanceStats(a *model.InstanceStats) error
EndInstanceStats updates the end time of an instance.
func (*PgDB) ExperimentConfigRaw ¶
ExperimentConfigRaw returns the full config object for an experiment as a JSON string.
func (*PgDB) ExperimentHasCheckpointsInRegistry ¶
ExperimentHasCheckpointsInRegistry checks if the experiment has any checkpoints in the registry.
func (*PgDB) ExperimentIDByTrialID ¶
ExperimentIDByTrialID looks up an experiment ID by a trial ID.
func (*PgDB) ExperimentLabelUsage ¶
ExperimentLabelUsage returns a flattened and deduplicated list of all the labels in use across all experiments.
func (*PgDB) ExperimentModelDefinitionRaw ¶
ExperimentModelDefinitionRaw returns the zipped model definition for an experiment as a byte array.
func (*PgDB) ExperimentNumTrials ¶
ExperimentNumTrials returns the total number of trials for the experiment.
func (*PgDB) ExperimentSnapshot ¶
ExperimentSnapshot returns the snapshot for the specified experiment.
func (*PgDB) ExperimentTrialIDs ¶
ExperimentTrialIDs returns the trial IDs for the experiment.
func (*PgDB) FailDeletingExperiment ¶
FailDeletingExperiment finds all experiments that were deleting when the master crashed and moves them to DELETE_FAILED.
func (*PgDB) GetExperimentStatus ¶
func (db *PgDB) GetExperimentStatus(experimentID int) (state model.State, progress float64, err error, )
GetExperimentStatus returns the current state of the experiment.
func (*PgDB) GetOrCreateClusterID ¶
GetOrCreateClusterID queries the master uuid in the database, adding one if it doesn't exist. If a nonempty telemetryID is provided, it will be the one added, otherwise a uuid is generated.
func (*PgDB) GetTrialProfilerMetricsBatches ¶
func (db *PgDB) GetTrialProfilerMetricsBatches( labels *trialv1.TrialProfilerMetricLabels, offset, limit int, ) (model.TrialProfilerMetricsBatchBatch, error)
GetTrialProfilerMetricsBatches gets a batch of profiler metric batches from the database. This method is for backwards compatibility and should be deprecated in the future in favor of generics metrics APIs.
Profiler metrics are stored in the metrics table as a nested JSON mapping of labels to values. All profiler metrics are associated with an agent ID, but certain metrics (i.e. gpu_util) may be associated with other labels. For example:
{ "agent-ID-1": { "GPU-UUID-1": { "gpu_util": 0.12, "gpu_free_memory": 0.34, } } }
func (*PgDB) InsertTrialProfilerMetricsBatch ¶
func (db *PgDB) InsertTrialProfilerMetricsBatch( values []float32, batches []int32, timestamps []time.Time, labels []byte, ) error
InsertTrialProfilerMetricsBatch inserts a batch of metrics into the database.
func (*PgDB) LatestCheckpointForTrial ¶
func (db *PgDB) LatestCheckpointForTrial(trialID int) (*model.Checkpoint, error)
LatestCheckpointForTrial finds the latest completed checkpoint for a trial, returning nil if none exists.
func (*PgDB) LegacyExperimentConfigByID ¶
func (db *PgDB) LegacyExperimentConfigByID( id int, ) (expconf.LegacyConfig, error)
LegacyExperimentConfigByID parses very old configs, returning a LegacyConfig which exposes a select subset of fields in a type-safe way.
func (*PgDB) MaxTerminationDelay ¶
MaxTerminationDelay is the max delay before a consumer can be sure all logs have been recevied. For Postgres, we don't need to wait very long at all; this was a hypothetical cap on fluent to DB latency prior to fluent's deprecation.
func (*PgDB) MetricNames ¶
func (db *PgDB) MetricNames(ctx context.Context, experimentIDs []int) ( map[model.MetricGroup][]string, error, )
MetricNames returns a list of metric names for the given experiment IDs.
func (*PgDB) NonTerminalExperiments ¶
func (db *PgDB) NonTerminalExperiments() ([]*model.Experiment, error)
NonTerminalExperiments finds all experiments in the database whose states are not terminal.
func (*PgDB) PeriodicTelemetryInfo ¶
PeriodicTelemetryInfo returns anonymous information about the usage of the current Determined cluster.
func (*PgDB) Query ¶
Query returns the result of the query. Any placeholder parameters are replaced with supplied params.
func (*PgDB) QueryF ¶
func (db *PgDB) QueryF( queryName string, args []interface{}, v interface{}, params ...interface{}, ) error
QueryF returns the result of the formatted query. Any placeholder parameters are replaced with supplied params.
func (*PgDB) QueryProto ¶
QueryProto returns the result of the query. Any placeholder parameters are replaced with supplied args. Enum values must be the full name of the enum.
func (*PgDB) QueryProtof ¶
func (db *PgDB) QueryProtof( queryName string, args []interface{}, v interface{}, params ...interface{}, ) error
QueryProtof returns the result of the formated query. Any placeholder parameters are replaced with supplied params.
func (*PgDB) RawQuery ¶
RawQuery returns the result of the query as a raw byte string. Any placeholder parameters are replaced with supplied params.
func (*PgDB) RecordAgentStats ¶
func (db *PgDB) RecordAgentStats(a *model.AgentStats) error
RecordAgentStats insert a record of instance start time if instance has not been started or already ended.
func (*PgDB) RecordInstanceStats ¶
func (db *PgDB) RecordInstanceStats(a *model.InstanceStats) error
RecordInstanceStats insert a record of instance start time if instance has not been started or already ended.
func (*PgDB) SaveExperimentArchiveStatus ¶
func (db *PgDB) SaveExperimentArchiveStatus(experiment *model.Experiment) error
SaveExperimentArchiveStatus saves the current experiment archive status to the database.
func (*PgDB) SaveExperimentConfig ¶
func (db *PgDB) SaveExperimentConfig(id int, config expconf.ExperimentConfig) error
SaveExperimentConfig saves the current experiment config to the database.
func (*PgDB) SaveExperimentProgress ¶
SaveExperimentProgress stores the progress for an experiment in the database.
func (*PgDB) SaveExperimentState ¶
func (db *PgDB) SaveExperimentState(experiment *model.Experiment) error
SaveExperimentState saves the current experiment state to the database.
func (*PgDB) SaveSnapshot ¶
SaveSnapshot saves a searcher and trial snapshot together.
func (*PgDB) TaskLogs ¶
func (db *PgDB) TaskLogs( taskID model.TaskID, limit int, fs []api.Filter, order apiv1.OrderBy, followState interface{}, ) ([]*model.TaskLog, interface{}, error)
TaskLogs takes a task ID and log offset, limit and filters and returns matching logs.
func (*PgDB) TaskLogsCount ¶
TaskLogsCount returns the number of logs in postgres for the given task.
func (*PgDB) TaskLogsFields ¶
TaskLogsFields returns the unique fields that can be filtered on for the given task.
func (*PgDB) TerminateExperimentInRestart ¶
TerminateExperimentInRestart is used during master restart to properly terminate an experiment which was either in the process of stopping or which is not restorable for some reason, such as an invalid experiment config after a version upgrade.
func (*PgDB) TokenStore ¶
func (db *PgDB) TokenStore() *TokenStore
TokenStore returns a store for OAuth tokens backed by this database.
func (*PgDB) TopTrialsByTrainingLength ¶
func (db *PgDB) TopTrialsByTrainingLength(experimentID int, maxTrials int, metric string, smallerIsBetter bool, ) (trials []int32, err error)
TopTrialsByTrainingLength chooses the subset of trials that has been training for the highest number of batches, using the specified metric as a tie breaker.
func (*PgDB) TrainingMetricBatches ¶
func (db *PgDB) TrainingMetricBatches(experimentID int, metricName string, startTime time.Time) ( batches []int32, endTime time.Time, err error, )
TrainingMetricBatches returns the milestones (in batches processed) at which a specific training metric was recorded.
func (*PgDB) TrialExperimentAndRequestID ¶
TrialExperimentAndRequestID returns the trial's experiment and request ID.
func (*PgDB) TrialLogs ¶
func (db *PgDB) TrialLogs( trialID, limit int, fs []api.Filter, order apiv1.OrderBy, followState interface{}, ) ([]*model.TrialLog, interface{}, error)
TrialLogs takes a trial ID and log offset, limit and filters and returns matching trial logs.
func (*PgDB) TrialLogsCount ¶
TrialLogsCount returns the number of logs in postgres for the given trial.
func (*PgDB) TrialLogsFields ¶
func (db *PgDB) TrialLogsFields(trialID int) (*apiv1.TrialLogsFieldsResponse, error)
TrialLogsFields returns the unique fields that can be filtered on for the given trial.
func (*PgDB) TrialRunIDAndRestarts ¶
TrialRunIDAndRestarts returns the run id and restart count for a trial.
func (*PgDB) TrialState ¶
TrialState returns the current state of the given trial.
func (*PgDB) TrialStatus ¶
TrialStatus returns the current status of the given trial, including the end time without returning all its hparams and other unneeded details. Called in paths hotter than TrialByID allows.
func (*PgDB) TrialsSnapshot ¶
func (db *PgDB) TrialsSnapshot(experimentID int, minBatches int, maxBatches int, metricName string, startTime time.Time, metricGroup model.MetricGroup, ) (trials []*apiv1.TrialsSnapshotResponse_Trial, endTime time.Time, err error)
TrialsSnapshot returns metrics across each trial in an experiment at a specific point of progress, for metric groups other than training and validation.
func (*PgDB) TrySaveExperimentState ¶
func (db *PgDB) TrySaveExperimentState(experiment *model.Experiment) error
TrySaveExperimentState saves the current experiment state to the database and returns if we successfully changed the state or not.
func (*PgDB) UpdateClusterHeartBeat ¶
UpdateClusterHeartBeat updates the clusterheartbeat column in the cluster_id table.
func (*PgDB) UpdateResourceAllocationAggregation ¶
UpdateResourceAllocationAggregation updates the aggregated resource allocation table.
func (*PgDB) UpdateTrialFields ¶
func (db *PgDB) UpdateTrialFields(id int, newRunnerMetadata *trialv1.TrialRunnerMetadata, newRunID, newRestarts int, ) error
UpdateTrialFields updates the specified fields of trial with ID id. Fields that are nil or zero are not updated.
func (*PgDB) ValidationByTotalBatches ¶
func (db *PgDB) ValidationByTotalBatches(trialID, totalBatches int) (*model.TrialMetrics, error)
ValidationByTotalBatches looks up a validation by trial and total batches, returning nil if none exists.
func (*PgDB) ValidationMetricBatches ¶
func (db *PgDB) ValidationMetricBatches(experimentID int, metricName string, startTime time.Time) ( batches []int32, endTime time.Time, err error, )
ValidationMetricBatches returns the milestones (in batches processed) at which a specific validation metric was recorded.
type RPWorkspaceBinding ¶
type RPWorkspaceBinding struct { bun.BaseModel `bun:"table:rp_workspace_bindings"` WorkspaceID int `bun:"workspace_id"` PoolName string `bun:"pool_name"` Valid bool `bun:"valid"` }
RPWorkspaceBinding is a struct reflecting the db table rp_workspace_bindings.
func GetAllBindings ¶
func GetAllBindings( ctx context.Context, ) ([]*RPWorkspaceBinding, error)
GetAllBindings gets all valid rp-workspace bindings.
func ReadWorkspacesBoundToRP ¶
func ReadWorkspacesBoundToRP( ctx context.Context, poolName string, offset, limit int32, resourcePools []config.ResourcePoolConfig, ) ([]*RPWorkspaceBinding, *apiv1.Pagination, error)
ReadWorkspacesBoundToRP get the bindings between workspaceIds and the requested resource pool.
type SortDirection ¶
type SortDirection string
SortDirection represents the order by in a query.
const ( // SortDirectionAsc represents ordering by ascending. SortDirectionAsc SortDirection = "ASC" // SortDirectionDesc represents ordering by descending. SortDirectionDesc SortDirection = "DESC" // SortDirectionAscNullsFirst represents ordering by ascending with nulls first. SortDirectionAscNullsFirst SortDirection = "ASC NULLS FIRST" // SortDirectionDescNullsLast represents ordering by descending with nulls last. SortDirectionDescNullsLast SortDirection = "DESC NULLS LAST" )
type StaticQueryMap ¶
StaticQueryMap caches static sql files.
func (*StaticQueryMap) GetOrLoad ¶
func (q *StaticQueryMap) GetOrLoad(queryName string) string
GetOrLoad fetches static sql from the cache or loads them from disk.
type TokenStore ¶
type TokenStore struct {
// contains filtered or unexported fields
}
TokenStore is a store for OAuth tokens. It is separate from PgDB so we can implement an interface of the external OAuth library without polluting PgDB's method set.
func (*TokenStore) Create ¶
func (s *TokenStore) Create(info oauth2.TokenInfo) error
Create adds a new token to the database.
func (*TokenStore) GetByAccess ¶
func (s *TokenStore) GetByAccess(access string) (oauth2.TokenInfo, error)
GetByAccess gets the token with the given access token value.
func (*TokenStore) GetByCode ¶
func (s *TokenStore) GetByCode(code string) (oauth2.TokenInfo, error)
GetByCode gets the token with the given authorization code.
func (*TokenStore) GetByRefresh ¶
func (s *TokenStore) GetByRefresh(refresh string) (oauth2.TokenInfo, error)
GetByRefresh gets the token with the given refresh token value.
func (*TokenStore) RemoveByAccess ¶
func (s *TokenStore) RemoveByAccess(access string) error
RemoveByAccess deletes any tokens with the given access token value.
func (*TokenStore) RemoveByCode ¶
func (s *TokenStore) RemoveByCode(code string) error
RemoveByCode deletes any tokens with the given authorization code.
func (*TokenStore) RemoveByRefresh ¶
func (s *TokenStore) RemoveByRefresh(refresh string) error
RemoveByRefresh deletes any tokens with the given refresh token value.
Source Files ¶
- bun_filters.go
- database.go
- migrations.go
- postgres.go
- postgres_agent.go
- postgres_cluster.go
- postgres_clustermessage.go
- postgres_experiments.go
- postgres_filters.go
- postgres_instance.go
- postgres_jobs.go
- postgres_notebook_sessions.go
- postgres_oauth.go
- postgres_proto.go
- postgres_rbac.go
- postgres_resource_managers_dispatcher.go
- postgres_rp_workspace_bindings.go
- postgres_runs.go
- postgres_snapshots.go
- postgres_tasks.go
- postgres_trial.go
- postgres_trial_logs.go
- postgres_trial_metrics.go
- postgres_trial_profiler_metrics.go
- setup.go
- static_query_map.go
- utils.go