Documentation ¶
Index ¶
- Constants
- Variables
- func AddAllocationResources(summary sproto.ResourcesSummary, containerStarted *sproto.ResourcesStarted)
- func AssociateAllocationContainer(aID model.AllocationID, cID cproto.ID)
- func AssociateAllocationTask(aID model.AllocationID, tID model.TaskID, name string, jID model.JobID)
- func AssociateContainerGPU(cID cproto.ID, d device.Device)
- func AssociateContainerRuntimeID(cID cproto.ID, dcID string)
- func AssociateExperimentIDLabels(eID string, labels []string)
- func AssociateJobExperiment(jID model.JobID, eID string, labels expconf.Labels)
- func DisassociateAllocationContainer(aID model.AllocationID, cID cproto.ID)
- func DisassociateAllocationTask(aID model.AllocationID, tID model.TaskID, name string, jID model.JobID)
- func DisassociateContainerGPU(cID cproto.ID, d device.Device)
- func DisassociateExperimentIDLabels(eID string, labels []string)
- func DisassociateJobExperiment(jID model.JobID, eID string, labels expconf.Labels)
- func ErrCount(counter prometheus.Counter, err *error)
- func RemoveAllocationResources(summary sproto.ResourcesSummary)
- func Time(obs prometheus.Observer) (end func())
- type TargetSDConfig
Examples ¶
Constants ¶
const ( // CAdvisorPort is the default port for cAdvisor. CAdvisorPort = ":8080" // DcgmPort is the default port for DCGM. DcgmPort = ":9400" // DetAgentIDLabel is the internal ID for the Determined agent. DetAgentIDLabel = "det_agent_id" // DetResourcePoolLabel is the resource pool name. DetResourcePoolLabel = "det_resource_pool" )
const DeterminedNamespace = "determined"
DeterminedNamespace is the prometheus namespace for Determined metrics.
Variables ¶
var ( // DetStateMetrics is a prometheus registry containing all exported user-facing metrics. DetStateMetrics = prometheus.NewRegistry() )
Functions ¶
func AddAllocationResources ¶
func AddAllocationResources(summary sproto.ResourcesSummary, containerStarted *sproto.ResourcesStarted, )
AddAllocationResources associates allocation and container and container and GPUs.
func AssociateAllocationContainer ¶
func AssociateAllocationContainer(aID model.AllocationID, cID cproto.ID)
AssociateAllocationContainer associates an allocation with its container ID.
func AssociateAllocationTask ¶
func AssociateAllocationTask(aID model.AllocationID, tID model.TaskID, name string, jID model.JobID, )
AssociateAllocationTask associates an allocation ID with its task/job info.
func AssociateContainerGPU ¶
AssociateContainerGPU associates container ID with GPU device ID.
func AssociateContainerRuntimeID ¶
AssociateContainerRuntimeID associates a Determined container ID with the runtime container ID.
func AssociateExperimentIDLabels ¶
AssociateExperimentIDLabels associates experiment ID with a list of labels.
func AssociateJobExperiment ¶
AssociateJobExperiment associates a job ID with experiment info.
func DisassociateAllocationContainer ¶
func DisassociateAllocationContainer(aID model.AllocationID, cID cproto.ID)
DisassociateAllocationContainer disassociates allocation ID with its container ID.
func DisassociateAllocationTask ¶
func DisassociateAllocationTask(aID model.AllocationID, tID model.TaskID, name string, jID model.JobID, )
DisassociateAllocationTask disassociates an allocation ID with its task info.
func DisassociateContainerGPU ¶
DisassociateContainerGPU removes association between container ID and device ID.
func DisassociateExperimentIDLabels ¶
DisassociateExperimentIDLabels disassociates experiment ID with a list of labels.
func DisassociateJobExperiment ¶
DisassociateJobExperiment disassociates a job ID with experiment info.
func ErrCount ¶
func ErrCount(counter prometheus.Counter, err *error)
ErrCount increments the counter if the err is non-nil. If Prometheus is disabled, it does nothing.
Example ¶
package main import ( "strconv" "github.com/prometheus/client_golang/prometheus" "github.com/determined-ai/determined/master/internal/prom" ) var labels = []string{"method"} var counter = prometheus.NewCounterVec(prometheus.CounterOpts{ Namespace: prom.DeterminedNamespace, Subsystem: "my-subsystem", Name: "errors", }, labels) func main() { var err error defer prom.ErrCount(counter.WithLabelValues("GET"), &err) // do some stuff that may cause error to be non-nil _, err = strconv.Atoi("abc") }
Output:
func RemoveAllocationResources ¶
func RemoveAllocationResources(summary sproto.ResourcesSummary)
RemoveAllocationResources disassociates allocation and container and container and its GPUs.
func Time ¶
func Time(obs prometheus.Observer) (end func())
Time times the duration between calling Time and calling the func() it returns, and observes the result using the prometheus.Observer. It can be used to time a function call. If Prometheus is disabled, it does nothing.
Example ¶
package main import ( "time" "github.com/prometheus/client_golang/prometheus" "github.com/determined-ai/determined/master/internal/prom" ) var ( labels = []string{"method"} histogram = prometheus.NewHistogramVec(prometheus.HistogramOpts{ Namespace: prom.DeterminedNamespace, Subsystem: "my-subsystem", Name: "seconds", Buckets: prometheus.DefBuckets, }, labels) ) func main() { defer prom.Time(histogram.WithLabelValues("GET")) // do thing you want to time. time.Sleep(time.Millisecond) }
Output:
Types ¶
type TargetSDConfig ¶
type TargetSDConfig struct { Targets []string `json:"targets"` Labels map[string]string `json:"labels"` }
TargetSDConfig is the format for specifying targets for prometheus service discovery.