prom

package
v0.750.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Aug 19, 2024 License: Apache-2.0 Imports: 9 Imported by: 0

Documentation

Index

Examples

Constants

View Source
const (
	// CAdvisorPort is the default port for cAdvisor.
	CAdvisorPort = ":8080"

	// DcgmPort is the default port for DCGM.
	DcgmPort = ":9400"

	// DetAgentIDLabel is the internal ID for the Determined agent.
	DetAgentIDLabel = "det_agent_id"

	// DetResourcePoolLabel is the resource pool name.
	DetResourcePoolLabel = "det_resource_pool"
)
View Source
const DeterminedNamespace = "determined"

DeterminedNamespace is the prometheus namespace for Determined metrics.

Variables

View Source
var (

	// DetStateMetrics is a prometheus registry containing all exported user-facing metrics.
	DetStateMetrics = prometheus.NewRegistry()
)

Functions

func AddAllocationResources

func AddAllocationResources(summary sproto.ResourcesSummary,
	containerStarted *sproto.ResourcesStarted,
)

AddAllocationResources associates allocation and container and container and GPUs.

func AssociateAllocationContainer

func AssociateAllocationContainer(aID model.AllocationID, cID cproto.ID)

AssociateAllocationContainer associates an allocation with its container ID.

func AssociateAllocationTask

func AssociateAllocationTask(aID model.AllocationID,
	tID model.TaskID,
	name string,
	jID model.JobID,
)

AssociateAllocationTask associates an allocation ID with its task/job info.

func AssociateContainerGPU

func AssociateContainerGPU(cID cproto.ID, d device.Device)

AssociateContainerGPU associates container ID with GPU device ID.

func AssociateContainerRuntimeID

func AssociateContainerRuntimeID(cID cproto.ID, dcID string)

AssociateContainerRuntimeID associates a Determined container ID with the runtime container ID.

func AssociateExperimentIDLabels

func AssociateExperimentIDLabels(eID string, labels []string)

AssociateExperimentIDLabels associates experiment ID with a list of labels.

func AssociateJobExperiment

func AssociateJobExperiment(jID model.JobID, eID string, labels expconf.Labels)

AssociateJobExperiment associates a job ID with experiment info.

func DisassociateAllocationContainer

func DisassociateAllocationContainer(aID model.AllocationID, cID cproto.ID)

DisassociateAllocationContainer disassociates allocation ID with its container ID.

func DisassociateAllocationTask

func DisassociateAllocationTask(aID model.AllocationID, tID model.TaskID, name string,
	jID model.JobID,
)

DisassociateAllocationTask disassociates an allocation ID with its task info.

func DisassociateContainerGPU

func DisassociateContainerGPU(cID cproto.ID, d device.Device)

DisassociateContainerGPU removes association between container ID and device ID.

func DisassociateExperimentIDLabels

func DisassociateExperimentIDLabels(eID string, labels []string)

DisassociateExperimentIDLabels disassociates experiment ID with a list of labels.

func DisassociateJobExperiment

func DisassociateJobExperiment(jID model.JobID, eID string, labels expconf.Labels)

DisassociateJobExperiment disassociates a job ID with experiment info.

func ErrCount

func ErrCount(counter prometheus.Counter, err *error)

ErrCount increments the counter if the err is non-nil. If Prometheus is disabled, it does nothing.

Example
package main

import (
	"strconv"

	"github.com/prometheus/client_golang/prometheus"

	"github.com/determined-ai/determined/master/internal/prom"
)

var labels = []string{"method"}

var counter = prometheus.NewCounterVec(prometheus.CounterOpts{
	Namespace: prom.DeterminedNamespace,
	Subsystem: "my-subsystem",
	Name:      "errors",
}, labels)

func main() {
	var err error
	defer prom.ErrCount(counter.WithLabelValues("GET"), &err)

	// do some stuff that may cause error to be non-nil
	_, err = strconv.Atoi("abc")
}
Output:

func RemoveAllocationResources

func RemoveAllocationResources(summary sproto.ResourcesSummary)

RemoveAllocationResources disassociates allocation and container and container and its GPUs.

func Time

func Time(obs prometheus.Observer) (end func())

Time times the duration between calling Time and calling the func() it returns, and observes the result using the prometheus.Observer. It can be used to time a function call. If Prometheus is disabled, it does nothing.

Example
package main

import (
	"time"

	"github.com/prometheus/client_golang/prometheus"

	"github.com/determined-ai/determined/master/internal/prom"
)

var (
	labels    = []string{"method"}
	histogram = prometheus.NewHistogramVec(prometheus.HistogramOpts{
		Namespace: prom.DeterminedNamespace,
		Subsystem: "my-subsystem",
		Name:      "seconds",
		Buckets:   prometheus.DefBuckets,
	}, labels)
)

func main() {
	defer prom.Time(histogram.WithLabelValues("GET"))

	// do thing you want to time.
	time.Sleep(time.Millisecond)
}
Output:

Types

type TargetSDConfig

type TargetSDConfig struct {
	Targets []string          `json:"targets"`
	Labels  map[string]string `json:"labels"`
}

TargetSDConfig is the format for specifying targets for prometheus service discovery.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL