Documentation ¶
Index ¶
- Constants
- Variables
- func GetAnnotationsFromRuntimeInfo(info interface{}) map[string]string
- func GetQueryLabelsFromPrometheus(query string) map[string]string
- func InitMetrics()
- func StartMetricsService(port int, queueFunc ListQueueFunc, jobFunc ListJobFunc) string
- type Info
- type JobMetricCollector
- type JobStageTimeRecorder
- type JobStatus
- type JobTimePoint
- type ListJobFunc
- type ListQueueFunc
- type MetricRunCollector
- type QueueMetricCollector
- type RunRecorderManager
- func (m *RunRecorderManager) AddJobStageTimeRecord(runID, stepName, jobID string, status schema.JobStatus, stage stageTimeType, ...)
- func (m *RunRecorderManager) AddRunStageTimeRecord(runID, requestID, status string, stage stageTimeType, timestamp time.Time)
- func (m *RunRecorderManager) AddStepStageTimeRecord(runID, stepName string, stage stageTimeType, timestamp time.Time)
- type RunStageTimeRecorder
- type StageTimeRecorder
- type Status
- type StepStageTimeRecorder
- type TimePoint
- type TimePointManager
- type Timestamps
Constants ¶
View Source
const ( MetricJobTime = "pf_metric_job_time" MetricQueueInfo = "pf_metric_queue_info" MetricJobGPUInfo = "pf_metric_job_gpu_info" MetricApiDuration = "pf_metric_api_duration_millisecond" MetricRunDuration = "pf_metric_run_duration_millisecond" MetricRunJobDuration = "pf_metric_runJob_duration_millisecond" )
View Source
const ( JobIDLabel = "jobID" GpuIdxLabel = "id" StatusLabel = "status" QueueIDLabel = "queueID" FinishedStatusLabel = "finishedStatus" QueueNameLabel = "queueName" UserNameLabel = "userName" ResourceLabel = "resource" TypeLabel = "type" BaiduGpuIndexLabel = "baidu_com_gpu_idx" ApiNameLabel = "apiName" RequestMethodLabel = "method" ResponseCodeLabel = "code" RunIDLabel = "runID" RunStageLabel = "runStage" RunStepNameLabel = "runStepName" RunStepStageLabel = "runStepStage" RunJobStageLabel = "runJobStage" RequestIDLabel = "requestID" RunJobIDLabel = "runJobID" )
View Source
const ( MinTimePoint = T1 MinStatus = StatusDBInserting MaxTimePoint = T8 MaxStatus = StatusRunning )
View Source
const ( MaxNum = 10000 Timeout = time.Hour ZeroDuration = time.Duration(0) )
View Source
const ( QueueTypeMaxResource = "maxResource" QueueTypeMinResource = "minResource" QueueTypeScalarResource = "scalarResource" )
View Source
const ( // 开始创建run的时间 StageRunStartTime stageTimeType = "run start" // run处于終态的时间 StageRunEndTime stageTimeType = "run end" // 开始解析 runyaml 的时间 StageRunParseStartTime stageTimeType = "run parse start" // 完成 runyaml 解析的时间 StageRunParseEndTime stageTimeType = "run parse end" // 对runyaml 以及相关参数进行校验的开始时间 StageRunValidateStartTime stageTimeType = "run validate start" // 完成对runyaml 以及相关参数进行校验的结束时间 StageRunValidateEndTime stageTimeType = "run validate end" // 开始进行Run后处理的时间:即Run检测到处于终态的时间 StageRunAftertreatmentStartTime stageTimeType = "run aftertreatment start" // 开始进行Step的调度时间:即确定Step可以运行的时间点 StageStepScheduleStartTime stageTimeType = "step schedule start" // Job 开始调度的时间,等价于StageStepScheduleStartTime // 这里需要再次进行记录是因为循环结构中,不同的job的调度结束时间不一致,为了方便计算每个job的调度时间,所以会在每一个jobTimeRecorder单独记录一次 // 在StepTimeRecorder中记录该时间点,是因为在Job阶段无法获取到改信息,因此需要在StepTimeRecorder进行记录,然后在jobTimeRecorder进行拷贝操作。 StageJobScheduleStartTime stageTimeType = "job schedule start" // 完成job调度的时间:即在调用Job模块的Create前的时间 StageJobScheduleEndTime stageTimeType = "job schedule end" // 完成Job创建的时间 StageJobCreateEndTime stageTimeType = "job create end" // 开始进行 Job 后处理的时间:也即Job处于終态的时间点 StageJobAftertreatmentStartTime stageTimeType = "job aftertreatment start" // 完成Job 后处理的时间:也即 Job的終态写入数据库的时间 StageJobAftertreatmentEndTime stageTimeType = "job aftertreatment end" )
View Source
const ( // run stage StageRunExecuteDuration = "execution" StageRunParseDuration = "parse" StageRunValidateDuration = "validate" StageRunAftertreatmentDuration = "aftertreatment" // job stage StageRunJobScheduleDuration = "job schedule" StageRunJobCreateDuration = "job create" StageRunJobAftertreatmentDuration = "job aftertreatment" )
View Source
const ( PromQLQueryPodAnnotations = "kube_pod_annotations{pod~=\"%s\"}" PromQLQueryPodLabels = "kube_pod_labels{pod~=\"%s\"}" )
View Source
const (
DefaultMetricPort = 8231
)
View Source
const (
QueryTimeout = time.Second * 1
)
Variables ¶
View Source
var ( RunMetricManger *RunRecorderManager Job TimePointManager PromAPIClient prom_v1.API )
View Source
var APiDurationSummary = prometheus.NewSummaryVec( prometheus.SummaryOpts{ Name: MetricApiDuration, Help: toHelp(MetricApiDuration), Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001, 1: 0}, }, []string{ApiNameLabel, RequestMethodLabel, ResponseCodeLabel})
View Source
var (
ZeroTime = time.Time{}
)
Functions ¶
func GetAnnotationsFromRuntimeInfo ¶ added in v0.14.5
GetAnnotationsFromRuntimeInfo get annotations from info map
func GetQueryLabelsFromPrometheus ¶ added in v0.14.5
GetQueryLabelsFromPrometheus return query labels from prometheus Deprecated
func InitMetrics ¶
func InitMetrics()
func StartMetricsService ¶
func StartMetricsService(port int, queueFunc ListQueueFunc, jobFunc ListJobFunc) string
Types ¶
type JobMetricCollector ¶
type JobMetricCollector struct {
// contains filtered or unexported fields
}
func NewJobMetricsCollector ¶
func NewJobMetricsCollector(manager TimePointManager, listJob ListJobFunc) *JobMetricCollector
func (*JobMetricCollector) Collect ¶
func (j *JobMetricCollector) Collect(metrics chan<- prometheus.Metric)
func (*JobMetricCollector) Describe ¶
func (j *JobMetricCollector) Describe(descs chan<- *prometheus.Desc)
type JobStageTimeRecorder ¶ added in v0.14.6
type JobStageTimeRecorder struct { *StageTimeRecorder JobID string StepName string RunID string Status schema.JobStatus }
func NewJobStageTimeRecorder ¶ added in v0.14.6
func NewJobStageTimeRecorder(jobID string, stepName string, runID string) *JobStageTimeRecorder
type JobTimePoint ¶
type JobTimePoint int
const ( // T1 api query time T1 JobTimePoint = iota // T2 db insert time T2 // T3 enqueue time T3 // T4 dequeue time T4 // T5 submit time T5 // T6 scheduled time // TODO: T6 is not supported yet T6 // T7 run time T7 // T8 finish(success/fail) time T8 )
func (JobTimePoint) Index ¶
func (t JobTimePoint) Index() int
func (JobTimePoint) Status ¶
func (t JobTimePoint) Status() Status
type ListJobFunc ¶ added in v0.14.5
type ListQueueFunc ¶
type MetricRunCollector ¶ added in v0.14.6
type MetricRunCollector struct {
// contains filtered or unexported fields
}
func NewMetricRunCollector ¶ added in v0.14.6
func NewMetricRunCollector() *MetricRunCollector
func (*MetricRunCollector) Collect ¶ added in v0.14.6
func (rm *MetricRunCollector) Collect(ch chan<- prometheus.Metric)
func (*MetricRunCollector) Describe ¶ added in v0.14.6
func (rm *MetricRunCollector) Describe(descs chan<- *prometheus.Desc)
type QueueMetricCollector ¶
type QueueMetricCollector struct {
// contains filtered or unexported fields
}
func NewQueueMetricsCollector ¶
func NewQueueMetricsCollector(queueFunc ListQueueFunc) *QueueMetricCollector
func (*QueueMetricCollector) Collect ¶
func (q *QueueMetricCollector) Collect(metrics chan<- prometheus.Metric)
func (*QueueMetricCollector) Describe ¶
func (q *QueueMetricCollector) Describe(descs chan<- *prometheus.Desc)
type RunRecorderManager ¶ added in v0.14.6
func NewRunRecorderManager ¶ added in v0.14.6
func NewRunRecorderManager() *RunRecorderManager
func (*RunRecorderManager) AddJobStageTimeRecord ¶ added in v0.14.6
func (*RunRecorderManager) AddRunStageTimeRecord ¶ added in v0.14.6
func (m *RunRecorderManager) AddRunStageTimeRecord(runID, requestID, status string, stage stageTimeType, timestamp time.Time)
func (*RunRecorderManager) AddStepStageTimeRecord ¶ added in v0.14.6
func (m *RunRecorderManager) AddStepStageTimeRecord(runID, stepName string, stage stageTimeType, timestamp time.Time)
type RunStageTimeRecorder ¶ added in v0.14.6
type RunStageTimeRecorder struct { *StageTimeRecorder RunID string RequestID string StepStages sync.Map Status string }
func NewRunStageTimeRecorder ¶ added in v0.14.6
func NewRunStageTimeRecorder(runID, reqID string) *RunStageTimeRecorder
type StageTimeRecorder ¶ added in v0.14.6
type StageTimeRecorder struct { // 用于进行日志标识 LoggerMeta string StageTime sync.Map Support []stageTimeType }
func NewStageTimeRecorder ¶ added in v0.14.6
func NewStageTimeRecorder(suppport []stageTimeType, loggerMeta string) *StageTimeRecorder
type StepStageTimeRecorder ¶ added in v0.14.6
type StepStageTimeRecorder struct { *StageTimeRecorder StepName string RunID string JobStages sync.Map }
func NewStepStageTimeRecorder ¶ added in v0.14.6
func NewStepStageTimeRecorder(stepName string, runID string) *StepStageTimeRecorder
type TimePointManager ¶
type TimePointManager interface { AddTimestamp(key string, timePoint TimePoint, timestamp time.Time, extraInfos ...Info) GetStatusTime(key string, status Status) (time.Duration, bool) GetTimestamp(key string, timePoint TimePoint) (time.Time, bool) GetTimestampsCache() map[string]Timestamps GetStatusCount(status Status) int64 GetInfo(key string) (Info, bool) }
func NewJobMetricTimePointManager ¶
func NewJobMetricTimePointManager() TimePointManager
NewJobMetricTimePointManager Implementation of default job metric manager
Click to show internal directories.
Click to hide internal directories.