metrics

package
v0.0.0-...-2b72f5a Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 28, 2025 License: Apache-2.0 Imports: 5 Imported by: 0

Documentation

Overview

Package metrics provides a framework for cluster-based metrics.

Index

Constants

View Source
const MetricIDPattern = `[a-z](?:[a-z0-9-]{0,61}[a-z0-9])?`

MetricIDPattern defines the valid format for metric identifiers. Metric identifiers must be valid google.aip.dev/122 resource ID segments.

Variables

View Source
var (
	// The number of distinct developer changelists that failed at least one
	// presubmit (CQ) run because of failure(s) in this cluster. Excludes
	// changelists authored by automation.
	HumanClsFailedPresubmit = metricBuilder{
		ID:                "human-cls-failed-presubmit",
		HumanReadableName: "User Cls Failed Presubmit",
		Description:       "The number of distinct developer changelists that failed at least one presubmit (CQ) run because of failure(s) in a cluster.",
		DefaultConfig: Configuration{
			SortPriority: 400,
			IsDefault:    true,
		},

		FilterSQL: `f.is_ingested_invocation_blocked AND COALESCE(ARRAY_LENGTH(f.exonerations) = 0, TRUE) AND f.build_status = 'FAILURE' ` +
			`AND f.build_critical AND f.presubmit_run_mode = 'FULL_RUN' AND f.presubmit_run_owner='user' AND f.presubmit_run_status = 'FAILED'`,

		CountSQL: `CONCAT(f.sources.changelists[SAFE_OFFSET(0)].host, '/', f.sources.changelists[SAFE_OFFSET(0)].change)`,
	}.Build()

	// The number of verdicts on test variants which were configured to be
	// presubmit-blocking, which were exonerated (i.e. did not actually block
	// presubmit) because infrastructure determined the test variant to be
	// failing or too flaky at tip-of-tree. If this number is non-zero, it
	// means a test variant which was configured to be presubmit-blocking is
	// not stable enough to do so, and should be fixed or made non-blocking.
	CriticalFailuresExonerated = metricBuilder{
		ID:                "critical-failures-exonerated",
		HumanReadableName: "Presubmit-blocking Verdicts Exonerated",
		Description:       "The number of presubmit-blocking test verdicts which were exonerated (i.e. did not actually block presubmit) because infrastructure determined the test variant to be failing or too flaky at tip-of-tree.",
		DefaultConfig: Configuration{
			SortPriority: 500,
			IsDefault:    true,
		},

		FilterSQL: `f.build_critical AND (EXISTS (SELECT TRUE FROM UNNEST(f.exonerations) e WHERE e.Reason = 'OCCURS_ON_OTHER_CLS'))`,

		CountSQL: `CONCAT(f.ingested_invocation_id, '/', f.test_id, '/', f.variant_hash)`,
	}.Build()

	// The number of test runs that failed. Test runs are generally
	// equivalent to swarming tasks.
	TestRunsFailed = metricBuilder{
		ID:                "test-runs-failed",
		HumanReadableName: "Test Runs Failed",
		Description:       "The number of distinct test runs (i.e. swarming tasks or builds) failed due to failures in a cluster.",
		DefaultConfig: Configuration{
			SortPriority: 200,
		},
		FilterSQL: `f.is_test_run_blocked`,
		CountSQL:  `f.test_run_id`,
	}.Build()

	// The total number of test results in this cluster. LUCI Analysis only
	// clusters test results which are unexpected and have a status of crash,
	// abort or fail, so by definition the only test results counted here
	// will be an unexpected fail/crash/abort.
	Failures = metricBuilder{
		ID:                "failures",
		HumanReadableName: "Test Results Failed",
		Description:       "The total number of test results in a cluster. LUCI Analysis only clusters test results which are unexpected and have a status of crash, abort or fail.",
		DefaultConfig: Configuration{
			SortPriority: 100,
			IsDefault:    true,
		},
	}.Build()

	BuildsWithTestRunsFailedDueToFlakyTests = metricBuilder{

		ID:                "builds-failed-due-to-flaky-tests",
		HumanReadableName: "Builds with Test Runs Failed by Flaky Test Variants",
		Description: "The number of builds monitored by a gardener rotation which had failing test runs " +
			"(i.e. all attempts of a test variant within a single swarming task failed) because of flaky test variants. " +
			" To be considered flaky, the test variant must have seen at least one flaky verdict on the same branch in the last 24 hours.",
		DefaultConfig: Configuration{
			SortPriority: 300,
		},

		FilterSQL: "ARRAY_LENGTH(f.build_gardener_rotations) > 0 AND " +
			"f.is_test_run_blocked AND " +
			"(f.exonerations IS NULL OR ARRAY_LENGTH(f.exonerations) = 0) AND " +
			"f.test_variant_branch.flaky_verdicts_24h > 0",

		CountSQL: "f.ingested_invocation_id",
	}.Build()

	FailuresWithAttributedFilteredTestRuns = metricBuilder{
		ID:                "failures-with-attributed-filtered-test-runs",
		HumanReadableName: "Failures with Attributed Filtered Test Runs",
		Description: "The number of failures in this cluster that has filtered test runs being attributed to them," +
			" which means those failures caused some tests to be filtered out in the test scheduler.",
		DefaultConfig: Configuration{
			SortPriority: 350,
		},
		RequireAttrs: true,
		FilterSQL:    "attrs.attributed_filtered_run_count > 0",
	}.Build()

	// The number of builds that had flakes in presubmit in this cluster.
	BuildsWithFlakesInPresubmit = metricBuilder{
		ID:                "builds-with-flakes-in-presubmit",
		HumanReadableName: "Builds with Flakes in Presubmit",
		Description:       "The total number of builds with at least one flaky test verdict in presubmit, due to this cluster.",
		DefaultConfig: Configuration{
			SortPriority: 250,
		},

		FilterSQL: `not f.is_ingested_invocation_blocked AND f.presubmit_run_id.id is not null`,

		CountSQL: `f.ingested_invocation_id`,
	}.Build()

	// ComputedMetrics is the set of metrics computed for each cluster and
	// stored on the cluster summaries table.
	ComputedMetrics = []BaseDefinition{
		HumanClsFailedPresubmit,
		CriticalFailuresExonerated,
		TestRunsFailed,
		Failures,
		BuildsWithTestRunsFailedDueToFlakyTests,
		FailuresWithAttributedFilteredTestRuns,
		BuildsWithFlakesInPresubmit,
	}
)

Standard metrics.

View Source
var OneDayNominalBasis = CalculationBasis{Residual: false, IntervalDays: 1}
View Source
var OneDayResidualBasis = CalculationBasis{Residual: true, IntervalDays: 1}
View Source
var SevenDayNominalBasis = CalculationBasis{Residual: false, IntervalDays: 7}
View Source
var SevenDayResidualBasis = CalculationBasis{Residual: true, IntervalDays: 7}
View Source
var ThreeDayNominalBasis = CalculationBasis{Residual: false, IntervalDays: 3}
View Source
var ThreeDayResidualBasis = CalculationBasis{Residual: true, IntervalDays: 3}

Functions

This section is empty.

Types

type BaseDefinition

type BaseDefinition struct {
	// ID is the identifier of the metric, for example "human-cls-failed-presubmit".
	// This is the same as the AIP-122 resource name of the metric,
	// excluding "metrics/".
	ID ID

	// A human readable name for the metric. Appears on the user interface.
	HumanReadableName string

	// A human readable descripton of the metric. Appears on the user interface
	// behind a help tooltip.
	Description string

	// BaseColumnName is the name of the metric to use in SQL column names.
	// It is the same as the identifier ID, but with hypens (-) replaced with
	// underscores.
	BaseColumnName string

	// RequireAttrs indicates whether calculating this metric requires joining the
	// failure_attributes table. When set to true, fields on the
	// failure_attributes table may be accessed via the prefix "attrs." by the
	// metric definition SQLs.
	RequireAttrs bool

	// The predicate on failures, that defines when an item is eligible to be
	// counted. Fields on the clustered_failures table may be accessed via the
	// prefix "f.". If `RequireAttrs` is set to true, fields on the
	// failure_attributes table may be accessed via the prefix "attrs.".
	//
	// For example, to count over failures on critical builds, use:
	// "f.build_critical".
	//
	// If no filtering is desired, this is left blank.
	FilterSQL string

	// An expression that defines the distinct items to count. Fields on the
	// clustered_failures table may be accessed via the prefix "f.". If
	// `RequireAttrs` is set to true, fields on the failure_attributes table may
	// be accessed via the prefix "attrs.".
	//
	// For example, to count distinct changelists, use:
	// `IF(ARRAY_LENGTH(f.sources.changelists)>0,
	//	  CONCAT(f.sources.changelists[OFFSET(0)].host, f.sources.changelists[OFFSET(0)].change),
	// NULL)`
	// While this may return NULL for items not to be counted, it is generally
	// preferred to use FilterSQL for that purpose.
	//
	// If failures are to be counted instead of distinct items, this is left blank.
	CountSQL string

	// DefaultConfig represents the default configuration for the metric.
	DefaultConfig Configuration
}

BaseDefinition represents the built-in definition of a metric. It does not include the parts of the metric definition which can be overriden by individual LUCI Project.

func ByID

func ByID(id ID) (BaseDefinition, error)

ByID returns the metric with the given ID, if any.

func MustByID

func MustByID(id ID) BaseDefinition

MustByID returns the metric with the given ID and panic if no metric with the id exists.

func (BaseDefinition) AdaptToProject

func (m BaseDefinition) AdaptToProject(project string, cfg *configpb.Metrics) Definition

AdaptToProject completes the definition of a built-in metric, by attaching LUCI Project-specific configuration.

func (BaseDefinition) ColumnName

func (m BaseDefinition) ColumnName(suffix string) string

ColumnName returns a column name to use for the metric in queries, with metric_ prefix used to namespace the column name to avoid name collisions with other predefined columns, and the specified suffix used to namespace the column name within other columns for the same metric.

type CalculationBasis

type CalculationBasis struct {
	// Residual is whether failures that are included in bug clusters should
	// be counted in suggested clusters as well.
	Residual bool
	// IntervalDays is the number of days of data to include in the metric.
	IntervalDays int
}

CalculationBasis defines a way of calculating a metric.

func (CalculationBasis) ColumnSuffix

func (c CalculationBasis) ColumnSuffix() string

ColumnSuffix returns a column suffix that can be used to distinguish this calculation basis from all other calculation bases.

type Configuration

type Configuration struct {
	// SortPriority is a number that defines the order by which metrics are
	// sorted by default.The metric with the highest sort priority
	// will define the primary sort order, followed by the metric with the
	// second highest sort priority, and so on.
	SortPriority int

	// IsDefault indicates whether this metric is shown by default in the UI.
	IsDefault bool
}

Configuration represents properties of a metric that can be overriden by a LUCI project.

type Counts

type Counts struct {
	// Nominal is the value of the metric, as calculated over all failures
	// in the cluster.
	Nominal int64 `json:"nominal"`
	// The value of the metric excluding failures already counted under
	// other higher-priority clusters (e.g. bug clusters.)
	// For bug clusters, this is the same as the nominal metric value.
	// For suggested clusters, this may be less than the nominal metric
	// value if some of the failures are also in a bug cluster.
	Residual int64 `json:"residual"`
}

Counts captures the values of an integer-valued metric in different calculation bases.

type Definition

type Definition struct {
	BaseDefinition

	// The AIP-122 resource name of the metric.
	// E.g. "projects/chromium/metrics/human-cls-failed-presubmit"
	Name string

	// Config represents the configuration of the metric for the LUCI Project.
	Config Configuration
}

Definition represents the complete definition of a metric for a LUCI Project. It includes the values of metric properties that the project can override.

type ID

type ID string

ID is an identifier of a metric. For example, "human-cls-failed-presubmit". It should be a valid AIP-122 Resource ID Segment.

func (ID) String

func (i ID) String() string

type TimewiseCounts

type TimewiseCounts struct {
	// OneDay is the value of the metric for the last day.
	OneDay Counts
	// ThreeDay is the value of the metric for the last three days.
	ThreeDay Counts
	// SevenDay is the value of the metric for the last seven days.
	SevenDay Counts
}

TimewiseCounts captures the value of a metric over multiple time periods.

func (*TimewiseCounts) PutValue

func (tc *TimewiseCounts) PutValue(value int64, basis CalculationBasis)

PutValue stores the metric value obtained for the given calculation basis in TimewiseCounts.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL