Documentation ¶
Index ¶
- Constants
- Variables
- func RecordAPICallDuration(ctx context.Context, operation, status, kind string, startTime time.Time)
- func RecordApplyDuration(ctx context.Context, status, commit string, startTime time.Time)
- func RecordApplyOperation(ctx context.Context, controller, operation, status, kind string)
- func RecordDeclaredResources(ctx context.Context, commit string, numResources int)
- func RecordInternalError(ctx context.Context, source string)
- func RecordLastSync(ctx context.Context, status, commit string, timestamp time.Time)
- func RecordParserDuration(ctx context.Context, trigger, source, status string, startTime time.Time)
- func RecordPipelineError(ctx context.Context, reconcilerType, component string, errLen int)
- func RecordReconcileDuration(ctx context.Context, status string, startTime time.Time)
- func RecordReconcilerErrors(ctx context.Context, component string, errs []v1beta1.ConfigSyncError)
- func RecordRemediateDuration(ctx context.Context, status, kind string, startTime time.Time)
- func RecordResourceConflict(ctx context.Context, _, commit string)
- func RecordResourceFight(ctx context.Context, _, _ string)
- func RegisterOCAgentExporter(containerName string) (*ocagent.Exporter, error)
- func RegisterReconcilerManagerMetricsViews() error
- func RegisterReconcilerMetricsViews() error
- func StatusTagKey(err error) string
- func StatusTagValueFromSummary(summary *v1beta1.ErrorSummary) string
Constants ¶
const ( // OpenTelemetry is the app label for all otel resources. OpenTelemetry = "opentelemetry" // OtelAgentName is the name of the OpenTelemetry Agent. OtelAgentName = "otel-agent" // OtelCollectorName is the name of the OpenTelemetry Collector. OtelCollectorName = "otel-collector" // OtelCollectorGooglecloud is the name of the OpenTelemetry Collector ConfigMap that contains Googlecloud exporter. OtelCollectorGooglecloud = "otel-collector-googlecloud" // OtelCollectorCustomCM is the name of the custom OpenTelemetry Collector ConfigMap. OtelCollectorCustomCM = "otel-collector-custom" // MonitoringNamespace is the Namespace used for OpenTelemetry Collector deployment. MonitoringNamespace = "config-management-monitoring" // CollectorConfigGooglecloud is the OpenTelemetry Collector configuration with // the googlecloud exporter. CollectorConfigGooglecloud = `` /* 6970-byte string literal not displayed */ )
const ( // StatusSuccess is the string value for the status key indicating success StatusSuccess = "success" // StatusError is the string value for the status key indicating failure/errors StatusError = "error" // CommitNone is the string value for the commit key indicating that no // commit has been synced. CommitNone = "NONE" // ApplierController is the string value for the applier controller in the multi-repo mode ApplierController = "applier" // RemediatorController is the string value for the remediator controller in the multi-repo mode RemediatorController = "remediator" )
Variables ¶
var ( // APICallDuration metric measures the latency of API server calls. APICallDuration = stats.Float64( "api_duration_seconds", "The duration of API server calls in seconds", stats.UnitSeconds) // ReconcilerErrors metric measures the number of errors in the reconciler. ReconcilerErrors = stats.Int64( "reconciler_errors", "The number of errors in the reconciler", stats.UnitDimensionless) // PipelineError metric measures the error by components when syncing a commit. // Definition here must exactly match the definition in the resource-group // controller, or the Prometheus exporter will error. b/247516388 // https://github.com/GoogleContainerTools/kpt-resource-group/blob/main/controllers/metrics/metrics.go#L88 PipelineError = stats.Int64( "pipeline_error_observed", "A boolean value indicates if error happened at readiness stage when syncing a commit", stats.UnitDimensionless) // ReconcileDuration metric measures the latency of reconcile events. ReconcileDuration = stats.Float64( "reconcile_duration_seconds", "The duration of reconcile events in seconds", stats.UnitSeconds) // ParserDuration metric measures the latency of the parse-apply-watch loop. ParserDuration = stats.Float64( "parser_duration_seconds", "The duration of the parse-apply-watch loop in seconds", stats.UnitSeconds) // LastSync metric measures the timestamp of the latest Git sync. LastSync = stats.Int64( "last_sync_timestamp", "The timestamp of the most recent sync from Git", stats.UnitDimensionless) // DeclaredResources metric measures the number of declared resources parsed from Git. DeclaredResources = stats.Int64( "declared_resources", "The number of declared resources parsed from Git", stats.UnitDimensionless) // ApplyOperations metric measures the number of applier apply events. ApplyOperations = stats.Int64( "apply_operations", "The number of operations that have been performed to sync resources to source of truth", stats.UnitDimensionless) // ApplyDuration metric measures the latency of applier apply events. ApplyDuration = stats.Float64( "apply_duration_seconds", "The duration of applier events in seconds", stats.UnitSeconds) // ResourceFights metric measures the number of resource fights. ResourceFights = stats.Int64( "resource_fights", "The number of resources that are being synced too frequently", stats.UnitDimensionless) // RemediateDuration metric measures the latency of remediator reconciliation events. RemediateDuration = stats.Float64( "remediate_duration_seconds", "The duration of remediator reconciliation events", stats.UnitSeconds) // LastApply metric measures the timestamp of the most recent applier apply event. LastApply = stats.Int64( "last_apply_timestamp", "The timestamp of the most recent applier event", stats.UnitDimensionless) // ResourceConflicts metric measures the number of resource conflicts. ResourceConflicts = stats.Int64( "resource_conflicts", "The number of resource conflicts resulting from a mismatch between the cached resources and cluster resources", stats.UnitDimensionless) // InternalErrors metric measures the number of unexpected internal errors triggered by defensive checks in Config Sync. InternalErrors = stats.Int64( "internal_errors", "The number of internal errors triggered by Config Sync", stats.UnitDimensionless) )
var (
// KeyName groups metrics by the reconciler name. Possible values: root-reconciler, ns-reconciler-<namespace>
// TODO b/208316928 remove this key from pipeline_error_observed metric once same metric in Resource Group Controller has this tag removed
KeyName, _ = tag.NewKey("name")
// KeyReconcilerType groups metrics by the reconciler type. Possible values: root, namespace.
// TODO: replace with configsync.sync.kind resource attribute
KeyReconcilerType, _ = tag.NewKey("reconciler")
// KeyOperation groups metrics by their operation. Possible values: create, patch, update, delete.
KeyOperation, _ = tag.NewKey("operation")
// KeyController groups metrics by their controller. Possible values: applier, remediator.
KeyController, _ = tag.NewKey("controller")
// KeyComponent groups metrics by their component. Possible values: source, sync, rendering, readiness(from Resource Group Controller).
KeyComponent, _ = tag.NewKey("component")
// KeyExportedComponent groups metrics by their component.
// The "component" metric tag overlaps with a resource tag exported by
// resource_to_telemetry_conversion when using Prometheus. So it's renamed
// "exported_component" when exported to Prometheus.
// TODO: Fix this naming overlap by renaming the "component" metric tag.
// Possible values: source, sync, rendering, readiness (from Resource Group Controller).
KeyExportedComponent, _ = tag.NewKey("exported_component")
// KeyErrorClass groups metrics by their error code.
KeyErrorClass, _ = tag.NewKey("errorclass")
// KeyStatus groups metrics by their status. Possible values: success, error.
KeyStatus, _ = tag.NewKey("status")
// KeyType groups metrics by their resource Kind.
KeyType, _ = tag.NewKey("type")
// KeyInternalErrorSource groups the InternalError metrics by their source. Possible values: parser, differ, remediator.
KeyInternalErrorSource, _ = tag.NewKey("source")
// KeyParserSource groups the metrics for the parser by their source. Possible values: read, parse, update.
KeyParserSource, _ = tag.NewKey("source")
// KeyTrigger groups metrics by their trigger. Possible values: retry, watchUpdate, managementConflict, resync, reimport.
KeyTrigger, _ = tag.NewKey("trigger")
// KeyCommit groups metrics by their git commit. Even though this tag has a high cardinality,
// it is only used by the `last_sync_timestamp` and `last_apply_timestamp` metrics.
// These are both aggregated as LastValue metrics so the number of recorded values will always be
// at most 1 per git commit.
KeyCommit, _ = tag.NewKey("commit")
// KeyContainer groups metrics by their container names. Possible values: reconciler, git-sync.
// TODO: replace with k8s.container.name resource attribute
KeyContainer, _ = tag.NewKey("container")
// KeyResourceType groups metris by their resource types. Possible values: cpu, memory.
KeyResourceType, _ = tag.NewKey("resource")
)
var (
// ResourceKeySyncKind groups metrics by the Sync kind. Possible values: RootSync, RepoSync.
ResourceKeySyncKind, _ = tag.NewKey("configsync_sync_kind")
// ResourceKeySyncName groups metrics by the Sync name.
ResourceKeySyncName, _ = tag.NewKey("configsync_sync_name")
// ResourceKeySyncNamespace groups metrics by the Sync namespace.
ResourceKeySyncNamespace, _ = tag.NewKey("configsync_sync_namespace")
// ResourceKeyDeploymentName groups metrics by k8s deployment name.
ResourceKeyDeploymentName, _ = tag.NewKey("k8s_deployment_name")
// ResourceKeyDeploymentName groups metrics by k8s pod name.
ResourceKeyPodName, _ = tag.NewKey("k8s_pod_name")
)
The following metric tag keys are available from the otel-collector Prometheus exporter. They are created from resource attributes using the resource_to_telemetry_conversion feature.
var ( // APICallDurationView aggregates the APICallDuration metric measurements. APICallDurationView = &view.View{ Name: APICallDuration.Name(), Measure: APICallDuration, Description: "The latency distribution of API server calls", TagKeys: []tag.Key{KeyOperation, KeyType, KeyStatus}, Aggregation: view.Distribution(distributionBounds...), } // ReconcilerErrorsView aggregates the ReconcilerErrors metric measurements. ReconcilerErrorsView = &view.View{ Name: ReconcilerErrors.Name(), Measure: ReconcilerErrors, Description: "The current number of errors in the RootSync and RepoSync reconcilers", TagKeys: []tag.Key{KeyComponent, KeyErrorClass}, Aggregation: view.LastValue(), } // PipelineErrorView aggregates the PipelineError metric measurements. // Definition here must exactly match the definition in the resource-group // controller, or the Prometheus exporter will error. b/247516388 // https://github.com/GoogleContainerTools/kpt-resource-group/blob/main/controllers/metrics/views.go#L123 PipelineErrorView = &view.View{ Name: PipelineError.Name(), Measure: PipelineError, Description: "A boolean value indicates if error happened from different stages when syncing a commit", TagKeys: []tag.Key{KeyName, KeyReconcilerType, KeyComponent}, Aggregation: view.LastValue(), } // ReconcileDurationView aggregates the ReconcileDuration metric measurements. ReconcileDurationView = &view.View{ Name: ReconcileDuration.Name(), Measure: ReconcileDuration, Description: "The latency distribution of RootSync and RepoSync reconcile events", TagKeys: []tag.Key{KeyStatus}, Aggregation: view.Distribution(distributionBounds...), } // ParserDurationView aggregates the ParserDuration metric measurements. ParserDurationView = &view.View{ Name: ParserDuration.Name(), Measure: ParserDuration, Description: "The latency distribution of the parse-apply-watch loop", TagKeys: []tag.Key{KeyStatus, KeyTrigger, KeyParserSource}, Aggregation: view.Distribution(longDistributionBounds...), } // LastSyncTimestampView aggregates the LastSyncTimestamp metric measurements. LastSyncTimestampView = &view.View{ Name: LastSync.Name(), Measure: LastSync, Description: "The timestamp of the most recent sync from Git", TagKeys: []tag.Key{KeyCommit, KeyStatus}, Aggregation: view.LastValue(), } // DeclaredResourcesView aggregates the DeclaredResources metric measurements. DeclaredResourcesView = &view.View{ Name: DeclaredResources.Name(), Measure: DeclaredResources, Description: "The current number of declared resources parsed from Git", TagKeys: []tag.Key{KeyCommit}, Aggregation: view.LastValue(), } // ApplyOperationsView aggregates the ApplyOps metric measurements. ApplyOperationsView = &view.View{ Name: ApplyOperations.Name() + "_total", Measure: ApplyOperations, Description: "The total number of operations that have been performed to sync resources to source of truth", TagKeys: []tag.Key{KeyController, KeyOperation, KeyType, KeyStatus}, Aggregation: view.Count(), } // ApplyDurationView aggregates the ApplyDuration metric measurements. ApplyDurationView = &view.View{ Name: ApplyDuration.Name(), Measure: ApplyDuration, Description: "The latency distribution of applier resource sync events", TagKeys: []tag.Key{KeyCommit, KeyStatus}, Aggregation: view.Distribution(longDistributionBounds...), } // LastApplyTimestampView aggregates the LastApplyTimestamp metric measurements. LastApplyTimestampView = &view.View{ Name: LastApply.Name(), Measure: LastApply, Description: "The timestamp of the most recent applier resource sync event", TagKeys: []tag.Key{KeyCommit, KeyStatus}, Aggregation: view.LastValue(), } // ResourceFightsView aggregates the ResourceFights metric measurements. ResourceFightsView = &view.View{ Name: ResourceFights.Name() + "_total", Measure: ResourceFights, Description: "The total number of resources that are being synced too frequently", Aggregation: view.Count(), } // RemediateDurationView aggregates the RemediateDuration metric measurements. RemediateDurationView = &view.View{ Name: RemediateDuration.Name(), Measure: RemediateDuration, Description: "The latency distribution of remediator reconciliation events", TagKeys: []tag.Key{KeyType, KeyStatus}, Aggregation: view.Distribution(distributionBounds...), } // ResourceConflictsView aggregates the ResourceConflicts metric measurements. ResourceConflictsView = &view.View{ Name: ResourceConflicts.Name() + "_total", Measure: ResourceConflicts, Description: "The total number of resource conflicts resulting from a mismatch between the cached resources and cluster resources", TagKeys: []tag.Key{KeyCommit}, Aggregation: view.Count(), } // InternalErrorsView aggregates the InternalErrors metric measurements. InternalErrorsView = &view.View{ Name: InternalErrors.Name() + "_total", Measure: InternalErrors, Description: "The total number of internal errors triggered by Config Sync", TagKeys: []tag.Key{KeyInternalErrorSource}, Aggregation: view.Count(), } )
Functions ¶
func RecordAPICallDuration ¶
func RecordAPICallDuration(ctx context.Context, operation, status, kind string, startTime time.Time)
RecordAPICallDuration produces a measurement for the APICallDuration view.
func RecordApplyDuration ¶
RecordApplyDuration produces measurements for the ApplyDuration and LastApplyTimestamp views.
func RecordApplyOperation ¶
RecordApplyOperation produces a measurement for the ApplyOperations view.
func RecordDeclaredResources ¶
RecordDeclaredResources produces a measurement for the DeclaredResources view.
func RecordInternalError ¶
RecordInternalError produces measurements for the InternalErrors view.
func RecordLastSync ¶
RecordLastSync produces a measurement for the LastSync view.
func RecordParserDuration ¶
RecordParserDuration produces a measurement for the ParserDuration view.
func RecordPipelineError ¶
RecordPipelineError produces a measurement for the PipelineError view
func RecordReconcileDuration ¶
RecordReconcileDuration produces a measurement for the ReconcileDuration view.
func RecordReconcilerErrors ¶
func RecordReconcilerErrors(ctx context.Context, component string, errs []v1beta1.ConfigSyncError)
RecordReconcilerErrors produces a measurement for the ReconcilerErrors view.
func RecordRemediateDuration ¶
RecordRemediateDuration produces measurements for the RemediateDuration view.
func RecordResourceConflict ¶
RecordResourceConflict produces measurements for the ResourceConflicts view.
func RecordResourceFight ¶
RecordResourceFight produces measurements for the ResourceFights view.
func RegisterOCAgentExporter ¶
RegisterOCAgentExporter creates the OC Agent metrics exporter.
func RegisterReconcilerManagerMetricsViews ¶
func RegisterReconcilerManagerMetricsViews() error
RegisterReconcilerManagerMetricsViews registers the views so that recorded metrics can be exported in the reconciler manager.
func RegisterReconcilerMetricsViews ¶
func RegisterReconcilerMetricsViews() error
RegisterReconcilerMetricsViews registers the views so that recorded metrics can be exported in the reconcilers.
func StatusTagKey ¶
StatusTagKey returns a string representation of the error, if it exists, otherwise success.
func StatusTagValueFromSummary ¶ added in v1.15.1
func StatusTagValueFromSummary(summary *v1beta1.ErrorSummary) string
StatusTagValueFromSummary returns error if the summary indicates at least 1 error, otherwise success.
Types ¶
This section is empty.