metriccache

package
v1.4.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 9, 2024 License: Apache-2.0 Imports: 15 Imported by: 0

Documentation

Index

Constants

View Source
const (
	NodeCPUInfoKey          = "node_cpu_info"
	NodeNUMAInfoKey         = "node_numa_info"
	NodeLocalStorageInfoKey = "node_local_storage_info"
)

Variables

View Source
var (

	// define all kinds of MetricResource
	NodeCPUUsageMetric                 = defaultMetricFactory.New(NodeMetricCPUUsage)
	NodeMemoryUsageMetric              = defaultMetricFactory.New(NodeMetricMemoryUsage)
	NodeMemoryUsageWithPageCacheMetric = defaultMetricFactory.New(NodeMemoryWithPageCacheUsage)
	NodeGPUCoreUsageMetric             = defaultMetricFactory.New(NodeMetricGPUCoreUsage).withPropertySchema(MetricPropertyGPUMinor, MetricPropertyGPUDeviceUUID)
	NodeGPUMemUsageMetric              = defaultMetricFactory.New(NodeMetricGPUMemUsage).withPropertySchema(MetricPropertyGPUMinor, MetricPropertyGPUDeviceUUID)
	NodeGPUMemTotalMetric              = defaultMetricFactory.New(NodeMetricGPUMemTotal).withPropertySchema(MetricPropertyGPUMinor, MetricPropertyGPUDeviceUUID)

	// define system resource usage as independent metric, although this can be calculate by node-sum(pod), but the time series are
	// unaligned across different type of metric, which makes it hard to aggregate.
	SystemCPUUsageMetric    = defaultMetricFactory.New(SysMetricCPUUsage)
	SystemMemoryUsageMetric = defaultMetricFactory.New(SysMetricMemoryUsage)

	PodCPUUsageMetric                 = defaultMetricFactory.New(PodMetricCPUUsage).withPropertySchema(MetricPropertyPodUID)
	PodMemUsageMetric                 = defaultMetricFactory.New(PodMetricMemoryUsage).withPropertySchema(MetricPropertyPodUID)
	PodMemoryUsageWithPageCacheMetric = defaultMetricFactory.New(PodMemoryWithPageCacheUsage).withPropertySchema(MetricPropertyPodUID)

	PodCPUThrottledMetric = defaultMetricFactory.New(PodMetricCPUThrottled).withPropertySchema(MetricPropertyPodUID)
	PodGPUCoreUsageMetric = defaultMetricFactory.New(PodMetricGPUCoreUsage).withPropertySchema(MetricPropertyPodUID, MetricPropertyGPUMinor, MetricPropertyGPUDeviceUUID)
	PodGPUMemUsageMetric  = defaultMetricFactory.New(PodMetricGPUMemUsage).withPropertySchema(MetricPropertyPodUID, MetricPropertyGPUMinor, MetricPropertyGPUDeviceUUID)

	ContainerCPUUsageMetric                 = defaultMetricFactory.New(ContainerMetricCPUUsage).withPropertySchema(MetricPropertyContainerID)
	ContainerMemUsageMetric                 = defaultMetricFactory.New(ContainerMetricMemoryUsage).withPropertySchema(MetricPropertyContainerID)
	ContainerMemoryUsageWithPageCacheMetric = defaultMetricFactory.New(ContainerMemoryWithPageCacheUsage).withPropertySchema(MetricPropertyContainerID)
	ContainerGPUCoreUsageMetric             = defaultMetricFactory.New(ContainerMetricGPUCoreUsage).withPropertySchema(MetricPropertyContainerID, MetricPropertyGPUMinor, MetricPropertyGPUDeviceUUID)
	ContainerGPUMemUsageMetric              = defaultMetricFactory.New(ContainerMetricGPUMemUsage).withPropertySchema(MetricPropertyContainerID, MetricPropertyGPUMinor, MetricPropertyGPUDeviceUUID)
	ContainerCPUThrottledMetric             = defaultMetricFactory.New(ContainerMetricCPUThrottled).withPropertySchema(MetricPropertyContainerID)
	// cold memory metrics
	NodeMemoryWithHotPageUsageMetric      = defaultMetricFactory.New(NodeMemoryWithHotPageUsage)
	PodMemoryWithHotPageUsageMetric       = defaultMetricFactory.New(PodMemoryWithHotPageUsage).withPropertySchema(MetricPropertyPodUID)
	ContainerMemoryWithHotPageUsageMetric = defaultMetricFactory.New(ContainerMemoryWithHotPageUsage).withPropertySchema(MetricPropertyContainerID)
	HostAppMemoryWithHotPageUsageMetric   = defaultMetricFactory.New(HostAppMemoryWithHotPageUsage).withPropertySchema(MetricPropertyHostAppName)
	NodeMemoryColdPageSizeMetric          = defaultMetricFactory.New(NodeMemoryColdPageSize)
	PodMemoryColdPageSizeMetric           = defaultMetricFactory.New(PodMemoryColdPageSize).withPropertySchema(MetricPropertyPodUID)
	HostAppMemoryColdPageSizeMetric       = defaultMetricFactory.New(HostAppMemoryColdPageSize).withPropertySchema(MetricPropertyHostAppName)
	ContainerMemoryColdPageSizeMetric     = defaultMetricFactory.New(ContainerMemoryColdPageSize).withPropertySchema(MetricPropertyContainerID)

	// CPI
	ContainerCPI = defaultMetricFactory.New(ContainerMetricCPI).withPropertySchema(MetricPropertyPodUID, MetricPropertyContainerID, MetricPropertyCPIResource)

	// PSI
	ContainerPSIMetric                 = defaultMetricFactory.New(ContainerMetricPSI).withPropertySchema(MetricPropertyPodUID, MetricPropertyContainerID, MetricPropertyPSIResource, MetricPropertyPSIPrecision, MetricPropertyPSIDegree)
	ContainerPSICPUFullSupportedMetric = defaultMetricFactory.New(ContainerMetricPSICPUFullSupported).withPropertySchema(MetricPropertyPodUID, MetricPropertyContainerID)
	PodPSIMetric                       = defaultMetricFactory.New(PodMetricPSI).withPropertySchema(MetricPropertyPodUID, MetricPropertyPSIResource, MetricPropertyPSIPrecision, MetricPropertyPSIDegree)
	PodPSICPUFullSupportedMetric       = defaultMetricFactory.New(PodMetricPSICPUFullSupported).withPropertySchema(MetricPropertyPodUID)

	// BE
	NodeBEMetric = defaultMetricFactory.New(NodeMetricBE).withPropertySchema(MetricPropertyBEResource, MetricPropertyBEAllocation)

	// Host Application
	HostAppCPUUsageMetric                 = defaultMetricFactory.New(HostAppCPUUsage).withPropertySchema(MetricPropertyHostAppName)
	HostAppMemoryUsageMetric              = defaultMetricFactory.New(HostAppMemoryUsage).withPropertySchema(MetricPropertyHostAppName)
	HostAppMemoryUsageWithPageCacheMetric = defaultMetricFactory.New(HostAppMemoryWithPageCacheUsage).withPropertySchema(MetricPropertyHostAppName)
)
View Source
var MetricPropertiesFunc = struct {
	Pod                 func(string) map[MetricProperty]string
	Container           func(string) map[MetricProperty]string
	GPU                 func(string, string) map[MetricProperty]string
	PSICPUFullSupported func(string, string) map[MetricProperty]string
	ContainerCPI        func(string, string, string) map[MetricProperty]string
	PodPSI              func(string, string, string, string) map[MetricProperty]string
	ContainerPSI        func(string, string, string, string, string) map[MetricProperty]string
	PodGPU              func(string, string, string) map[MetricProperty]string
	ContainerGPU        func(string, string, string) map[MetricProperty]string
	NodeBE              func(string, string) map[MetricProperty]string
	HostApplication     func(string) map[MetricProperty]string
}{
	Pod: func(podUID string) map[MetricProperty]string {
		return map[MetricProperty]string{MetricPropertyPodUID: podUID}
	},
	Container: func(containerID string) map[MetricProperty]string {
		return map[MetricProperty]string{MetricPropertyContainerID: containerID}
	},
	GPU: func(minor, uuid string) map[MetricProperty]string {
		return map[MetricProperty]string{MetricPropertyGPUMinor: minor, MetricPropertyGPUDeviceUUID: uuid}
	},
	PSICPUFullSupported: func(podUID, containerID string) map[MetricProperty]string {
		return map[MetricProperty]string{MetricPropertyPodUID: podUID, MetricPropertyContainerID: containerID}
	},
	ContainerCPI: func(podUID, containerID, cpiResource string) map[MetricProperty]string {
		return map[MetricProperty]string{MetricPropertyPodUID: podUID, MetricPropertyContainerID: containerID, MetricPropertyCPIResource: cpiResource}
	},
	PodPSI: func(podUID, psiResource, psiPrecision, psiDegree string) map[MetricProperty]string {
		return map[MetricProperty]string{MetricPropertyPodUID: podUID, MetricPropertyPSIResource: psiResource, MetricPropertyPSIPrecision: psiPrecision, MetricPropertyPSIDegree: psiDegree}
	},
	ContainerPSI: func(podUID, containerID, psiResource, psiPrecision, psiDegree string) map[MetricProperty]string {
		return map[MetricProperty]string{MetricPropertyPodUID: podUID, MetricPropertyContainerID: containerID, MetricPropertyPSIResource: psiResource, MetricPropertyPSIPrecision: psiPrecision, MetricPropertyPSIDegree: psiDegree}
	},
	PodGPU: func(podUID, minor, uuid string) map[MetricProperty]string {
		return map[MetricProperty]string{MetricPropertyPodUID: podUID, MetricPropertyGPUMinor: minor, MetricPropertyGPUDeviceUUID: uuid}
	},
	ContainerGPU: func(containerID, minor, uuid string) map[MetricProperty]string {
		return map[MetricProperty]string{MetricPropertyContainerID: containerID, MetricPropertyGPUMinor: minor, MetricPropertyGPUDeviceUUID: uuid}
	},
	NodeBE: func(beResource, beResourceAllocation string) map[MetricProperty]string {
		return map[MetricProperty]string{MetricPropertyBEResource: beResource, MetricPropertyBEAllocation: beResourceAllocation}
	},
	HostApplication: func(appName string) map[MetricProperty]string {
		return map[MetricProperty]string{MetricPropertyHostAppName: appName}
	},
}

MetricPropertiesFunc is a collection of functions generating metric property k-v, for metric sample generation and query

Functions

This section is empty.

Types

type AggregateInfo

type AggregateInfo struct {
	// TODO only support node resource metric now
	MetricStart *time.Time
	MetricEnd   *time.Time

	MetricsCount int64
}

func (*AggregateInfo) TimeRangeDuration added in v1.1.0

func (a *AggregateInfo) TimeRangeDuration() time.Duration

type AggregateParam

type AggregateParam struct {
	ValueFieldName string
	TimeFieldName  string
}

AggregateParam defines the field name of value and time in series struct

type AggregateResult added in v1.3.0

type AggregateResult interface {
	MetricResult
	Count() int
	Value(t AggregationType) (float64, error)
	TimeRangeDuration() time.Duration
}

AggregateResult inherits MetricResult, which can also generate value according to the give AggregationType

type AggregateResultFactory added in v1.3.0

type AggregateResultFactory interface {
	New(meta MetricMeta) AggregateResult
}

AggregateResultFactory generates AggregateResult according to MetricMeta

var DefaultAggregateResultFactory AggregateResultFactory = &aggregateResultFactory{}

type AggregationFunc added in v0.6.0

type AggregationFunc func(interface{}, AggregateParam) (float64, error)

AggregationFunc receives a list of series and generate the final value according to AggregateParam

type AggregationType

type AggregationType string
const (
	AggregationTypeAVG   AggregationType = "avg"
	AggregationTypeP99   AggregationType = "p99"
	AggregationTypeP95   AggregationType = "P95"
	AggregationTypeP90   AggregationType = "P90"
	AggregationTypeP50   AggregationType = "p50"
	AggregationTypeLast  AggregationType = "last"
	AggregationTypeCount AggregationType = "count"
)

type Appendable added in v1.3.0

type Appendable interface {
	// Appender returns a new appender for the storage.
	Appender() Appender
}

Appendable allows creating appenders.

type Appender added in v1.3.0

type Appender interface {
	// Append adds a list of MetricSample for the given series.
	Append(s []MetricSample) error
	// Commit submits the MetricSample and purges the batch. If Commit
	// returns a non-nil error, it also rolls back all modifications made in
	// the appender so far, as Rollback would do. In any case, an Appender
	// must not be used anymore after Commit has been called.
	Commit() error
}

Appender provides batch appends of MetricSample against a storage

type Config

type Config struct {
	MetricGCIntervalSeconds int
	MetricExpireSeconds     int

	TSDBPath              string
	TSDBRetentionDuration time.Duration
	TSDBEnablePromMetrics bool
	TSDBStripeSize        int
	TSDBMaxBytes          int64

	// not necessary now since it is in-memory empty dir now
	TSDBWALSegmentSize            int
	TSDBMaxBlockChunkSegmentSize  int64
	TSDBMinBlockDuration          time.Duration
	TSDBMaxBlockDuration          time.Duration
	TSDBHeadChunksWriteBufferSize int
}

func NewDefaultConfig

func NewDefaultConfig() *Config

func (*Config) InitFlags

func (c *Config) InitFlags(fs *flag.FlagSet)

type Devices added in v1.3.0

type Devices util.Devices

type InterferenceMetricName added in v1.0.0

type InterferenceMetricName string

type KVStorage added in v1.3.0

type KVStorage interface {
	Get(key interface{}) (interface{}, bool)
	Set(key, value interface{})
}

func NewMemoryStorage added in v1.3.0

func NewMemoryStorage() KVStorage

type MetricCache

type MetricCache interface {
	Run(stopCh <-chan struct{}) error
	TSDBStorage
	KVStorage
}

func NewMetricCache

func NewMetricCache(cfg *Config) (MetricCache, error)

type MetricFactory added in v1.3.0

type MetricFactory interface {
	// New generate MetricResource by giving kind
	New(metricKind MetricKind) MetricResource
}

MetricFactory generates MetricResource by specified kind

func NewMetricFactory added in v1.3.0

func NewMetricFactory() MetricFactory

type MetricKind added in v1.3.0

type MetricKind string

MetricKind represents all kind of metrics

const (
	NodeMetricCPUUsage           MetricKind = "node_cpu_usage"
	NodeMetricMemoryUsage        MetricKind = "node_memory_usage"
	NodeMemoryWithPageCacheUsage MetricKind = "node_memory_usage_with_page_cache"
	NodeMetricGPUCoreUsage       MetricKind = "node_gpu_core_usage"
	NodeMetricGPUMemUsage        MetricKind = "node_gpu_memory_usage"
	NodeMetricGPUMemTotal        MetricKind = "node_gpu_memory_total"

	SysMetricCPUUsage    MetricKind = "sys_cpu_usage"
	SysMetricMemoryUsage MetricKind = "sys_memory_usage"

	// NodeBE
	NodeMetricBE MetricKind = "node_be"

	PodMetricCPUUsage           MetricKind = "pod_cpu_usage"
	PodMetricMemoryUsage        MetricKind = "pod_memory_usage"
	PodMemoryWithPageCacheUsage MetricKind = "pod_memory_usage_with_page_cache"
	PodMetricGPUCoreUsage       MetricKind = "pod_gpu_core_usage"
	PodMetricGPUMemUsage        MetricKind = "pod_gpu_memory_usage"

	ContainerMetricCPUUsage           MetricKind = "container_cpu_usage"
	ContainerMetricMemoryUsage        MetricKind = "container_memory_usage"
	ContainerMemoryWithPageCacheUsage MetricKind = "container_memory_usage_with_page_cache"
	ContainerMetricGPUCoreUsage       MetricKind = "container_gpu_core_usage"
	ContainerMetricGPUMemUsage        MetricKind = "container_gpu_memory_usage"

	PodMetricCPUThrottled       MetricKind = "pod_cpu_throttled"
	ContainerMetricCPUThrottled MetricKind = "container_cpu_throttled"

	HostAppCPUUsage                 MetricKind = "host_application_cpu_usage"
	HostAppMemoryUsage              MetricKind = "host_application_memory_usage"
	HostAppMemoryWithPageCacheUsage MetricKind = "host_application_memory_usage_with_page_cache"

	// CPI
	ContainerMetricCPI MetricKind = "container_cpi"

	// PSI
	ContainerMetricPSI                 MetricKind = "container_psi"
	ContainerMetricPSICPUFullSupported MetricKind = "container_psi_cpu_full_supported"
	PodMetricPSI                       MetricKind = "pod_psi"
	PodMetricPSICPUFullSupported       MetricKind = "pod_psi_cpu_full_supported"

	//cold memory metrics
	NodeMemoryWithHotPageUsage      MetricKind = "node_memory_with_hot_page_usage"
	PodMemoryWithHotPageUsage       MetricKind = "pod_memory_with_hot_page_usage"
	ContainerMemoryWithHotPageUsage MetricKind = "container_memory_with_hot_page_usage"
	HostAppMemoryWithHotPageUsage   MetricKind = "host_application_memory_with_hot_page_usage"
	NodeMemoryColdPageSize          MetricKind = "node_memory_cold_page_size"
	HostAppMemoryColdPageSize       MetricKind = "host_application_memory_cold_page_size"
	PodMemoryColdPageSize           MetricKind = "pod_memory_cold_page_size"
	ContainerMemoryColdPageSize     MetricKind = "container_memory_cold_page_size"
)

type MetricMeta added in v1.3.0

type MetricMeta interface {
	// GetKind should returns the metric kind like pod_cpu_usage, pod_cpu_throttled
	GetKind() string
	// GetProperties should return the property of metric like pod_uid, container_id, gpu_device_name
	GetProperties() map[string]string
}

MetricMeta is the meta info of metric

type MetricProperty added in v1.3.0

type MetricProperty string

MetricProperty is the property of metric

const (
	MetricPropertyPodUID        MetricProperty = "pod_uid"
	MetricPropertyContainerID   MetricProperty = "container_id"
	MetricPropertyPriorityClass MetricProperty = "priority_class"
	MetricPropertyGPUMinor      MetricProperty = "gpu_minor"
	MetricPropertyGPUDeviceUUID MetricProperty = "gpu_device_uuid"

	MetricPropertyCPIResource MetricProperty = "cpi_resource"

	MetricPropertyPSIResource  MetricProperty = "psi_resource"
	MetricPropertyPSIPrecision MetricProperty = "psi_precision"
	MetricPropertyPSIDegree    MetricProperty = "psi_degree"

	MetricPropertyBEResource   MetricProperty = "be_resource"
	MetricPropertyBEAllocation MetricProperty = "be_allocation"

	MetricPropertyHostAppName MetricProperty = "host_app_name"
)

type MetricPropertyValue added in v1.3.0

type MetricPropertyValue string

MetricPropertyValue is the property value

const (
	CPIResourceCycle       MetricPropertyValue = "cycle"
	CPIResourceInstruction MetricPropertyValue = "instruction"

	PSIResourceCPU  MetricPropertyValue = "cpu"
	PSIResourceMem  MetricPropertyValue = "mem"
	PSIResourceIO   MetricPropertyValue = "io"
	PSIPrecision10  MetricPropertyValue = "10"
	PSIPrecision60  MetricPropertyValue = "60"
	PSIPrecision300 MetricPropertyValue = "300"
	PSIDegreeFull   MetricPropertyValue = "full"
	PSIDegreeSome   MetricPropertyValue = "some"

	BEResourceCPU                 MetricPropertyValue = "cpu"
	BEResourceAllocationUsage     MetricPropertyValue = "usage"
	BEResourceAllocationRealLimit MetricPropertyValue = "real-limit"
	BEResourceAllocationRequest   MetricPropertyValue = "request"
)

type MetricResource added in v1.3.0

type MetricResource interface {

	// GenerateSample produce the detailed sample according to the specified k-v properties, timestamp and value
	// All properties in the schema of MetricResource must be set, or it will return with error
	GenerateSample(properties map[MetricProperty]string, t time.Time, val float64) (MetricSample, error)

	// BuildQueryMeta produce the MetricMeta for query, properties for query must be already registered in schema,
	// or it will return with error
	BuildQueryMeta(properties map[MetricProperty]string) (MetricMeta, error)
	// contains filtered or unexported methods
}

MetricResource represents the Metric struct like NodeCPUUsage, which is an abstract struct for generate sample or query meta All MetricResources should be init before using

type MetricResult added in v1.3.0

type MetricResult interface {
	MetricMeta
	// AddSeries receives series and saves in MetricResult, which can be used for generating the final value
	AddSeries(promstorage.Series) error
}

MetricResult contains s set of series, it can also produce final result like aggregation value

type MetricSample added in v1.3.0

type MetricSample interface {
	MetricMeta
	// contains filtered or unexported methods
}

MetricSample is a sample of specified metric, e.g. '{__name__: node_cpu_usage} = <2023-04-18:20:00:00, 4.1 core>'

type NodeCPUInfo

type NodeCPUInfo util.LocalCPUInfo

type NodeLocalStorageInfo added in v1.3.0

type NodeLocalStorageInfo util.LocalStorageInfo

type Point added in v1.3.0

type Point struct {
	Timestamp time.Time
	Value     float64
}

point is the struct to describe metric

type Querier added in v1.3.0

type Querier interface {
	// Query add series to MetricResult that matches the given meta.
	// It allows passing hints that can help in optimizing select,
	// but it's up to implementation of MetricResult how to use it.
	Query(meta MetricMeta, hints *QueryHints, result MetricResult) error
}

Querier provides querying access over time series data of a fixed time range.

type QueryHints added in v1.3.0

type QueryHints struct {
}

SelectHints specifies hints passed for query. It is only an option for implementation of MetricResult to use, e.g. GroupedResult

type QueryParam

type QueryParam struct {
	Aggregate AggregationType
	Start     *time.Time
	End       *time.Time
}

func (*QueryParam) FillDefaultValue

func (q *QueryParam) FillDefaultValue()

type QueryResult

type QueryResult struct {
	AggregateInfo *AggregateInfo
	Error         error
}

type Queryable added in v1.3.0

type Queryable interface {
	// Querier returns a new querier over the data partition for the given time range.
	Querier(startTime, endTime time.Time) (Querier, error)
}

Queryable handles queries against a storage.

type TSDBStorage added in v1.3.0

type TSDBStorage interface {
	Appendable
	Queryable

	// Close closes the storage and all its underlying resources.
	Close() error
}

TSDBStorage defines time-series type DB, providing insert and query interface

func NewTSDBStorage added in v1.3.0

func NewTSDBStorage(conf *Config) (TSDBStorage, error)

Directories

Path Synopsis
Package mock_metriccache is a generated GoMock package.
Package mock_metriccache is a generated GoMock package.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL