metrics

package
v0.0.0-...-c345e67 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 6, 2024 License: Apache-2.0 Imports: 28 Imported by: 0

Documentation

Index

Constants

View Source
const (
	MetricSucceeded MetricSimpleStatus = "Succeeded"
	MetricFailed    MetricSimpleStatus = "Failed"

	StatSucceeded DirectorStatResult = "Succeeded"
	StatNotFound  DirectorStatResult = "NotFound"
	StatTimeout   DirectorStatResult = "Timeout"
	StatCancelled DirectorStatResult = "Cancelled"
	StatForbidden DirectorStatResult = "Forbidden"
	StatUnkownErr DirectorStatResult = "UnknownErr"
)
View Source
const (
	XROOTD_MON_PIDSHFT = int64(56)
	XROOTD_MON_PIDMASK = int64(0xff)
)

Variables

View Source
var (
	PelicanDirectorFileTransferTestSuite = promauto.NewCounterVec(prometheus.CounterOpts{
		Name: "pelican_director_total_ftx_test_suite",
		Help: "The total number of file transfer test suite the director issued. A new test suite is a new goroutine started at origin's advertisement to the director and is cancelled when such registration expired in director's TTL cache",
	}, []string{"server_name", "server_web_url", "server_type"})

	PelicanDirectorActiveFileTransferTestSuite = promauto.NewGaugeVec(prometheus.GaugeOpts{
		Name: "pelican_director_active_ftx_test_suite",
		Help: "The number of active director file transfer test suite. The number of active goroutines that executes test run",
	}, []string{"server_name", "server_web_url", "server_type"})

	PelicanDirectorFileTransferTestsRuns = promauto.NewCounterVec(prometheus.CounterOpts{
		Name: "pelican_director_total_ftx_test_runs",
		Help: "The number of file transfer test runs the director issued. A test run is a cycle of upload/download/delete test file, which is executed per 15s per origin (by defult)",
	}, []string{"server_name", "server_web_url", "server_type", "status", "report_status"})

	PelicanDirectorAdvertisementsRecievedTotal = promauto.NewCounterVec(prometheus.CounterOpts{
		Name: "pelican_director_advertisements_received_total",
		Help: "The total number of advertisement the director received from the origin and cache servers. Labelled by status_code, server_name, serve_type: Origin|Cache, server_web_url",
	}, []string{"server_name", "server_web_url", "server_type", "status_code", "namespace_prefix"})

	PelicanDirectorMapItemsTotal = promauto.NewGaugeVec(prometheus.GaugeOpts{
		Name: "pelican_director_map_items_total",
		Help: "The total number of map items in the director, by the name of the map",
	}, []string{"name"}) // name: healthTestUtils, filteredServers, originStatUtils

	PelicanDirectorTTLCache = promauto.NewGaugeVec(prometheus.GaugeOpts{
		Name: "pelican_director_ttl_cache",
		Help: "The statistics of various TTL caches",
	}, []string{"name", "type"}) // name: serverAds, jwks; type: evictions, insersions, hits, misses, total

	PelicanDirectorStatActive = promauto.NewGaugeVec(prometheus.GaugeOpts{
		Name: "pelican_director_stat_active",
		Help: "The active stat queries in the director",
	}, []string{"server_name", "server_url", "server_type"})

	PelicanDirectorStatTotal = promauto.NewGaugeVec(prometheus.GaugeOpts{
		Name: "pelican_director_stat_total",
		Help: "The total stat queries the director issues. The status can be Succeeded, Cancelled, Timeout, Forbidden, or UnknownErr",
	}, []string{"server_name", "server_url", "server_type", "result", "cached_result"}) // result: see enums for DirectorStatResult

	PelicanDirectorServerCount = promauto.NewGaugeVec(prometheus.GaugeOpts{
		Name: "pelican_director_server_count",
		Help: "The number of servers currently recognized by the Director, delineated by pelican/non-pelican and origin/cache",
	}, []string{"server_name", "server_type", "from_topology"})

	PelicanDirectorClientVersionTotal = promauto.NewCounterVec(prometheus.CounterOpts{
		Name: "pelican_director_client_version_total",
		Help: "The total number of requests from client versions.",
	}, []string{"version", "service"})

	PelicanDirectorRedirectionsTotal = promauto.NewCounterVec(prometheus.CounterOpts{
		Name: "pelican_director_redirections_total",
		Help: "The total number of redirections the director issued.",
	}, []string{"destination", "status_code", "version", "network"})

	PelicanDirectorGeoIPErrors = promauto.NewCounterVec(prometheus.CounterOpts{
		Name: "pelican_director_geoip_errors",
		Help: "The total number of errors encountered trying to resolve coordinates using the GeoIP MaxMind database",
	}, []string{"network", "source", "proj"})
)
View Source
var (
	PelicanHealthStatus = promauto.NewGaugeVec(prometheus.GaugeOpts{
		Name: "pelican_component_health_status",
		Help: "The health status of various components",
	}, []string{"component"})

	PelicanHealthLastUpdate = promauto.NewGaugeVec(prometheus.GaugeOpts{
		Name: "pelican_component_health_status_last_update",
		Help: "Last update timestamp of components health status",
	}, []string{"component"})
)
View Source
var (
	PacketsReceived = promauto.NewCounter(prometheus.CounterOpts{
		Name: "xrootd_monitoring_packets_received",
		Help: "The total number of monitoring UDP packets received",
	})

	TransferReadvSegs = promauto.NewCounterVec(prometheus.CounterOpts{
		Name: "xrootd_transfer_readv_segments_count",
		Help: "Number of segments in readv operations",
	}, []string{"path", "ap", "dn", "role", "org", "proj", "network"})

	TransferOps = promauto.NewCounterVec(prometheus.CounterOpts{
		Name: "xrootd_transfer_operations_count",
		Help: "Number of transfer operations performed",
	}, []string{"path", "ap", "dn", "role", "org", "proj", "type", "network"})

	TransferBytes = promauto.NewCounterVec(prometheus.CounterOpts{
		Name: "xrootd_transfer_bytes",
		Help: "Bytes of transfers",
	}, []string{"path", "ap", "dn", "role", "org", "proj", "type", "network"})

	Threads = promauto.NewGaugeVec(prometheus.GaugeOpts{
		Name: "xrootd_sched_thread_count",
		Help: "Number of scheduler threads",
	}, []string{"state"})

	Connections = promauto.NewCounter(prometheus.CounterOpts{
		Name: "xrootd_server_connection_count",
		Help: "Aggregate number of server connections",
	})

	BytesXfer = promauto.NewCounterVec(prometheus.CounterOpts{
		Name: "xrootd_server_bytes",
		Help: "Number of bytes read into the server",
	}, []string{"direction"})

	StorageVolume = promauto.NewGaugeVec(prometheus.GaugeOpts{
		Name: "xrootd_storage_volume_bytes",
		Help: "Storage volume usage on the server",
	}, []string{"ns", "type", "server_type"}) // type: total/free; server_type: origin/cache

	CacheAccess = promauto.NewGaugeVec(prometheus.GaugeOpts{
		Name: "xrootd_cache_access_bytes",
		Help: "Number of bytes the data requested is in the cache or not",
	}, []string{"path", "type"}) // type: hit/miss/bypass

	ServerTotalIO = promauto.NewCounter(prometheus.CounterOpts{
		Name: "xrootd_server_io_total",
		Help: "Total storage operations in origin/cache server",
	})

	ServerActiveIO = promauto.NewGauge(prometheus.GaugeOpts{
		Name: "xrootd_server_io_active",
		Help: "Number of ongoing storage operations in origin/cache server",
	})

	ServerIOWaitTime = promauto.NewCounter(prometheus.CounterOpts{
		Name: "xrootd_server_io_wait_time",
		Help: "The aggregate time spent in storage operations in origin/cache server",
	})
)
View Source
var PelicanOSDFInstitutions = promauto.NewGauge(prometheus.GaugeOpts{
	Name: "pelican_osdf_institution_count",
	Help: "Total number of contributing institutions",
})
View Source
var PelicanRegistryFederationNamespaces = promauto.NewGaugeVec(prometheus.GaugeOpts{
	Name: "pelican_registry_federation_namespaces",
	Help: "The number of federation namespace associated with a public key, excluding server namespaces, in the registry.",
}, []string{"status"})

Functions

func ConfigureMonitoring

func ConfigureMonitoring(ctx context.Context, egrp *errgroup.Group) (int, error)

Set up listening and parsing xrootd monitoring UDP packets into prometheus

The `ctx` is the context for listening to server shutdown event in order to cleanup internal cache eviction

func DeleteComponentHealthStatus added in v1.0.4

func DeleteComponentHealthStatus(name HealthStatusComponent)

func GetComponentStatus

func GetComponentStatus(comp HealthStatusComponent) (status string, err error)

Get the current health status of a component. Status can be critical|warning|ok|unknown

func HandlePacket

func HandlePacket(packet []byte) error

func HandleSummaryPacket

func HandleSummaryPacket(packet []byte) error

func LaunchShoveler

func LaunchShoveler(ctx context.Context, egrp *errgroup.Group, metricsPort int) (int, error)

func NullTermToString

func NullTermToString(nullTermBytes []byte) (str string)

func ParseTokenAuth added in v1.0.4

func ParseTokenAuth(tokenauth string) (userId UserId, record UserRecord, err error)

func SetComponentHealthStatus

func SetComponentHealthStatus(name HealthStatusComponent, state HealthStatusEnum, msg string)

Add/update the component health status. If you have a new component to record, please go to metrics/health and register your component as a new constant of type HealthStatusComponent. Also note that StatusUnknown is mostly for internal use only, please try to avoid setting this as your component status

Types

type CacheAccessStat

type CacheAccessStat struct {
	Hit    int64
	Miss   int64
	Bypass int64
}

type CacheGS

type CacheGS struct {
	AccessCnt   uint32 `json:"access_cnt"`
	AttachT     int64  `json:"attach_t"`
	ByteBypass  int64  `json:"b_bypass"`
	ByteHit     int64  `json:"b_hit"`
	ByteMiss    int64  `json:"b_miss"`
	BlkSize     int    `json:"blk_size"`
	DetachT     int64  `json:"detach_t"`
	Event       string `json:"event"`
	Lfn         string `json:"lfn"`
	NBlocks     int    `json:"n_blks"`
	NBlocksDone int    `json:"n_blks_done"`
	NCksErrs    int    `json:"n_cks_errs"`
	Size        int64  `json:"size"`
}

Cache g-stream: https://xrootd.slac.stanford.edu/doc/dev56/xrd_monitoring.htm#_Toc138968526

type ComponentStatus

type ComponentStatus struct {
	Status     string `json:"status"`
	Message    string `json:"message,omitempty"`
	LastUpdate int64  `json:"last_update"`
}

This is for API response so we want to display string representation of status

type DirectorFTXTestStatus

type DirectorFTXTestStatus MetricSimpleStatus

type DirectorStatResult

type DirectorStatResult string

type FileId

type FileId struct {
	Id uint32
}

type FileRecord

type FileRecord struct {
	UserId     UserId
	Path       string
	ReadOps    uint32
	ReadvOps   uint32
	WriteOps   uint32
	ReadvSegs  uint64
	ReadBytes  uint64
	ReadvBytes uint64
	WriteBytes uint64
}

type HealthStatus

type HealthStatus struct {
	OverallStatus   string                                    `json:"status"`
	ComponentStatus map[HealthStatusComponent]ComponentStatus `json:"components"`
}

func GetHealthStatus

func GetHealthStatus() HealthStatus

type HealthStatusComponent added in v1.0.4

type HealthStatusComponent string
const (
	OriginCache_XRootD        HealthStatusComponent = "xrootd"
	OriginCache_CMSD          HealthStatusComponent = "cmsd"
	OriginCache_Federation    HealthStatusComponent = "federation" // Advertise to the director
	OriginCache_Director      HealthStatusComponent = "director"   // File transfer tests with director
	OriginCache_Registry      HealthStatusComponent = "registry"   // Register namespace at the registry
	DirectorRegistry_Topology HealthStatusComponent = "topology"   // Fetch data from OSDF topology
	Server_WebUI              HealthStatusComponent = "web-ui"
)

Naming convention for components:

ServiceName1Name2_ComponentName

i.e. For ""OriginCache_XRootD", it means this component is available at both Origin and Cache. Please come up with the largest possible scope of the component

func (HealthStatusComponent) String added in v1.0.4

func (component HealthStatusComponent) String() string

type HealthStatusEnum added in v1.0.4

type HealthStatusEnum int
const (
	StatusCritical HealthStatusEnum = iota + 1
	StatusWarning
	StatusOK
	StatusUnknown // Do not abuse this enum. Use others when possible
)

HealthStatusEnum are stored as Prometheus values and internal struct

func (HealthStatusEnum) String added in v1.0.4

func (status HealthStatusEnum) String() string

Unfortunately we don't have a better way to ensure the enum constants always have matched string representation, so we will return "Error: status string index out of range" as an indicator

type MetricSimpleStatus

type MetricSimpleStatus string

type PathList

type PathList struct {
	Paths []string
}

type SummaryCacheMemory

type SummaryCacheMemory struct {
	Size int `xml:"size"`
	Used int `xml:"used"`
	Wq   int `xml:"wq"`
}

type SummaryCacheStore

type SummaryCacheStore struct {
	Size int `xml:"size"`
	Used int `xml:"used"`
	Min  int `xml:"min"`
	Max  int `xml:"max"`
}

type SummaryPath

type SummaryPath struct {
	Idx   int               `xml:",chardata"`
	Stats []SummaryPathStat `xml:"stats"`
}

type SummaryPathStat

type SummaryPathStat struct {
	Id    string `xml:"id,attr"`
	Lp    string `xml:"lp"`   // The minimally reduced logical file system path i.e. top-level namespace
	Free  int    `xml:"free"` // Kilobytes available
	Total int    `xml:"tot"`  // Kilobytes allocated
}

type SummaryStat

type SummaryStat struct {
	Id      SummaryStatType    `xml:"id,attr"`
	Total   int                `xml:"tot"`
	In      int                `xml:"in"`
	Out     int                `xml:"out"`
	Threads int                `xml:"threads"`
	Idle    int                `xml:"idle"`
	Paths   SummaryPath        `xml:"paths"` // For Oss Summary Data
	Store   SummaryCacheStore  `xml:"store"`
	Memory  SummaryCacheMemory `xml:"mem"`
}

type SummaryStatistics

type SummaryStatistics struct {
	Version string        `xml:"ver,attr"`
	Program string        `xml:"pgm,attr"`
	Stats   []SummaryStat `xml:"stats"`
}

type ThrottleGS

type ThrottleGS struct {
	IOWaitTime float64 `json:"io_wait"`
	IOActive   int     `json:"io_active"`
	IOTotal    int     `json:"io_total"`
}

Throttle plug-in g-stream

type UserId

type UserId struct {
	Id uint32
}

type UserRecord

type UserRecord struct {
	AuthenticationProtocol string
	User                   string
	DN                     string
	Role                   string
	Org                    string
	Groups                 []string
	Project                string
	Host                   string
}

type XrdUserId added in v1.0.4

type XrdUserId struct {
	Prot string
	User string
	Pid  int
	Sid  int
	Host string
}

userid as in XRootD message info field

func GetSIDRest

func GetSIDRest(info []byte) (xrdUserId XrdUserId, rest string, err error)

func ParseXrdUserId added in v1.0.4

func ParseXrdUserId(userid string) (xrdUserId XrdUserId, err error)

type XrdXrootdMonFileCLS added in v1.0.4

type XrdXrootdMonFileCLS struct {
	Hdr XrdXrootdMonFileHdr // Always present
	Xfr XrdXrootdMonStatXFR // Always present
	Ops XrdXrootdMonStatOPS // OPTIONAL

}

XrdXrootdMonFileCLS represents a variable length structure and includes other structures that are "Always present" or "OPTIONAL". The OPTIONAL parts are not included here as they require more context.

func (*XrdXrootdMonFileCLS) Serialize added in v1.0.4

func (cls *XrdXrootdMonFileCLS) Serialize() ([]byte, error)

Serialize converts XrdXrootdMonFileCLS to a byte array

type XrdXrootdMonFileHdr

type XrdXrootdMonFileHdr struct {
	RecType recTval
	RecFlag byte
	RecSize int16
	FileId  uint32
	UserId  uint32
	NRecs0  int16
	NRecs1  int16
}

func ParseFileHeader

func ParseFileHeader(packet []byte) (XrdXrootdMonFileHdr, error)

func (*XrdXrootdMonFileHdr) Serialize added in v1.0.4

func (hdr *XrdXrootdMonFileHdr) Serialize() ([]byte, error)

type XrdXrootdMonFileLFN added in v1.0.4

type XrdXrootdMonFileLFN struct {
	User uint32
	Lfn  [1032]byte
}

func (*XrdXrootdMonFileLFN) Serialize added in v1.0.4

func (lfn *XrdXrootdMonFileLFN) Serialize() ([]byte, error)

type XrdXrootdMonFileOPN added in v1.0.4

type XrdXrootdMonFileOPN struct {
	Hdr XrdXrootdMonFileHdr
	Fsz int64
	Ufn XrdXrootdMonFileLFN
}

func (*XrdXrootdMonFileOPN) Serialize added in v1.0.4

func (opn *XrdXrootdMonFileOPN) Serialize() ([]byte, error)

type XrdXrootdMonFileTOD

type XrdXrootdMonFileTOD struct {
	Hdr  XrdXrootdMonFileHdr
	TBeg int32
	TEnd int32
	SID  int64
}

func (*XrdXrootdMonFileTOD) Serialize added in v1.0.4

func (ftod *XrdXrootdMonFileTOD) Serialize() ([]byte, error)

type XrdXrootdMonFileXFR added in v1.0.4

type XrdXrootdMonFileXFR struct {
	Hdr XrdXrootdMonFileHdr // Header with recType == isXfr
	Xfr XrdXrootdMonStatXFR
}

func (*XrdXrootdMonFileXFR) Serialize added in v1.0.4

func (fileXFR *XrdXrootdMonFileXFR) Serialize() ([]byte, error)

type XrdXrootdMonGS

type XrdXrootdMonGS struct {
	Hdr  XrdXrootdMonHeader
	TBeg int   // UNIX time of first entry
	TEnd int   // UNIX time of last entry
	SID  int64 // Provider identification
}

type XrdXrootdMonHeader

type XrdXrootdMonHeader struct {
	Code byte   // = | d | f | g | i | p | r | t | u | x
	Pseq byte   // packet sequence
	Plen uint16 // packet length
	Stod int32  // Unix time at Server start
}

func (*XrdXrootdMonHeader) Serialize added in v1.0.4

func (monHeader *XrdXrootdMonHeader) Serialize() ([]byte, error)

type XrdXrootdMonMap added in v1.0.4

type XrdXrootdMonMap struct {
	Hdr    XrdXrootdMonHeader
	Dictid uint32
	Info   []byte
}

func (XrdXrootdMonMap) Serialize added in v1.0.4

func (monMap XrdXrootdMonMap) Serialize() ([]byte, error)

type XrdXrootdMonStatOPS added in v1.0.4

type XrdXrootdMonStatOPS struct {
	Read  int32 // Number of read() calls
	Readv int32 // Number of readv() calls
	Write int32 // Number of write() calls
	RsMin int16 // Smallest readv() segment count
	RsMax int16 // Largest readv() segment count
	Rsegs int64 // Number of readv() segments
	RdMin int32 // Smallest read() request size
	RdMax int32 // Largest read() request size
	RvMin int32 // Smallest readv() request size
	RvMax int32 // Largest readv() request size
	WrMin int32 // Smallest write() request size
	WrMax int32 // Largest write() request size
}

func (*XrdXrootdMonStatOPS) Serialize added in v1.0.4

func (ops *XrdXrootdMonStatOPS) Serialize() ([]byte, error)

type XrdXrootdMonStatXFR added in v1.0.4

type XrdXrootdMonStatXFR struct {
	Read  int64 // Bytes read from file using read()
	Readv int64 // Bytes read from file using readv()
	Write int64 // Bytes written to file
}

func (*XrdXrootdMonStatXFR) Serialize added in v1.0.4

func (xfr *XrdXrootdMonStatXFR) Serialize() ([]byte, error)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL