Documentation ¶
Index ¶
- Constants
- Variables
- func ConfigureMonitoring(ctx context.Context, egrp *errgroup.Group) (int, error)
- func DeleteComponentHealthStatus(name HealthStatusComponent)
- func GetComponentStatus(comp HealthStatusComponent) (status string, err error)
- func HandlePacket(packet []byte) error
- func HandleSummaryPacket(packet []byte) error
- func LaunchShoveler(ctx context.Context, egrp *errgroup.Group, metricsPort int) (int, error)
- func NullTermToString(nullTermBytes []byte) (str string)
- func ParseTokenAuth(tokenauth string) (userId UserId, record UserRecord, err error)
- func SetComponentHealthStatus(name HealthStatusComponent, state HealthStatusEnum, msg string)
- type CacheAccessStat
- type CacheGS
- type ComponentStatus
- type DirectorFTXTestStatus
- type DirectorStatResult
- type FileId
- type FileRecord
- type HealthStatus
- type HealthStatusComponent
- type HealthStatusEnum
- type MetricSimpleStatus
- type PathList
- type SummaryCacheMemory
- type SummaryCacheStore
- type SummaryPath
- type SummaryPathStat
- type SummaryStat
- type SummaryStatType
- type SummaryStatistics
- type ThrottleGS
- type UserId
- type UserRecord
- type XrdUserId
- type XrdXrootdMonFileCLS
- type XrdXrootdMonFileHdr
- type XrdXrootdMonFileLFN
- type XrdXrootdMonFileOPN
- type XrdXrootdMonFileTOD
- type XrdXrootdMonFileXFR
- type XrdXrootdMonGS
- type XrdXrootdMonHeader
- type XrdXrootdMonMap
- type XrdXrootdMonStatOPS
- type XrdXrootdMonStatXFR
Constants ¶
const ( MetricSucceeded MetricSimpleStatus = "Succeeded" MetricFailed MetricSimpleStatus = "Failed" StatSucceeded DirectorStatResult = "Succeeded" StatNotFound DirectorStatResult = "NotFound" StatTimeout DirectorStatResult = "Timeout" StatCancelled DirectorStatResult = "Cancelled" StatForbidden DirectorStatResult = "Forbidden" StatUnkownErr DirectorStatResult = "UnknownErr" )
const ( XROOTD_MON_PIDSHFT = int64(56) XROOTD_MON_PIDMASK = int64(0xff) )
Variables ¶
var ( PelicanDirectorFileTransferTestSuite = promauto.NewCounterVec(prometheus.CounterOpts{ Name: "pelican_director_total_ftx_test_suite", Help: "The total number of file transfer test suite the director issued. A new test suite is a new goroutine started at origin's advertisement to the director and is cancelled when such registration expired in director's TTL cache", }, []string{"server_name", "server_web_url", "server_type"}) PelicanDirectorActiveFileTransferTestSuite = promauto.NewGaugeVec(prometheus.GaugeOpts{ Name: "pelican_director_active_ftx_test_suite", Help: "The number of active director file transfer test suite. The number of active goroutines that executes test run", }, []string{"server_name", "server_web_url", "server_type"}) PelicanDirectorFileTransferTestsRuns = promauto.NewCounterVec(prometheus.CounterOpts{ Name: "pelican_director_total_ftx_test_runs", Help: "The number of file transfer test runs the director issued. A test run is a cycle of upload/download/delete test file, which is executed per 15s per origin (by defult)", }, []string{"server_name", "server_web_url", "server_type", "status", "report_status"}) PelicanDirectorAdvertisementsRecievedTotal = promauto.NewCounterVec(prometheus.CounterOpts{ Name: "pelican_director_advertisements_received_total", Help: "The total number of advertisement the director received from the origin and cache servers. Labelled by status_code, server_name, serve_type: Origin|Cache, server_web_url", }, []string{"server_name", "server_web_url", "server_type", "status_code", "namespace_prefix"}) PelicanDirectorMapItemsTotal = promauto.NewGaugeVec(prometheus.GaugeOpts{ Name: "pelican_director_map_items_total", Help: "The total number of map items in the director, by the name of the map", }, []string{"name"}) // name: healthTestUtils, filteredServers, originStatUtils PelicanDirectorTTLCache = promauto.NewGaugeVec(prometheus.GaugeOpts{ Name: "pelican_director_ttl_cache", Help: "The statistics of various TTL caches", }, []string{"name", "type"}) // name: serverAds, jwks; type: evictions, insersions, hits, misses, total PelicanDirectorStatActive = promauto.NewGaugeVec(prometheus.GaugeOpts{ Name: "pelican_director_stat_active", Help: "The active stat queries in the director", }, []string{"server_name", "server_url", "server_type"}) PelicanDirectorStatTotal = promauto.NewGaugeVec(prometheus.GaugeOpts{ Name: "pelican_director_stat_total", Help: "The total stat queries the director issues. The status can be Succeeded, Cancelled, Timeout, Forbidden, or UnknownErr", }, []string{"server_name", "server_url", "server_type", "result", "cached_result"}) // result: see enums for DirectorStatResult PelicanDirectorServerCount = promauto.NewGaugeVec(prometheus.GaugeOpts{ Name: "pelican_director_server_count", Help: "The number of servers currently recognized by the Director, delineated by pelican/non-pelican and origin/cache", }, []string{"server_name", "server_type", "from_topology"}) PelicanDirectorClientVersionTotal = promauto.NewCounterVec(prometheus.CounterOpts{ Name: "pelican_director_client_version_total", Help: "The total number of requests from client versions.", }, []string{"version", "service"}) PelicanDirectorRedirectionsTotal = promauto.NewCounterVec(prometheus.CounterOpts{ Name: "pelican_director_redirections_total", Help: "The total number of redirections the director issued.", }, []string{"destination", "status_code", "version", "network"}) PelicanDirectorGeoIPErrors = promauto.NewCounterVec(prometheus.CounterOpts{ Name: "pelican_director_geoip_errors", Help: "The total number of errors encountered trying to resolve coordinates using the GeoIP MaxMind database", }, []string{"network", "source", "proj"}) )
var ( PelicanHealthStatus = promauto.NewGaugeVec(prometheus.GaugeOpts{ Name: "pelican_component_health_status", Help: "The health status of various components", }, []string{"component"}) PelicanHealthLastUpdate = promauto.NewGaugeVec(prometheus.GaugeOpts{ Name: "pelican_component_health_status_last_update", Help: "Last update timestamp of components health status", }, []string{"component"}) )
var ( PacketsReceived = promauto.NewCounter(prometheus.CounterOpts{ Name: "xrootd_monitoring_packets_received", Help: "The total number of monitoring UDP packets received", }) TransferReadvSegs = promauto.NewCounterVec(prometheus.CounterOpts{ Name: "xrootd_transfer_readv_segments_count", Help: "Number of segments in readv operations", }, []string{"path", "ap", "dn", "role", "org", "proj", "network"}) TransferOps = promauto.NewCounterVec(prometheus.CounterOpts{ Name: "xrootd_transfer_operations_count", Help: "Number of transfer operations performed", }, []string{"path", "ap", "dn", "role", "org", "proj", "type", "network"}) TransferBytes = promauto.NewCounterVec(prometheus.CounterOpts{ Name: "xrootd_transfer_bytes", Help: "Bytes of transfers", }, []string{"path", "ap", "dn", "role", "org", "proj", "type", "network"}) Threads = promauto.NewGaugeVec(prometheus.GaugeOpts{ Name: "xrootd_sched_thread_count", Help: "Number of scheduler threads", }, []string{"state"}) Connections = promauto.NewCounter(prometheus.CounterOpts{ Name: "xrootd_server_connection_count", Help: "Aggregate number of server connections", }) BytesXfer = promauto.NewCounterVec(prometheus.CounterOpts{ Name: "xrootd_server_bytes", Help: "Number of bytes read into the server", }, []string{"direction"}) StorageVolume = promauto.NewGaugeVec(prometheus.GaugeOpts{ Name: "xrootd_storage_volume_bytes", Help: "Storage volume usage on the server", }, []string{"ns", "type", "server_type"}) // type: total/free; server_type: origin/cache CacheAccess = promauto.NewGaugeVec(prometheus.GaugeOpts{ Name: "xrootd_cache_access_bytes", Help: "Number of bytes the data requested is in the cache or not", }, []string{"path", "type"}) // type: hit/miss/bypass ServerTotalIO = promauto.NewCounter(prometheus.CounterOpts{ Name: "xrootd_server_io_total", Help: "Total storage operations in origin/cache server", }) ServerActiveIO = promauto.NewGauge(prometheus.GaugeOpts{ Name: "xrootd_server_io_active", Help: "Number of ongoing storage operations in origin/cache server", }) ServerIOWaitTime = promauto.NewCounter(prometheus.CounterOpts{ Name: "xrootd_server_io_wait_time", Help: "The aggregate time spent in storage operations in origin/cache server", }) )
var PelicanOSDFInstitutions = promauto.NewGauge(prometheus.GaugeOpts{
Name: "pelican_osdf_institution_count",
Help: "Total number of contributing institutions",
})
var PelicanRegistryFederationNamespaces = promauto.NewGaugeVec(prometheus.GaugeOpts{ Name: "pelican_registry_federation_namespaces", Help: "The number of federation namespace associated with a public key, excluding server namespaces, in the registry.", }, []string{"status"})
Functions ¶
func ConfigureMonitoring ¶
Set up listening and parsing xrootd monitoring UDP packets into prometheus
The `ctx` is the context for listening to server shutdown event in order to cleanup internal cache eviction
func DeleteComponentHealthStatus ¶ added in v1.0.4
func DeleteComponentHealthStatus(name HealthStatusComponent)
func GetComponentStatus ¶
func GetComponentStatus(comp HealthStatusComponent) (status string, err error)
Get the current health status of a component. Status can be critical|warning|ok|unknown
func HandlePacket ¶
func HandleSummaryPacket ¶
func LaunchShoveler ¶
func NullTermToString ¶
func ParseTokenAuth ¶ added in v1.0.4
func ParseTokenAuth(tokenauth string) (userId UserId, record UserRecord, err error)
func SetComponentHealthStatus ¶
func SetComponentHealthStatus(name HealthStatusComponent, state HealthStatusEnum, msg string)
Add/update the component health status. If you have a new component to record, please go to metrics/health and register your component as a new constant of type HealthStatusComponent. Also note that StatusUnknown is mostly for internal use only, please try to avoid setting this as your component status
Types ¶
type CacheAccessStat ¶
type CacheGS ¶
type CacheGS struct { AccessCnt uint32 `json:"access_cnt"` AttachT int64 `json:"attach_t"` ByteBypass int64 `json:"b_bypass"` ByteHit int64 `json:"b_hit"` ByteMiss int64 `json:"b_miss"` BlkSize int `json:"blk_size"` DetachT int64 `json:"detach_t"` Event string `json:"event"` Lfn string `json:"lfn"` NBlocks int `json:"n_blks"` NBlocksDone int `json:"n_blks_done"` NCksErrs int `json:"n_cks_errs"` Size int64 `json:"size"` }
Cache g-stream: https://xrootd.slac.stanford.edu/doc/dev56/xrd_monitoring.htm#_Toc138968526
type ComponentStatus ¶
type ComponentStatus struct { Status string `json:"status"` Message string `json:"message,omitempty"` LastUpdate int64 `json:"last_update"` }
This is for API response so we want to display string representation of status
type DirectorFTXTestStatus ¶
type DirectorFTXTestStatus MetricSimpleStatus
type DirectorStatResult ¶
type DirectorStatResult string
type FileRecord ¶
type HealthStatus ¶
type HealthStatus struct { OverallStatus string `json:"status"` ComponentStatus map[HealthStatusComponent]ComponentStatus `json:"components"` }
func GetHealthStatus ¶
func GetHealthStatus() HealthStatus
type HealthStatusComponent ¶ added in v1.0.4
type HealthStatusComponent string
const ( OriginCache_XRootD HealthStatusComponent = "xrootd" OriginCache_CMSD HealthStatusComponent = "cmsd" OriginCache_Federation HealthStatusComponent = "federation" // Advertise to the director OriginCache_Director HealthStatusComponent = "director" // File transfer tests with director OriginCache_Registry HealthStatusComponent = "registry" // Register namespace at the registry DirectorRegistry_Topology HealthStatusComponent = "topology" // Fetch data from OSDF topology Server_WebUI HealthStatusComponent = "web-ui" )
Naming convention for components:
ServiceName1Name2_ComponentName
i.e. For ""OriginCache_XRootD", it means this component is available at both Origin and Cache. Please come up with the largest possible scope of the component
func (HealthStatusComponent) String ¶ added in v1.0.4
func (component HealthStatusComponent) String() string
type HealthStatusEnum ¶ added in v1.0.4
type HealthStatusEnum int
const ( StatusCritical HealthStatusEnum = iota + 1 StatusWarning StatusOK StatusUnknown // Do not abuse this enum. Use others when possible )
HealthStatusEnum are stored as Prometheus values and internal struct
func (HealthStatusEnum) String ¶ added in v1.0.4
func (status HealthStatusEnum) String() string
Unfortunately we don't have a better way to ensure the enum constants always have matched string representation, so we will return "Error: status string index out of range" as an indicator
type MetricSimpleStatus ¶
type MetricSimpleStatus string
type SummaryCacheMemory ¶
type SummaryCacheStore ¶
type SummaryPath ¶
type SummaryPath struct { Idx int `xml:",chardata"` Stats []SummaryPathStat `xml:"stats"` }
type SummaryPathStat ¶
type SummaryStat ¶
type SummaryStat struct { Id SummaryStatType `xml:"id,attr"` Total int `xml:"tot"` In int `xml:"in"` Out int `xml:"out"` Threads int `xml:"threads"` Idle int `xml:"idle"` Paths SummaryPath `xml:"paths"` // For Oss Summary Data Store SummaryCacheStore `xml:"store"` Memory SummaryCacheMemory `xml:"mem"` }
type SummaryStatType ¶
type SummaryStatType string
const ( LinkStat SummaryStatType = "link" // https://xrootd.slac.stanford.edu/doc/dev55/xrd_monitoring.htm#_Toc99653739 SchedStat SummaryStatType = "sched" // https://xrootd.slac.stanford.edu/doc/dev55/xrd_monitoring.htm#_Toc99653745 OssStat SummaryStatType = "oss" // https://xrootd.slac.stanford.edu/doc/dev55/xrd_monitoring.htm#_Toc99653741 CacheStat SummaryStatType = "cache" // https://xrootd.slac.stanford.edu/doc/dev55/xrd_monitoring.htm#_Toc99653733 )
Summary data types
type SummaryStatistics ¶
type SummaryStatistics struct { Version string `xml:"ver,attr"` Program string `xml:"pgm,attr"` Stats []SummaryStat `xml:"stats"` }
type ThrottleGS ¶
type ThrottleGS struct { IOWaitTime float64 `json:"io_wait"` IOActive int `json:"io_active"` IOTotal int `json:"io_total"` }
Throttle plug-in g-stream
type UserRecord ¶
type XrdUserId ¶ added in v1.0.4
userid as in XRootD message info field
func ParseXrdUserId ¶ added in v1.0.4
type XrdXrootdMonFileCLS ¶ added in v1.0.4
type XrdXrootdMonFileCLS struct { Hdr XrdXrootdMonFileHdr // Always present Xfr XrdXrootdMonStatXFR // Always present Ops XrdXrootdMonStatOPS // OPTIONAL }
XrdXrootdMonFileCLS represents a variable length structure and includes other structures that are "Always present" or "OPTIONAL". The OPTIONAL parts are not included here as they require more context.
func (*XrdXrootdMonFileCLS) Serialize ¶ added in v1.0.4
func (cls *XrdXrootdMonFileCLS) Serialize() ([]byte, error)
Serialize converts XrdXrootdMonFileCLS to a byte array
type XrdXrootdMonFileHdr ¶
type XrdXrootdMonFileHdr struct { RecType recTval RecFlag byte RecSize int16 FileId uint32 UserId uint32 NRecs0 int16 NRecs1 int16 }
func ParseFileHeader ¶
func ParseFileHeader(packet []byte) (XrdXrootdMonFileHdr, error)
func (*XrdXrootdMonFileHdr) Serialize ¶ added in v1.0.4
func (hdr *XrdXrootdMonFileHdr) Serialize() ([]byte, error)
type XrdXrootdMonFileLFN ¶ added in v1.0.4
func (*XrdXrootdMonFileLFN) Serialize ¶ added in v1.0.4
func (lfn *XrdXrootdMonFileLFN) Serialize() ([]byte, error)
type XrdXrootdMonFileOPN ¶ added in v1.0.4
type XrdXrootdMonFileOPN struct { Hdr XrdXrootdMonFileHdr Fsz int64 Ufn XrdXrootdMonFileLFN }
func (*XrdXrootdMonFileOPN) Serialize ¶ added in v1.0.4
func (opn *XrdXrootdMonFileOPN) Serialize() ([]byte, error)
type XrdXrootdMonFileTOD ¶
type XrdXrootdMonFileTOD struct { Hdr XrdXrootdMonFileHdr TBeg int32 TEnd int32 SID int64 }
func (*XrdXrootdMonFileTOD) Serialize ¶ added in v1.0.4
func (ftod *XrdXrootdMonFileTOD) Serialize() ([]byte, error)
type XrdXrootdMonFileXFR ¶ added in v1.0.4
type XrdXrootdMonFileXFR struct { Hdr XrdXrootdMonFileHdr // Header with recType == isXfr Xfr XrdXrootdMonStatXFR }
func (*XrdXrootdMonFileXFR) Serialize ¶ added in v1.0.4
func (fileXFR *XrdXrootdMonFileXFR) Serialize() ([]byte, error)
type XrdXrootdMonGS ¶
type XrdXrootdMonGS struct { Hdr XrdXrootdMonHeader TBeg int // UNIX time of first entry TEnd int // UNIX time of last entry SID int64 // Provider identification }
type XrdXrootdMonHeader ¶
type XrdXrootdMonHeader struct { Code byte // = | d | f | g | i | p | r | t | u | x Pseq byte // packet sequence Plen uint16 // packet length Stod int32 // Unix time at Server start }
func (*XrdXrootdMonHeader) Serialize ¶ added in v1.0.4
func (monHeader *XrdXrootdMonHeader) Serialize() ([]byte, error)
type XrdXrootdMonMap ¶ added in v1.0.4
type XrdXrootdMonMap struct { Hdr XrdXrootdMonHeader Dictid uint32 Info []byte }
func (XrdXrootdMonMap) Serialize ¶ added in v1.0.4
func (monMap XrdXrootdMonMap) Serialize() ([]byte, error)
type XrdXrootdMonStatOPS ¶ added in v1.0.4
type XrdXrootdMonStatOPS struct { Read int32 // Number of read() calls Readv int32 // Number of readv() calls Write int32 // Number of write() calls RsMin int16 // Smallest readv() segment count RsMax int16 // Largest readv() segment count Rsegs int64 // Number of readv() segments RdMin int32 // Smallest read() request size RdMax int32 // Largest read() request size RvMin int32 // Smallest readv() request size RvMax int32 // Largest readv() request size WrMin int32 // Smallest write() request size WrMax int32 // Largest write() request size }
func (*XrdXrootdMonStatOPS) Serialize ¶ added in v1.0.4
func (ops *XrdXrootdMonStatOPS) Serialize() ([]byte, error)
type XrdXrootdMonStatXFR ¶ added in v1.0.4
type XrdXrootdMonStatXFR struct { Read int64 // Bytes read from file using read() Readv int64 // Bytes read from file using readv() Write int64 // Bytes written to file }
func (*XrdXrootdMonStatXFR) Serialize ¶ added in v1.0.4
func (xfr *XrdXrootdMonStatXFR) Serialize() ([]byte, error)