Documentation ¶
Overview ¶
Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. All rights reserved. Use of this source code is governed by a MIT-style license that can be found in the LICENSE file.
Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. All rights reserved. Use of this source code is governed by a MIT-style license that can be found in the LICENSE file.
Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. All rights reserved. Use of this source code is governed by a MIT-style license that can be found in the LICENSE file.
Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. All rights reserved. Use of this source code is governed by a MIT-style license that can be found in the LICENSE file.
Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. All rights reserved. Use of this source code is governed by a MIT-style license that can be found in the LICENSE file.
Copyright (C) 2023 NHR@FAU, University Erlangen-Nuremberg. All rights reserved. Use of this source code is governed by a MIT-style license that can be found in the LICENSE file.
Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. All rights reserved. Use of this source code is governed by a MIT-style license that can be found in the LICENSE file.
Index ¶
- Constants
- func ConvertFloatToFloat64(s []Float) []float64
- func GetRoleString(roleInt Role) string
- func GetValidRoles(user *User) ([]string, error)
- func GetValidRolesMap(user *User) (map[string]Role, error)
- func IsValidRole(role string) bool
- func Validate(k Kind, r io.Reader) (err error)
- type Accelerator
- type AuthSource
- type AuthType
- type BaseJob
- type Cluster
- type ClusterConfig
- type FilterRanges
- type Float
- type IntRange
- type JWTAuthConfig
- type Job
- type JobData
- type JobLink
- type JobLinkResultList
- type JobMeta
- type JobMetric
- type JobState
- type JobStatistics
- type Kind
- type LdapConfig
- type MetricConfig
- type MetricScope
- type MetricStatistics
- type MetricValue
- type ProgramConfig
- type Resource
- type Retention
- type Role
- type Series
- type StatsSeries
- type SubCluster
- type SubClusterConfig
- type Tag
- type TimeRange
- type Topology
- func (topo *Topology) GetAcceleratorID(id int) (string, error)
- func (topo *Topology) GetAcceleratorIDs() ([]int, error)
- func (topo *Topology) GetCoresFromHWThreads(hwthreads []int) (cores []int, exclusive bool)
- func (topo *Topology) GetMemoryDomainsFromHWThreads(hwthreads []int) (memDoms []int, exclusive bool)
- func (topo *Topology) GetSocketsFromHWThreads(hwthreads []int) (sockets []int, exclusive bool)
- type Unit
- type User
- func (u *User) GetAuthLevel() Role
- func (u *User) HasAllRoles(queryroles []Role) bool
- func (u *User) HasAnyRole(queryroles []Role) bool
- func (u *User) HasNotRoles(queryroles []Role) bool
- func (u *User) HasProject(project string) bool
- func (u *User) HasRole(role Role) bool
- func (u *User) HasValidRole(role string) (hasRole bool, isValid bool)
Constants ¶
const ( MonitoringStatusDisabled int32 = 0 MonitoringStatusRunningOrArchiving int32 = 1 MonitoringStatusArchivingFailed int32 = 2 MonitoringStatusArchivingSuccessful int32 = 3 )
Variables ¶
This section is empty.
Functions ¶
func ConvertFloatToFloat64 ¶
func GetRoleString ¶ added in v1.2.0
func GetValidRoles ¶ added in v1.2.0
Called by API endpoint '/roles/' from frontend: Only required for admin config -> Check Admin Role
func GetValidRolesMap ¶ added in v1.2.0
Called by routerConfig web.page setup in backend: Only requires known user
func IsValidRole ¶ added in v1.2.0
Types ¶
type Accelerator ¶
type AuthSource ¶ added in v1.2.0
type AuthSource int
const ( AuthViaLocalPassword AuthSource = iota AuthViaLDAP AuthViaToken AuthViaAll )
type BaseJob ¶
type BaseJob struct { // The unique identifier of a job JobID int64 `json:"jobId" db:"job_id" example:"123000"` User string `json:"user" db:"user" example:"abcd100h"` // The unique identifier of a user Project string `json:"project" db:"project" example:"abcd200"` // The unique identifier of a project Cluster string `json:"cluster" db:"cluster" example:"fritz"` // The unique identifier of a cluster SubCluster string `json:"subCluster" db:"subcluster" example:"main"` // The unique identifier of a sub cluster Partition string `json:"partition,omitempty" db:"partition" example:"main"` // The Slurm partition to which the job was submitted ArrayJobId int64 `json:"arrayJobId,omitempty" db:"array_job_id" example:"123000"` // The unique identifier of an array job NumNodes int32 `json:"numNodes" db:"num_nodes" example:"2" minimum:"1"` // Number of nodes used (Min > 0) // NumCores int32 `json:"numCores" db:"num_cores" example:"20" minimum:"1"` // Number of HWThreads used (Min > 0) NumHWThreads int32 `json:"numHwthreads,omitempty" db:"num_hwthreads" example:"20" minimum:"1"` // Number of HWThreads used (Min > 0) NumAcc int32 `json:"numAcc,omitempty" db:"num_acc" example:"2" minimum:"1"` // Number of accelerators used (Min > 0) Exclusive int32 `json:"exclusive" db:"exclusive" example:"1" minimum:"0" maximum:"2"` // Specifies how nodes are shared: 0 - Shared among multiple jobs of multiple users, 1 - Job exclusive (Default), 2 - Shared among multiple jobs of same user MonitoringStatus int32 `json:"monitoringStatus,omitempty" db:"monitoring_status" example:"1" minimum:"0" maximum:"3"` // State of monitoring system during job run: 0 - Disabled, 1 - Running or Archiving (Default), 2 - Archiving Failed, 3 - Archiving Successfull SMT int32 `json:"smt,omitempty" db:"smt" example:"4"` // SMT threads used by job State JobState `json:"jobState" db:"job_state" example:"completed" enums:"completed,failed,cancelled,stopped,timeout,out_of_memory"` // Final state of job Duration int32 `json:"duration" db:"duration" example:"43200" minimum:"1"` // Duration of job in seconds (Min > 0) Walltime int64 `json:"walltime,omitempty" db:"walltime" example:"86400" minimum:"1"` // Requested walltime of job in seconds (Min > 0) Tags []*Tag `json:"tags,omitempty"` // List of tags RawResources []byte `json:"-" db:"resources"` // Resources used by job [As Bytes] Resources []*Resource `json:"resources"` // Resources used by job RawMetaData []byte `json:"-" db:"meta_data"` // Additional information about the job [As Bytes] MetaData map[string]string `json:"metaData"` // Additional information about the job ConcurrentJobs JobLinkResultList `json:"concurrentJobs"` }
var JobDefaults BaseJob = BaseJob{ Exclusive: 1, MonitoringStatus: MonitoringStatusRunningOrArchiving, }
type Cluster ¶
type Cluster struct { Name string `json:"name"` MetricConfig []*MetricConfig `json:"metricConfig"` SubClusters []*SubCluster `json:"subClusters"` }
type ClusterConfig ¶
type ClusterConfig struct { Name string `json:"name"` FilterRanges *FilterRanges `json:"filterRanges"` MetricDataRepository json.RawMessage `json:"metricDataRepository"` }
type FilterRanges ¶
type Float ¶
type Float float64
A custom float type is used so that (Un)MarshalJSON and (Un)MarshalGQL can be overloaded and NaN/null can be used. The default behaviour of putting every nullable value behind a pointer has a bigger overhead.
func GetFloat64ToFloat ¶
func (Float) MarshalGQL ¶
MarshalGQL implements the graphql.Marshaler interface. NaN will be serialized to `null`.
func (Float) MarshalJSON ¶
NaN will be serialized to `null`.
func (*Float) UnmarshalGQL ¶
UnmarshalGQL implements the graphql.Unmarshaler interface.
func (*Float) UnmarshalJSON ¶
`null` will be unserialized to NaN.
type JWTAuthConfig ¶
type JWTAuthConfig struct { // Specifies for how long a JWT token shall be valid // as a string parsable by time.ParseDuration(). MaxAge string `json:"max-age"` // Specifies which cookie should be checked for a JWT token (if no authorization header is present) CookieName string `json:"cookieName"` // Deny login for users not in database (but defined in JWT). // Ignore user roles defined in JWTs ('roles' claim), get them from db. ValidateUser bool `json:"validateUser"` // Specifies which issuer should be accepted when validating external JWTs ('iss' claim) TrustedIssuer string `json:"trustedIssuer"` // Should an non-existent user be added to the DB based on the information in the token SyncUserOnLogin bool `json:"syncUserOnLogin"` }
type Job ¶
type Job struct { // The unique identifier of a job in the database ID int64 `json:"id" db:"id"` BaseJob StartTimeUnix int64 `json:"-" db:"start_time" example:"1649723812"` // Start epoch time stamp in seconds StartTime time.Time `json:"startTime"` // Start time as 'time.Time' data type MemUsedMax float64 `json:"-" db:"mem_used_max"` // MemUsedMax as Float64 FlopsAnyAvg float64 `json:"-" db:"flops_any_avg"` // FlopsAnyAvg as Float64 MemBwAvg float64 `json:"-" db:"mem_bw_avg"` // MemBwAvg as Float64 LoadAvg float64 `json:"-" db:"load_avg"` // LoadAvg as Float64 NetBwAvg float64 `json:"-" db:"net_bw_avg"` // NetBwAvg as Float64 NetDataVolTotal float64 `json:"-" db:"net_data_vol_total"` // NetDataVolTotal as Float64 FileBwAvg float64 `json:"-" db:"file_bw_avg"` // FileBwAvg as Float64 FileDataVolTotal float64 `json:"-" db:"file_data_vol_total"` // FileDataVolTotal as Float64 }
Job struct type
This type is used as the GraphQL interface and using sqlx as a table row.
Job model @Description Information of a HPC job.
type JobLinkResultList ¶
type JobMeta ¶
type JobMeta struct { // The unique identifier of a job in the database ID *int64 `json:"id,omitempty"` BaseJob StartTime int64 `json:"startTime" db:"start_time" example:"1649723812" minimum:"1"` // Start epoch time stamp in seconds (Min > 0) Statistics map[string]JobStatistics `json:"statistics"` // Metric statistics of job }
JobMeta model @Description Meta data information of a HPC job.
type JobMetric ¶
type JobMetric struct { Unit Unit `json:"unit"` Timestep int `json:"timestep"` Series []Series `json:"series"` StatisticsSeries *StatsSeries `json:"statisticsSeries,omitempty"` }
func (*JobMetric) AddPercentiles ¶
func (*JobMetric) AddStatisticsSeries ¶
func (jm *JobMetric) AddStatisticsSeries()
type JobState ¶
type JobState string
const ( JobStateRunning JobState = "running" JobStateCompleted JobState = "completed" JobStateFailed JobState = "failed" JobStateCancelled JobState = "cancelled" JobStateStopped JobState = "stopped" JobStateTimeout JobState = "timeout" JobStatePreempted JobState = "preempted" JobStateOutOfMemory JobState = "out_of_memory" )
func (JobState) MarshalGQL ¶
func (*JobState) UnmarshalGQL ¶
type JobStatistics ¶
type JobStatistics struct { Unit Unit `json:"unit"` Avg float64 `json:"avg" example:"2500" minimum:"0"` // Job metric average Min float64 `json:"min" example:"2000" minimum:"0"` // Job metric minimum Max float64 `json:"max" example:"3000" minimum:"0"` // Job metric maximum }
JobStatistics model @Description Specification for job metric statistics.
type LdapConfig ¶
type LdapConfig struct { Url string `json:"url"` UserBase string `json:"user_base"` SearchDN string `json:"search_dn"` UserBind string `json:"user_bind"` UserFilter string `json:"user_filter"` UserAttr string `json:"username_attr"` SyncInterval string `json:"sync_interval"` // Parsed using time.ParseDuration. SyncDelOldUsers bool `json:"sync_del_old_users"` // Should an non-existent user be added to the DB if user exists in ldap directory SyncUserOnLogin bool `json:"syncUserOnLogin"` }
type MetricConfig ¶
type MetricConfig struct { Name string `json:"name"` Unit Unit `json:"unit"` Scope MetricScope `json:"scope"` Aggregation string `json:"aggregation"` Timestep int `json:"timestep"` Peak float64 `json:"peak"` Normal float64 `json:"normal"` Caution float64 `json:"caution"` Alert float64 `json:"alert"` SubClusters []*SubClusterConfig `json:"subClusters,omitempty"` }
type MetricScope ¶
type MetricScope string
const ( MetricScopeInvalid MetricScope = "invalid_scope" MetricScopeNode MetricScope = "node" MetricScopeSocket MetricScope = "socket" MetricScopeMemoryDomain MetricScope = "memoryDomain" MetricScopeCore MetricScope = "core" MetricScopeHWThread MetricScope = "hwthread" MetricScopeAccelerator MetricScope = "accelerator" )
func (*MetricScope) LT ¶
func (e *MetricScope) LT(other MetricScope) bool
func (*MetricScope) LTE ¶
func (e *MetricScope) LTE(other MetricScope) bool
func (MetricScope) MarshalGQL ¶
func (e MetricScope) MarshalGQL(w io.Writer)
func (*MetricScope) Max ¶
func (e *MetricScope) Max(other MetricScope) MetricScope
func (*MetricScope) UnmarshalGQL ¶
func (e *MetricScope) UnmarshalGQL(v interface{}) error
func (MetricScope) Valid ¶
func (e MetricScope) Valid() bool
type MetricStatistics ¶
type MetricValue ¶
type ProgramConfig ¶
type ProgramConfig struct { // Address where the http (or https) server will listen on (for example: 'localhost:80'). Addr string `json:"addr"` // Addresses from which secured API endpoints can be reached ApiAllowedIPs []string `json:"apiAllowedIPs"` // Drop root permissions once .env was read and the port was taken. User string `json:"user"` Group string `json:"group"` // Disable authentication (for everything: API, Web-UI, ...) DisableAuthentication bool `json:"disable-authentication"` // If `embed-static-files` is true (default), the frontend files are directly // embeded into the go binary and expected to be in web/frontend. Only if // it is false the files in `static-files` are served instead. EmbedStaticFiles bool `json:"embed-static-files"` StaticFiles string `json:"static-files"` // 'sqlite3' or 'mysql' (mysql will work for mariadb as well) DBDriver string `json:"db-driver"` // For sqlite3 a filename, for mysql a DSN in this format: https://github.com/go-sql-driver/mysql#dsn-data-source-name (Without query parameters!). DB string `json:"db"` // Config for job archive Archive json.RawMessage `json:"archive"` // Keep all metric data in the metric data repositories, // do not write to the job-archive. DisableArchive bool `json:"disable-archive"` // Validate json input against schema Validate bool `json:"validate"` // For LDAP Authentication and user synchronisation. LdapConfig *LdapConfig `json:"ldap"` JwtConfig *JWTAuthConfig `json:"jwts"` // If 0 or empty, the session does not expire! SessionMaxAge string `json:"session-max-age"` // If both those options are not empty, use HTTPS using those certificates. HttpsCertFile string `json:"https-cert-file"` HttpsKeyFile string `json:"https-key-file"` // If not the empty string and `addr` does not end in ":80", // redirect every request incoming at port 80 to that url. RedirectHttpTo string `json:"redirect-http-to"` // If overwritten, at least all the options in the defaults below must // be provided! Most options here can be overwritten by the user. UiDefaults map[string]interface{} `json:"ui-defaults"` // Where to store MachineState files MachineStateDir string `json:"machine-state-dir"` // If not zero, automatically mark jobs as stopped running X seconds longer than their walltime. StopJobsExceedingWalltime int `json:"stop-jobs-exceeding-walltime"` // Defines time X in seconds in which jobs are considered to be "short" and will be filtered in specific views. ShortRunningJobsDuration int `json:"short-running-jobs-duration"` // Array of Clusters Clusters []*ClusterConfig `json:"clusters"` }
Format of the configuration (file). See below for the defaults.
type Resource ¶
type Resource struct { Hostname string `json:"hostname"` // Name of the host (= node) HWThreads []int `json:"hwthreads,omitempty"` // List of OS processor ids Accelerators []string `json:"accelerators,omitempty"` // List of of accelerator device ids Configuration string `json:"configuration,omitempty"` // The configuration options of the node }
Resource model @Description A resource used by a job
type Series ¶
type Series struct { Hostname string `json:"hostname"` Id *string `json:"id,omitempty"` Statistics MetricStatistics `json:"statistics"` Data []Float `json:"data"` }
func (*Series) MarshalJSON ¶
Only used via REST-API, not via GraphQL. This uses a lot less allocations per series, but it turns out that the performance increase from using this is not that big.
type StatsSeries ¶
type SubCluster ¶
type SubCluster struct { Name string `json:"name"` Nodes string `json:"nodes"` ProcessorType string `json:"processorType"` SocketsPerNode int `json:"socketsPerNode"` CoresPerSocket int `json:"coresPerSocket"` ThreadsPerCore int `json:"threadsPerCore"` FlopRateScalar MetricValue `json:"flopRateScalar"` FlopRateSimd MetricValue `json:"flopRateSimd"` MemoryBandwidth MetricValue `json:"memoryBandwidth"` Topology Topology `json:"topology"` }
type SubClusterConfig ¶
type Tag ¶
type Tag struct { ID int64 `json:"id" db:"id"` // The unique DB identifier of a tag Type string `json:"type" db:"tag_type" example:"Debug"` // Tag Type Name string `json:"name" db:"tag_name" example:"Testjob"` // Tag Name }
Tag model @Description Defines a tag using name and type.
type Topology ¶
type Topology struct { Node []int `json:"node"` Socket [][]int `json:"socket"` MemoryDomain [][]int `json:"memoryDomain"` Die [][]*int `json:"die,omitempty"` Core [][]int `json:"core"` Accelerators []*Accelerator `json:"accelerators,omitempty"` }
func (*Topology) GetAcceleratorID ¶
Temporary fix to convert back from int id to string id for accelerators
func (*Topology) GetAcceleratorIDs ¶
func (*Topology) GetCoresFromHWThreads ¶
Return a list of core IDs given a list of hwthread IDs. Even if just one hwthread is in that core, add it to the list. If no hwthreads other than those in the argument list are assigned to one of the cores in the first return value, return true as the second value. TODO: Optimize this, there must be a more efficient way/algorithm.
func (*Topology) GetMemoryDomainsFromHWThreads ¶
func (topo *Topology) GetMemoryDomainsFromHWThreads( hwthreads []int) (memDoms []int, exclusive bool)
Return a list of memory domain IDs given a list of hwthread IDs. Even if just one hwthread is in that memory domain, add it to the list. If no hwthreads other than those in the argument list are assigned to one of the memory domains in the first return value, return true as the second value. TODO: Optimize this, there must be a more efficient way/algorithm.
func (*Topology) GetSocketsFromHWThreads ¶
Return a list of socket IDs given a list of hwthread IDs. Even if just one hwthread is in that socket, add it to the list. If no hwthreads other than those in the argument list are assigned to one of the sockets in the first return value, return true as the second value. TODO: Optimize this, there must be a more efficient way/algorithm.
type User ¶ added in v1.2.0
type User struct { Username string `json:"username"` Password string `json:"-"` Name string `json:"name"` Roles []string `json:"roles"` AuthType AuthType `json:"authType"` AuthSource AuthSource `json:"authSource"` Email string `json:"email"` Projects []string `json:"projects"` }
func (*User) HasAllRoles ¶ added in v1.2.0
Role-Arrays are short: performance not impacted by nested loop
func (*User) HasAnyRole ¶ added in v1.2.0
Role-Arrays are short: performance not impacted by nested loop
func (*User) HasNotRoles ¶ added in v1.2.0
Role-Arrays are short: performance not impacted by nested loop