Documentation
¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
var ( ErrIbstatOutputBrokenStateDown = errors.New("ibstat output unexpected; found State: Down (check the physical switch)") ErrIbstatOutputBrokenPhysicalDisabled = errors.New("ibstat output unexpected; found Physical state: Disabled (check the physical switch)") )
var ( ErrIbstatOutputEmpty = errors.New("ibstat returned empty output") ErrIbstatOutputNoCardFound = errors.New("parsed ibstat output does not contain any card") )
var ErrNoExpectedPortStates = errors.New("no expected port states found (not supported)")
var ErrNoIbstatCommand = errors.New("ibstat not found. cannot check ib state")
Functions ¶
func CheckInfiniband ¶
func CheckInfiniband(ctx context.Context, ibstatCommand string, threshold ExpectedPortStates) error
CheckInfiniband checks if the infiniband ports are up and running with the expected thresholds.
func ValidateIbstatOutput ¶
Types ¶
type ExpectedPortStates ¶
type ExpectedPortStates struct { // The minimum number of ports. // If not set, it defaults to 0. AtLeastPorts int `json:"at_least_ports"` // The expected rate in Gb/sec. // If not set, it defaults to 0. AtLeastRate int `json:"at_least_rate"` }
Configures the expected state of the ports.
func SupportsInfinibandPortRate ¶
func SupportsInfinibandPortRate(gpuProductName string) (ExpectedPortStates, error)
type IBStatCard ¶
type IBStatCard struct { Name string `json:"CA name"` Type string `json:"CA type"` NumPorts string `json:"Number of ports"` FirmwareVersion string `json:"Firmware version"` HardwareVersion string `json:"Hardware version"` NodeGUID string `json:"Node GUID"` SystemImageGUID string `json:"System image GUID"` Port1 IBStatPort `json:"Port 1"` }
type IBStatCards ¶
type IBStatCards []IBStatCard
func ParseIBStat ¶
func ParseIBStat(input string) (IBStatCards, error)
ParseIBStat parses ibstat output and returns YAML representation. Returns ErrIbstatOutputEmpty if the input is empty.
func (IBStatCards) CheckPortsAndRate ¶
func (cards IBStatCards) CheckPortsAndRate(atLeastPorts int, atLeastRate int) error
CheckPortsAndRate checks if the number of active IB ports matches expectations
func (IBStatCards) Match ¶
func (cards IBStatCards) Match(expectedPhysicalState string, expectedState string, atLeastRate int) []string
Match returns the IB port names whose physical state, state, and "Port 1"."Rate" match the expected values. The specified rate is the threshold for "Port 1"."Rate", where it evaluates with ">=" operator (e.g., count all the cards whose rate is >= 400).
If the `expectedPhysicalState` is empty, it matches all states. If the `expectedState` is empty, it matches all states.
type IBStatPort ¶
type IbstatOutput ¶
type IbstatOutput struct { Parsed IBStatCards `json:"parsed,omitempty"` Raw string `json:"raw"` Errors []string `json:"errors,omitempty"` }
func GetIbstatOutput ¶
func GetIbstatOutput(ctx context.Context, ibstatCommands []string) (*IbstatOutput, error)