Documentation ¶
Overview ¶
Package xid provides the NVIDIA XID error details.
Index ¶
Constants ¶
const ( // e.g., // [...] NVRM: Xid (0000:03:00): 14, Channel 00000001 // [...] NVRM: Xid (PCI:0000:05:00): 79, pid='<unknown>', name=<unknown>, GPU has fallen off the bus. // NVRM: Xid (PCI:0000:01:00): 79, GPU has fallen off the bus. // // ref. // https://docs.nvidia.com/deploy/pdf/XID_Errors.pdf RegexNVRMXidDmesg = `NVRM: Xid.*?: (\d+),` )
Variables ¶
var CompiledRegexNVRMXidDmesg = regexp.MustCompile(RegexNVRMXidDmesg)
Functions ¶
func ExtractNVRMXid ¶
Extracts the nvidia Xid error code from the dmesg log line. Returns 0 if the error code is not found. https://docs.nvidia.com/deploy/pdf/XID_Errors.pdf
Types ¶
type Detail ¶
type Detail struct { ID int `json:"id"` Name string `json:"name"` Description string `json:"description"` HWError bool `json:"hw_error"` DriverError bool `json:"driver_error"` UserAppError bool `json:"user_app_error"` SystemMemoryCorruption bool `json:"system_memory_corruption"` BusError bool `json:"bus_error"` ThermalIssue bool `json:"thermal_issue"` FBCorruption bool `json:"fb_corruption"` }
Defines the XID error type. ref. https://docs.nvidia.com/deploy/pdf/XID_Errors.pdf ref. https://docs.nvidia.com/deploy/xid-errors/index.html#xid-error-listing ref. https://docs.nvidia.com/deploy/gpu-debug-guidelines/index.html#xid-messages ref. https://docs.nvidia.com/deploy/xid-errors/index.html ref. https://github.com/NVIDIA/open-gpu-kernel-modules/blob/main/src/common/sdk/nvidia/inc/nverror.h ref. https://github.com/NVIDIA/k8s-device-plugin/blob/v0.16.0/internal/rm/health.go#L62-L76
type DmesgError ¶
type DmesgError struct { Detail *Detail `json:"detail,omitempty"` DetailFound bool `json:"detail_found"` LogItem query_log.Item `json:"log_item"` }
func ParseDmesgErrorJSON ¶
func ParseDmesgErrorJSON(data []byte) (*DmesgError, error)
func ParseDmesgErrorYAML ¶
func ParseDmesgErrorYAML(data []byte) (*DmesgError, error)
func ParseDmesgLogLine ¶
func ParseDmesgLogLine(line string) (DmesgError, error)
func (*DmesgError) JSON ¶
func (de *DmesgError) JSON() ([]byte, error)
func (*DmesgError) YAML ¶
func (de *DmesgError) YAML() ([]byte, error)