Documentation ¶
Overview ¶
Package ecc tracks the NVIDIA per-GPU ECC errors.
Index ¶
Constants ¶
View Source
const ( StateNameECCErrors = "ecc_errors" StateKeyECCErrorsData = "data" StateKeyECCErrorsEncoding = "encoding" StateValueECCErrorsEncodingJSON = "json" )
View Source
const Name = "accelerator-nvidia-ecc"
Variables ¶
This section is empty.
Functions ¶
Types ¶
type Config ¶
type Config struct {
Query query_config.Config `json:"query"`
}
type Output ¶
type Output struct { ErrorCountsSMI []nvidia_query.SMIECCErrors `json:"error_counts_smi"` ErrorCountsNVML []nvidia_query_nvml.ECCErrors `json:"error_counts_nvml"` // Volatile counts are reset each time the driver loads. // As aggregate counts persist across reboots (i.e. for the lifetime of the device), // do not track separately. // ref. https://docs.nvidia.com/deploy/nvml-api/group__nvmlDeviceEnumvs.html#group__nvmlDeviceEnumvs_1g08978d1c4fb52b6a4c72b39de144f1d9 // // A memory error that was not correctedFor ECC errors, these are double bit errors. // For Texture memory, these are errors where the resend fails. // ref. https://docs.nvidia.com/deploy/nvml-api/group__nvmlDeviceEnumvs.html#group__nvmlDeviceEnumvs_1gc5469bd68b9fdcf78734471d86becb24 VolatileUncorrectedErrors []string `json:"volatile_uncorrected_errors"` }
func ParseOutputJSON ¶
func ParseStatesToOutput ¶
func ParseStatesToOutput(states ...components.State) (*Output, error)
func ToOutput ¶
func ToOutput(i *nvidia_query.Output) *Output
Click to show internal directories.
Click to hide internal directories.