Documentation ¶
Index ¶
- Constants
- Variables
- func ClusterRole(n ClusterPolicyController) (gpuv1.State, error)
- func ClusterRoleBinding(n ClusterPolicyController) (gpuv1.State, error)
- func ConfigMaps(n ClusterPolicyController) (gpuv1.State, error)
- func DaemonSet(n ClusterPolicyController) (gpuv1.State, error)
- func Deployment(n ClusterPolicyController) (gpuv1.State, error)
- func GetClusterWideProxy() (*apiconfigv1.Proxy, error)
- func OpenshiftVersion() (string, error)
- func PodSecurityPolicy(n ClusterPolicyController) (gpuv1.State, error)
- func PrometheusRule(n ClusterPolicyController) (gpuv1.State, error)
- func Role(n ClusterPolicyController) (gpuv1.State, error)
- func RoleBinding(n ClusterPolicyController) (gpuv1.State, error)
- func RuntimeClass(n ClusterPolicyController) (gpuv1.State, error)
- func SecurityContextConstraints(n ClusterPolicyController) (gpuv1.State, error)
- func Service(n ClusterPolicyController) (gpuv1.State, error)
- func ServiceAccount(n ClusterPolicyController) (gpuv1.State, error)
- func ServiceMonitor(n ClusterPolicyController) (gpuv1.State, error)
- func TransformDCGM(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, ...) error
- func TransformDCGMExporter(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, ...) error
- func TransformDevicePlugin(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, ...) error
- func TransformDriver(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, ...) error
- func TransformGPUDiscoveryPlugin(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, ...) error
- func TransformMIGManager(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, ...) error
- func TransformNodeStatusExporter(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, ...) error
- func TransformSandboxDevicePlugin(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, ...) error
- func TransformSandboxValidator(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, ...) error
- func TransformToolkit(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, ...) error
- func TransformVFIOManager(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, ...) error
- func TransformVGPUDeviceManager(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, ...) error
- func TransformVGPUManager(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, ...) error
- func TransformValidator(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, ...) error
- func TransformValidatorComponent(config *gpuv1.ClusterPolicySpec, podSpec *corev1.PodSpec, component string) error
- func TransformValidatorShared(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, ...) error
- type ClusterPolicyController
- type ClusterPolicyReconciler
- type OpenShiftDriverToolkit
- type OperatorMetrics
- type Resources
Constants ¶
const ( // DefaultContainerdConfigFile indicates default config file path for containerd DefaultContainerdConfigFile = "/etc/containerd/config.toml" // DefaultContainerdSocketFile indicates default containerd socket file DefaultContainerdSocketFile = "/run/containerd/containerd.sock" // DefaultDockerConfigFile indicates default config file path for docker DefaultDockerConfigFile = "/etc/docker/daemon.json" // DefaultDockerSocketFile indicates default docker socket file DefaultDockerSocketFile = "/var/run/docker.sock" // TrustedCAConfigMapName indicates configmap with custom user CA injected TrustedCAConfigMapName = "gpu-operator-trusted-ca" // TrustedCABundleFileName indicates custom user ca certificate filename TrustedCABundleFileName = "ca-bundle.crt" // TrustedCABundleMountDir indicates target mount directory of user ca bundle TrustedCABundleMountDir = "/etc/pki/ca-trust/extracted/pem" // TrustedCACertificate indicates injected CA certificate name TrustedCACertificate = "tls-ca-bundle.pem" // VGPULicensingConfigMountPath indicates target mount path for vGPU licensing configuration file VGPULicensingConfigMountPath = "/drivers/gridd.conf" // VGPULicensingFileName is the vGPU licensing configuration filename VGPULicensingFileName = "gridd.conf" // NLSClientTokenMountPath inidicates the target mount path for NLS client config token file (.tok) NLSClientTokenMountPath = "/drivers/ClientConfigToken/client_configuration_token.tok" // NLSClientTokenFileName is the NLS client config token filename NLSClientTokenFileName = "client_configuration_token.tok" // VGPUTopologyConfigMountPath indicates target mount path for vGPU topology daemon configuration file VGPUTopologyConfigMountPath = "/etc/nvidia/nvidia-topologyd.conf" // VGPUTopologyConfigFileName is the vGPU topology daemon configuration filename VGPUTopologyConfigFileName = "nvidia-topologyd.conf" // DefaultRuntimeClass represents "nvidia" RuntimeClass DefaultRuntimeClass = "nvidia" // DriverInstallPathVolName represents volume name for driver install path provided to toolkit DriverInstallPathVolName = "driver-install-path" // DefaultRuntimeSocketTargetDir represents target directory where runtime socket dirctory will be mounted DefaultRuntimeSocketTargetDir = "/runtime/sock-dir/" // DefaultRuntimeConfigTargetDir represents target directory where runtime socket dirctory will be mounted DefaultRuntimeConfigTargetDir = "/runtime/config-dir/" // ValidatorImageEnvName indicates env name for validator image passed ValidatorImageEnvName = "VALIDATOR_IMAGE" // ValidatorImagePullPolicyEnvName indicates env name for validator image pull policy passed ValidatorImagePullPolicyEnvName = "VALIDATOR_IMAGE_PULL_POLICY" // ValidatorImagePullSecretsEnvName indicates env name for validator image pull secrets passed ValidatorImagePullSecretsEnvName = "VALIDATOR_IMAGE_PULL_SECRETS" // ValidatorRuntimeClassEnvName indicates env name of runtime class to be applied to validator pods ValidatorRuntimeClassEnvName = "VALIDATOR_RUNTIME_CLASS" // MigStrategyEnvName indicates env name for passing MIG strategy MigStrategyEnvName = "MIG_STRATEGY" // MigPartedDefaultConfigMapName indicates name of ConfigMap containing default mig-parted config MigPartedDefaultConfigMapName = "default-mig-parted-config" // MigDefaultGPUClientsConfigMapName indicates name of ConfigMap containing default gpu-clients MigDefaultGPUClientsConfigMapName = "default-gpu-clients" // DCGMRemoteEngineEnvName indicates env name to specify remote DCGM host engine ip:port DCGMRemoteEngineEnvName = "DCGM_REMOTE_HOSTENGINE_INFO" // DCGMDefaultHostPort indicates default host port bound to DCGM host engine DCGMDefaultHostPort = 5555 // GPUDirectRDMAEnabledEnvName indicates if GPU direct RDMA is enabled through GPU operator GPUDirectRDMAEnabledEnvName = "GPU_DIRECT_RDMA_ENABLED" // UseHostMOFEDEnvName indicates if MOFED driver is pre-installed on the host UseHostMOFEDEnvName = "USE_HOST_MOFED" // MetricsConfigMountPath indicates mount path for custom dcgm metrics file MetricsConfigMountPath = "/etc/dcgm-exporter/" + MetricsConfigFileName // MetricsConfigFileName indicates custom dcgm metrics file name MetricsConfigFileName = "dcgm-metrics.csv" // NvidiaAnnotationHashKey indicates annotation name for last applied hash by gpu-operator NvidiaAnnotationHashKey = "nvidia.com/last-applied-hash" // NvidiaDisableRequireEnvName is the env name to disable default cuda constraints NvidiaDisableRequireEnvName = "NVIDIA_DISABLE_REQUIRE" )
Variables ¶
var CertConfigPathMap = map[string]string{
"centos": "/etc/pki/ca-trust/extracted/pem",
"ubuntu": "/etc/ssl/certs",
"rhcos": "/etc/pki/ca-trust/extracted/pem",
"rhel": "/etc/pki/ca-trust/extracted/pem",
}
CertConfigPathMap indicates standard OS specific paths for ssl keys/certificates. Where Go looks for certs: https://golang.org/src/crypto/x509/root_linux.go Where OCP mounts proxy certs on RHCOS nodes: https://access.redhat.com/documentation/en-us/openshift_container_platform/4.3/html/authentication/ocp-certificates#proxy-certificates_ocp-certificates
var RepoConfigPathMap = map[string]string{
"centos": "/etc/yum.repos.d",
"ubuntu": "/etc/apt/sources.list.d",
"rhcos": "/etc/yum.repos.d",
"rhel": "/etc/yum.repos.d",
}
RepoConfigPathMap indicates standard OS specific paths for repository configuration files
Functions ¶
func ClusterRole ¶
func ClusterRole(n ClusterPolicyController) (gpuv1.State, error)
ClusterRole creates ClusterRole resource
func ClusterRoleBinding ¶
func ClusterRoleBinding(n ClusterPolicyController) (gpuv1.State, error)
ClusterRoleBinding creates ClusterRoleBinding resource
func ConfigMaps ¶ added in v1.9.0
func ConfigMaps(n ClusterPolicyController) (gpuv1.State, error)
ConfigMaps creates ConfigMap resource(s)
func DaemonSet ¶
func DaemonSet(n ClusterPolicyController) (gpuv1.State, error)
DaemonSet creates Daemonset resource
func Deployment ¶
func Deployment(n ClusterPolicyController) (gpuv1.State, error)
Deployment creates Deployment resource
func GetClusterWideProxy ¶
func GetClusterWideProxy() (*apiconfigv1.Proxy, error)
GetClusterWideProxy returns cluster wide proxy object setup in OCP
func OpenshiftVersion ¶
OpenshiftVersion fetches OCP version
func PodSecurityPolicy ¶
func PodSecurityPolicy(n ClusterPolicyController) (gpuv1.State, error)
PodSecurityPolicy creates PSP resources
func PrometheusRule ¶ added in v1.8.0
func PrometheusRule(n ClusterPolicyController) (gpuv1.State, error)
PrometheusRule creates PrometheusRule object
func RoleBinding ¶
func RoleBinding(n ClusterPolicyController) (gpuv1.State, error)
RoleBinding creates RoleBinding resource
func RuntimeClass ¶
func RuntimeClass(n ClusterPolicyController) (gpuv1.State, error)
RuntimeClass creates RuntimeClass object
func SecurityContextConstraints ¶
func SecurityContextConstraints(n ClusterPolicyController) (gpuv1.State, error)
SecurityContextConstraints creates SCC resources
func Service ¶
func Service(n ClusterPolicyController) (gpuv1.State, error)
Service creates Service object
func ServiceAccount ¶
func ServiceAccount(n ClusterPolicyController) (gpuv1.State, error)
ServiceAccount creates ServiceAccount resource
func ServiceMonitor ¶
func ServiceMonitor(n ClusterPolicyController) (gpuv1.State, error)
ServiceMonitor creates ServiceMonitor object
func TransformDCGM ¶ added in v1.8.0
func TransformDCGM(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error
TransformDCGM transforms dcgm daemonset with required config as per ClusterPolicy
func TransformDCGMExporter ¶
func TransformDCGMExporter(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error
TransformDCGMExporter transforms dcgm exporter daemonset with required config as per ClusterPolicy
func TransformDevicePlugin ¶
func TransformDevicePlugin(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error
TransformDevicePlugin transforms k8s-device-plugin daemonset with required config as per ClusterPolicy
func TransformDriver ¶
func TransformDriver(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error
TransformDriver transforms Nvidia driver daemonset with required config as per ClusterPolicy
func TransformGPUDiscoveryPlugin ¶
func TransformGPUDiscoveryPlugin(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error
TransformGPUDiscoveryPlugin transforms GPU discovery daemonset with required config as per ClusterPolicy
func TransformMIGManager ¶
func TransformMIGManager(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error
TransformMIGManager transforms MIG Manager daemonset with required config as per ClusterPolicy
func TransformNodeStatusExporter ¶ added in v1.8.0
func TransformNodeStatusExporter(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error
TransformNodeStatusExporter transforms the node-status-exporter daemonset with required config as per ClusterPolicy
func TransformSandboxDevicePlugin ¶ added in v1.11.0
func TransformSandboxDevicePlugin(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error
TransformSandboxDevicePlugin transforms sandbox-device-plugin daemonset with required config as per ClusterPolicy
func TransformSandboxValidator ¶ added in v1.11.0
func TransformSandboxValidator(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error
TransformSandboxValidator transforms nvidia-sandbox-validator daemonset with required config as per ClusterPolicy
func TransformToolkit ¶
func TransformToolkit(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error
TransformToolkit transforms Nvidia container-toolkit daemonset with required config as per ClusterPolicy
func TransformVFIOManager ¶ added in v1.11.0
func TransformVFIOManager(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error
TransformVFIOManager transforms VFIO-PCI Manager daemonset with required config as per ClusterPolicy
func TransformVGPUDeviceManager ¶ added in v1.11.0
func TransformVGPUDeviceManager(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error
TransformVGPUDeviceManager transforms VGPU Device Manager daemonset with required config as per ClusterPolicy
func TransformVGPUManager ¶ added in v1.11.0
func TransformVGPUManager(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error
TransformVGPUManager transforms NVIDIA vGPU Manager daemonset with required config as per ClusterPolicy
func TransformValidator ¶
func TransformValidator(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error
TransformValidator transforms nvidia-operator-validator daemonset with required config as per ClusterPolicy
func TransformValidatorComponent ¶
func TransformValidatorComponent(config *gpuv1.ClusterPolicySpec, podSpec *corev1.PodSpec, component string) error
TransformValidatorComponent applies changes to given validator component
func TransformValidatorShared ¶ added in v1.11.0
func TransformValidatorShared(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error
TransformValidatorShared applies general transformations to the validator daemonset with required config as per ClusterPolicy
Types ¶
type ClusterPolicyController ¶
type ClusterPolicyController struct {
// contains filtered or unexported fields
}
ClusterPolicyController represents clusterpolicy controller spec for GPU operator
type ClusterPolicyReconciler ¶
ClusterPolicyReconciler reconciles a ClusterPolicy object
func (*ClusterPolicyReconciler) Reconcile ¶
func (r *ClusterPolicyReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error)
Reconcile is part of the main kubernetes reconciliation loop which aims to move the current state of the cluster closer to the desired state. TODO(user): Modify the Reconcile function to compare the state specified by the ClusterPolicy object against the actual cluster state, and then perform operations to make the cluster state reflect the state specified by the user.
For more details, check Reconcile and its Result here: - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.7.0/pkg/reconcile
func (*ClusterPolicyReconciler) SetupWithManager ¶
func (r *ClusterPolicyReconciler) SetupWithManager(mgr ctrl.Manager) error
SetupWithManager sets up the controller with the Manager.
type OpenShiftDriverToolkit ¶ added in v1.9.0
type OpenShiftDriverToolkit struct {
// contains filtered or unexported fields
}
OpenShiftDriverToolkit contains the values required to deploy OpenShift DriverToolkit DaemonSet.
type OperatorMetrics ¶ added in v1.8.0
type OperatorMetrics struct {
// contains filtered or unexported fields
}
OperatorMetrics defines the Prometheus metrics exposed for the operator status
type Resources ¶
type Resources struct { ServiceAccount corev1.ServiceAccount Role rbacv1.Role RoleBinding rbacv1.RoleBinding ClusterRole rbacv1.ClusterRole ClusterRoleBinding rbacv1.ClusterRoleBinding ConfigMaps []corev1.ConfigMap DaemonSet appsv1.DaemonSet Deployment appsv1.Deployment Pod corev1.Pod Service corev1.Service ServiceMonitor promv1.ServiceMonitor PriorityClass schedv1.PriorityClass Taint corev1.Taint SecurityContextConstraints secv1.SecurityContextConstraints PodSecurityPolicy policyv1beta1.PodSecurityPolicy RuntimeClass nodev1.RuntimeClass PrometheusRule promv1.PrometheusRule }
Resources indicates resources managed by GPU operator