Skip to content

Commit fcf105a

Browse files
sutaakaropenshift-merge-bot[bot]
authored andcommitted
[RHOAIENG-9004] Add GitHub actions to install NVidia GPU on KinD
1 parent 795d7ec commit fcf105a

File tree

3 files changed

+54
-0
lines changed

3 files changed

+54
-0
lines changed

github-actions/kind/resources/kind.yaml

+3
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ apiVersion: kind.x-k8s.io/v1alpha4
1919
nodes:
2020
- role: control-plane
2121
image: kindest/node:v1.25.3@sha256:f52781bc0d7a19fb6c405c2af83abfeb311f130707a0e219175677e366cc45d1
22+
extraMounts:
23+
- hostPath: /dev/null
24+
containerPath: /var/run/nvidia-container-devices/all
2225
extraPortMappings:
2326
- containerPort: 80
2427
hostPort: 80
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
name: "Install NVidia GPU operator for KinD"
2+
description: "Step to install NVidia GPU operator for KinD, based on https://www.substratus.ai/blog/kind-with-gpus"
3+
4+
runs:
5+
using: "composite"
6+
steps:
7+
- name: Install Helm
8+
run: |
9+
curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3
10+
chmod 700 get_helm.sh
11+
./get_helm.sh
12+
sudo chmod 777 /usr/local/bin/helm
13+
shell: bash
14+
15+
- name: Install NVidia GPU operator
16+
run: |
17+
helm repo add nvidia https://helm.ngc.nvidia.com/nvidia || true
18+
helm repo update
19+
helm install --wait --generate-name -n gpu-operator --create-namespace nvidia/gpu-operator --set driver.enabled=false
20+
shell: bash
21+
22+
- name: Print KinD node
23+
run: |
24+
kubectl describe nodes
25+
shell: bash
+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
name: "Setup NVidia GPU environment for KinD"
2+
description: "Step to setup NVidia GPU environment for KinD, based on https://www.substratus.ai/blog/kind-with-gpus"
3+
4+
runs:
5+
using: "composite"
6+
steps:
7+
- name: Install Podman
8+
run: |
9+
sudo apt-get -y install podman
10+
loginctl enable-linger 1001
11+
# To avoid Error: error creating build container: loading drop-in registries configuration "/etc/containers/registries.conf.d/local.conf": open /etc/containers/registries.conf.d/local.conf: permission denied
12+
mkdir ~/.config
13+
cp -r /etc/containers ~/.config/containers
14+
shell: bash
15+
16+
- name: Setup NVIDIA Container Toolkit
17+
run: |
18+
sudo nvidia-ctk runtime configure --runtime=docker --set-as-default
19+
sudo systemctl restart docker
20+
sudo sed -i "s/#accept-nvidia-visible-devices-as-volume-mounts = false/accept-nvidia-visible-devices-as-volume-mounts = true/" /etc/nvidia-container-runtime/config.toml
21+
shell: bash
22+
23+
- name: List NVIDIA GPUs
24+
run: |
25+
nvidia-smi
26+
shell: bash

0 commit comments

Comments
 (0)