Skip to content

Commit 9defe37

Browse files
[no-relnote] Update Github Actions E2E
Signed-off-by: Carlos Eduardo Arango Gutierrez <[email protected]>
1 parent 6df26cc commit 9defe37

File tree

9 files changed

+91
-70
lines changed

9 files changed

+91
-70
lines changed

.github/workflows/e2e.yaml

+9-2
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,8 @@ jobs:
7070

7171
- name: Run e2e tests
7272
env:
73-
IMAGE_NAME: ghcr.io/nvidia/container-toolkit
74-
VERSION: ${{ inputs.version }}
73+
E2E_IMAGE_REPO: ghcr.io/nvidia/container-toolkit
74+
E2E_IMAGE_TAG: ${{ inputs.version }}-ubuntu20.04
7575
SSH_KEY: ${{ secrets.AWS_SSH_KEY }}
7676
E2E_SSH_USER: ${{ secrets.E2E_SSH_USER }}
7777
E2E_SSH_HOST: ${{ steps.holodeck_public_dns_name.outputs.result }}
@@ -84,6 +84,13 @@ jobs:
8484
8585
make -f tests/e2e/Makefile test
8686
87+
- name: Archive Ginkgo logs
88+
uses: actions/upload-artifact@v4
89+
with:
90+
name: ginkgo-logs
91+
path: ginkgo.json
92+
retention-days: 15
93+
8794
- name: Send Slack alert notification
8895
if: ${{ failure() }}
8996
uses: slackapi/[email protected]

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,4 @@
1111
/nvidia-ctk
1212
/shared-*
1313
/release-*
14+
/bin

tests/e2e/Makefile

+7-5
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,16 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515

16-
.PHONY: test-e2e ginkgo
16+
.PHONY: test $(GINKGO_BIN)
1717

1818
GINKGO_ARGS ?=
1919
LOG_ARTIFACTS_DIR ?= $(CURDIR)/e2e_logs
2020

21-
ginkgo:
21+
GINKGO_BIN := $(CURDIR)/bin/ginkgo
22+
23+
test: $(GINKGO_BIN)
24+
$(GINKGO_BIN) $(GINKGO_ARGS) -v --json-report ginkgo.json ./tests/e2e/...
25+
26+
$(GINKGO_BIN):
2227
mkdir -p $(CURDIR)/bin
2328
GOBIN=$(CURDIR)/bin go install github.com/onsi/ginkgo/v2/ginkgo@latest
24-
25-
test-e2e: ginkgo
26-
$(CURDIR)/bin/ginkgo $(GINKGO_ARGS) -v --json-report ginkgo.json ./tests/e2e/...

tests/e2e/README.md

+11-9
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ limitations under the License.
2020
---
2121

2222
## 1 Scope & Goals
23-
This repository contains a **Ginkgo v2 / Gomega** test harness that exercises an
23+
This folder contains a **Ginkgo v2 / Gomega** test harness that exercises an
2424
NVIDIA Container Toolkit (CTK) installation on a **remote GPU‑enabled host** via
2525
SSH. The suite validates that:
2626

@@ -58,12 +58,13 @@ compatibility runs, and pre‑release validation of new CTK builds.
5858

5959
| Variable | Required | Example | Description |
6060
|----------|----------|---------|-------------|
61-
| `INSTALL_CTK` || `true` | When `true` the test installs CTK on the remote host before running the image. When `false` it assumes CTK is already present. |
62-
| `TOOLKIT_IMAGE` || `nvcr.io/nvidia/cuda:12.4.0-runtime-ubi9` | Image that will be pulled & executed. |
63-
| `SSH_KEY` || `/home/ci/.ssh/id_rsa` | Private key used for authentication. |
64-
| `SSH_USER` || `ubuntu` | Username on the remote host. |
65-
| `REMOTE_HOST` || `gpurunner01.corp.local` | Hostname or IP address of the target node. |
66-
| `REMOTE_PORT` || `22` | SSH port of the target node. |
61+
| `E2E_INSTALL_CTK` || `true` | When `true` the test installs CTK on the remote host before running the image. When `false` it assumes CTK is already present. |
62+
| `E2E_IMAGE_REPO` || `ghcr.io/nvidia/container-toolkit` | Container Toolkit Image |
63+
| `E2E_IMAGE_TAG` || `latest` | Image tag |
64+
| `E2E_SSH_KEY` || `/home/ci/.ssh/id_rsa` | Private key used for authentication. |
65+
| `E2E_SSH_USER` || `ubuntu` | Username on the remote host. |
66+
| `E2E_SSH_HOST` || `10.0.0.0` | Hostname or IP address of the target node. |
67+
| `E2E_SSH_PORT` || `22` | SSH port of the target node. |
6768

6869
> All variables are validated at start‑up; the suite aborts early with a clear
6970
> message if any are missing or ill‑formed.
@@ -92,12 +93,13 @@ bin/ginkgo:
9293
### 6.1 Basic invocation
9394
```bash
9495
INSTALL_CTK=true \
95-
TOOLKIT_IMAGE=nvcr.io/nvidia/cuda:12.4.0-runtime-ubi9 \
96+
E2E_IMAGE_REPO=ghcr.io/nvidia/container-toolkit \
97+
E2E_IMAGE_TAG=<image-tag> \
9698
SSH_KEY=$HOME/.ssh/id_rsa \
9799
SSH_USER=ubuntu \
98100
REMOTE_HOST=10.0.0.15 \
99101
REMOTE_PORT=22 \
100-
make test-e2e
102+
make test
101103
```
102104
This downloads the image on the remote host, installs CTK (if requested), and
103105
executes a minimal CUDA‑based workload.

tests/e2e/e2e_test.go

+43-29
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
/*
2-
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3-
* SPDX-License-Identifier: Apache-2.0
2+
* Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
43
*
54
* Licensed under the Apache License, Version 2.0 (the "License");
65
* you may not use this file except in compliance with the License.
@@ -19,6 +18,7 @@ package e2e
1918

2019
import (
2120
"context"
21+
"errors"
2222
"os"
2323
"path/filepath"
2424
"runtime"
@@ -81,15 +81,6 @@ var _ = BeforeSuite(func() {
8181
err = installer.Install()
8282
Expect(err).ToNot(HaveOccurred())
8383
}
84-
85-
_, _, err := runner.Run("docker pull ubuntu")
86-
Expect(err).ToNot(HaveOccurred())
87-
88-
_, _, err = runner.Run("docker pull nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
89-
Expect(err).ToNot(HaveOccurred())
90-
91-
_, _, err = runner.Run("docker pull nvcr.io/nvidia/cuda:12.8.0-base-ubi8")
92-
Expect(err).ToNot(HaveOccurred())
9384
})
9485

9586
// getTestEnv gets the test environment variables
@@ -100,40 +91,63 @@ func getTestEnv() {
10091
_, thisFile, _, _ := runtime.Caller(0)
10192
packagePath = filepath.Dir(thisFile)
10293

103-
installCTK = getBoolEnvVar("INSTALL_CTK", false)
94+
installCTK = getEnvVarOrDefault("E2E_INSTALL_CTK", true)
10495

105-
ImageRepo = os.Getenv("E2E_IMAGE_REPO")
106-
Expect(ImageRepo).NotTo(BeEmpty(), "E2E_IMAGE_REPO environment variable must be set")
96+
if installCTK {
97+
ImageRepo = os.Getenv("E2E_IMAGE_REPO")
98+
Expect(ImageRepo).NotTo(BeEmpty(), "E2E_IMAGE_REPO environment variable must be set")
10799

108-
ImageTag = os.Getenv("E2E_IMAGE_TAG")
109-
Expect(ImageTag).NotTo(BeEmpty(), "E2E_IMAGE_TAG environment variable must be set")
100+
ImageTag = os.Getenv("E2E_IMAGE_TAG")
101+
Expect(ImageTag).NotTo(BeEmpty(), "E2E_IMAGE_TAG environment variable must be set")
102+
}
110103

111-
sshKey = os.Getenv("SSH_KEY")
112-
Expect(sshKey).NotTo(BeEmpty(), "SSH_KEY environment variable must be set")
104+
sshKey = os.Getenv("E2E_SSH_KEY")
105+
Expect(sshKey).NotTo(BeEmpty(), "E2E_SSH_KEY environment variable must be set")
113106

114-
sshUser = os.Getenv("SSH_USER")
115-
Expect(sshUser).NotTo(BeEmpty(), "SSH_USER environment variable must be set")
107+
sshUser = os.Getenv("E2E_SSH_USER")
108+
Expect(sshUser).NotTo(BeEmpty(), "E2E_SSH_USER environment variable must be set")
116109

117-
host = os.Getenv("REMOTE_HOST")
118-
Expect(host).NotTo(BeEmpty(), "REMOTE_HOST environment variable must be set")
110+
host = os.Getenv("E2E_SSH_HOST")
111+
Expect(host).NotTo(BeEmpty(), "E2E_SSH_HOST environment variable must be set")
119112

120-
sshPort = os.Getenv("REMOTE_PORT")
121-
Expect(sshPort).NotTo(BeEmpty(), "REMOTE_PORT environment variable must be set")
113+
sshPort = getEnvVarOrDefault("E2E_SSH_PORT", "22")
122114

123115
// Get current working directory
124116
cwd, err = os.Getwd()
125117
Expect(err).NotTo(HaveOccurred())
126118
}
127119

128-
// getBoolEnvVar returns the boolean value of the environment variable or the default value if not set.
129-
func getBoolEnvVar(key string, defaultValue bool) bool {
120+
func getEnvVarAs[T any](key string) (T, error) {
121+
var zero T
130122
value := os.Getenv(key)
131123
if value == "" {
132-
return defaultValue
124+
return zero, errors.New("env var not set")
125+
}
126+
127+
switch any(zero).(type) {
128+
case bool:
129+
v, err := strconv.ParseBool(value)
130+
if err != nil {
131+
return zero, err
132+
}
133+
return any(v).(T), nil
134+
case int:
135+
v, err := strconv.Atoi(value)
136+
if err != nil {
137+
return zero, err
138+
}
139+
return any(v).(T), nil
140+
case string:
141+
return any(value).(T), nil
142+
default:
143+
return zero, errors.New("unsupported type")
133144
}
134-
boolValue, err := strconv.ParseBool(value)
145+
}
146+
147+
func getEnvVarOrDefault[T any](key string, defaultValue T) T {
148+
val, err := getEnvVarAs[T](key)
135149
if err != nil {
136150
return defaultValue
137151
}
138-
return boolValue
152+
return val
139153
}

tests/e2e/installer.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
/*
2-
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3-
* SPDX-License-Identifier: Apache-2.0
2+
* Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
43
*
54
* Licensed under the Apache License, Version 2.0 (the "License");
65
* you may not use this file except in compliance with the License.
@@ -14,6 +13,7 @@
1413
* See the License for the specific language governing permissions and
1514
* limitations under the License.
1615
*/
16+
1717
package e2e
1818

1919
import (

tests/e2e/nvidia-container-toolkit_test.go

+14-20
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
/*
2-
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3-
* SPDX-License-Identifier: Apache-2.0
2+
* Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
43
*
54
* Licensed under the Apache License, Version 2.0 (the "License");
65
* you may not use this file except in compliance with the License.
@@ -39,38 +38,36 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() {
3938
BeforeAll(func(ctx context.Context) {
4039
hostOutput, _, err = runner.Run("nvidia-smi -L")
4140
Expect(err).ToNot(HaveOccurred())
41+
42+
_, _, err := runner.Run("docker pull ubuntu")
43+
Expect(err).ToNot(HaveOccurred())
4244
})
4345

4446
It("should support NVIDIA_VISIBLE_DEVICES", func(ctx context.Context) {
45-
By("Running docker run with --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all")
4647
containerOutput, _, err := runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all ubuntu nvidia-smi -L")
4748
Expect(err).ToNot(HaveOccurred())
4849
Expect(containerOutput).To(Equal(hostOutput))
4950
})
5051

5152
It("should support automatic CDI spec generation", func(ctx context.Context) {
52-
By("Running docker run with --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all")
5353
containerOutput, _, err := runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all ubuntu nvidia-smi -L")
5454
Expect(err).ToNot(HaveOccurred())
5555
Expect(containerOutput).To(Equal(hostOutput))
5656
})
5757

5858
It("should support automatic CDI spec generation with the --gpus flag", func(ctx context.Context) {
59-
By("Running docker run with --gpus=all --runtime=nvidia --gpus all")
6059
containerOutput, _, err := runner.Run("docker run --rm -i --gpus=all --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all ubuntu nvidia-smi -L")
6160
Expect(err).ToNot(HaveOccurred())
6261
Expect(containerOutput).To(Equal(hostOutput))
6362
})
6463

6564
It("should support the --gpus flag using the nvidia-container-runtime", func(ctx context.Context) {
66-
By("Running docker run with --runtime=nvidia --gpus all")
6765
containerOutput, _, err := runner.Run("docker run --rm -i --runtime=nvidia --gpus all ubuntu nvidia-smi -L")
6866
Expect(err).ToNot(HaveOccurred())
6967
Expect(containerOutput).To(Equal(hostOutput))
7068
})
7169

7270
It("should support the --gpus flag using the nvidia-container-runtime-hook", func(ctx context.Context) {
73-
By("Running docker run with --gpus all")
7471
containerOutput, _, err := runner.Run("docker run --rm -i --gpus all ubuntu nvidia-smi -L")
7572
Expect(err).ToNot(HaveOccurred())
7673
Expect(containerOutput).To(Equal(hostOutput))
@@ -82,8 +79,12 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() {
8279
When("Running the cuda-vectorAdd sample", Ordered, func() {
8380
var referenceOutput string
8481

82+
BeforeAll(func(ctx context.Context) {
83+
_, _, err := runner.Run("docker pull nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
84+
Expect(err).ToNot(HaveOccurred())
85+
})
86+
8587
It("should support NVIDIA_VISIBLE_DEVICES", func(ctx context.Context) {
86-
By("Running docker run with --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all")
8788
var err error
8889
referenceOutput, _, err = runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
8990
Expect(err).ToNot(HaveOccurred())
@@ -92,21 +93,18 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() {
9293
})
9394

9495
It("should support automatic CDI spec generation", func(ctx context.Context) {
95-
By("Running docker run with --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all")
9696
out2, _, err := runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
9797
Expect(err).ToNot(HaveOccurred())
9898
Expect(referenceOutput).To(Equal(out2))
9999
})
100100

101101
It("should support the --gpus flag using the nvidia-container-runtime", func(ctx context.Context) {
102-
By("Running docker run with --runtime=nvidia --gpus all")
103102
out3, _, err := runner.Run("docker run --rm -i --runtime=nvidia --gpus all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
104103
Expect(err).ToNot(HaveOccurred())
105104
Expect(referenceOutput).To(Equal(out3))
106105
})
107106

108107
It("should support the --gpus flag using the nvidia-container-runtime-hook", func(ctx context.Context) {
109-
By("Running docker run with --gpus all")
110108
out4, _, err := runner.Run("docker run --rm -i --gpus all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
111109
Expect(err).ToNot(HaveOccurred())
112110
Expect(referenceOutput).To(Equal(out4))
@@ -116,37 +114,33 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() {
116114
// A deviceQuery sample runs in a container with access to all GPUs
117115
// The following should all produce the same result.
118116
When("Running the cuda-deviceQuery sample", Ordered, func() {
117+
var referenceOutput string
118+
119119
BeforeAll(func(ctx context.Context) {
120120
_, _, err := runner.Run("docker pull nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
121121
Expect(err).ToNot(HaveOccurred())
122122
})
123123

124-
var referenceOutput string
125-
126124
It("should support NVIDIA_VISIBLE_DEVICES", func(ctx context.Context) {
127-
By("Running docker run with --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all")
128125
var err error
129126
referenceOutput, _, err = runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
130127
Expect(err).ToNot(HaveOccurred())
131128
Expect(referenceOutput).To(ContainSubstring("Result = PASS"))
132129
})
133130

134131
It("should support automatic CDI spec generation", func(ctx context.Context) {
135-
By("Running docker run with --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all")
136132
out2, _, err := runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
137133
Expect(err).ToNot(HaveOccurred())
138134
Expect(referenceOutput).To(Equal(out2))
139135
})
140136

141137
It("should support the --gpus flag using the nvidia-container-runtime", func(ctx context.Context) {
142-
By("Running docker run with --runtime=nvidia --gpus all")
143138
out3, _, err := runner.Run("docker run --rm -i --runtime=nvidia --gpus all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
144139
Expect(err).ToNot(HaveOccurred())
145140
Expect(referenceOutput).To(Equal(out3))
146141
})
147142

148143
It("should support the --gpus flag using the nvidia-container-runtime-hook", func(ctx context.Context) {
149-
By("Running docker run with --gpus all")
150144
out4, _, err := runner.Run("docker run --rm -i --gpus all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
151145
Expect(err).ToNot(HaveOccurred())
152146
Expect(referenceOutput).To(Equal(out4))
@@ -155,6 +149,9 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() {
155149

156150
When("Testing CUDA Forward compatibility", Ordered, func() {
157151
BeforeAll(func(ctx context.Context) {
152+
_, _, err := runner.Run("docker pull nvcr.io/nvidia/cuda:12.8.0-base-ubi8")
153+
Expect(err).ToNot(HaveOccurred())
154+
158155
compatOutput, _, err := runner.Run("docker run --rm -i -e NVIDIA_VISIBLE_DEVICES=void nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ls /usr/local/cuda/compat/libcuda.*.*\"")
159156
Expect(err).ToNot(HaveOccurred())
160157
Expect(compatOutput).ToNot(BeEmpty())
@@ -178,21 +175,18 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() {
178175
})
179176

180177
It("should work with the nvidia runtime in legacy mode", func(ctx context.Context) {
181-
By("Running docker run with -e NVIDIA_DISABLE_REQUIRE=true --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all")
182178
ldconfigOut, _, err := runner.Run("docker run --rm -i -e NVIDIA_DISABLE_REQUIRE=true --runtime=nvidia --gpus all nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ldconfig -p | grep libcuda.so.1\"")
183179
Expect(err).ToNot(HaveOccurred())
184180
Expect(ldconfigOut).To(ContainSubstring("/usr/local/cuda/compat"))
185181
})
186182

187183
It("should work with the nvidia runtime in CDI mode", func(ctx context.Context) {
188-
By("Running docker run with -e NVIDIA_DISABLE_REQUIRE=true --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all")
189184
ldconfigOut, _, err := runner.Run("docker run --rm -i -e NVIDIA_DISABLE_REQUIRE=true --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ldconfig -p | grep libcuda.so.1\"")
190185
Expect(err).ToNot(HaveOccurred())
191186
Expect(ldconfigOut).To(ContainSubstring("/usr/local/cuda/compat"))
192187
})
193188

194189
It("should NOT work with nvidia-container-runtime-hook", func(ctx context.Context) {
195-
By("Running docker run with -e NVIDIA_DISABLE_REQUIRE=true --gpus all")
196190
ldconfigOut, _, err := runner.Run("docker run --rm -i -e NVIDIA_DISABLE_REQUIRE=true --runtime=runc --gpus all nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ldconfig -p | grep libcuda.so.1\"")
197191
Expect(err).ToNot(HaveOccurred())
198192
Expect(ldconfigOut).To(ContainSubstring("/usr/lib64"))

tests/e2e/runner.go

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
/*
2-
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3-
* SPDX-License-Identifier: Apache-2.0
2+
* Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
43
*
54
* Licensed under the Apache License, Version 2.0 (the "License");
65
* you may not use this file except in compliance with the License.

0 commit comments

Comments
 (0)