[no-relnote] Update Github Actions E2E

ArangoGutierrez · ArangoGutierrez · commit 9defe37fa2f5 · 2025-05-06T19:39:07.000+02:00
Signed-off-by: Carlos Eduardo Arango Gutierrez &lt;eduardoa@nvidia.com&gt;
diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml
@@ -70,8 +70,8 @@ jobs:
 
       - name: Run e2e tests
         env:
-          IMAGE_NAME: ghcr.io/nvidia/container-toolkit
-          VERSION: ${{ inputs.version }}
+          E2E_IMAGE_REPO: ghcr.io/nvidia/container-toolkit
+          E2E_IMAGE_TAG: ${{ inputs.version }}-ubuntu20.04
           SSH_KEY: ${{ secrets.AWS_SSH_KEY }}
           E2E_SSH_USER: ${{ secrets.E2E_SSH_USER }}
           E2E_SSH_HOST: ${{ steps.holodeck_public_dns_name.outputs.result }}
@@ -84,6 +84,13 @@ jobs:
 
           make -f tests/e2e/Makefile test
 
+      - name: Archive Ginkgo logs
+        uses: actions/upload-artifact@v4
+        with:
+          name: ginkgo-logs
+          path: ginkgo.json
+          retention-days: 15
+        
       - name: Send Slack alert notification
         if: ${{ failure() }}
         uses: slackapi/slack-github-action@v2.0.0
diff --git a/.gitignore b/.gitignore
@@ -11,3 +11,4 @@
 /nvidia-ctk
 /shared-*
 /release-*
+/bin
diff --git a/tests/e2e/Makefile b/tests/e2e/Makefile
@@ -13,14 +13,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-.PHONY: test-e2e ginkgo
+.PHONY: test $(GINKGO_BIN)
 
 GINKGO_ARGS ?=
 LOG_ARTIFACTS_DIR ?= $(CURDIR)/e2e_logs
 
-ginkgo:
+GINKGO_BIN := $(CURDIR)/bin/ginkgo
+
+test: $(GINKGO_BIN)
+	$(GINKGO_BIN) $(GINKGO_ARGS) -v --json-report ginkgo.json ./tests/e2e/...
+
+$(GINKGO_BIN):
 	mkdir -p $(CURDIR)/bin
 	GOBIN=$(CURDIR)/bin go install github.com/onsi/ginkgo/v2/ginkgo@latest
-
-test-e2e: ginkgo
-	$(CURDIR)/bin/ginkgo $(GINKGO_ARGS) -v --json-report ginkgo.json ./tests/e2e/...
diff --git a/tests/e2e/README.md b/tests/e2e/README.md
@@ -20,7 +20,7 @@ limitations under the License.
 ---
 
 ## 1  Scope & Goals
-This repository contains a **Ginkgo v2 / Gomega** test harness that exercises an
+This folder contains a **Ginkgo v2 / Gomega** test harness that exercises an
 NVIDIA Container Toolkit (CTK) installation on a **remote GPU‑enabled host** via
 SSH.  The suite validates that:
 
@@ -58,12 +58,13 @@ compatibility runs, and pre‑release validation of new CTK builds.
 
 | Variable | Required | Example | Description |
 |----------|----------|---------|-------------|
-| `INSTALL_CTK` | ✖ | `true` | When `true` the test installs CTK on the remote host before running the image. When `false` it assumes CTK is already present. |
-| `TOOLKIT_IMAGE` | ✔ | `nvcr.io/nvidia/cuda:12.4.0-runtime-ubi9` | Image that will be pulled & executed. |
-| `SSH_KEY` | ✔ | `/home/ci/.ssh/id_rsa` | Private key used for authentication. |
-| `SSH_USER` | ✔ | `ubuntu` | Username on the remote host. |
-| `REMOTE_HOST` | ✔ | `gpurunner01.corp.local` | Hostname or IP address of the target node. |
-| `REMOTE_PORT` | ✔ | `22` | SSH port of the target node. |
+| `E2E_INSTALL_CTK` | ✖ | `true` | When `true` the test installs CTK on the remote host before running the image. When `false` it assumes CTK is already present. |
+| `E2E_IMAGE_REPO` | ✔ | `ghcr.io/nvidia/container-toolkit` | Container Toolkit Image  |
+| `E2E_IMAGE_TAG` | ✔ | `latest` | Image tag |
+| `E2E_SSH_KEY` | ✔ | `/home/ci/.ssh/id_rsa` | Private key used for authentication. |
+| `E2E_SSH_USER` | ✔ | `ubuntu` | Username on the remote host. |
+| `E2E_SSH_HOST` | ✔ | `10.0.0.0` | Hostname or IP address of the target node. |
+| `E2E_SSH_PORT` | ✔ | `22` | SSH port of the target node. |
 
 > All variables are validated at start‑up; the suite aborts early with a clear
 > message if any are missing or ill‑formed.
@@ -92,12 +93,13 @@ bin/ginkgo:
 ### 6.1  Basic invocation
 ```bash
 INSTALL_CTK=true \
-TOOLKIT_IMAGE=nvcr.io/nvidia/cuda:12.4.0-runtime-ubi9 \
+E2E_IMAGE_REPO=ghcr.io/nvidia/container-toolkit \
+E2E_IMAGE_TAG=<image-tag> \
 SSH_KEY=$HOME/.ssh/id_rsa \
 SSH_USER=ubuntu \
 REMOTE_HOST=10.0.0.15 \
 REMOTE_PORT=22 \
-make test-e2e
+make test
 ```
 This downloads the image on the remote host, installs CTK (if requested), and
 executes a minimal CUDA‑based workload.
diff --git a/tests/e2e/e2e_test.go b/tests/e2e/e2e_test.go
@@ -1,6 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -19,6 +18,7 @@ package e2e
 
 import (
 	"context"
+	"errors"
 	"os"
 	"path/filepath"
 	"runtime"
@@ -81,15 +81,6 @@ var _ = BeforeSuite(func() {
 		err = installer.Install()
 		Expect(err).ToNot(HaveOccurred())
 	}
-
-	_, _, err := runner.Run("docker pull ubuntu")
-	Expect(err).ToNot(HaveOccurred())
-
-	_, _, err = runner.Run("docker pull nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
-	Expect(err).ToNot(HaveOccurred())
-
-	_, _, err = runner.Run("docker pull nvcr.io/nvidia/cuda:12.8.0-base-ubi8")
-	Expect(err).ToNot(HaveOccurred())
 })
 
 // getTestEnv gets the test environment variables
@@ -100,40 +91,63 @@ func getTestEnv() {
 	_, thisFile, _, _ := runtime.Caller(0)
 	packagePath = filepath.Dir(thisFile)
 
-	installCTK = getBoolEnvVar("INSTALL_CTK", false)
+	installCTK = getEnvVarOrDefault("E2E_INSTALL_CTK", true)
 
-	ImageRepo = os.Getenv("E2E_IMAGE_REPO")
-	Expect(ImageRepo).NotTo(BeEmpty(), "E2E_IMAGE_REPO environment variable must be set")
+	if installCTK {
+		ImageRepo = os.Getenv("E2E_IMAGE_REPO")
+		Expect(ImageRepo).NotTo(BeEmpty(), "E2E_IMAGE_REPO environment variable must be set")
 
-	ImageTag = os.Getenv("E2E_IMAGE_TAG")
-	Expect(ImageTag).NotTo(BeEmpty(), "E2E_IMAGE_TAG environment variable must be set")
+		ImageTag = os.Getenv("E2E_IMAGE_TAG")
+		Expect(ImageTag).NotTo(BeEmpty(), "E2E_IMAGE_TAG environment variable must be set")
+	}
 
-	sshKey = os.Getenv("SSH_KEY")
-	Expect(sshKey).NotTo(BeEmpty(), "SSH_KEY environment variable must be set")
+	sshKey = os.Getenv("E2E_SSH_KEY")
+	Expect(sshKey).NotTo(BeEmpty(), "E2E_SSH_KEY environment variable must be set")
 
-	sshUser = os.Getenv("SSH_USER")
-	Expect(sshUser).NotTo(BeEmpty(), "SSH_USER environment variable must be set")
+	sshUser = os.Getenv("E2E_SSH_USER")
+	Expect(sshUser).NotTo(BeEmpty(), "E2E_SSH_USER environment variable must be set")
 
-	host = os.Getenv("REMOTE_HOST")
-	Expect(host).NotTo(BeEmpty(), "REMOTE_HOST environment variable must be set")
+	host = os.Getenv("E2E_SSH_HOST")
+	Expect(host).NotTo(BeEmpty(), "E2E_SSH_HOST environment variable must be set")
 
-	sshPort = os.Getenv("REMOTE_PORT")
-	Expect(sshPort).NotTo(BeEmpty(), "REMOTE_PORT environment variable must be set")
+	sshPort = getEnvVarOrDefault("E2E_SSH_PORT", "22")
 
 	// Get current working directory
 	cwd, err = os.Getwd()
 	Expect(err).NotTo(HaveOccurred())
 }
 
-// getBoolEnvVar returns the boolean value of the environment variable or the default value if not set.
-func getBoolEnvVar(key string, defaultValue bool) bool {
+func getEnvVarAs[T any](key string) (T, error) {
+	var zero T
 	value := os.Getenv(key)
 	if value == "" {
-		return defaultValue
+		return zero, errors.New("env var not set")
+	}
+
+	switch any(zero).(type) {
+	case bool:
+		v, err := strconv.ParseBool(value)
+		if err != nil {
+			return zero, err
+		}
+		return any(v).(T), nil
+	case int:
+		v, err := strconv.Atoi(value)
+		if err != nil {
+			return zero, err
+		}
+		return any(v).(T), nil
+	case string:
+		return any(value).(T), nil
+	default:
+		return zero, errors.New("unsupported type")
 	}
-	boolValue, err := strconv.ParseBool(value)
+}
+
+func getEnvVarOrDefault[T any](key string, defaultValue T) T {
+	val, err := getEnvVarAs[T](key)
 	if err != nil {
 		return defaultValue
 	}
-	return boolValue
+	return val
 }
diff --git a/tests/e2e/installer.go b/tests/e2e/installer.go
@@ -1,6 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 package e2e
 
 import (
diff --git a/tests/e2e/nvidia-container-toolkit_test.go b/tests/e2e/nvidia-container-toolkit_test.go
@@ -1,6 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -39,38 +38,36 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() {
 		BeforeAll(func(ctx context.Context) {
 			hostOutput, _, err = runner.Run("nvidia-smi -L")
 			Expect(err).ToNot(HaveOccurred())
+
+			_, _, err := runner.Run("docker pull ubuntu")
+			Expect(err).ToNot(HaveOccurred())
 		})
 
 		It("should support NVIDIA_VISIBLE_DEVICES", func(ctx context.Context) {
-			By("Running docker run with --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all")
 			containerOutput, _, err := runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all ubuntu nvidia-smi -L")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(containerOutput).To(Equal(hostOutput))
 		})
 
 		It("should support automatic CDI spec generation", func(ctx context.Context) {
-			By("Running docker run with --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all")
 			containerOutput, _, err := runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all ubuntu nvidia-smi -L")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(containerOutput).To(Equal(hostOutput))
 		})
 
 		It("should support automatic CDI spec generation with the --gpus flag", func(ctx context.Context) {
-			By("Running docker run with --gpus=all --runtime=nvidia --gpus all")
 			containerOutput, _, err := runner.Run("docker run --rm -i --gpus=all --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all ubuntu nvidia-smi -L")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(containerOutput).To(Equal(hostOutput))
 		})
 
 		It("should support the --gpus flag using the nvidia-container-runtime", func(ctx context.Context) {
-			By("Running docker run with --runtime=nvidia --gpus all")
 			containerOutput, _, err := runner.Run("docker run --rm -i --runtime=nvidia --gpus all ubuntu nvidia-smi -L")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(containerOutput).To(Equal(hostOutput))
 		})
 
 		It("should support the --gpus flag using the nvidia-container-runtime-hook", func(ctx context.Context) {
-			By("Running docker run with --gpus all")
 			containerOutput, _, err := runner.Run("docker run --rm -i --gpus all ubuntu nvidia-smi -L")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(containerOutput).To(Equal(hostOutput))
@@ -82,8 +79,12 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() {
 	When("Running the cuda-vectorAdd sample", Ordered, func() {
 		var referenceOutput string
 
+		BeforeAll(func(ctx context.Context) {
+			_, _, err := runner.Run("docker pull nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
+			Expect(err).ToNot(HaveOccurred())
+		})
+
 		It("should support NVIDIA_VISIBLE_DEVICES", func(ctx context.Context) {
-			By("Running docker run with --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all")
 			var err error
 			referenceOutput, _, err = runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
 			Expect(err).ToNot(HaveOccurred())
@@ -92,21 +93,18 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() {
 		})
 
 		It("should support automatic CDI spec generation", func(ctx context.Context) {
-			By("Running docker run with --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all")
 			out2, _, err := runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(referenceOutput).To(Equal(out2))
 		})
 
 		It("should support the --gpus flag using the nvidia-container-runtime", func(ctx context.Context) {
-			By("Running docker run with --runtime=nvidia --gpus all")
 			out3, _, err := runner.Run("docker run --rm -i --runtime=nvidia --gpus all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(referenceOutput).To(Equal(out3))
 		})
 
 		It("should support the --gpus flag using the nvidia-container-runtime-hook", func(ctx context.Context) {
-			By("Running docker run with --gpus all")
 			out4, _, err := runner.Run("docker run --rm -i --gpus all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(referenceOutput).To(Equal(out4))
@@ -116,37 +114,33 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() {
 	// A deviceQuery sample runs in a container with access to all GPUs
 	// The following should all produce the same result.
 	When("Running the cuda-deviceQuery sample", Ordered, func() {
+		var referenceOutput string
+
 		BeforeAll(func(ctx context.Context) {
 			_, _, err := runner.Run("docker pull nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
 			Expect(err).ToNot(HaveOccurred())
 		})
 
-		var referenceOutput string
-
 		It("should support NVIDIA_VISIBLE_DEVICES", func(ctx context.Context) {
-			By("Running docker run with --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all")
 			var err error
 			referenceOutput, _, err = runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(referenceOutput).To(ContainSubstring("Result = PASS"))
 		})
 
 		It("should support automatic CDI spec generation", func(ctx context.Context) {
-			By("Running docker run with --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all")
 			out2, _, err := runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(referenceOutput).To(Equal(out2))
 		})
 
 		It("should support the --gpus flag using the nvidia-container-runtime", func(ctx context.Context) {
-			By("Running docker run with --runtime=nvidia --gpus all")
 			out3, _, err := runner.Run("docker run --rm -i --runtime=nvidia --gpus all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(referenceOutput).To(Equal(out3))
 		})
 
 		It("should support the --gpus flag using the nvidia-container-runtime-hook", func(ctx context.Context) {
-			By("Running docker run with --gpus all")
 			out4, _, err := runner.Run("docker run --rm -i --gpus all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(referenceOutput).To(Equal(out4))
@@ -155,6 +149,9 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() {
 
 	When("Testing CUDA Forward compatibility", Ordered, func() {
 		BeforeAll(func(ctx context.Context) {
+			_, _, err := runner.Run("docker pull nvcr.io/nvidia/cuda:12.8.0-base-ubi8")
+			Expect(err).ToNot(HaveOccurred())
+
 			compatOutput, _, err := runner.Run("docker run --rm -i -e NVIDIA_VISIBLE_DEVICES=void nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ls /usr/local/cuda/compat/libcuda.*.*\"")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(compatOutput).ToNot(BeEmpty())
@@ -178,21 +175,18 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() {
 		})
 
 		It("should work with the nvidia runtime in legacy mode", func(ctx context.Context) {
-			By("Running docker run with -e NVIDIA_DISABLE_REQUIRE=true --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all")
 			ldconfigOut, _, err := runner.Run("docker run --rm -i -e NVIDIA_DISABLE_REQUIRE=true --runtime=nvidia --gpus all nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ldconfig -p | grep libcuda.so.1\"")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(ldconfigOut).To(ContainSubstring("/usr/local/cuda/compat"))
 		})
 
 		It("should work with the nvidia runtime in CDI mode", func(ctx context.Context) {
-			By("Running docker run with -e NVIDIA_DISABLE_REQUIRE=true --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all")
 			ldconfigOut, _, err := runner.Run("docker run --rm -i -e NVIDIA_DISABLE_REQUIRE=true  --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ldconfig -p | grep libcuda.so.1\"")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(ldconfigOut).To(ContainSubstring("/usr/local/cuda/compat"))
 		})
 
 		It("should NOT work with nvidia-container-runtime-hook", func(ctx context.Context) {
-			By("Running docker run with -e NVIDIA_DISABLE_REQUIRE=true --gpus all")
 			ldconfigOut, _, err := runner.Run("docker run --rm -i -e NVIDIA_DISABLE_REQUIRE=true --runtime=runc --gpus all nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ldconfig -p | grep libcuda.so.1\"")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(ldconfigOut).To(ContainSubstring("/usr/lib64"))
diff --git a/tests/e2e/runner.go b/tests/e2e/runner.go
@@ -1,6 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/tests/go.mod b/tests/go.mod

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,5 @@`
`1`	`1`	`/*`
`2`		`- * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.`
`3`		`- * SPDX-License-Identifier: Apache-2.0`
	`2`	`+ * Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.`
`4`	`3`	`*`
`5`	`4`	`* Licensed under the Apache License, Version 2.0 (the "License");`
`6`	`5`	`* you may not use this file except in compliance with the License.`