added support for testing cpu example in e2e tests (#485)

nirrozenbaum · web-flow · commit a1c95a532a13 · 2025-03-14T11:15:47.000-07:00
* added support for testing cpu example in e2e tests

Signed-off-by: Nir Rozenbaum &lt;nirro@il.ibm.com&gt;

* minor change in e2e test

Signed-off-by: Nir Rozenbaum &lt;nirro@il.ibm.com&gt;

* fixed linter error

Signed-off-by: Nir Rozenbaum &lt;nirro@il.ibm.com&gt;

* fixed a typo

Signed-off-by: Nir Rozenbaum &lt;nirro@il.ibm.com&gt;

---------

Signed-off-by: Nir Rozenbaum &lt;nirro@il.ibm.com&gt;
diff --git a/Makefile b/Makefile
@@ -32,6 +32,8 @@ IMAGE_REGISTRY ?= us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-infe
 IMAGE_NAME := epp
 IMAGE_REPO ?= $(IMAGE_REGISTRY)/$(IMAGE_NAME)
 IMAGE_TAG ?= $(IMAGE_REPO):$(GIT_TAG)
+ROOT_DIR:=$(shell dirname $(realpath $(firstword $(MAKEFILE_LIST))))
+E2E_MANIFEST_PATH ?= config/manifests/vllm/gpu-deployment.yaml
 
 SYNCER_IMAGE_NAME := lora-syncer
 SYNCER_IMAGE_REPO ?= $(IMAGE_REGISTRY)/$(SYNCER_IMAGE_NAME)
@@ -126,8 +128,8 @@ test-integration: manifests generate fmt vet envtest ## Run tests.
 	KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test ./test/integration/epp/... -race -coverprofile cover.out
 
 .PHONY: test-e2e
-test-e2e: ## Run end-to-end tests against an existing Kubernetes cluster with at least 3 available GPUs.
-	go test ./test/e2e/epp -v -ginkgo.v
+test-e2e: ## Run end-to-end tests against an existing Kubernetes cluster. When using default configuration, the tests need at least 3 available GPUs.
+	MANIFEST_PATH=$(ROOT_DIR)/$(E2E_MANIFEST_PATH) go test ./test/e2e/epp/ -v -ginkgo.v
 
 .PHONY: lint
 lint: golangci-lint ## Run golangci-lint linter
diff --git a/test/e2e/epp/e2e_suite_test.go b/test/e2e/epp/e2e_suite_test.go
@@ -68,8 +68,6 @@ const (
 	inferExtName = "inference-gateway-ext-proc"
 	// clientManifest is the manifest for the client test resources.
 	clientManifest = "../../testdata/client.yaml"
-	// modelServerManifest is the manifest for the model server test resources.
-	modelServerManifest = "../../../config/manifests/vllm/gpu-deployment.yaml"
 	// modelServerSecretManifest is the manifest for the model server secret resource.
 	modelServerSecretManifest = "../../testdata/model-secret.yaml"
 	// inferPoolManifest is the manifest for the inference pool CRD.
@@ -80,6 +78,8 @@ const (
 	inferExtManifest = "../../../config/manifests/ext_proc.yaml"
 	// envoyManifest is the manifest for the envoy proxy test resources.
 	envoyManifest = "../../testdata/envoy.yaml"
+	// modelServerManifestFilepathEnvVar is the env var that holds absolute path to the manifest for the model server test resource.
+	modelServerManifestFilepathEnvVar = "MANIFEST_PATH"
 )
 
 var (
@@ -107,6 +107,7 @@ var _ = ginkgo.BeforeSuite(func() {
 })
 
 func setupInfra() {
+	modelServerManifest := readModelServerManifestPath()
 	crds := map[string]string{
 		"inferencepools.inference.networking.x-k8s.io":  inferPoolManifest,
 		"inferencemodels.inference.networking.x-k8s.io": inferModelManifest,
@@ -145,6 +146,7 @@ func setupSuite() {
 
 	kubeCli, err = kubernetes.NewForConfig(cfg)
 	gomega.Expect(err).NotTo(gomega.HaveOccurred())
+	gomega.Expect(kubeCli).NotTo(gomega.BeNil())
 }
 
 func cleanupResources() {
@@ -181,6 +183,14 @@ func namespaceExists(k8sClient client.Client, ns string) {
 	}, existsTimeout, interval)
 }
 
+// readModelServerManifestPath reads from env var the absolute filepath to model server deployment for testing.
+func readModelServerManifestPath() string {
+	ginkgo.By(fmt.Sprintf("Ensuring %s environment variable is set", modelServerManifestFilepathEnvVar))
+	modelServerManifestFilepath := os.Getenv(modelServerManifestFilepathEnvVar)
+	gomega.Expect(modelServerManifestFilepath).NotTo(gomega.BeEmpty(), modelServerManifestFilepathEnvVar+" is not set")
+	return modelServerManifestFilepath
+}
+
 // createCRDs creates the Inference Extension CRDs used for testing.
 func createCRDs(k8sClient client.Client, crds map[string]string) {
 	for name, path := range crds {
@@ -215,6 +225,29 @@ func createClient(k8sClient client.Client, filePath string) {
 
 // createModelServer creates the model server resources used for testing from the given filePaths.
 func createModelServer(k8sClient client.Client, secretPath, deployPath string) {
+	ginkgo.By("Ensuring the model server manifest points to an existing file")
+	modelServerManifestArray := readYaml(deployPath)
+	gomega.Expect(modelServerManifestArray).NotTo(gomega.BeEmpty())
+	modelServerManifestYaml := modelServerManifestArray[0]
+	if strings.Contains(modelServerManifestYaml, "hf-token") {
+		createHfSecret(k8sClient, secretPath)
+	}
+
+	ginkgo.By("Creating model server resources from manifest: " + deployPath)
+	createObjsFromYaml(k8sClient, modelServerManifestArray)
+
+	// Wait for the deployment to exist.
+	deploy := &appsv1.Deployment{}
+	testutils.EventuallyExists(ctx, func() error {
+		return k8sClient.Get(ctx, types.NamespacedName{Namespace: nsName, Name: modelServerName}, deploy)
+	}, existsTimeout, interval)
+
+	// Wait for the deployment to be available.
+	testutils.DeploymentAvailable(ctx, k8sClient, deploy, modelReadyTimeout, interval)
+}
+
+// createHfSecret read HF_TOKEN from env var and creates a secret that contains the access token.
+func createHfSecret(k8sClient client.Client, secretPath string) {
 	ginkgo.By("Ensuring the HF_TOKEN environment variable is set")
 	token := os.Getenv("HF_TOKEN")
 	gomega.Expect(token).NotTo(gomega.BeEmpty(), "HF_TOKEN is not set")
@@ -226,25 +259,13 @@ func createModelServer(k8sClient client.Client, secretPath, deployPath string) {
 		outManifests = append(outManifests, strings.Replace(m, "$HF_TOKEN", token, 1))
 	}
 
-	ginkgo.By("Creating model server secret resource from manifest: " + deployPath)
+	ginkgo.By("Creating model server secret resource")
 	createObjsFromYaml(k8sClient, outManifests)
 
 	// Wait for the secret to exist before proceeding with test.
 	testutils.EventuallyExists(ctx, func() error {
 		return k8sClient.Get(ctx, types.NamespacedName{Namespace: nsName, Name: "hf-token"}, &corev1.Secret{})
 	}, existsTimeout, interval)
-
-	ginkgo.By("Creating model server resources from manifest: " + deployPath)
-	applyYAMLFile(k8sClient, deployPath)
-
-	// Wait for the deployment to exist.
-	deploy := &appsv1.Deployment{}
-	testutils.EventuallyExists(ctx, func() error {
-		return k8sClient.Get(ctx, types.NamespacedName{Namespace: nsName, Name: modelServerName}, deploy)
-	}, existsTimeout, interval)
-
-	// Wait for the deployment to be available.
-	testutils.DeploymentAvailable(ctx, k8sClient, deploy, modelReadyTimeout, interval)
 }
 
 // createEnvoy creates the envoy proxy resources used for testing from the given filePath.
diff --git a/test/e2e/epp/e2e_test.go b/test/e2e/epp/e2e_test.go
@@ -49,11 +49,7 @@ var _ = ginkgo.Describe("InferencePool", func() {
 
 			ginkgo.By("Ensuring the InferenceModel resource exists in the namespace")
 			gomega.Eventually(func() error {
-				err := cli.Get(ctx, types.NamespacedName{Namespace: infModel.Namespace, Name: infModel.Name}, infModel)
-				if err != nil {
-					return err
-				}
-				return nil
+				return cli.Get(ctx, types.NamespacedName{Namespace: infModel.Namespace, Name: infModel.Name}, infModel)
 			}, existsTimeout, interval).Should(gomega.Succeed())
 
 			ginkgo.By("Verifying connectivity through the inference extension")