Skip to content

added support for testing cpu example in e2e tests #485

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Mar 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ IMAGE_REGISTRY ?= us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-infe
IMAGE_NAME := epp
IMAGE_REPO ?= $(IMAGE_REGISTRY)/$(IMAGE_NAME)
IMAGE_TAG ?= $(IMAGE_REPO):$(GIT_TAG)
ROOT_DIR:=$(shell dirname $(realpath $(firstword $(MAKEFILE_LIST))))
E2E_MANIFEST_PATH ?= config/manifests/vllm/gpu-deployment.yaml

SYNCER_IMAGE_NAME := lora-syncer
SYNCER_IMAGE_REPO ?= $(IMAGE_REGISTRY)/$(SYNCER_IMAGE_NAME)
Expand Down Expand Up @@ -126,8 +128,8 @@ test-integration: manifests generate fmt vet envtest ## Run tests.
KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test ./test/integration/epp/... -race -coverprofile cover.out

.PHONY: test-e2e
test-e2e: ## Run end-to-end tests against an existing Kubernetes cluster with at least 3 available GPUs.
go test ./test/e2e/epp -v -ginkgo.v
test-e2e: ## Run end-to-end tests against an existing Kubernetes cluster. When using default configuration, the tests need at least 3 available GPUs.
MANIFEST_PATH=$(ROOT_DIR)/$(E2E_MANIFEST_PATH) go test ./test/e2e/epp/ -v -ginkgo.v

.PHONY: lint
lint: golangci-lint ## Run golangci-lint linter
Expand Down
51 changes: 36 additions & 15 deletions test/e2e/epp/e2e_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,6 @@ const (
inferExtName = "inference-gateway-ext-proc"
// clientManifest is the manifest for the client test resources.
clientManifest = "../../testdata/client.yaml"
// modelServerManifest is the manifest for the model server test resources.
modelServerManifest = "../../../config/manifests/vllm/gpu-deployment.yaml"
// modelServerSecretManifest is the manifest for the model server secret resource.
modelServerSecretManifest = "../../testdata/model-secret.yaml"
// inferPoolManifest is the manifest for the inference pool CRD.
Expand All @@ -80,6 +78,8 @@ const (
inferExtManifest = "../../../config/manifests/ext_proc.yaml"
// envoyManifest is the manifest for the envoy proxy test resources.
envoyManifest = "../../testdata/envoy.yaml"
// modelServerManifestFilepathEnvVar is the env var that holds absolute path to the manifest for the model server test resource.
modelServerManifestFilepathEnvVar = "MANIFEST_PATH"
)

var (
Expand Down Expand Up @@ -107,6 +107,7 @@ var _ = ginkgo.BeforeSuite(func() {
})

func setupInfra() {
modelServerManifest := readModelServerManifestPath()
crds := map[string]string{
"inferencepools.inference.networking.x-k8s.io": inferPoolManifest,
"inferencemodels.inference.networking.x-k8s.io": inferModelManifest,
Expand Down Expand Up @@ -145,6 +146,7 @@ func setupSuite() {

kubeCli, err = kubernetes.NewForConfig(cfg)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
gomega.Expect(kubeCli).NotTo(gomega.BeNil())
}

func cleanupResources() {
Expand Down Expand Up @@ -181,6 +183,14 @@ func namespaceExists(k8sClient client.Client, ns string) {
}, existsTimeout, interval)
}

// readModelServerManifestPath reads from env var the absolute filepath to model server deployment for testing.
func readModelServerManifestPath() string {
ginkgo.By(fmt.Sprintf("Ensuring %s environment variable is set", modelServerManifestFilepathEnvVar))
modelServerManifestFilepath := os.Getenv(modelServerManifestFilepathEnvVar)
gomega.Expect(modelServerManifestFilepath).NotTo(gomega.BeEmpty(), modelServerManifestFilepathEnvVar+" is not set")
return modelServerManifestFilepath
}

// createCRDs creates the Inference Extension CRDs used for testing.
func createCRDs(k8sClient client.Client, crds map[string]string) {
for name, path := range crds {
Expand Down Expand Up @@ -215,6 +225,29 @@ func createClient(k8sClient client.Client, filePath string) {

// createModelServer creates the model server resources used for testing from the given filePaths.
func createModelServer(k8sClient client.Client, secretPath, deployPath string) {
ginkgo.By("Ensuring the model server manifest points to an existing file")
modelServerManifestArray := readYaml(deployPath)
gomega.Expect(modelServerManifestArray).NotTo(gomega.BeEmpty())
modelServerManifestYaml := modelServerManifestArray[0]
if strings.Contains(modelServerManifestYaml, "hf-token") {
createHfSecret(k8sClient, secretPath)
}

ginkgo.By("Creating model server resources from manifest: " + deployPath)
createObjsFromYaml(k8sClient, modelServerManifestArray)

// Wait for the deployment to exist.
deploy := &appsv1.Deployment{}
testutils.EventuallyExists(ctx, func() error {
return k8sClient.Get(ctx, types.NamespacedName{Namespace: nsName, Name: modelServerName}, deploy)
}, existsTimeout, interval)

// Wait for the deployment to be available.
testutils.DeploymentAvailable(ctx, k8sClient, deploy, modelReadyTimeout, interval)
}

// createHfSecret read HF_TOKEN from env var and creates a secret that contains the access token.
func createHfSecret(k8sClient client.Client, secretPath string) {
ginkgo.By("Ensuring the HF_TOKEN environment variable is set")
token := os.Getenv("HF_TOKEN")
gomega.Expect(token).NotTo(gomega.BeEmpty(), "HF_TOKEN is not set")
Expand All @@ -226,25 +259,13 @@ func createModelServer(k8sClient client.Client, secretPath, deployPath string) {
outManifests = append(outManifests, strings.Replace(m, "$HF_TOKEN", token, 1))
}

ginkgo.By("Creating model server secret resource from manifest: " + deployPath)
ginkgo.By("Creating model server secret resource")
createObjsFromYaml(k8sClient, outManifests)

// Wait for the secret to exist before proceeding with test.
testutils.EventuallyExists(ctx, func() error {
return k8sClient.Get(ctx, types.NamespacedName{Namespace: nsName, Name: "hf-token"}, &corev1.Secret{})
}, existsTimeout, interval)

ginkgo.By("Creating model server resources from manifest: " + deployPath)
applyYAMLFile(k8sClient, deployPath)

// Wait for the deployment to exist.
deploy := &appsv1.Deployment{}
testutils.EventuallyExists(ctx, func() error {
return k8sClient.Get(ctx, types.NamespacedName{Namespace: nsName, Name: modelServerName}, deploy)
}, existsTimeout, interval)

// Wait for the deployment to be available.
testutils.DeploymentAvailable(ctx, k8sClient, deploy, modelReadyTimeout, interval)
}

// createEnvoy creates the envoy proxy resources used for testing from the given filePath.
Expand Down
6 changes: 1 addition & 5 deletions test/e2e/epp/e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,7 @@ var _ = ginkgo.Describe("InferencePool", func() {

ginkgo.By("Ensuring the InferenceModel resource exists in the namespace")
gomega.Eventually(func() error {
err := cli.Get(ctx, types.NamespacedName{Namespace: infModel.Namespace, Name: infModel.Name}, infModel)
if err != nil {
return err
}
return nil
return cli.Get(ctx, types.NamespacedName{Namespace: infModel.Namespace, Name: infModel.Name}, infModel)
}, existsTimeout, interval).Should(gomega.Succeed())

ginkgo.By("Verifying connectivity through the inference extension")
Expand Down