diff --git a/hack/release-quickstart.sh b/hack/release-quickstart.sh
index a21047c3..832bd872 100755
--- a/hack/release-quickstart.sh
+++ b/hack/release-quickstart.sh
@@ -51,9 +51,9 @@ sed -i.bak '/us-central1-docker.pkg.dev\/k8s-staging-images\/gateway-api-inferen
 sed -i.bak -E "s|us-central1-docker\.pkg\.dev/k8s-staging-images|registry.k8s.io|g" "$EXT_PROC"
 
 # -----------------------------------------------------------------------------
-# Update config/manifests/vllm/deployment.yaml
+# Update config/manifests/vllm/gpu-deployment.yaml
 # -----------------------------------------------------------------------------
-VLLM_DEPLOY="config/manifests/vllm/deployment.yaml"
+VLLM_DEPLOY="config/manifests/vllm/gpu-deployment.yaml"
 echo "Updating ${VLLM_DEPLOY} ..."
 
 # Update the vLLM image version
diff --git a/site-src/guides/index.md b/site-src/guides/index.md
index 98ae94a3..b7b31000 100644
--- a/site-src/guides/index.md
+++ b/site-src/guides/index.md
@@ -24,7 +24,7 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
    
 #### GPU-Based Model Server
 
-   For this setup, you will need 3 GPUs to run the sample model server. Adjust the number of replicas in `./config/manifests/vllm/deployment.yaml` as needed.  
+   For this setup, you will need 3 GPUs to run the sample model server. Adjust the number of replicas in `./config/manifests/vllm/gpu-deployment.yaml` as needed.  
    Create a Hugging Face secret to download the model [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf). Ensure that the token grants access to this model.
    Deploy a sample vLLM deployment with the proper protocol to work with the LLM Instance Gateway.
    ```bash
diff --git a/test/e2e/e2e_suite_test.go b/test/e2e/e2e_suite_test.go
index 3d068c9f..24a488db 100644
--- a/test/e2e/e2e_suite_test.go
+++ b/test/e2e/e2e_suite_test.go
@@ -69,7 +69,7 @@ const (
 	// clientManifest is the manifest for the client test resources.
 	clientManifest = "../testdata/client.yaml"
 	// modelServerManifest is the manifest for the model server test resources.
-	modelServerManifest = "../../config/manifests/vllm/deployment.yaml"
+	modelServerManifest = "../../config/manifests/vllm/gpu-deployment.yaml"
 	// modelServerSecretManifest is the manifest for the model server secret resource.
 	modelServerSecretManifest = "../testdata/model-secret.yaml"
 	// inferPoolManifest is the manifest for the inference pool CRD.