diff --git a/hack/release-quickstart.sh b/hack/release-quickstart.sh index a21047c3..832bd872 100755 --- a/hack/release-quickstart.sh +++ b/hack/release-quickstart.sh @@ -51,9 +51,9 @@ sed -i.bak '/us-central1-docker.pkg.dev\/k8s-staging-images\/gateway-api-inferen sed -i.bak -E "s|us-central1-docker\.pkg\.dev/k8s-staging-images|registry.k8s.io|g" "$EXT_PROC" # ----------------------------------------------------------------------------- -# Update config/manifests/vllm/deployment.yaml +# Update config/manifests/vllm/gpu-deployment.yaml # ----------------------------------------------------------------------------- -VLLM_DEPLOY="config/manifests/vllm/deployment.yaml" +VLLM_DEPLOY="config/manifests/vllm/gpu-deployment.yaml" echo "Updating ${VLLM_DEPLOY} ..." # Update the vLLM image version diff --git a/site-src/guides/index.md b/site-src/guides/index.md index 98ae94a3..b7b31000 100644 --- a/site-src/guides/index.md +++ b/site-src/guides/index.md @@ -24,7 +24,7 @@ This quickstart guide is intended for engineers familiar with k8s and model serv #### GPU-Based Model Server - For this setup, you will need 3 GPUs to run the sample model server. Adjust the number of replicas in `./config/manifests/vllm/deployment.yaml` as needed. + For this setup, you will need 3 GPUs to run the sample model server. Adjust the number of replicas in `./config/manifests/vllm/gpu-deployment.yaml` as needed. Create a Hugging Face secret to download the model [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf). Ensure that the token grants access to this model. Deploy a sample vLLM deployment with the proper protocol to work with the LLM Instance Gateway. ```bash diff --git a/test/e2e/e2e_suite_test.go b/test/e2e/e2e_suite_test.go index 3d068c9f..24a488db 100644 --- a/test/e2e/e2e_suite_test.go +++ b/test/e2e/e2e_suite_test.go @@ -69,7 +69,7 @@ const ( // clientManifest is the manifest for the client test resources. clientManifest = "../testdata/client.yaml" // modelServerManifest is the manifest for the model server test resources. - modelServerManifest = "../../config/manifests/vllm/deployment.yaml" + modelServerManifest = "../../config/manifests/vllm/gpu-deployment.yaml" // modelServerSecretManifest is the manifest for the model server secret resource. modelServerSecretManifest = "../testdata/model-secret.yaml" // inferPoolManifest is the manifest for the inference pool CRD.