From d1669b8e5c6ece3b11fcde0325b4799c0ac5f9e6 Mon Sep 17 00:00:00 2001 From: Clayton Coleman Date: Tue, 25 Mar 2025 13:33:12 -0400 Subject: [PATCH] Configure gpu-deployment.yaml to force vLLM v1 with LoRA Until 0.8.3 is released, using the LoRA flag disables automatic v1 opt-in. --- config/manifests/vllm/gpu-deployment.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/config/manifests/vllm/gpu-deployment.yaml b/config/manifests/vllm/gpu-deployment.yaml index ecff81ec..e9507601 100644 --- a/config/manifests/vllm/gpu-deployment.yaml +++ b/config/manifests/vllm/gpu-deployment.yaml @@ -33,6 +33,10 @@ spec: - '{"name": "tweet-summary-0", "path": "vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm", "base_model_name": "llama-2"}' - '{"name": "tweet-summary-1", "path": "vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm", "base_model_name": "llama-2"}' env: + # Enabling LoRA support temporarily disables automatic v1, we want to force it on + # until 0.8.3 vLLM is released. + - name: VLLM_USE_V1 + value: "1" - name: PORT value: "8000" - name: HUGGING_FACE_HUB_TOKEN