From d1669b8e5c6ece3b11fcde0325b4799c0ac5f9e6 Mon Sep 17 00:00:00 2001
From: Clayton Coleman <smarterclayton@gmail.com>
Date: Tue, 25 Mar 2025 13:33:12 -0400
Subject: [PATCH] Configure gpu-deployment.yaml to force vLLM v1 with LoRA

Until 0.8.3 is released, using the LoRA flag disables automatic
v1 opt-in.
---
 config/manifests/vllm/gpu-deployment.yaml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/config/manifests/vllm/gpu-deployment.yaml b/config/manifests/vllm/gpu-deployment.yaml
index ecff81ec..e9507601 100644
--- a/config/manifests/vllm/gpu-deployment.yaml
+++ b/config/manifests/vllm/gpu-deployment.yaml
@@ -33,6 +33,10 @@ spec:
           - '{"name": "tweet-summary-0", "path": "vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm", "base_model_name": "llama-2"}'
           - '{"name": "tweet-summary-1", "path": "vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm", "base_model_name": "llama-2"}'
           env:
+            # Enabling LoRA support temporarily disables automatic v1, we want to force it on
+            # until 0.8.3 vLLM is released.
+            - name: VLLM_USE_V1
+              value: "1"
             - name: PORT
               value: "8000"
             - name: HUGGING_FACE_HUB_TOKEN