From 948dbdce0d257ee5c8ec864e7e1ed224caaa7c3b Mon Sep 17 00:00:00 2001
From: Cong Liu <conliu@google.com>
Date: Tue, 22 Oct 2024 09:53:28 -0700
Subject: [PATCH] Update vllm deployment example to use 1 GPU as tensor
 parallelism is 1

---
 examples/poc/manifests/vllm/vllm-lora-deployment.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/poc/manifests/vllm/vllm-lora-deployment.yaml b/examples/poc/manifests/vllm/vllm-lora-deployment.yaml
index dcd729b0..fc9c1d6b 100644
--- a/examples/poc/manifests/vllm/vllm-lora-deployment.yaml
+++ b/examples/poc/manifests/vllm/vllm-lora-deployment.yaml
@@ -78,9 +78,9 @@ spec:
             timeoutSeconds: 1
           resources:
             limits:
-              nvidia.com/gpu: 2
+              nvidia.com/gpu: 1
             requests:
-              nvidia.com/gpu: 2
+              nvidia.com/gpu: 1
           volumeMounts:
             - mountPath: /data
               name: data