From 948dbdce0d257ee5c8ec864e7e1ed224caaa7c3b Mon Sep 17 00:00:00 2001 From: Cong Liu Date: Tue, 22 Oct 2024 09:53:28 -0700 Subject: [PATCH] Update vllm deployment example to use 1 GPU as tensor parallelism is 1 --- examples/poc/manifests/vllm/vllm-lora-deployment.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/poc/manifests/vllm/vllm-lora-deployment.yaml b/examples/poc/manifests/vllm/vllm-lora-deployment.yaml index dcd729b0..fc9c1d6b 100644 --- a/examples/poc/manifests/vllm/vllm-lora-deployment.yaml +++ b/examples/poc/manifests/vllm/vllm-lora-deployment.yaml @@ -78,9 +78,9 @@ spec: timeoutSeconds: 1 resources: limits: - nvidia.com/gpu: 2 + nvidia.com/gpu: 1 requests: - nvidia.com/gpu: 2 + nvidia.com/gpu: 1 volumeMounts: - mountPath: /data name: data