From 391c444e8924abffb6903e3e0e674996f9b356f2 Mon Sep 17 00:00:00 2001 From: ahg-g Date: Wed, 2 Apr 2025 14:30:13 +0000 Subject: [PATCH 1/2] adjust the gpu deployment to increase max batch size --- config/manifests/vllm/gpu-deployment.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/config/manifests/vllm/gpu-deployment.yaml b/config/manifests/vllm/gpu-deployment.yaml index e7cb193e..b0fc3fdd 100644 --- a/config/manifests/vllm/gpu-deployment.yaml +++ b/config/manifests/vllm/gpu-deployment.yaml @@ -24,9 +24,17 @@ spec: - "1" - "--port" - "8000" + - "--max-num-seq" + - "1024" + - "--max-model-len" + - "2048" + - "--compilation-config" + - "3" - "--enable-lora" - "--max-loras" - "2" + - "--max-lora-rank" + - "8" - "--max-cpu-loras" - "12" env: From 7f77fe38930088603c6c8b7dfb675420b2de316e Mon Sep 17 00:00:00 2001 From: Abdullah Gharaibeh <40361897+ahg-g@users.noreply.github.com> Date: Tue, 8 Apr 2025 08:02:35 -0700 Subject: [PATCH 2/2] Apply suggestions from code review --- config/manifests/vllm/gpu-deployment.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/config/manifests/vllm/gpu-deployment.yaml b/config/manifests/vllm/gpu-deployment.yaml index b0fc3fdd..53b4a7bc 100644 --- a/config/manifests/vllm/gpu-deployment.yaml +++ b/config/manifests/vllm/gpu-deployment.yaml @@ -26,8 +26,6 @@ spec: - "8000" - "--max-num-seq" - "1024" - - "--max-model-len" - - "2048" - "--compilation-config" - "3" - "--enable-lora"