Skip to content

Commit 46e678b

Browse files
authored
[Minor] Use larger batch sizes for A100/B100/B200/MI300x (#17073)
Signed-off-by: Woosuk Kwon <[email protected]>
1 parent 6b2427f commit 46e678b

File tree

1 file changed

+5
-5
lines changed

1 file changed

+5
-5
lines changed

vllm/engine/arg_utils.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
from vllm.test_utils import MODEL_WEIGHTS_S3_BUCKET, MODELS_ON_S3
3636
from vllm.transformers_utils.utils import check_gguf_file
3737
from vllm.usage.usage_lib import UsageContext
38-
from vllm.utils import FlexibleArgumentParser, is_in_ray_actor
38+
from vllm.utils import FlexibleArgumentParser, GiB_bytes, is_in_ray_actor
3939

4040
# yapf: enable
4141

@@ -1625,13 +1625,13 @@ def _set_default_args_v1(self, usage_context: UsageContext) -> None:
16251625
# values for non-H100/H200 GPUs.
16261626
try:
16271627
from vllm.platforms import current_platform
1628-
device_name = current_platform.get_device_name().lower()
1628+
device_memory = current_platform.get_device_total_memory()
16291629
except Exception:
16301630
# This is only used to set default_max_num_batched_tokens
1631-
device_name = "no-device"
1631+
device_memory = 0
16321632

1633-
if "h100" in device_name or "h200" in device_name:
1634-
# For H100 and H200, we use larger default values.
1633+
if device_memory >= 70 * GiB_bytes:
1634+
# For GPUs like H100 and MI300x, use larger default values.
16351635
default_max_num_batched_tokens = {
16361636
UsageContext.LLM_CLASS: 16384,
16371637
UsageContext.OPENAI_API_SERVER: 8192,

0 commit comments

Comments
 (0)