Skip to content

Commit 0e74d79

Browse files
authored
[V1] Increase default batch size for H100/H200 (#12369)
Signed-off-by: Woosuk Kwon <[email protected]>
1 parent 55ef66e commit 0e74d79

File tree

1 file changed

+16
-5
lines changed

1 file changed

+16
-5
lines changed

vllm/engine/arg_utils.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1279,11 +1279,22 @@ def _override_v1_engine_args(self, usage_context: UsageContext) -> None:
12791279
self.enable_chunked_prefill = True
12801280
# When no user override, set the default values based on the usage
12811281
# context.
1282-
# TODO(woosuk): Tune the default values for different hardware.
1283-
default_max_num_batched_tokens = {
1284-
UsageContext.LLM_CLASS: 8192,
1285-
UsageContext.OPENAI_API_SERVER: 2048,
1286-
}
1282+
# Use different default values for different hardware.
1283+
from vllm.platforms import current_platform
1284+
device_name = current_platform.get_device_name().lower()
1285+
if "h100" in device_name or "h200" in device_name:
1286+
# For H100 and H200, we use larger default values.
1287+
default_max_num_batched_tokens = {
1288+
UsageContext.LLM_CLASS: 16384,
1289+
UsageContext.OPENAI_API_SERVER: 8192,
1290+
}
1291+
else:
1292+
# TODO(woosuk): Tune the default values for other hardware.
1293+
default_max_num_batched_tokens = {
1294+
UsageContext.LLM_CLASS: 8192,
1295+
UsageContext.OPENAI_API_SERVER: 2048,
1296+
}
1297+
12871298
if (self.max_num_batched_tokens is None
12881299
and usage_context in default_max_num_batched_tokens):
12891300
self.max_num_batched_tokens = default_max_num_batched_tokens[

0 commit comments

Comments
 (0)