Skip to content

Commit e61cac9

Browse files
FrankD412nvpohanh
authored andcommitted
[feat]: Allow for a settable end-of-sequence/padding token in max throughput benchmark. (NVIDIA#3776)
* Move world options to a different group for clarity. Signed-off-by: Frank Di Natale <[email protected]> * Add eos_id option. Signed-off-by: Frank Di Natale <[email protected]> --------- Signed-off-by: Frank Di Natale <[email protected]>
1 parent 81a5592 commit e61cac9

File tree

1 file changed

+27
-14
lines changed

1 file changed

+27
-14
lines changed

tensorrt_llm/bench/benchmark/throughput.py

Lines changed: 27 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,14 @@
9292
required=False,
9393
help="Pass in a dataset file for parsing instead of stdin.",
9494
)
95+
@optgroup.option(
96+
"--eos_id",
97+
type=int,
98+
default=-1,
99+
required=False,
100+
help=
101+
"Set the end-of-sequence token for the benchmark. Set to -1 to disable EOS.",
102+
)
95103
@optgroup.option(
96104
"--num_requests",
97105
type=int,
@@ -106,6 +114,22 @@
106114
default=2,
107115
help="Number of requests warm up benchmark.",
108116
)
117+
@optgroup.option(
118+
"--target_input_len",
119+
default=None,
120+
type=click.IntRange(min=1),
121+
help="Target (average) input length for tuning heuristics.",
122+
)
123+
@optgroup.option(
124+
"--target_output_len",
125+
default=None,
126+
type=click.IntRange(min=1),
127+
help="Target (average) sequence length for tuning heuristics.",
128+
)
129+
@optgroup.group(
130+
"World Configuration",
131+
help="Options for configuring the backend multi-GPU world.",
132+
)
109133
@optgroup.option(
110134
"--tp",
111135
type=int,
@@ -124,18 +148,6 @@
124148
default=None,
125149
help="expert parallelism size",
126150
)
127-
@optgroup.option(
128-
"--target_input_len",
129-
default=None,
130-
type=click.IntRange(min=1),
131-
help="Target (average) input length for tuning heuristics.",
132-
)
133-
@optgroup.option(
134-
"--target_output_len",
135-
default=None,
136-
type=click.IntRange(min=1),
137-
help="Target (average) sequence length for tuning heuristics.",
138-
)
139151
@optgroup.group("Request Load Control Options",
140152
cls=MutuallyExclusiveOptionGroup,
141153
help="Limits how requests are loaded.")
@@ -198,6 +210,7 @@ def throughput_command(
198210
# Parameters from CLI
199211
# Model, experiment, and engine params
200212
dataset_path: Path = params.pop("dataset")
213+
eos_id: int = params.pop("eos_id")
201214
warmup: int = params.get("warmup")
202215
num_requests: int = params.pop("num_requests")
203216
max_seq_len: int = params.pop("max_seq_len")
@@ -299,8 +312,8 @@ def throughput_command(
299312
else:
300313
llm = LLM(**kwargs)
301314

302-
sampling_params = SamplingParams(end_id=-1,
303-
pad_id=-1,
315+
sampling_params = SamplingParams(end_id=eos_id,
316+
pad_id=eos_id,
304317
beam_width=beam_width)
305318

306319
# Perform warmup if requested.

0 commit comments

Comments
 (0)