Skip to content

Commit 1e317c9

Browse files
authored
[feat]: Allow for a settable end-of-sequence/padding token in max throughput benchmark. (#3776)
* Move world options to a different group for clarity. Signed-off-by: Frank Di Natale <[email protected]> * Add eos_id option. Signed-off-by: Frank Di Natale <[email protected]> --------- Signed-off-by: Frank Di Natale <[email protected]>
1 parent 9cc5922 commit 1e317c9

File tree

1 file changed

+27
-14
lines changed

1 file changed

+27
-14
lines changed

tensorrt_llm/bench/benchmark/throughput.py

+27-14
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,14 @@
9494
required=False,
9595
help="Pass in a dataset file for parsing instead of stdin.",
9696
)
97+
@optgroup.option(
98+
"--eos_id",
99+
type=int,
100+
default=-1,
101+
required=False,
102+
help=
103+
"Set the end-of-sequence token for the benchmark. Set to -1 to disable EOS.",
104+
)
97105
@optgroup.option(
98106
"--modality",
99107
type=click.Choice(["image", "video"]),
@@ -122,6 +130,22 @@
122130
default=2,
123131
help="Number of requests warm up benchmark.",
124132
)
133+
@optgroup.option(
134+
"--target_input_len",
135+
default=None,
136+
type=click.IntRange(min=1),
137+
help="Target (average) input length for tuning heuristics.",
138+
)
139+
@optgroup.option(
140+
"--target_output_len",
141+
default=None,
142+
type=click.IntRange(min=1),
143+
help="Target (average) sequence length for tuning heuristics.",
144+
)
145+
@optgroup.group(
146+
"World Configuration",
147+
help="Options for configuring the backend multi-GPU world.",
148+
)
125149
@optgroup.option(
126150
"--tp",
127151
type=int,
@@ -146,18 +170,6 @@
146170
default=None,
147171
help="expert cluster parallelism size",
148172
)
149-
@optgroup.option(
150-
"--target_input_len",
151-
default=None,
152-
type=click.IntRange(min=1),
153-
help="Target (average) input length for tuning heuristics.",
154-
)
155-
@optgroup.option(
156-
"--target_output_len",
157-
default=None,
158-
type=click.IntRange(min=1),
159-
help="Target (average) sequence length for tuning heuristics.",
160-
)
161173
@optgroup.group("Request Load Control Options",
162174
cls=MutuallyExclusiveOptionGroup,
163175
help="Limits how requests are loaded.")
@@ -218,6 +230,7 @@ def throughput_command(
218230
# Parameters from CLI
219231
# Model, experiment, and engine params
220232
dataset_path: Path = params.pop("dataset")
233+
eos_id: int = params.pop("eos_id")
221234
warmup: int = params.get("warmup")
222235
num_requests: int = params.pop("num_requests")
223236
max_seq_len: int = params.pop("max_seq_len")
@@ -329,8 +342,8 @@ def throughput_command(
329342
else:
330343
llm = LLM(**kwargs)
331344

332-
sampling_params = SamplingParams(end_id=-1,
333-
pad_id=-1,
345+
sampling_params = SamplingParams(end_id=eos_id,
346+
pad_id=eos_id,
334347
beam_width=beam_width)
335348

336349
# Perform warmup if requested.

0 commit comments

Comments
 (0)