|
94 | 94 | required=False,
|
95 | 95 | help="Pass in a dataset file for parsing instead of stdin.",
|
96 | 96 | )
|
| 97 | +@optgroup.option( |
| 98 | + "--eos_id", |
| 99 | + type=int, |
| 100 | + default=-1, |
| 101 | + required=False, |
| 102 | + help= |
| 103 | + "Set the end-of-sequence token for the benchmark. Set to -1 to disable EOS.", |
| 104 | +) |
97 | 105 | @optgroup.option(
|
98 | 106 | "--modality",
|
99 | 107 | type=click.Choice(["image", "video"]),
|
|
122 | 130 | default=2,
|
123 | 131 | help="Number of requests warm up benchmark.",
|
124 | 132 | )
|
| 133 | +@optgroup.option( |
| 134 | + "--target_input_len", |
| 135 | + default=None, |
| 136 | + type=click.IntRange(min=1), |
| 137 | + help="Target (average) input length for tuning heuristics.", |
| 138 | +) |
| 139 | +@optgroup.option( |
| 140 | + "--target_output_len", |
| 141 | + default=None, |
| 142 | + type=click.IntRange(min=1), |
| 143 | + help="Target (average) sequence length for tuning heuristics.", |
| 144 | +) |
| 145 | +@optgroup.group( |
| 146 | + "World Configuration", |
| 147 | + help="Options for configuring the backend multi-GPU world.", |
| 148 | +) |
125 | 149 | @optgroup.option(
|
126 | 150 | "--tp",
|
127 | 151 | type=int,
|
|
146 | 170 | default=None,
|
147 | 171 | help="expert cluster parallelism size",
|
148 | 172 | )
|
149 |
| -@optgroup.option( |
150 |
| - "--target_input_len", |
151 |
| - default=None, |
152 |
| - type=click.IntRange(min=1), |
153 |
| - help="Target (average) input length for tuning heuristics.", |
154 |
| -) |
155 |
| -@optgroup.option( |
156 |
| - "--target_output_len", |
157 |
| - default=None, |
158 |
| - type=click.IntRange(min=1), |
159 |
| - help="Target (average) sequence length for tuning heuristics.", |
160 |
| -) |
161 | 173 | @optgroup.group("Request Load Control Options",
|
162 | 174 | cls=MutuallyExclusiveOptionGroup,
|
163 | 175 | help="Limits how requests are loaded.")
|
@@ -218,6 +230,7 @@ def throughput_command(
|
218 | 230 | # Parameters from CLI
|
219 | 231 | # Model, experiment, and engine params
|
220 | 232 | dataset_path: Path = params.pop("dataset")
|
| 233 | + eos_id: int = params.pop("eos_id") |
221 | 234 | warmup: int = params.get("warmup")
|
222 | 235 | num_requests: int = params.pop("num_requests")
|
223 | 236 | max_seq_len: int = params.pop("max_seq_len")
|
@@ -329,8 +342,8 @@ def throughput_command(
|
329 | 342 | else:
|
330 | 343 | llm = LLM(**kwargs)
|
331 | 344 |
|
332 |
| - sampling_params = SamplingParams(end_id=-1, |
333 |
| - pad_id=-1, |
| 345 | + sampling_params = SamplingParams(end_id=eos_id, |
| 346 | + pad_id=eos_id, |
334 | 347 | beam_width=beam_width)
|
335 | 348 |
|
336 | 349 | # Perform warmup if requested.
|
|
0 commit comments