|
92 | 92 | required=False,
|
93 | 93 | help="Pass in a dataset file for parsing instead of stdin.",
|
94 | 94 | )
|
| 95 | +@optgroup.option( |
| 96 | + "--eos_id", |
| 97 | + type=int, |
| 98 | + default=-1, |
| 99 | + required=False, |
| 100 | + help= |
| 101 | + "Set the end-of-sequence token for the benchmark. Set to -1 to disable EOS.", |
| 102 | +) |
95 | 103 | @optgroup.option(
|
96 | 104 | "--num_requests",
|
97 | 105 | type=int,
|
|
106 | 114 | default=2,
|
107 | 115 | help="Number of requests warm up benchmark.",
|
108 | 116 | )
|
| 117 | +@optgroup.option( |
| 118 | + "--target_input_len", |
| 119 | + default=None, |
| 120 | + type=click.IntRange(min=1), |
| 121 | + help="Target (average) input length for tuning heuristics.", |
| 122 | +) |
| 123 | +@optgroup.option( |
| 124 | + "--target_output_len", |
| 125 | + default=None, |
| 126 | + type=click.IntRange(min=1), |
| 127 | + help="Target (average) sequence length for tuning heuristics.", |
| 128 | +) |
| 129 | +@optgroup.group( |
| 130 | + "World Configuration", |
| 131 | + help="Options for configuring the backend multi-GPU world.", |
| 132 | +) |
109 | 133 | @optgroup.option(
|
110 | 134 | "--tp",
|
111 | 135 | type=int,
|
|
124 | 148 | default=None,
|
125 | 149 | help="expert parallelism size",
|
126 | 150 | )
|
127 |
| -@optgroup.option( |
128 |
| - "--target_input_len", |
129 |
| - default=None, |
130 |
| - type=click.IntRange(min=1), |
131 |
| - help="Target (average) input length for tuning heuristics.", |
132 |
| -) |
133 |
| -@optgroup.option( |
134 |
| - "--target_output_len", |
135 |
| - default=None, |
136 |
| - type=click.IntRange(min=1), |
137 |
| - help="Target (average) sequence length for tuning heuristics.", |
138 |
| -) |
139 | 151 | @optgroup.group("Request Load Control Options",
|
140 | 152 | cls=MutuallyExclusiveOptionGroup,
|
141 | 153 | help="Limits how requests are loaded.")
|
@@ -198,6 +210,7 @@ def throughput_command(
|
198 | 210 | # Parameters from CLI
|
199 | 211 | # Model, experiment, and engine params
|
200 | 212 | dataset_path: Path = params.pop("dataset")
|
| 213 | + eos_id: int = params.pop("eos_id") |
201 | 214 | warmup: int = params.get("warmup")
|
202 | 215 | num_requests: int = params.pop("num_requests")
|
203 | 216 | max_seq_len: int = params.pop("max_seq_len")
|
@@ -299,8 +312,8 @@ def throughput_command(
|
299 | 312 | else:
|
300 | 313 | llm = LLM(**kwargs)
|
301 | 314 |
|
302 |
| - sampling_params = SamplingParams(end_id=-1, |
303 |
| - pad_id=-1, |
| 315 | + sampling_params = SamplingParams(end_id=eos_id, |
| 316 | + pad_id=eos_id, |
304 | 317 | beam_width=beam_width)
|
305 | 318 |
|
306 | 319 | # Perform warmup if requested.
|
|
0 commit comments