Skip to content

Commit a93fa1c

Browse files
ywang96tjtanaa
authored andcommitted
[Misc] Use VisionArena Dataset for VLM Benchmarking (vllm-project#12389)
Signed-off-by: Roger Wang <[email protected]>
1 parent 404466b commit a93fa1c

File tree

1 file changed

+12
-20
lines changed

1 file changed

+12
-20
lines changed

benchmarks/benchmark_serving.py

Lines changed: 12 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ def sample_sonnet_requests(
200200
return sampled_requests
201201

202202

203-
def sample_mmmu_pro_vision_requests(
203+
def sample_vision_arena_requests(
204204
dataset,
205205
num_requests: int,
206206
tokenizer: PreTrainedTokenizerBase,
@@ -212,13 +212,7 @@ def sample_mmmu_pro_vision_requests(
212212
if len(sampled_requests) == num_requests:
213213
break
214214

215-
# MMMU-Pro vision direct prompt
216-
# Ref: https://github.com/MMMU-Benchmark/MMMU/blob/6ce42f4d8f70c1841c67867152648974415b5cac/mmmu-pro/prompts.yaml#L5
217-
prompt = (
218-
"Answer with the option letter from the given choices directly. "
219-
"The last line of your response should be of the following "
220-
"format: 'Answer: $LETTER' (without quotes) where LETTER is one of "
221-
"options.")
215+
prompt = data["turns"][0][0]['content']
222216

223217
prompt_token_ids = tokenizer(prompt).input_ids
224218
if fixed_output_len is None:
@@ -230,10 +224,10 @@ def sample_mmmu_pro_vision_requests(
230224
output_len = fixed_output_len
231225

232226
assert isinstance(
233-
data["image"],
227+
data["images"][0],
234228
Image), ("Input image format must be `PIL.Image.Image`, "
235229
f"given {type(data['image'])}.")
236-
image: Image = data["image"]
230+
image: Image = data["images"][0]
237231
image = image.convert("RGB")
238232
image_data = io.BytesIO()
239233
image.save(image_data, format='JPEG')
@@ -252,27 +246,25 @@ def sample_mmmu_pro_vision_requests(
252246

253247
def sample_hf_requests(
254248
dataset_path: str,
255-
dataset_subset: str,
249+
dataset_subset: Optional[str],
256250
dataset_split: str,
257251
num_requests: int,
258252
tokenizer: PreTrainedTokenizerBase,
259253
random_seed: int,
260254
fixed_output_len: Optional[int] = None,
261255
) -> List[Tuple[str, str, int, Optional[Dict[str, Collection[str]]]]]:
262256

263-
# Special case for MMMU-Pro vision dataset
264-
if dataset_path == 'MMMU/MMMU_Pro' and dataset_subset == 'vision':
265-
assert dataset_split == "test"
257+
# Special case for vision_arena dataset
258+
if dataset_path == 'lmarena-ai/vision-arena-bench-v0.1' \
259+
and dataset_subset is None:
260+
assert dataset_split == "train"
266261
dataset = load_dataset(dataset_path,
267262
name=dataset_subset,
268263
split=dataset_split,
269264
streaming=True)
270-
assert "image" in dataset.features, (
271-
"MMMU/MMMU_Pro vision dataset must have 'image' column.")
272-
filter_func = lambda x: isinstance(x["image"], Image)
273-
dataset = dataset.shuffle(seed=random_seed).filter(filter_func)
274-
return sample_mmmu_pro_vision_requests(dataset, num_requests,
275-
tokenizer, fixed_output_len)
265+
dataset = dataset.shuffle(seed=random_seed)
266+
return sample_vision_arena_requests(dataset, num_requests, tokenizer,
267+
fixed_output_len)
276268

277269
dataset = load_dataset(dataset_path,
278270
name=dataset_subset,

0 commit comments

Comments
 (0)