Skip to content

Commit bb354e6

Browse files
[Bugfix] Fix various bugs in multi-modal processor (#12031)
Signed-off-by: DarkLight1337 <[email protected]>
1 parent ff39141 commit bb354e6

File tree

3 files changed

+62
-51
lines changed

3 files changed

+62
-51
lines changed

tests/multimodal/test_processing.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,8 @@ def test_find_replace_tokens(
421421
"pattern_1": [32000, 32000],
422422
"pattern_2": [],
423423
"pattern_3": [1550, 918, 1550],
424+
# Test different modalities having the same tokens (32000)
425+
"pattern_4": [32000],
424426
},
425427
],
426428
)
@@ -438,6 +440,14 @@ def test_find_replace_tokens(
438440
replacement=[32000, 32000],
439441
),
440442
],
443+
"pattern_4": [
444+
PlaceholderInfo(
445+
modality="pattern_4",
446+
item_idx=0,
447+
start_idx=3,
448+
replacement=[32000],
449+
),
450+
],
441451
}
442452
443453
),
@@ -466,6 +476,7 @@ def test_find_replace_tokens(
466476
replacement=[1550, 918, 1550],
467477
),
468478
],
479+
# No match for pattern_4 as it has lower priority than pattern_1
469480
}
470481
),
471482
(
@@ -485,6 +496,14 @@ def test_find_replace_tokens(
485496
replacement=[32000, 32000],
486497
),
487498
],
499+
"pattern_4": [
500+
PlaceholderInfo(
501+
modality="pattern_4",
502+
item_idx=0,
503+
start_idx=5,
504+
replacement=[32000],
505+
),
506+
],
488507
"pattern_3": [
489508
PlaceholderInfo(
490509
modality="pattern_3",

vllm/multimodal/processing.py

Lines changed: 39 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -404,73 +404,62 @@ def replace_text_matches(
404404
return "".join(texts)
405405

406406

407-
def _iter_modality_placeholders(
407+
def _iter_placeholders(
408+
mm_prompt_repls: Mapping[str, Sequence[BoundPromptReplacement]],
408409
prompt: list[int],
409-
modality: str,
410-
modality_repls: Sequence[BoundPromptReplacement],
411-
modal_item_count: int,
410+
mm_item_counts: Mapping[str, int],
412411
) -> Iterable[PlaceholderInfo]:
413-
if modal_item_count == 0:
414-
return
412+
"""
413+
Yield each set of placeholder tokens found in :code:`prompt`.
414+
415+
Matches are exclusive even when multiple modalities share
416+
the same placeholder tokens. In that case, the modality that
417+
appears earlier in `mm_prompt_repls` takes priority.
415418
419+
Note that empty matches are ignored.
420+
"""
416421
prompt_len = len(prompt)
417-
item_idx = 0
422+
item_idx_by_modality = defaultdict[str, int](lambda: 0)
418423

419424
start_idx = 0
420425
while start_idx < prompt_len:
421426
found = False
422427

423-
for repl_info in modality_repls:
424-
replacement = repl_info.get_replacement(item_idx)
425-
repl_tokens = replacement.token_ids
426-
repl_len = len(repl_tokens)
427-
end_idx = start_idx + repl_len
428-
429-
if repl_len == 0 or end_idx > prompt_len:
428+
for modality, modality_repls in mm_prompt_repls.items():
429+
item_idx = item_idx_by_modality[modality]
430+
if item_idx >= mm_item_counts.get(modality, 0):
430431
continue
431432

432-
if prompt[start_idx:end_idx] == repl_tokens:
433-
yield PlaceholderInfo(
434-
modality=modality,
435-
item_idx=item_idx,
436-
start_idx=start_idx,
437-
replacement=repl_tokens,
438-
)
433+
for repl_info in modality_repls:
434+
replacement = repl_info.get_replacement(item_idx)
435+
repl_tokens = replacement.token_ids
436+
repl_len = len(repl_tokens)
437+
end_idx = start_idx + repl_len
438+
439+
if repl_len == 0 or end_idx > prompt_len:
440+
continue
441+
442+
if prompt[start_idx:end_idx] == repl_tokens:
443+
yield PlaceholderInfo(
444+
modality=modality,
445+
item_idx=item_idx,
446+
start_idx=start_idx,
447+
replacement=repl_tokens,
448+
)
439449

440-
item_idx += 1
441-
if item_idx >= modal_item_count:
442-
return
450+
# Exclude overlapping matches
451+
start_idx = end_idx
452+
item_idx_by_modality[modality] += 1
453+
found = True
454+
break
443455

444-
# Exclude overlapping matches
445-
start_idx = end_idx
446-
found = True
447-
break
456+
if found:
457+
break # Go back to the outer while loop
448458

449459
if not found:
450460
start_idx += 1
451461

452462

453-
def _iter_placeholders(
454-
mm_prompt_repls: Mapping[str, Sequence[BoundPromptReplacement]],
455-
prompt: list[int],
456-
mm_item_counts: Mapping[str, int],
457-
) -> Iterable[PlaceholderInfo]:
458-
"""
459-
For each modality, yield each set of placeholder tokens found in
460-
:code:`prompt`.
461-
462-
Note that empty matches are ignored.
463-
"""
464-
for modality, modal_item_count in mm_item_counts.items():
465-
if modality in mm_prompt_repls:
466-
yield from _iter_modality_placeholders(
467-
prompt,
468-
modality,
469-
mm_prompt_repls[modality],
470-
modal_item_count,
471-
)
472-
473-
474463
def find_mm_placeholders(
475464
mm_prompt_repls: Mapping[str, Sequence[BoundPromptReplacement]],
476465
prompt: list[int],
@@ -1156,7 +1145,7 @@ def apply(
11561145

11571146
# If HF processor already inserts placeholder tokens,
11581147
# there is no need for us to insert them
1159-
if all(len(repls) == 0 for repls in mm_missing_repls.items()):
1148+
if all(len(repls) == 0 for repls in mm_missing_repls.values()):
11601149
tokenizer = self.info.get_tokenizer()
11611150
prompt = decode_tokens(tokenizer, prompt_ids)
11621151
mm_placeholders = hf_mm_placeholders

vllm/multimodal/registry.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,10 @@ def get_max_tokens_per_item_by_modality(
259259
This is currently directly used only in V1.
260260
"""
261261
if self.has_processor(model_config):
262-
tokenizer = cached_get_tokenizer(model_config.tokenizer)
262+
tokenizer = cached_get_tokenizer(
263+
model_config.tokenizer,
264+
trust_remote_code=model_config.trust_remote_code,
265+
)
263266
processor = self.create_processor(model_config, tokenizer)
264267
seq_len = model_config.max_model_len
265268
return processor.info.get_mm_max_tokens_per_item(seq_len)

0 commit comments

Comments
 (0)