Skip to content

Commit 0572080

Browse files
DarkLight1337tjtanaa
authored andcommitted
[Bugfix] Fix mm_limits access for merged multi-modal processor (vllm-project#12252)
Signed-off-by: DarkLight1337 <[email protected]>
1 parent e9ddeda commit 0572080

File tree

2 files changed

+16
-7
lines changed

2 files changed

+16
-7
lines changed

vllm/multimodal/profiling.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ def processing_info(self) -> BaseProcessingInfo:
106106
def dummy_inputs(self) -> BaseDummyInputsBuilder[_I]:
107107
return self.processor.dummy_inputs
108108

109-
def _get_mm_limits(self) -> Mapping[str, int]:
109+
def get_mm_limits(self) -> Mapping[str, int]:
110110
mm_config = self.processing_info.ctx.get_mm_config()
111111
mm_limit_per_prompt = mm_config.limit_per_prompt
112112

@@ -146,7 +146,7 @@ def get_dummy_data(self, seq_len: int) -> DummyData:
146146
# Avoid circular import
147147
from vllm.sequence import SequenceData
148148

149-
mm_counts = self._get_mm_limits()
149+
mm_counts = self.get_mm_limits()
150150

151151
info = self.processing_info
152152
mm_max_tokens_per_item = info.get_mm_max_tokens_per_item(seq_len)

vllm/multimodal/registry.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from .inputs import MultiModalDataDict, MultiModalKwargs, NestedTensors
1818
from .processing import (BaseMultiModalProcessor, BaseProcessingInfo,
1919
ProcessingCache)
20-
from .profiling import BaseDummyInputsBuilder
20+
from .profiling import BaseDummyInputsBuilder, MultiModalProfiler
2121
from .utils import cached_get_tokenizer
2222
from .video import VideoPlugin
2323

@@ -282,13 +282,13 @@ def get_max_tokens_per_item_by_nonzero_modality(
282282
This is currently directly used only in V1 for profiling the memory
283283
usage of a model.
284284
"""
285-
limits_per_plugin = self._limits_by_model[model_config]
285+
mm_limits = self.get_mm_limits_per_prompt(model_config)
286286

287287
return {
288288
key: max_tokens_per_mm_item
289289
for key, max_tokens_per_mm_item in
290290
self.get_max_tokens_per_item_by_modality(model_config).items()
291-
if limits_per_plugin[key] > 0
291+
if mm_limits[key] > 0
292292
}
293293

294294
def get_max_tokens_by_modality(
@@ -304,10 +304,10 @@ def get_max_tokens_by_modality(
304304
Note:
305305
This should be called after :meth:`init_mm_limits_per_prompt`.
306306
"""
307-
limits_per_plugin = self._limits_by_model[model_config]
307+
mm_limits = self.get_mm_limits_per_prompt(model_config)
308308

309309
return {
310-
key: limits_per_plugin[key] * max_tokens_per_mm_item
310+
key: mm_limits[key] * max_tokens_per_mm_item
311311
for key, max_tokens_per_mm_item in
312312
self.get_max_tokens_per_item_by_modality(model_config).items()
313313
}
@@ -371,6 +371,15 @@ def get_mm_limits_per_prompt(
371371
Note:
372372
This should be called after :meth:`init_mm_limits_per_prompt`.
373373
"""
374+
if self.has_processor(model_config):
375+
tokenizer = cached_get_tokenizer(
376+
model_config.tokenizer,
377+
trust_remote_code=model_config.trust_remote_code,
378+
)
379+
processor = self.create_processor(model_config, tokenizer)
380+
profiler = MultiModalProfiler(processor)
381+
return profiler.get_mm_limits()
382+
374383
return self._limits_by_model[model_config]
375384

376385
def register_processor(

0 commit comments

Comments
 (0)