Skip to content

Commit 5f940fc

Browse files
committed
Reorganize profiling/processing-related code
Signed-off-by: DarkLight1337 <[email protected]>
1 parent c0efe92 commit 5f940fc

File tree

23 files changed

+1363
-1286
lines changed

23 files changed

+1363
-1286
lines changed

tests/models/decoder_only/vision_language/processing/test_llava_next.py

Lines changed: 14 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -4,24 +4,17 @@
44
import pytest
55
from PIL import Image
66
from pqdm.threads import pqdm
7-
from transformers import AutoTokenizer
87

9-
from vllm.inputs import InputProcessingContext
8+
from vllm.multimodal import MULTIMODAL_REGISTRY
109
from vllm.multimodal.parse import ImageSize
10+
from vllm.multimodal.processor import BaseMultiModalProcessor
11+
from vllm.multimodal.utils import cached_get_tokenizer
1112

1213
from ....utils import build_model_context
1314

1415

15-
# Fixtures lazy import to avoid initializing CUDA during test collection
16-
@pytest.fixture()
17-
def processor_for_llava_next():
18-
from vllm.model_executor.models.llava_next import (
19-
LlavaNextMultiModalProcessor)
20-
return LlavaNextMultiModalProcessor
21-
22-
2316
def _validate_image_prompt_replacements_one(
24-
processor,
17+
processor: BaseMultiModalProcessor,
2518
num_imgs: int,
2619
failed_size_excs: list[tuple[ImageSize, Exception]],
2720
image_size: ImageSize,
@@ -78,20 +71,17 @@ def _test_image_prompt_replacements(
7871

7972
@pytest.mark.parametrize("model_id", ["llava-hf/llava-v1.6-mistral-7b-hf"])
8073
@pytest.mark.parametrize("num_imgs", [1, 2])
81-
def test_processor_prompt_replacements_regression(
82-
processor_for_llava_next,
83-
model_id: str,
84-
num_imgs: int,
85-
):
74+
def test_processor_prompt_replacements_regression(model_id, num_imgs):
8675
ctx = build_model_context(
8776
model_name=model_id,
8877
tokenizer_name=model_id,
8978
mm_processor_kwargs=None,
9079
limit_mm_per_prompt={"image": num_imgs},
9180
)
92-
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
93-
ctx = InputProcessingContext(ctx.model_config, tokenizer)
94-
processor = processor_for_llava_next(ctx)
81+
processor = MULTIMODAL_REGISTRY.create_processor(
82+
ctx.model_config,
83+
tokenizer=cached_get_tokenizer(ctx.model_config.tokenizer),
84+
)
9585

9686
image_ratios = [(171, 152), (184, 161), (198, 176), (333, 296), (369, 328),
9787
(488, 183), (2560, 1669)]
@@ -111,20 +101,17 @@ def test_processor_prompt_replacements_regression(
111101
"Comment this out to run it manually.")
112102
@pytest.mark.parametrize("model_id", ["llava-hf/llava-v1.6-mistral-7b-hf"])
113103
@pytest.mark.parametrize("num_imgs", [1])
114-
def test_processor_prompt_replacements_all(
115-
processor_for_llava_next,
116-
model_id: str,
117-
num_imgs: int,
118-
):
104+
def test_processor_prompt_replacements_all(model_id, num_imgs):
119105
ctx = build_model_context(
120106
model_name=model_id,
121107
tokenizer_name=model_id,
122108
mm_processor_kwargs=None,
123109
limit_mm_per_prompt={"image": num_imgs},
124110
)
125-
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
126-
ctx = InputProcessingContext(ctx.model_config, tokenizer)
127-
processor = processor_for_llava_next(ctx)
111+
processor = MULTIMODAL_REGISTRY.create_processor(
112+
ctx.model_config,
113+
tokenizer=cached_get_tokenizer(ctx.model_config.tokenizer),
114+
)
128115

129116
seen_aspect_ratios = set[float]()
130117
image_sizes = list[ImageSize]()

tests/models/decoder_only/vision_language/processing/test_llava_onevision.py

Lines changed: 14 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -4,24 +4,17 @@
44
import pytest
55
from PIL import Image
66
from pqdm.threads import pqdm
7-
from transformers import AutoTokenizer
87

9-
from vllm.inputs import InputProcessingContext
8+
from vllm.multimodal import MULTIMODAL_REGISTRY
109
from vllm.multimodal.parse import ImageSize
10+
from vllm.multimodal.processor import BaseMultiModalProcessor
11+
from vllm.multimodal.utils import cached_get_tokenizer
1112

1213
from ....utils import build_model_context
1314

1415

15-
# Fixtures lazy import to avoid initializing CUDA during test collection
16-
@pytest.fixture()
17-
def processor_for_llava_onevision():
18-
from vllm.model_executor.models.llava_onevision import (
19-
LlavaOnevisionMultiModalProcessor)
20-
return LlavaOnevisionMultiModalProcessor
21-
22-
2316
def _validate_image_prompt_replacements_one(
24-
processor,
17+
processor: BaseMultiModalProcessor,
2518
num_imgs: int,
2619
failed_size_excs: list[tuple[ImageSize, Exception]],
2720
image_size: ImageSize,
@@ -77,20 +70,17 @@ def _test_image_prompt_replacements(
7770
@pytest.mark.parametrize("model_id",
7871
["llava-hf/llava-onevision-qwen2-0.5b-ov-hf"])
7972
@pytest.mark.parametrize("num_imgs", [1, 2])
80-
def test_processor_prompt_replacements_regression(
81-
processor_for_llava_onevision,
82-
model_id: str,
83-
num_imgs: int,
84-
):
73+
def test_processor_prompt_replacements_regression(model_id, num_imgs):
8574
ctx = build_model_context(
8675
model_name=model_id,
8776
tokenizer_name=model_id,
8877
mm_processor_kwargs=None,
8978
limit_mm_per_prompt={"image": num_imgs},
9079
)
91-
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
92-
ctx = InputProcessingContext(ctx.model_config, tokenizer)
93-
processor = processor_for_llava_onevision(ctx)
80+
processor = MULTIMODAL_REGISTRY.create_processor(
81+
ctx.model_config,
82+
tokenizer=cached_get_tokenizer(ctx.model_config.tokenizer),
83+
)
9484

9585
image_ratios = [(171, 152), (184, 161), (198, 176), (333, 296), (369, 328),
9686
(488, 183), (2560, 1669)]
@@ -111,20 +101,17 @@ def test_processor_prompt_replacements_regression(
111101
@pytest.mark.parametrize("model_id",
112102
["llava-hf/llava-onevision-qwen2-0.5b-ov-hf"])
113103
@pytest.mark.parametrize("num_imgs", [1])
114-
def test_processor_prompt_replacements_all(
115-
processor_for_llava_onevision,
116-
model_id: str,
117-
num_imgs: int,
118-
):
104+
def test_processor_prompt_replacements_all(model_id, num_imgs):
119105
ctx = build_model_context(
120106
model_name=model_id,
121107
tokenizer_name=model_id,
122108
mm_processor_kwargs=None,
123109
limit_mm_per_prompt={"image": num_imgs},
124110
)
125-
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
126-
ctx = InputProcessingContext(ctx.model_config, tokenizer)
127-
processor = processor_for_llava_onevision(ctx)
111+
processor = MULTIMODAL_REGISTRY.create_processor(
112+
ctx.model_config,
113+
tokenizer=cached_get_tokenizer(ctx.model_config.tokenizer),
114+
)
128115

129116
seen_aspect_ratios = set[float]()
130117
image_sizes = list[ImageSize]()

tests/multimodal/test_processing.py

Lines changed: 25 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,17 @@
1010
from vllm.config import ModelConfig
1111
from vllm.inputs import InputProcessingContext
1212
from vllm.multimodal import MULTIMODAL_REGISTRY
13-
from vllm.multimodal.processing import (ProcessingCache, PromptReplacement,
14-
_PlaceholderInfo, find_mm_placeholders,
13+
# yapf conflicts with isort for this block
14+
# yapf: disable
15+
from vllm.multimodal.processing import (PlaceholderInfo, ProcessingCache,
16+
PromptReplacement,
17+
find_mm_placeholders,
1518
find_text_matches, find_token_matches,
1619
iter_token_matches,
1720
replace_text_matches,
1821
replace_token_matches)
22+
# yapf: enable
23+
from vllm.multimodal.profiler import MultiModalProfiler
1924
from vllm.multimodal.utils import cached_get_tokenizer
2025
from vllm.transformers_utils.tokenizer import AnyTokenizer
2126
from vllm.utils import full_groupby
@@ -431,7 +436,7 @@ def test_find_replace_tokens(
431436
[1, 9833, 28747, 32000, 9833, 28747, 32000, 32000, 918],
432437
{
433438
"pattern_1": [
434-
_PlaceholderInfo(
439+
PlaceholderInfo(
435440
modality="pattern_1",
436441
item_idx=0,
437442
start_idx=6,
@@ -445,21 +450,21 @@ def test_find_replace_tokens(
445450
[1, 32000, 32000, 9833, 28747, 32000, 32000, 1550, 918, 1550],
446451
{
447452
"pattern_1": [
448-
_PlaceholderInfo(
453+
PlaceholderInfo(
449454
modality="pattern_1",
450455
item_idx=0,
451456
start_idx=1,
452457
replacement=[32000, 32000],
453458
),
454-
_PlaceholderInfo(
459+
PlaceholderInfo(
455460
modality="pattern_1",
456461
item_idx=1,
457462
start_idx=5,
458463
replacement=[32000, 32000],
459464
),
460465
],
461466
"pattern_3": [
462-
_PlaceholderInfo(
467+
PlaceholderInfo(
463468
modality="pattern_3",
464469
item_idx=0,
465470
start_idx=7,
@@ -472,21 +477,21 @@ def test_find_replace_tokens(
472477
[1, 32000, 32000, 32000, 32000, 32000, 1550, 918, 1550],
473478
{
474479
"pattern_1": [
475-
_PlaceholderInfo(
480+
PlaceholderInfo(
476481
modality="pattern_1",
477482
item_idx=0,
478483
start_idx=1,
479484
replacement=[32000, 32000],
480485
),
481-
_PlaceholderInfo(
486+
PlaceholderInfo(
482487
modality="pattern_1",
483488
item_idx=1,
484489
start_idx=3,
485490
replacement=[32000, 32000],
486491
),
487492
],
488493
"pattern_3": [
489-
_PlaceholderInfo(
494+
PlaceholderInfo(
490495
modality="pattern_3",
491496
item_idx=0,
492497
start_idx=6,
@@ -577,27 +582,23 @@ def test_limit_mm_per_prompt_dummy(model_id, limit, num_supported, is_valid):
577582
revision=None,
578583
limit_mm_per_prompt=limit_mm_per_prompt,
579584
)
580-
model_cls = MULTIMODAL_REGISTRY._get_model_cls(model_config)
581585

582-
processor_factory = MULTIMODAL_REGISTRY._processor_factories[model_cls]
583-
ctx = InputProcessingContext(
586+
processor = MULTIMODAL_REGISTRY.create_processor(
584587
model_config,
585588
tokenizer=cached_get_tokenizer(model_config.tokenizer),
586589
)
587-
588-
processor = processor_factory(ctx, cache=None)
589-
profiler = processor.profiling_info
590+
profiler = MultiModalProfiler(processor)
590591

591592
mock_supported_mm_limits = MagicMock(return_value={"image": num_supported})
592-
profiler.get_supported_mm_limits = mock_supported_mm_limits
593+
processor.info.get_supported_mm_limits = mock_supported_mm_limits
593594

594595
if is_valid:
595596
exc_ctx = nullcontext()
596597
else:
597598
exc_ctx = pytest.raises(ValueError, match="this model only supports")
598599

599600
with exc_ctx:
600-
profiler.get_mm_limits()
601+
profiler.get_dummy_data(model_config.max_model_len)
601602

602603

603604
@pytest.mark.parametrize("model_id", ["llava-hf/llava-v1.6-mistral-7b-hf"])
@@ -620,16 +621,12 @@ def test_limit_mm_per_prompt_apply(model_id, num_images, limit, is_valid):
620621
revision=None,
621622
limit_mm_per_prompt=limit_mm_per_prompt,
622623
)
623-
model_cls = MULTIMODAL_REGISTRY._get_model_cls(model_config)
624624

625-
processor_factory = MULTIMODAL_REGISTRY._processor_factories[model_cls]
626-
ctx = InputProcessingContext(
625+
processor = MULTIMODAL_REGISTRY.create_processor(
627626
model_config,
628627
tokenizer=cached_get_tokenizer(model_config.tokenizer),
629628
)
630629

631-
processor = processor_factory(ctx, cache=None)
632-
633630
rng = np.random.RandomState(0)
634631
image = _rand_img(rng, min_wh=128, max_wh=256)
635632
if num_images == 0:
@@ -681,18 +678,19 @@ def _test_processing_cache_correctness(
681678
hf_overrides=hf_overrides,
682679
limit_mm_per_prompt=limit_mm_per_prompt,
683680
)
684-
model_cls = MULTIMODAL_REGISTRY._get_model_cls(model_config)
685681

686-
processor_factory = MULTIMODAL_REGISTRY._processor_factories[model_cls]
682+
model_cls = MULTIMODAL_REGISTRY._get_model_cls(model_config)
683+
factories = MULTIMODAL_REGISTRY._processor_factories[model_cls]
687684
ctx = InputProcessingContext(
688685
model_config,
689686
tokenizer=cached_get_tokenizer(model_config.tokenizer),
690687
)
691688
# Ensure that it can fit all of the data
692689
cache = ProcessingCache(capacity=1 << 30)
693690

694-
baseline_processor = processor_factory(ctx, cache=None)
695-
cached_processor = processor_factory(ctx, cache=cache)
691+
baseline_processor = factories.build_processor(ctx, cache=None)
692+
cached_processor = factories.build_processor(ctx, cache=cache)
693+
dummy_data_builder = baseline_processor.dummy_data_builder
696694

697695
rng = np.random.RandomState(0)
698696

@@ -724,7 +722,7 @@ def _test_processing_cache_correctness(
724722
}
725723

726724
mm_counts = {k: len(vs) for k, vs in mm_data.items()}
727-
prompt = baseline_processor.profiling_info.get_dummy_processor_inputs(
725+
prompt = dummy_data_builder.get_dummy_processor_inputs(
728726
model_config.max_model_len,
729727
mm_counts,
730728
).prompt_text

tests/plugins/vllm_add_dummy_model/vllm_add_dummy_model/my_llava.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,17 @@
22

33
import torch
44

5-
from vllm.model_executor.models.llava import (LlavaForConditionalGeneration,
6-
LlavaMultiModalProcessor)
5+
from vllm.model_executor.models.llava import (LlavaDummyDataBuilder,
6+
LlavaForConditionalGeneration,
7+
LlavaMultiModalProcessor,
8+
LlavaProcessingInfo)
79
from vllm.model_executor.sampling_metadata import SamplingMetadata
810
from vllm.multimodal import MULTIMODAL_REGISTRY
911

1012

11-
@MULTIMODAL_REGISTRY.register_processor(LlavaMultiModalProcessor)
13+
@MULTIMODAL_REGISTRY.register_processor(LlavaMultiModalProcessor,
14+
info=LlavaProcessingInfo,
15+
dummy_data=LlavaDummyDataBuilder)
1216
class MyLlava(LlavaForConditionalGeneration):
1317

1418
def compute_logits(

vllm/inputs/preprocess.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from vllm.logger import init_logger
88
from vllm.lora.request import LoRARequest
99
from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalRegistry
10-
from vllm.multimodal.processing import MultiModalDataDict, MultiModalInputsV2
10+
from vllm.multimodal.inputs import MultiModalDataDict, MultiModalInputsV2
1111
from vllm.prompt_adapter.request import PromptAdapterRequest
1212
from vllm.transformers_utils.tokenizer_group import BaseTokenizerGroup
1313
from vllm.utils import print_info_once, print_warning_once

vllm/inputs/registry.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,7 @@ def dummy_data_for_profiling(
323323
# Avoid circular import
324324
from vllm.model_executor.model_loader import get_model_architecture
325325
from vllm.multimodal import MultiModalKwargs
326+
from vllm.multimodal.profiler import MultiModalProfiler
326327
from vllm.multimodal.utils import cached_get_tokenizer
327328

328329
if mm_registry.has_processor(model_config):
@@ -331,7 +332,8 @@ def dummy_data_for_profiling(
331332
trust_remote_code=model_config.trust_remote_code,
332333
)
333334
processor = mm_registry.create_processor(model_config, tokenizer)
334-
dummy_data = processor.get_dummy_data(seq_len)
335+
profiler = MultiModalProfiler(processor)
336+
dummy_data = profiler.get_dummy_data(seq_len)
335337
else:
336338
model_cls, _ = get_model_architecture(model_config)
337339
if is_encoder_data:

0 commit comments

Comments
 (0)