[Bugfix] Fix CI failures for InternVL and Mantis models (#12728)

DarkLight1337 · web-flow · commit 18016a5e627d · 2025-02-04T23:54:23.000+08:00
Signed-off-by: DarkLight1337 &lt;tlleungac@connect.ust.hk&gt;
diff --git a/tests/models/decoder_only/vision_language/test_models.py b/tests/models/decoder_only/vision_language/test_models.py
@@ -9,6 +9,7 @@
 from typing import Type
 
 import pytest
+from packaging.version import Version
 from transformers import AutoModelForVision2Seq
 from transformers import __version__ as TRANSFORMERS_VERSION
 
@@ -154,13 +155,7 @@
         stop_str=["<|im_end|>"],
         image_size_factors=[(0.10, 0.15)],
         max_tokens=64,
-        marks=[
-            pytest.mark.skipif(
-                TRANSFORMERS_VERSION < "4.48.0",
-                reason="HF model requires transformers>=4.48.0",
-            ),
-            large_gpu_mark(min_gb=64),
-        ],
+        marks=[large_gpu_mark(min_gb=64)],
     ),
     "blip2": VLMTestInfo(
         models=["Salesforce/blip2-opt-2.7b"],
@@ -206,7 +201,7 @@
         image_size_factors=[(), (1.0, ), (1.0, 1.0, 1.0), (0.1, 0.5, 1.0)],
         marks=[
             pytest.mark.skipif(
-                TRANSFORMERS_VERSION >= "4.48.0",
+                Version(TRANSFORMERS_VERSION) >= Version("4.48"),
                 reason="HF model is not compatible with transformers>=4.48.0",
             )
         ],
@@ -339,6 +334,12 @@
         auto_cls=AutoModelForVision2Seq,
         vllm_output_post_proc=model_utils.mantis_vllm_to_hf_output,
         patch_hf_runner=model_utils.mantis_patch_hf_runner,
+        marks=[
+            pytest.mark.skipif(
+                Version(TRANSFORMERS_VERSION) >= Version("4.48"),
+                reason="HF model is not compatible with transformers>=4.48.0",
+            )
+        ],
     ),
     "minicpmv_25": VLMTestInfo(
         models=["openbmb/MiniCPM-Llama3-V-2_5"],
diff --git a/tests/models/registry.py b/tests/models/registry.py
@@ -224,8 +224,7 @@ def check_available_online(
 
 _MULTIMODAL_EXAMPLE_MODELS = {
     # [Decoder-only]
-    "AriaForConditionalGeneration": _HfExamplesInfo("rhymes-ai/Aria",
-                                                    min_transformers_version="4.48"),
+    "AriaForConditionalGeneration": _HfExamplesInfo("rhymes-ai/Aria"),
     "Blip2ForConditionalGeneration": _HfExamplesInfo("Salesforce/blip2-opt-2.7b"),  # noqa: E501
     "ChameleonForConditionalGeneration": _HfExamplesInfo("facebook/chameleon-7b"),  # noqa: E501
     "ChatGLMModel": _HfExamplesInfo("THUDM/glm-4v-9b",
diff --git a/tests/multimodal/test_processing.py b/tests/multimodal/test_processing.py
@@ -1,11 +1,13 @@
 # SPDX-License-Identifier: Apache-2.0
 
 from contextlib import nullcontext
+from types import MethodType
 from typing import cast
 from unittest.mock import MagicMock
 
 import numpy as np
 import pytest
+from transformers import ProcessorMixin
 
 from vllm.config import ModelConfig
 from vllm.multimodal import MULTIMODAL_REGISTRY
@@ -636,3 +638,70 @@ def test_limit_mm_per_prompt_apply(model_id, num_images, limit, is_valid):
             mm_data=mm_data,
             hf_processor_mm_kwargs={},
         )
+
+
+class _ProcessorProxy:
+
+    def __init__(self, processor: ProcessorMixin) -> None:
+        super().__init__()
+
+        self.__processor = processor
+
+    def __getattr__(self, key: str):
+        return getattr(self.__processor, key)
+
+    def __call__(
+        self,
+        text=None,
+        images=None,
+        videos=None,
+        exists=None,
+        return_tensors=None,
+    ):
+        return dict(exists=exists)
+
+
+@pytest.mark.parametrize("model_id", ["Qwen/Qwen2-VL-7B-Instruct"])  # Dummy
+# yapf: disable
+@pytest.mark.parametrize(
+    ("call_kwargs", "expected_kwargs"),
+    [
+        # Should ignore invalid kwargs
+        ({"does_not_exist": 100}, {"exists": None}),
+        ({"exists": 1}, {"exists": 1}),
+        ({"does_not_exist": 100, "exists": 1}, {"exists": 1}),
+    ],
+)
+# yapf: enable
+def test_hf_processor_kwargs(model_id, call_kwargs, expected_kwargs):
+    model_config = ModelConfig(
+        model=model_id,
+        task="auto",
+        tokenizer=model_id,
+        tokenizer_mode="auto",
+        trust_remote_code=False,
+        seed=0,
+        dtype="half",
+        revision=None,
+    )
+
+    processor = MULTIMODAL_REGISTRY.create_processor(
+        model_config,
+        tokenizer=cached_get_tokenizer(model_config.tokenizer),
+    )
+    orig_get_hf_processor = processor.info.get_hf_processor
+
+    def get_hf_processor(self, **kwargs):
+        assert kwargs == call_kwargs
+        return _ProcessorProxy(orig_get_hf_processor())
+
+    processor.info.get_hf_processor = MethodType(get_hf_processor,
+                                                 processor.info)
+
+    out_kwargs = processor._call_hf_processor(
+        prompt="",
+        mm_data={},
+        mm_kwargs=call_kwargs,
+    )
+
+    assert out_kwargs == expected_kwargs
diff --git a/tests/multimodal/test_processor_kwargs.py b/tests/multimodal/test_processor_kwargs.py