Skip to content

Commit af51d80

Browse files
authored
Revert "[V1] Scatter and gather placeholders in the model runner" (#16075)
1 parent f5722a5 commit af51d80

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+943
-497
lines changed

docs/source/contributing/model/multimodal.md

+8-8
Original file line numberDiff line numberDiff line change
@@ -860,8 +860,8 @@ prompt_tokens, prompts_length = _tokenize_prompts_with_image_and_batch(
860860
)
861861
```
862862

863-
To assign the vision embeddings to only the image tokens, instead of a string
864-
you can return an instance of {class}`~vllm.multimodal.processing.PromptUpdateDetails`:
863+
To accommodate this, instead of a string you can return an instance of {class}`~vllm.multimodal.processing.PromptUpdateDetails`
864+
with different `full` and `feature` attributes:
865865

866866
```python
867867
hf_config = self.info.get_hf_config()
@@ -879,9 +879,9 @@ def get_replacement_fuyu(item_idx: int):
879879
image_tokens = ([_IMAGE_TOKEN_ID] * ncols +
880880
[_NEWLINE_TOKEN_ID]) * nrows
881881

882-
return PromptUpdateDetails.select_token_id(
883-
image_tokens + [bos_token_id],
884-
embed_token_id=_IMAGE_TOKEN_ID,
882+
return PromptUpdateDetails(
883+
full=image_tokens + [bos_token_id],
884+
features=image_tokens,
885885
)
886886
```
887887

@@ -914,9 +914,9 @@ def _get_prompt_updates(
914914
image_tokens = ([_IMAGE_TOKEN_ID] * ncols +
915915
[_NEWLINE_TOKEN_ID]) * nrows
916916

917-
return PromptUpdateDetails.select_token_id(
918-
image_tokens + [bos_token_id],
919-
embed_token_id=_IMAGE_TOKEN_ID,
917+
return PromptUpdateDetails(
918+
full=image_tokens + [bos_token_id],
919+
features=image_tokens,
920920
)
921921

922922
return [

docs/source/models/supported_models.md

+3
Original file line numberDiff line numberDiff line change
@@ -989,6 +989,9 @@ See [this page](#generative-models) for more information on how to use generativ
989989
<sup>+</sup> Multiple items can be inputted per text prompt for this modality.
990990

991991
:::{important}
992+
To use Gemma3 series models, you have to install Hugging Face Transformers library from source via
993+
`pip install git+https://github.com/huggingface/transformers`.
994+
992995
Pan-and-scan image pre-processing is currently supported on V0 (but not V1).
993996
You can enable it by passing `--mm-processor-kwargs '{"do_pan_and_scan": True}'`.
994997
:::

examples/offline_inference/audio_language.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def run_minicpmo(question: str, audio_count: int) -> ModelRequestData:
4747
model=model_name,
4848
trust_remote_code=True,
4949
max_model_len=4096,
50-
max_num_seqs=2,
50+
max_num_seqs=5,
5151
limit_mm_per_prompt={"audio": audio_count},
5252
)
5353

tests/models/decoder_only/audio_language/test_ultravox.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,7 @@ def server(request, audio_assets):
5555
for key, value in request.param.items()
5656
]
5757

58-
with RemoteOpenAIServer(MODEL_NAME,
59-
args,
60-
env_dict={"VLLM_AUDIO_FETCH_TIMEOUT":
61-
"30"}) as remote_server:
58+
with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
6259
yield remote_server
6360

6461

tests/models/decoder_only/vision_language/test_models.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@
167167
"cherry_blossom": "<image>What is the season?", # noqa: E501
168168
}),
169169
multi_image_prompt="<image><image>Describe the two images in detail.", # noqa: E501
170-
max_model_len=4096,
170+
max_model_len=8192,
171171
max_num_seqs=2,
172172
auto_cls=AutoModelForImageTextToText,
173173
vllm_runner_kwargs={"mm_processor_kwargs": {"crop_to_patches": True}}

tests/models/decoder_only/vision_language/test_pixtral.py

+16-10
Original file line numberDiff line numberDiff line change
@@ -176,8 +176,6 @@ def test_chat(
176176
model,
177177
dtype=dtype,
178178
tokenizer_mode="mistral",
179-
load_format="mistral",
180-
config_format="mistral",
181179
max_model_len=max_model_len,
182180
limit_mm_per_prompt=LIMIT_MM_PER_PROMPT,
183181
) as vllm_model:
@@ -200,14 +198,22 @@ def test_chat(
200198

201199

202200
@large_gpu_test(min_gb=48)
203-
@pytest.mark.parametrize("prompt,expected_ranges",
204-
[(_create_engine_inputs_hf(IMG_URLS[:1]),
205-
[PlaceholderRange(offset=11, length=494)]),
206-
(_create_engine_inputs_hf(IMG_URLS[1:4]), [
207-
PlaceholderRange(offset=11, length=266),
208-
PlaceholderRange(offset=277, length=1056),
209-
PlaceholderRange(offset=1333, length=418)
210-
])])
201+
@pytest.mark.parametrize(
202+
"prompt,expected_ranges",
203+
[(_create_engine_inputs_hf(IMG_URLS[:1]), [{
204+
"offset": 11,
205+
"length": 494
206+
}]),
207+
(_create_engine_inputs_hf(IMG_URLS[1:4]), [{
208+
"offset": 11,
209+
"length": 266
210+
}, {
211+
"offset": 277,
212+
"length": 1056
213+
}, {
214+
"offset": 1333,
215+
"length": 418
216+
}])])
211217
def test_multi_modal_placeholders(vllm_runner, prompt,
212218
expected_ranges: list[PlaceholderRange],
213219
monkeypatch) -> None:

tests/models/multimodal/processing/test_llava_next.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,8 @@ def _validate_image_prompt_replacements_one(
9292
first_placeholder = image_placeholders[0]
9393

9494
# NOTE: There is a BOS token
95-
assert first_placeholder.offset == 1
96-
assert first_placeholder.length == (
95+
assert first_placeholder["offset"] == 1
96+
assert first_placeholder["length"] == (
9797
len(processed_inputs["prompt_token_ids"]) - 1) // num_imgs
9898

9999
except Exception as exc:

tests/models/multimodal/processing/test_llava_onevision.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,8 @@ def _validate_image_prompt_replacements_one(
9292

9393
first_placeholder = image_placeholders[0]
9494

95-
assert first_placeholder.offset == 0
96-
assert first_placeholder.length == len(
95+
assert first_placeholder["offset"] == 0
96+
assert first_placeholder["length"] == len(
9797
processed_inputs["prompt_token_ids"]) // num_imgs
9898
except Exception as exc:
9999
failed_size_excs.append((image_size, exc))

tests/models/registry.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -277,9 +277,7 @@ def check_available_online(
277277
trust_remote_code=True,
278278
hf_overrides={"architectures": ["GLM4VForCausalLM"]}), # noqa: E501
279279
"H2OVLChatModel": _HfExamplesInfo("h2oai/h2ovl-mississippi-800m",
280-
extras={"2b": "h2oai/h2ovl-mississippi-2b"}, # noqa: E501
281-
max_transformers_version="4.48", # noqa: E501
282-
transformers_version_reason="HF model is not compatible."), # noqa: E501
280+
extras={"2b": "h2oai/h2ovl-mississippi-2b"}), # noqa: E501
283281
"InternVLChatModel": _HfExamplesInfo("OpenGVLab/InternVL2-1B",
284282
extras={"2B": "OpenGVLab/InternVL2-2B"}, # noqa: E501
285283
trust_remote_code=True),

tests/multimodal/test_processing.py

-9
Original file line numberDiff line numberDiff line change
@@ -785,7 +785,6 @@ def test_find_update_tokens(
785785
item_idx=0,
786786
start_idx=6,
787787
tokens=[32000, 32000],
788-
is_embed=None,
789788
),
790789
],
791790
"pattern_4": [
@@ -794,7 +793,6 @@ def test_find_update_tokens(
794793
item_idx=0,
795794
start_idx=3,
796795
tokens=[32000],
797-
is_embed=None,
798796
),
799797
],
800798
}
@@ -809,14 +807,12 @@ def test_find_update_tokens(
809807
item_idx=0,
810808
start_idx=1,
811809
tokens=[32000, 32000],
812-
is_embed=None,
813810
),
814811
PlaceholderFeaturesInfo(
815812
modality="pattern_1",
816813
item_idx=1,
817814
start_idx=5,
818815
tokens=[32000, 32000],
819-
is_embed=None,
820816
),
821817
],
822818
"pattern_3": [
@@ -825,7 +821,6 @@ def test_find_update_tokens(
825821
item_idx=0,
826822
start_idx=7,
827823
tokens=[1550, 918, 1550],
828-
is_embed=None,
829824
),
830825
],
831826
# No match for pattern_4 as it has lower priority than pattern_1
@@ -840,14 +835,12 @@ def test_find_update_tokens(
840835
item_idx=0,
841836
start_idx=1,
842837
tokens=[32000, 32000],
843-
is_embed=None,
844838
),
845839
PlaceholderFeaturesInfo(
846840
modality="pattern_1",
847841
item_idx=1,
848842
start_idx=3,
849843
tokens=[32000, 32000],
850-
is_embed=None,
851844
),
852845
],
853846
"pattern_4": [
@@ -856,7 +849,6 @@ def test_find_update_tokens(
856849
item_idx=0,
857850
start_idx=5,
858851
tokens=[32000],
859-
is_embed=None,
860852
),
861853
],
862854
"pattern_3": [
@@ -865,7 +857,6 @@ def test_find_update_tokens(
865857
item_idx=0,
866858
start_idx=6,
867859
tokens=[1550, 918, 1550],
868-
is_embed=None,
869860
),
870861
],
871862
}

tests/v1/core/test_kv_cache_utils.py

+29-17
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import pytest
44
import torch
55

6-
from vllm.multimodal.inputs import MultiModalKwargs, PlaceholderRange
6+
from vllm.multimodal.inputs import MultiModalKwargs
77
from vllm.sampling_params import SamplingParams
88
from vllm.utils import sha256
99
# disable yapf here as it formats differently than isort such that both fail
@@ -158,10 +158,13 @@ def test_generate_block_hash_extra_keys():
158158
request = make_request(
159159
request_id=0,
160160
prompt_token_ids=[_ for _ in range(20)],
161-
mm_positions=[
162-
PlaceholderRange(offset=0, length=5),
163-
PlaceholderRange(offset=10, length=5),
164-
],
161+
mm_positions=[{
162+
"offset": 0,
163+
"length": 5
164+
}, {
165+
"offset": 10,
166+
"length": 5
167+
}],
165168
mm_hashes=["hash1", "hash2"],
166169
)
167170

@@ -219,10 +222,13 @@ def test_hash_request_tokens(hash_fn):
219222
request = make_request(
220223
request_id=0,
221224
prompt_token_ids=[_ for _ in range(6)],
222-
mm_positions=[
223-
PlaceholderRange(offset=0, length=3),
224-
PlaceholderRange(offset=3, length=3),
225-
],
225+
mm_positions=[{
226+
"offset": 0,
227+
"length": 3
228+
}, {
229+
"offset": 3,
230+
"length": 3
231+
}],
226232
mm_hashes=["hash1", "hash2"],
227233
)
228234

@@ -247,19 +253,25 @@ def test_hash_tokens_different_mm_input(hash_fn):
247253
request1 = make_request(
248254
request_id=0,
249255
prompt_token_ids=[_ for _ in range(6)],
250-
mm_positions=[
251-
PlaceholderRange(offset=0, length=3),
252-
PlaceholderRange(offset=3, length=3),
253-
],
256+
mm_positions=[{
257+
"offset": 0,
258+
"length": 3
259+
}, {
260+
"offset": 3,
261+
"length": 3
262+
}],
254263
mm_hashes=["hash1", "hash2"],
255264
)
256265
request2 = make_request(
257266
request_id=1,
258267
prompt_token_ids=[_ for _ in range(6)],
259-
mm_positions=[
260-
PlaceholderRange(offset=0, length=3),
261-
PlaceholderRange(offset=3, length=3),
262-
],
268+
mm_positions=[{
269+
"offset": 0,
270+
"length": 3
271+
}, {
272+
"offset": 3,
273+
"length": 3
274+
}],
263275
mm_hashes=["hash3", "hash2"],
264276
)
265277
block_size = 3

0 commit comments

Comments
 (0)