Skip to content

Commit 1a9c651

Browse files
mgoinyangw-dev
authored andcommitted
[Bugfix] Do not skip "empty" parts of chats that are parsable (vllm-project#16219)
Signed-off-by: mgoin <[email protected]> Signed-off-by: Yang Wang <[email protected]>
1 parent 2151858 commit 1a9c651

File tree

2 files changed

+98
-13
lines changed

2 files changed

+98
-13
lines changed

tests/entrypoints/test_chat_utils.py

Lines changed: 87 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
MLLAMA_MODEL_ID = "meta-llama/Llama-3.2-11B-Vision-Instruct"
3131
LLAMA_GUARD_MODEL_ID = "meta-llama/Llama-Guard-3-1B"
3232
HERMES_MODEL_ID = "NousResearch/Hermes-3-Llama-3.1-8B"
33+
MISTRAL_MODEL_ID = "mistralai/Mistral-Small-3.1-24B-Instruct-2503"
3334

3435

3536
@pytest.fixture(scope="function")
@@ -80,6 +81,30 @@ def mllama_tokenizer():
8081
)
8182

8283

84+
@pytest.fixture(scope="function")
85+
def mistral_model_config():
86+
return ModelConfig(MISTRAL_MODEL_ID,
87+
task="generate",
88+
tokenizer=MISTRAL_MODEL_ID,
89+
tokenizer_mode="auto",
90+
trust_remote_code=True,
91+
dtype="auto",
92+
seed=0,
93+
limit_mm_per_prompt={
94+
"image": 2,
95+
})
96+
97+
98+
@pytest.fixture(scope="module")
99+
def mistral_tokenizer():
100+
return TokenizerGroup(
101+
tokenizer_id=MISTRAL_MODEL_ID,
102+
enable_lora=False,
103+
max_num_seqs=5,
104+
max_input_length=None,
105+
)
106+
107+
83108
@pytest.fixture(scope="module")
84109
def image_url():
85110
image = ImageAsset('cherry_blossom')
@@ -131,6 +156,66 @@ def test_parse_chat_messages_single_image(
131156
_assert_mm_data_is_image_input(mm_data, 1)
132157

133158

159+
def test_parse_chat_messages_empty_system(
160+
mistral_model_config,
161+
mistral_tokenizer,
162+
):
163+
# Test string format
164+
conversation, _ = parse_chat_messages(
165+
[{
166+
"role": "system",
167+
"content": ""
168+
}, {
169+
"role": "user",
170+
"content": [{
171+
"type": "text",
172+
"text": "Who are you?"
173+
}]
174+
}],
175+
mistral_model_config,
176+
mistral_tokenizer,
177+
content_format="string",
178+
)
179+
assert conversation == [{
180+
"role": "system",
181+
"content": ""
182+
}, {
183+
"role": "user",
184+
"content": "Who are you?"
185+
}]
186+
187+
# Test openai format
188+
conversation, _ = parse_chat_messages(
189+
[{
190+
"role": "system",
191+
"content": ""
192+
}, {
193+
"role": "user",
194+
"content": [{
195+
"type": "text",
196+
"text": "Who are you?"
197+
}]
198+
}],
199+
mistral_model_config,
200+
mistral_tokenizer,
201+
content_format="openai",
202+
)
203+
assert conversation == [{
204+
"role": "system",
205+
"content": [{
206+
"type": "text",
207+
"text": ""
208+
}]
209+
}, {
210+
"role":
211+
"user",
212+
"content": [{
213+
"type": "text",
214+
"text": "Who are you?"
215+
}]
216+
}]
217+
218+
134219
@pytest.mark.asyncio
135220
async def test_parse_chat_messages_single_image_async(
136221
phi3v_model_config,
@@ -671,7 +756,7 @@ def get_conversation(is_hf: bool):
671756
# Build a config for the model
672757
model_config = ModelConfig(model,
673758
task="generate",
674-
tokenizer=MLLAMA_MODEL_ID,
759+
tokenizer=model,
675760
tokenizer_mode="auto",
676761
trust_remote_code=True,
677762
dtype="auto",
@@ -682,7 +767,7 @@ def get_conversation(is_hf: bool):
682767

683768
# Build the tokenizer group and grab the underlying tokenizer
684769
tokenizer_group = TokenizerGroup(
685-
MLLAMA_MODEL_ID,
770+
model,
686771
enable_lora=False,
687772
max_num_seqs=5,
688773
max_input_length=None,

vllm/entrypoints/chat_utils.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -872,19 +872,19 @@ def _get_full_multimodal_text_prompt(placeholder_counts: dict[str, int],
872872
Callable[[ChatCompletionContentPartParam], _ContentPart],
873873
] = {
874874
"text":
875-
lambda part: _TextParser(part).get("text", ""),
875+
lambda part: _TextParser(part).get("text", None),
876876
"image_url":
877-
lambda part: _ImageParser(part).get("image_url", {}).get("url", ""),
877+
lambda part: _ImageParser(part).get("image_url", {}).get("url", None),
878878
"image_embeds":
879-
lambda part: _ImageEmbedsParser(part).get("image_embeds", {}),
879+
lambda part: _ImageEmbedsParser(part).get("image_embeds", None),
880880
"audio_url":
881-
lambda part: _AudioParser(part).get("audio_url", {}).get("url", ""),
881+
lambda part: _AudioParser(part).get("audio_url", {}).get("url", None),
882882
"input_audio":
883-
lambda part: _InputAudioParser(part).get("input_audio", {}),
883+
lambda part: _InputAudioParser(part).get("input_audio", None),
884884
"refusal":
885-
lambda part: _RefusalParser(part).get("refusal", ""),
885+
lambda part: _RefusalParser(part).get("refusal", None),
886886
"video_url":
887-
lambda part: _VideoParser(part).get("video_url", {}).get("url", ""),
887+
lambda part: _VideoParser(part).get("video_url", {}).get("url", None),
888888
}
889889

890890

@@ -1003,11 +1003,11 @@ def _parse_chat_message_content_part(
10031003
part_type, content = _parse_chat_message_content_mm_part(part)
10041004

10051005
# if part_type is text/refusal/image_url/audio_url/video_url/input_audio but
1006-
# content is empty, log a warning and skip
1007-
if part_type in VALID_MESSAGE_CONTENT_MM_PART_TYPES and not content:
1006+
# content is None, log a warning and skip
1007+
if part_type in VALID_MESSAGE_CONTENT_MM_PART_TYPES and content is None:
10081008
logger.warning(
1009-
"Skipping multimodal part (type: '%s') "
1010-
"with empty / unparsable content.", part_type)
1009+
"Skipping multimodal part '%s' (type: '%s') "
1010+
"with empty / unparsable content.", part, part_type)
10111011
return None
10121012

10131013
if part_type in ("text", "refusal"):

0 commit comments

Comments
 (0)