Skip to content

Commit 03860c4

Browse files
gshtrasMu Huai
authored and
Mu Huai
committed
Print the warning only once (vllm-project#16193)
Signed-off-by: Gregory Shtrasberg <[email protected]> Signed-off-by: Mu Huai <[email protected]>
1 parent 02137c8 commit 03860c4

File tree

1 file changed

+12
-10
lines changed

1 file changed

+12
-10
lines changed

vllm/multimodal/profiling.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -216,17 +216,18 @@ def get_encoder_dummy_data(
216216
# Encoder-decoder multimodal models only support v0
217217
if total_len > seq_len:
218218
# `max_num_batched_tokens` is defined by `SchedulerConfig`
219-
logger.warning(
219+
logger.warning_once(
220220
"The encoder sequence length used for profiling ("
221-
"max_num_batched_tokens / max_num_seqs = %d) is too short "
221+
f"max_num_batched_tokens / max_num_seqs = {seq_len}) "
222+
" is too short "
222223
"to hold the multi-modal embeddings in the worst case "
223-
"(%d tokens in total, out of which %s are reserved for "
224+
f"({total_len} tokens in total, out of which "
225+
f"{total_placeholders_by_modality} are reserved for "
224226
"multi-modal embeddings). This may cause certain "
225227
"multi-modal inputs to fail during inference, even when "
226228
"the input text is short. To avoid this, you should "
227229
"increase `max_model_len`, reduce `max_num_seqs`, "
228-
"and/or reduce `mm_counts`.", seq_len, total_len,
229-
total_placeholders_by_modality)
230+
"and/or reduce `mm_counts`.")
230231

231232
processor = cast(EncDecMultiModalProcessor, self.processor)
232233
if processor.pad_dummy_encoder_prompt:
@@ -251,17 +252,18 @@ def get_decoder_dummy_data(
251252
# V0 does not support chunked prefill.
252253
if total_len > seq_len and not envs.VLLM_USE_V1:
253254
# `max_num_batched_tokens` is defined by `SchedulerConfig`
254-
logger.warning(
255+
logger.warning_once(
255256
"The sequence length used for profiling ("
256-
"max_num_batched_tokens / max_num_seqs = %d) is too short "
257+
f"max_num_batched_tokens / max_num_seqs = {seq_len}) "
258+
"is too short "
257259
"to hold the multi-modal embeddings in the worst case "
258-
"(%d tokens in total, out of which %s are reserved for "
260+
f"({total_len} tokens in total, out of which "
261+
f"{total_placeholders_by_modality} are reserved for "
259262
"multi-modal embeddings). This may cause certain "
260263
"multi-modal inputs to fail during inference, even when "
261264
"the input text is short. To avoid this, you should "
262265
"increase `max_model_len`, reduce `max_num_seqs`, "
263-
"and/or reduce `mm_counts`.", seq_len, total_len,
264-
total_placeholders_by_modality)
266+
"and/or reduce `mm_counts`.")
265267

266268
if total_len < seq_len:
267269
prompt_token_ids.extend([0] * (seq_len - total_len))

0 commit comments

Comments
 (0)