Skip to content

Commit f603f84

Browse files
aarnphmMu Huai
authored and
Mu Huai
committed
[Fix] Support passing args to logger (vllm-project#17425)
Signed-off-by: Aaron Pham <[email protected]> Signed-off-by: Mu Huai <[email protected]>
1 parent d1db451 commit f603f84

File tree

13 files changed

+75
-79
lines changed

13 files changed

+75
-79
lines changed

vllm/config.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,7 @@ class ModelConfig:
278278
max_model_len: int = None # type: ignore
279279
"""Model context length (prompt and output). If unspecified, will be
280280
automatically derived from the model config.
281-
281+
282282
When passing via `--max-model-len`, supports k/m/g/K/M/G in human-readable
283283
format. Examples:\n
284284
- 1k -> 1000\n
@@ -518,11 +518,11 @@ def __post_init__(self) -> None:
518518
self.hf_text_config.sliding_window)
519519

520520
logger.warning_once(
521-
f"{self.hf_text_config.model_type} has interleaved "
522-
"attention, which is currently not supported by the "
523-
f"{backend} backend. Disabling sliding window and capping "
524-
"the max length to the sliding window size "
525-
f"({sliding_window_len_min}).")
521+
"%s has interleaved attention, which is currently not supported by the %s backend. Disabling sliding window and capping the max length to the sliding window size (%d).", # noqa: E501
522+
self.hf_text_config.model_type,
523+
backend,
524+
sliding_window_len_min,
525+
)
526526
self.disable_sliding_window = True
527527
else:
528528
# for a model with interleaved attention,

vllm/logger.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import logging
66
import os
77
import sys
8+
from collections.abc import Hashable
89
from functools import lru_cache, partial
910
from logging import Logger
1011
from logging.config import dictConfig
@@ -52,15 +53,15 @@
5253

5354

5455
@lru_cache
55-
def _print_info_once(logger: Logger, msg: str) -> None:
56+
def _print_info_once(logger: Logger, msg: str, *args: Hashable) -> None:
5657
# Set the stacklevel to 2 to print the original caller's line info
57-
logger.info(msg, stacklevel=2)
58+
logger.info(msg, *args, stacklevel=2)
5859

5960

6061
@lru_cache
61-
def _print_warning_once(logger: Logger, msg: str) -> None:
62+
def _print_warning_once(logger: Logger, msg: str, *args: Hashable) -> None:
6263
# Set the stacklevel to 2 to print the original caller's line info
63-
logger.warning(msg, stacklevel=2)
64+
logger.warning(msg, *args, stacklevel=2)
6465

6566

6667
class _VllmLogger(Logger):
@@ -72,19 +73,19 @@ class _VllmLogger(Logger):
7273
`intel_extension_for_pytorch.utils._logger`.
7374
"""
7475

75-
def info_once(self, msg: str) -> None:
76+
def info_once(self, msg: str, *args: Hashable) -> None:
7677
"""
7778
As :meth:`info`, but subsequent calls with the same message
7879
are silently dropped.
7980
"""
80-
_print_info_once(self, msg)
81+
_print_info_once(self, msg, *args)
8182

82-
def warning_once(self, msg: str) -> None:
83+
def warning_once(self, msg: str, *args: Hashable) -> None:
8384
"""
8485
As :meth:`warning`, but subsequent calls with the same message
8586
are silently dropped.
8687
"""
87-
_print_warning_once(self, msg)
88+
_print_warning_once(self, msg, *args)
8889

8990

9091
def _configure_vllm_root_logger() -> None:

vllm/lora/punica_wrapper/punica_selector.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,5 @@ def get_punica_wrapper(*args, **kwargs) -> PunicaWrapperBase:
1515
punica_wrapper = punica_wrapper_cls(*args, **kwargs)
1616
assert punica_wrapper is not None, \
1717
"the punica_wrapper_qualname(" + punica_wrapper_qualname + ") is wrong."
18-
logger.info_once("Using " + punica_wrapper_qualname.rsplit(".", 1)[1] +
19-
".")
18+
logger.info_once("Using %s.", punica_wrapper_qualname.rsplit(".", 1)[1])
2019
return punica_wrapper

vllm/model_executor/custom_op.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -107,9 +107,9 @@ def enabled(cls) -> bool:
107107
custom_ops = compilation_config.custom_ops
108108
if not hasattr(cls, "name"):
109109
logger.warning_once(
110-
f"Custom op {cls.__name__} was not registered, "
111-
f"which means it won't appear in the op registry. "
112-
f"It will be enabled/disabled based on the global settings.")
110+
"Custom op %s was not registered, which means it won't appear in the op registry. It will be enabled/disabled based on the global settings.", # noqa: E501
111+
cls.__name__,
112+
)
113113
return CustomOp.default_on()
114114

115115
enabled = f"+{cls.name}" in custom_ops

vllm/model_executor/guided_decoding/xgrammar_decoding.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -191,9 +191,9 @@ def from_guided_params(cls,
191191

192192
if model_with_warn is not None and any_whitespace:
193193
logger.info_once(
194-
f"{model_with_warn} model detected, consider setting "
195-
"`disable_any_whitespace` to prevent runaway generation "
196-
"of whitespaces.")
194+
"%s model detected, consider setting `disable_any_whitespace` to prevent runaway generation of whitespaces.", # noqa: E501
195+
model_with_warn,
196+
)
197197
# Validate the schema and raise ValueError here if it is invalid.
198198
# This is to avoid exceptions in model execution, which will crash
199199
# the engine worker process.

vllm/model_executor/layers/quantization/awq_marlin.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,8 +130,9 @@ def get_quant_method(self, layer: torch.nn.Module,
130130
# Check if the layer is supported by AWQMarlin.
131131
if not check_marlin_supports_layer(layer, self.group_size):
132132
logger.warning_once(
133-
f"Layer '{prefix}' is not supported by AWQMarlin. "
134-
"Falling back to unoptimized AWQ kernels.")
133+
"Layer '%s' is not supported by AWQMarlin. Falling back to unoptimized AWQ kernels.", # noqa: E501
134+
prefix,
135+
)
135136
return AWQConfig.from_config(
136137
self.full_config).get_quant_method(layer, prefix)
137138
return AWQMarlinLinearMethod(self)

vllm/model_executor/model_loader/weight_utils.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -464,7 +464,7 @@ def fastsafetensors_weights_iterator(
464464
hf_weights_files: List[str],
465465
use_tqdm_on_load: bool,
466466
) -> Generator[Tuple[str, torch.Tensor], None, None]:
467-
"""Iterate over the weights in the model safetensor files
467+
"""Iterate over the weights in the model safetensor files
468468
using fastsafetensor library."""
469469
if torch.distributed.is_initialized():
470470
pg = torch.distributed.group.WORLD
@@ -716,10 +716,10 @@ def maybe_remap_kv_scale_name(name: str, params_dict: dict) -> Optional[str]:
716716
remapped_name = name.replace(".kv_scale", ".attn.k_scale")
717717
if remapped_name not in params_dict:
718718
logger.warning_once(
719-
f"Found kv_scale in the checkpoint (e.g. {name}), "
720-
"but not found the expected name in the model "
721-
f"(e.g. {remapped_name}). kv_scale is "
722-
"not loaded.")
719+
"Found kv_scale in the checkpoint (e.g. %s), but not found the expected name in the model (e.g. %s). kv_scale is not loaded.", # noqa: E501
720+
name,
721+
remapped_name,
722+
)
723723
return None
724724
return remapped_name
725725

@@ -738,10 +738,12 @@ def maybe_remap_kv_scale_name(name: str, params_dict: dict) -> Optional[str]:
738738
remapped_name = name.replace(scale_name, f".attn{scale_name}")
739739
if remapped_name not in params_dict:
740740
logger.warning_once(
741-
f"Found {scale_name} in the checkpoint (e.g. {name}), "
742-
"but not found the expected name in the model "
743-
f"(e.g. {remapped_name}). {scale_name} is "
744-
"not loaded.")
741+
"Found %s in the checkpoint (e.g. %s), but not found the expected name in the model (e.g. %s). %s is not loaded.", # noqa: E501
742+
scale_name,
743+
name,
744+
remapped_name,
745+
scale_name,
746+
)
745747
return None
746748
return remapped_name
747749

vllm/model_executor/models/chameleon.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1111,10 +1111,10 @@ def load_weights(self, weights: Iterable[Tuple[str,
11111111
".kv_scale", ".attn.kv_scale")
11121112
if remapped_kv_scale_name not in params_dict:
11131113
logger.warning_once(
1114-
"Found kv scale in the checkpoint (e.g. "
1115-
f"{name}), but not found the expected name in "
1116-
f"the model (e.g. {remapped_kv_scale_name}). "
1117-
"kv-scale is not loaded.")
1114+
"Found kv scale in the checkpoint (e.g. %s), but not found the expected name in the model (e.g. %s). kv-scale is not loaded.", # noqa: E501
1115+
name,
1116+
remapped_kv_scale_name,
1117+
)
11181118
continue
11191119
else:
11201120
name = remapped_kv_scale_name

vllm/model_executor/models/olmoe.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -385,11 +385,10 @@ def load_weights(self, weights: Iterable[Tuple[str,
385385
".kv_scale", ".attn.kv_scale")
386386
if remapped_kv_scale_name not in params_dict:
387387
logger.warning_once(
388-
"Found kv scale in the checkpoint "
389-
f"(e.g. {name}), but not found the expected "
390-
f"name in the model "
391-
f"(e.g. {remapped_kv_scale_name}). "
392-
"kv-scale is not loaded.")
388+
"Found kv scale in the checkpoint (e.g. %s), but not found the expected name in the model (e.g. %s). kv-scale is not loaded.", # noqa: E501
389+
name,
390+
remapped_kv_scale_name,
391+
)
393392
continue
394393
else:
395394
name = remapped_kv_scale_name

vllm/model_executor/models/qwen2_moe.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -462,11 +462,10 @@ def load_weights(self, weights: Iterable[Tuple[str,
462462
".kv_scale", ".attn.kv_scale")
463463
if remapped_kv_scale_name not in params_dict:
464464
logger.warning_once(
465-
"Found kv scale in the checkpoint "
466-
f"(e.g. {name}), but not found the expected "
467-
f"name in the model "
468-
f"(e.g. {remapped_kv_scale_name}). "
469-
"kv-scale is not loaded.")
465+
"Found kv_scale in the checkpoint (e.g. %s), but not found the expected name in the model (e.g. %s). kv_scale is not loaded.", # noqa: E501
466+
name,
467+
remapped_kv_scale_name,
468+
)
470469
continue
471470
else:
472471
name = remapped_kv_scale_name

vllm/model_executor/models/qwen3_moe.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -459,11 +459,10 @@ def load_weights(self, weights: Iterable[Tuple[str,
459459
".kv_scale", ".attn.kv_scale")
460460
if remapped_kv_scale_name not in params_dict:
461461
logger.warning_once(
462-
"Found kv scale in the checkpoint "
463-
f"(e.g. {name}), but not found the expected "
464-
f"name in the model "
465-
f"(e.g. {remapped_kv_scale_name}). "
466-
"kv-scale is not loaded.")
462+
"Found kv scale in the checkpoint (e.g. %s), but not found the expected name in the model (e.g. %s). kv-scale is not loaded.", # noqa: E501
463+
name,
464+
remapped_kv_scale_name,
465+
)
467466
continue
468467
else:
469468
name = remapped_kv_scale_name

vllm/multimodal/profiling.py

Lines changed: 16 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -215,17 +215,14 @@ def get_encoder_dummy_data(
215215
elif total_len > seq_len and not envs.VLLM_USE_V1:
216216
# `max_num_batched_tokens` is defined by `SchedulerConfig`
217217
logger.warning_once(
218-
"The encoder sequence length used for profiling ("
219-
f"max_num_batched_tokens / max_num_seqs = {seq_len}) "
220-
" is too short "
221-
"to hold the multi-modal embeddings in the worst case "
222-
f"({total_len} tokens in total, out of which "
223-
f"{self._get_mm_num_tokens(mm_inputs)} are reserved for "
224-
"multi-modal embeddings). This may cause certain "
225-
"multi-modal inputs to fail during inference, even when "
226-
"the input text is short. To avoid this, you should "
227-
"increase `max_model_len`, reduce `max_num_seqs`, "
228-
"and/or reduce `mm_counts`.")
218+
"The encoder sequence length used for profiling (max_num_batched_tokens / max_num_seqs = %d) " # noqa: E501
219+
"is too short to hold the multi-modal embeddings in the worst case (%d tokens in total, out of which %s are reserved for multi-modal embeddings). " # noqa: E501
220+
"This may cause certain multi-modal inputs to fail during inference, even when the input text is short. " # noqa: E501
221+
"To avoid this, you should increase `max_model_len`, reduce `max_num_seqs`, and/or reduce `mm_counts`.", # noqa: E501
222+
seq_len,
223+
total_len,
224+
str(self._get_mm_num_tokens(mm_inputs)),
225+
)
229226

230227
return DummyEncoderData(encoder_prompt_token_ids)
231228

@@ -243,17 +240,14 @@ def get_decoder_dummy_data(
243240
if total_len > seq_len and not envs.VLLM_USE_V1:
244241
# `max_num_batched_tokens` is defined by `SchedulerConfig`
245242
logger.warning_once(
246-
"The sequence length used for profiling ("
247-
f"max_num_batched_tokens / max_num_seqs = {seq_len}) "
248-
"is too short "
249-
"to hold the multi-modal embeddings in the worst case "
250-
f"({total_len} tokens in total, out of which "
251-
f"{self._get_mm_num_tokens(mm_inputs)} are reserved for "
252-
"multi-modal embeddings). This may cause certain "
253-
"multi-modal inputs to fail during inference, even when "
254-
"the input text is short. To avoid this, you should "
255-
"increase `max_model_len`, reduce `max_num_seqs`, "
256-
"and/or reduce `mm_counts`.")
243+
"The sequence length used for profiling (max_num_batched_tokens / max_num_seqs = %d) " # noqa: E501
244+
"is too short to hold the multi-modal embeddings in the worst case (%d tokens in total, out of which %s are reserved for multi-modal embeddings). " # noqa: E501
245+
"This may cause certain multi-modal inputs to fail during inference, even when the input text is short. " # noqa: E501
246+
"To avoid this, you should increase `max_model_len`, reduce `max_num_seqs`, and/or reduce `mm_counts`.", # noqa: E501
247+
seq_len,
248+
total_len,
249+
str(self._get_mm_num_tokens(mm_inputs)),
250+
)
257251

258252
if total_len < seq_len:
259253
prompt_token_ids.extend([0] * (seq_len - total_len))

vllm/multimodal/registry.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ def get_max_tokens_per_item_by_modality(
100100
model_config: "ModelConfig",
101101
) -> Mapping[str, int]:
102102
"""
103-
Get the maximum number of tokens per data item from each modality based
103+
Get the maximum number of tokens per data item from each modality based
104104
on underlying model configuration.
105105
"""
106106
if not model_config.is_multimodal_model:
@@ -126,11 +126,11 @@ def get_max_tokens_per_item_by_nonzero_modality(
126126
) -> Mapping[str, int]:
127127
"""
128128
Get the maximum number of tokens per data item from each modality based
129-
on underlying model configuration, excluding modalities that user
129+
on underlying model configuration, excluding modalities that user
130130
explicitly disabled via `limit_mm_per_prompt`.
131131
132132
Note:
133-
This is currently directly used only in V1 for profiling the memory
133+
This is currently directly used only in V1 for profiling the memory
134134
usage of a model.
135135
"""
136136
mm_limits = self.get_mm_limits_per_prompt(model_config)
@@ -316,7 +316,9 @@ def get_encoder_dummy_data(
316316
token_ids = dummy_data.prompt_token_ids
317317
if len(token_ids) < seq_len:
318318
logger.warning_once(
319-
f"Expected at least {seq_len} dummy encoder tokens for "
320-
f"profiling, but found {len(token_ids)} tokens instead.")
319+
"Expected at least %d dummy encoder tokens for profiling, but found %d tokens instead.", # noqa: E501
320+
seq_len,
321+
len(token_ids),
322+
)
321323

322324
return dummy_data

0 commit comments

Comments
 (0)