Skip to content

Commit 8f7bace

Browse files
[Doc] Improve documentation for multimodal CLI args (#16960)
Signed-off-by: DarkLight1337 <[email protected]>
1 parent e4d6144 commit 8f7bace

File tree

2 files changed

+17
-10
lines changed

2 files changed

+17
-10
lines changed

vllm/config.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,13 +54,15 @@
5454
from vllm.transformers_utils.tokenizer_group.base_tokenizer_group import (
5555
BaseTokenizerGroup)
5656

57-
Config = TypeVar("Config", bound=DataclassInstance)
57+
ConfigType = type[DataclassInstance]
5858
else:
5959
QuantizationConfig = None
60-
Config = TypeVar("Config")
60+
ConfigType = type
6161

6262
logger = init_logger(__name__)
6363

64+
ConfigT = TypeVar("ConfigT", bound=ConfigType)
65+
6466
# This value is chosen to have a balance between ITL and TTFT. Note it is
6567
# not optimized for throughput.
6668
_DEFAULT_MAX_NUM_BATCHED_TOKENS = 2048
@@ -162,7 +164,7 @@ def pairwise(iterable):
162164
return out
163165

164166

165-
def config(cls: type[Config]) -> type[Config]:
167+
def config(cls: ConfigT) -> ConfigT:
166168
"""
167169
A decorator that ensures all fields in a dataclass have default values
168170
and that each field has a docstring.
@@ -181,7 +183,7 @@ def config(cls: type[Config]) -> type[Config]:
181183
return cls
182184

183185

184-
def get_field(cls: type[Config], name: str) -> Field:
186+
def get_field(cls: ConfigType, name: str) -> Field:
185187
"""Get the default factory field of a dataclass by name. Used for getting
186188
default factory fields in `EngineArgs`."""
187189
if not is_dataclass(cls):
@@ -2749,6 +2751,9 @@ class MultiModalConfig:
27492751
The maximum number of input items allowed per prompt for each modality.
27502752
This should be a JSON string that will be parsed into a dictionary.
27512753
Defaults to 1 (V0) or 999 (V1) for each modality.
2754+
2755+
For example, to allow up to 16 images and 2 videos per prompt:
2756+
``{"images": 16, "videos": 2}``
27522757
"""
27532758

27542759
def compute_hash(self) -> str:

vllm/engine/arg_utils.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
import vllm.envs as envs
1818
from vllm import version
1919
from vllm.config import (BlockSize, CacheConfig, CacheDType, CompilationConfig,
20-
Config, ConfigFormat, DecodingConfig, Device,
20+
ConfigFormat, ConfigType, DecodingConfig, Device,
2121
DeviceConfig, DistributedExecutorBackend, HfOverrides,
2222
KVTransferConfig, LoadConfig, LoadFormat, LoRAConfig,
2323
ModelConfig, ModelImpl, MultiModalConfig,
@@ -304,7 +304,7 @@ def is_custom_type(cls: TypeHint) -> bool:
304304
"""Check if the class is a custom type."""
305305
return cls.__module__ != "builtins"
306306

307-
def get_kwargs(cls: type[Config]) -> dict[str, Any]:
307+
def get_kwargs(cls: ConfigType) -> dict[str, Any]:
308308
cls_docs = get_attr_docs(cls)
309309
kwargs = {}
310310
for field in fields(cls):
@@ -678,13 +678,15 @@ def get_kwargs(cls: type[Config]) -> dict[str, Any]:
678678
'--mm-processor-kwargs',
679679
default=None,
680680
type=json.loads,
681-
help=('Overrides for the multimodal input mapping/processing, '
682-
'e.g., image processor. For example: ``{"num_crops": 4}``.'))
681+
help=('Overrides for the multi-modal processor obtained from '
682+
'``AutoProcessor.from_pretrained``. The available overrides '
683+
'depend on the model that is being run.'
684+
'For example, for Phi-3-Vision: ``{"num_crops": 4}``.'))
683685
parser.add_argument(
684686
'--disable-mm-preprocessor-cache',
685687
action='store_true',
686-
help='If true, then disables caching of the multi-modal '
687-
'preprocessor/mapper. (not recommended)')
688+
help='If True, disable caching of the processed multi-modal '
689+
'inputs.')
688690

689691
# LoRA related configs
690692
parser.add_argument('--enable-lora',

0 commit comments

Comments
 (0)