Skip to content

Commit e25c988

Browse files
DarkLight1337mzusman
authored andcommitted
[Misc] Rename MultiModalInputsV2 -> MultiModalInputs (vllm-project#12244)
Signed-off-by: DarkLight1337 <[email protected]>
1 parent 5be9b80 commit e25c988

File tree

12 files changed

+31
-31
lines changed

12 files changed

+31
-31
lines changed

docs/source/api/multimodal/inputs.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
```
4444

4545
```{eval-rst}
46-
.. autoclass:: vllm.multimodal.inputs.MultiModalInputsV2
46+
.. autoclass:: vllm.multimodal.inputs.MultiModalInputs
4747
:members:
4848
:show-inheritance:
4949
```

vllm/inputs/data.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
if TYPE_CHECKING:
1010
from vllm.multimodal import (MultiModalDataDict, MultiModalKwargs,
1111
MultiModalPlaceholderDict)
12-
from vllm.multimodal.inputs import MultiModalInputsV2
12+
from vllm.multimodal.inputs import MultiModalInputs
1313

1414

1515
class TextPrompt(TypedDict):
@@ -207,7 +207,7 @@ def token_inputs(
207207
return inputs
208208

209209

210-
DecoderOnlyInputs = Union[TokenInputs, "MultiModalInputsV2"]
210+
DecoderOnlyInputs = Union[TokenInputs, "MultiModalInputs"]
211211
"""
212212
The inputs in :class:`~vllm.LLMEngine` before they are
213213
passed to the model executor.
@@ -222,14 +222,14 @@ class EncoderDecoderInputs(TypedDict):
222222
223223
This specifies the required data for encoder-decoder models.
224224
"""
225-
encoder: Union[TokenInputs, "MultiModalInputsV2"]
225+
encoder: Union[TokenInputs, "MultiModalInputs"]
226226
"""The inputs for the encoder portion."""
227227

228-
decoder: Union[TokenInputs, "MultiModalInputsV2"]
228+
decoder: Union[TokenInputs, "MultiModalInputs"]
229229
"""The inputs for the decoder portion."""
230230

231231

232-
SingletonInputs = Union[TokenInputs, "MultiModalInputsV2"]
232+
SingletonInputs = Union[TokenInputs, "MultiModalInputs"]
233233
"""
234234
A processed :class:`SingletonPrompt` which can be passed to
235235
:class:`vllm.sequence.Sequence`.
@@ -311,7 +311,7 @@ def multi_modal_hashes(self) -> List[str]:
311311
return inputs.get("multi_modal_hashes", [])
312312

313313
if inputs["type"] == "multimodal":
314-
# only the case when we use MultiModalInputsV2
314+
# only the case when we use MultiModalInputs
315315
return inputs.get("mm_hashes", []) # type: ignore[return-value]
316316

317317
assert_never(inputs) # type: ignore[arg-type]

vllm/inputs/preprocess.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from vllm.logger import init_logger
88
from vllm.lora.request import LoRARequest
99
from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalRegistry
10-
from vllm.multimodal.inputs import MultiModalDataDict, MultiModalInputsV2
10+
from vllm.multimodal.inputs import MultiModalDataDict, MultiModalInputs
1111
from vllm.prompt_adapter.request import PromptAdapterRequest
1212
from vllm.transformers_utils.tokenizer_group import BaseTokenizerGroup
1313

@@ -247,7 +247,7 @@ def _process_multimodal(
247247
mm_data: MultiModalDataDict,
248248
mm_processor_kwargs: Optional[Mapping[str, object]],
249249
lora_request: Optional[LoRARequest],
250-
) -> MultiModalInputsV2:
250+
) -> MultiModalInputs:
251251
"""
252252
Apply the model's multi-modal processor to a multi-modal prompt,
253253
returning the corresponding token IDs and metadata.
@@ -271,7 +271,7 @@ async def _process_multimodal_async(
271271
mm_data: MultiModalDataDict,
272272
mm_processor_kwargs: Optional[Mapping[str, object]],
273273
lora_request: Optional[LoRARequest],
274-
) -> MultiModalInputsV2:
274+
) -> MultiModalInputs:
275275
"""Async version of :meth:`_process_multimodal`."""
276276
tokenizer_group = self.get_tokenizer_group()
277277
tokenizer = await tokenizer_group.get_lora_tokenizer_async(lora_request

vllm/model_executor/models/blip2.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from vllm.model_executor.sampling_metadata import SamplingMetadata
1616
from vllm.multimodal import MULTIMODAL_REGISTRY
1717
from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig,
18-
MultiModalInputsV2, MultiModalKwargs,
18+
MultiModalInputs, MultiModalKwargs,
1919
NestedTensors, PlaceholderRange)
2020
from vllm.multimodal.parse import MultiModalDataItems
2121
from vllm.multimodal.processing import (BaseMultiModalProcessor,
@@ -490,7 +490,7 @@ def apply(
490490
prompt: Union[str, list[int]],
491491
mm_data: MultiModalDataDict,
492492
hf_processor_mm_kwargs: Mapping[str, object],
493-
) -> MultiModalInputsV2:
493+
) -> MultiModalInputs:
494494
result = super().apply(prompt, mm_data, hf_processor_mm_kwargs)
495495

496496
# Only <image> tokens should be considered as placeholders,

vllm/model_executor/models/chameleon.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
from vllm.model_executor.utils import set_weight_attrs
3030
from vllm.multimodal import MULTIMODAL_REGISTRY
3131
from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig,
32-
MultiModalInputsV2, MultiModalKwargs,
32+
MultiModalInputs, MultiModalKwargs,
3333
NestedTensors, PlaceholderRange)
3434
from vllm.multimodal.parse import MultiModalDataItems
3535
from vllm.multimodal.processing import (BaseMultiModalProcessor,
@@ -159,7 +159,7 @@ def apply(
159159
prompt: Union[str, list[int]],
160160
mm_data: MultiModalDataDict,
161161
hf_processor_mm_kwargs: Mapping[str, object],
162-
) -> MultiModalInputsV2:
162+
) -> MultiModalInputs:
163163
result = super().apply(prompt, mm_data, hf_processor_mm_kwargs)
164164

165165
# Only <image> tokens should be considered as placeholders,

vllm/model_executor/models/fuyu.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
from vllm.model_executor.sampling_metadata import SamplingMetadata
3232
from vllm.multimodal import MULTIMODAL_REGISTRY
3333
from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig,
34-
MultiModalInputsV2, MultiModalKwargs,
34+
MultiModalInputs, MultiModalKwargs,
3535
NestedTensors, PlaceholderRange)
3636
from vllm.multimodal.parse import (ImageProcessorItems, ImageSize,
3737
MultiModalDataItems)
@@ -232,7 +232,7 @@ def apply(
232232
prompt: Union[str, list[int]],
233233
mm_data: MultiModalDataDict,
234234
hf_processor_mm_kwargs: Mapping[str, object],
235-
) -> MultiModalInputsV2:
235+
) -> MultiModalInputs:
236236
result = super().apply(prompt, mm_data, hf_processor_mm_kwargs)
237237

238238
# Only |SPEAKER| (image) tokens should be considered as placeholders,

vllm/model_executor/models/llava.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from vllm.model_executor.sampling_metadata import SamplingMetadata
2525
from vllm.multimodal import MULTIMODAL_REGISTRY
2626
from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig,
27-
MultiModalInputsV2, MultiModalKwargs,
27+
MultiModalInputs, MultiModalKwargs,
2828
NestedTensors)
2929
from vllm.multimodal.parse import (ImageEmbeddingItems, ImageProcessorItems,
3030
ImageSize, MultiModalDataItems)
@@ -746,7 +746,7 @@ def apply(
746746
prompt: Union[str, list[int]],
747747
mm_data: MultiModalDataDict,
748748
hf_processor_mm_kwargs: Mapping[str, object],
749-
) -> MultiModalInputsV2:
749+
) -> MultiModalInputs:
750750
hf_config = self.info.get_hf_config()
751751
image_token_id = hf_config.image_token_index
752752

@@ -805,7 +805,7 @@ def get_replacement_mantis(item_idx: int):
805805
for modality, placeholders in mm_placeholders.items()
806806
}
807807

808-
return MultiModalInputsV2(
808+
return MultiModalInputs(
809809
type="multimodal",
810810
prompt=prompt,
811811
prompt_token_ids=prompt_ids,

vllm/model_executor/models/phi3v.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
from vllm.model_executor.sampling_metadata import SamplingMetadata
3232
from vllm.multimodal import MULTIMODAL_REGISTRY
3333
from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig,
34-
MultiModalInputsV2, MultiModalKwargs,
34+
MultiModalInputs, MultiModalKwargs,
3535
NestedTensors, PlaceholderRange)
3636
from vllm.multimodal.parse import (ImageEmbeddingItems, ImageProcessorItems,
3737
ImageSize, MultiModalDataItems)
@@ -484,7 +484,7 @@ def apply(
484484
prompt: Union[str, list[int]],
485485
mm_data: MultiModalDataDict,
486486
hf_processor_mm_kwargs: Mapping[str, object],
487-
) -> MultiModalInputsV2:
487+
) -> MultiModalInputs:
488488
result = super().apply(prompt, mm_data, hf_processor_mm_kwargs)
489489

490490
# Only <|image|> tokens should be considered as placeholders,

vllm/model_executor/models/qwen2_audio.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
from vllm.model_executor.sampling_metadata import SamplingMetadata
3838
from vllm.multimodal import MULTIMODAL_REGISTRY
3939
from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig,
40-
MultiModalInputsV2, MultiModalKwargs,
40+
MultiModalInputs, MultiModalKwargs,
4141
NestedTensors, PlaceholderRange)
4242
from vllm.multimodal.parse import (AudioProcessorItems, MultiModalDataItems,
4343
MultiModalDataParser)
@@ -245,7 +245,7 @@ def apply(
245245
prompt: Union[str, list[int]],
246246
mm_data: MultiModalDataDict,
247247
hf_processor_mm_kwargs: Mapping[str, object],
248-
) -> MultiModalInputsV2:
248+
) -> MultiModalInputs:
249249
result = super().apply(prompt, mm_data, hf_processor_mm_kwargs)
250250

251251
# Only <|AUDIO|> tokens should be considered as placeholders,

vllm/multimodal/inputs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -491,7 +491,7 @@ def get_items(self, modality: str) -> Sequence[MultiModalKwargsItem]:
491491
"""
492492

493493

494-
class MultiModalInputsV2(TypedDict):
494+
class MultiModalInputs(TypedDict):
495495
"""
496496
Represents the outputs of
497497
:class:`vllm.multimodal.processing.BaseMultiModalProcessor`,

vllm/multimodal/processing.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@
1818

1919
from .hasher import MultiModalHasher
2020
from .inputs import (MultiModalDataDict, MultiModalFieldConfig,
21-
MultiModalInputsV2, MultiModalKwargs,
22-
MultiModalKwargsItem, PlaceholderRange)
21+
MultiModalInputs, MultiModalKwargs, MultiModalKwargsItem,
22+
PlaceholderRange)
2323
from .parse import MultiModalDataItems, MultiModalDataParser
2424

2525
if TYPE_CHECKING:
@@ -609,7 +609,7 @@ def __call__(
609609
prompt: str,
610610
mm_data: MultiModalDataDict,
611611
hf_processor_mm_kwargs: Mapping[str, object],
612-
) -> MultiModalInputsV2:
612+
) -> MultiModalInputs:
613613
return self.apply(prompt, mm_data, hf_processor_mm_kwargs)
614614

615615
def _get_data_parser(self) -> MultiModalDataParser:
@@ -1067,7 +1067,7 @@ def apply(
10671067
prompt: Union[str, list[int]],
10681068
mm_data: MultiModalDataDict,
10691069
hf_processor_mm_kwargs: Mapping[str, object],
1070-
) -> MultiModalInputsV2:
1070+
) -> MultiModalInputs:
10711071
"""
10721072
Process multi-modal inputs to be used in vLLM.
10731073
@@ -1169,7 +1169,7 @@ def apply(
11691169
for modality, placeholders in mm_placeholders.items()
11701170
}
11711171

1172-
return MultiModalInputsV2(
1172+
return MultiModalInputs(
11731173
type="multimodal",
11741174
prompt=prompt,
11751175
prompt_token_ids=prompt_ids,

vllm/multimodal/profiling.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from vllm.inputs import DummyData
1212
from vllm.logger import init_logger
1313

14-
from .inputs import MultiModalDataDict, MultiModalInputsV2
14+
from .inputs import MultiModalDataDict, MultiModalInputs
1515
from .processing import BaseMultiModalProcessor, BaseProcessingInfo
1616

1717
logger = init_logger(__name__)
@@ -131,7 +131,7 @@ def _get_dummy_mm_inputs(
131131
self,
132132
seq_len: int,
133133
mm_counts: Mapping[str, int],
134-
) -> MultiModalInputsV2:
134+
) -> MultiModalInputs:
135135
factory = self.dummy_inputs
136136
processor_inputs = factory.get_dummy_processor_inputs(
137137
seq_len, mm_counts)

0 commit comments

Comments
 (0)