vllm-project · vllm-bot · May 21, 2025 · May 21, 2025
@@ -66,7 +66,6 @@ XPU platform supports **tensor parallel** inference/serving and also supports **
 python -m vllm.entrypoints.openai.api_server \
      --model=facebook/opt-13b \
      --dtype=bfloat16 \
-     --device=xpu \
      --max_model_len=1024 \
      --distributed-executor-backend=ray \
      --pipeline-parallel-size=2 \

diff --git a/vllm/config.py b/vllm/config.py
@@ -2201,7 +2201,11 @@ class DeviceConfig:
     """Configuration for the device to use for vLLM execution."""
 
     device: Union[Device, torch.device] = "auto"
-    """Device type for vLLM execution."""
+    """Device type for vLLM execution.
+    This parameter is deprecated and will be 
+    removed in a future release. 
+    It will now be set automatically based 
+    on the current platform."""
     device_type: str = field(init=False)
     """Device type from the current platform. This is set in
     `__post_init__`."""

diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
@@ -737,7 +737,9 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
             title="DeviceConfig",
             description=DeviceConfig.__doc__,
         )
-        device_group.add_argument("--device", **device_kwargs["device"])
+        device_group.add_argument("--device",
+                                  **device_kwargs["device"],
+                                  deprecated=True)
 
         # Speculative arguments
         speculative_group = parser.add_argument_group(
@@ -977,7 +979,7 @@ def create_engine_config(
         from vllm.platforms import current_platform
         current_platform.pre_register_and_update()
 
-        device_config = DeviceConfig(device=self.device)
+        device_config = DeviceConfig(device=current_platform.device_type)
         model_config = self.create_model_config()
 
         # * If VLLM_USE_V1 is unset, we enable V1 for "supported features"