fix

comaniac · comaniac · commit 6907571ebbcd · 2025-03-19T10:38:47.000-07:00
Signed-off-by: Cody Yu &lt;hao.yu.cody@gmail.com&gt;
diff --git a/examples/offline_inference/rlhf.py b/examples/offline_inference/rlhf.py
@@ -44,8 +44,9 @@ def __init__(self, *args, **kwargs):
 train_model.to("cuda:0")
 """
 Start the inference process, here we use vLLM to hold a model on GPU 1 and 
-GPU 2. For the details on how to use ray, please refer to the ray 
-documentation https://docs.ray.io/en/latest/ .
+GPU 2 by creating a Ray placement group. The placement group will be passed
+to the worker processes spawned by vLLM. For the details on how to use Ray,
+please refer to the Ray documentation https://docs.ray.io/en/latest/ .
 """
 os.environ["CUDA_VISIBLE_DEVICES"] = "1,2"
 ray.init()
diff --git a/vllm/envs.py b/vllm/envs.py
@@ -86,6 +86,7 @@
     VLLM_ENABLE_MOE_ALIGN_BLOCK_SIZE_TRITON: bool = False
     VLLM_RAY_PER_WORKER_GPUS: float = 1.0
     VLLM_RAY_BUNDLE_INDICES: str = ""
+    VLLM_RAY_PLACEMENT_GROUP: Optional[str] = None
     VLLM_CUDART_SO_PATH: Optional[str] = None
     VLLM_USE_HPU_CONTIGUOUS_CACHE_FETCH: bool = True
     VLLM_DP_RANK: int = 0
@@ -577,6 +578,12 @@ def maybe_convert_int(value: Optional[str]) -> Optional[int]:
     "VLLM_RAY_BUNDLE_INDICES":
     lambda: os.getenv("VLLM_RAY_BUNDLE_INDICES", ""),
 
+    # Ray placement group (serialized string), if it is set and
+    # ray.util.get_current_placement_group() is None, it will be used as the
+    # placement group in vLLM Ray executor.
+    "VLLM_RAY_PLACEMENT_GROUP":
+    lambda: os.getenv("VLLM_RAY_PLACEMENT_GROUP", None),
+
     # In some system, find_loaded_library() may not work. So we allow users to
     # specify the path through environment variable VLLM_CUDART_SO_PATH.
     "VLLM_CUDART_SO_PATH":
diff --git a/vllm/executor/ray_utils.py b/vllm/executor/ray_utils.py
@@ -1,12 +1,14 @@
 # SPDX-License-Identifier: Apache-2.0
 
+import json
 import os
 import time
 from collections import defaultdict
 from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union
 
 import msgspec
 
+import vllm.envs as envs
 import vllm.platforms
 from vllm.config import ParallelConfig
 from vllm.executor.msgspec_utils import decode_hook, encode_hook
@@ -162,6 +164,41 @@ def assert_ray_available():
                          "`pip install ray`.") from ray_import_err
 
 
+def serialize_placement_group_to_str(placement_group: "PlacementGroup") -> str:
+    """Serialize a placement group to a string.
+    FIXME: This should be implemented in Ray.
+
+    Args:
+        placement_group: The placement group to serialize.
+
+    Returns:
+        A string representation of the placement group.
+    """
+    placement_group_data = {
+        "id": placement_group.id.hex(),
+        "bundle_cache": placement_group.bundle_cache,
+    }
+    return json.dumps(placement_group_data)
+
+
+def deserialize_placement_group_from_str(
+        placement_group_str: str) -> "PlacementGroup":
+    """Deserialize a placement group from a string.
+    FIXME: This should be implemented in Ray.
+
+    Args:
+        placement_group_str: The string representation of the placement group.
+
+    Returns:
+        A placement group.
+    """
+    placement_group_data = json.loads(placement_group_str)
+    return PlacementGroup(
+        id=ray._raylet.PlacementGroupID.from_hex(placement_group_data["id"]),
+        bundle_cache=placement_group_data["bundle_cache"],
+    )
+
+
 def _verify_bundles(placement_group: "PlacementGroup",
                     parallel_config: ParallelConfig, device_str: str):
     """Verify a given placement group has bundles located in the right place.
@@ -308,12 +345,19 @@ def initialize_ray_cluster(
 
     # Create or get the placement group for worker processes
     if parallel_config.placement_group:
+        logger.info(
+            "Using the existing Ray placement group from parallel config")
         current_placement_group = parallel_config.placement_group
+    elif envs.VLLM_RAY_PLACEMENT_GROUP:
+        logger.info("Using the existing Ray placement group from "
+                    "VLLM_RAY_PLACEMENT_GROUP")
+        current_placement_group = deserialize_placement_group_from_str(
+            envs.VLLM_RAY_PLACEMENT_GROUP)
     else:
+        logger.info("Trying to get the existing Ray placement group")
         current_placement_group = ray.util.get_current_placement_group()
 
     if current_placement_group:
-        logger.info("Using the existing placement group")
 
         # We are in a placement group
         bundles = current_placement_group.bundle_specs