From 08769efb638bc0580349cc2b9fad2bb70f0428f7 Mon Sep 17 00:00:00 2001 From: "jiang.li" Date: Thu, 22 May 2025 07:40:56 +0000 Subject: [PATCH] fix shm Signed-off-by: jiang.li --- vllm/distributed/device_communicators/cpu_communicator.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/vllm/distributed/device_communicators/cpu_communicator.py b/vllm/distributed/device_communicators/cpu_communicator.py index d4b34900b95..c04218cb9f3 100644 --- a/vllm/distributed/device_communicators/cpu_communicator.py +++ b/vllm/distributed/device_communicators/cpu_communicator.py @@ -22,8 +22,10 @@ def __init__(self, super().__init__(cpu_group, device, device_group, unique_name) self.dist_module = torch.distributed - if (current_platform.get_cpu_architecture() == CpuArchEnum.X86) \ - and hasattr(torch.ops._C, "init_shm_manager"): + if (current_platform.get_cpu_architecture() + == CpuArchEnum.X86) and hasattr( + torch.ops._C, + "init_shm_manager") and unique_name.startswith("tp"): self.dist_module = _CPUSHMDistributed(self) def all_reduce(self, input_): @@ -96,6 +98,8 @@ class _CPUSHMDistributed: def __init__(self, communicator: CpuCommunicator): instance_identifier = os.environ["VLLM_DIST_IDENT"] + unique_name = communicator.unique_name + instance_identifier = f"{instance_identifier}-{unique_name}" self.communicator = communicator group_ranks = [str(rank) for rank in self.communicator.ranks]