Skip to content

Commit 7e433fe

Browse files
kzawora-intelrasmith
authored andcommitted
[Bugfix] Fix HPU multiprocessing executor (vllm-project#12167)
Signed-off-by: Konrad Zawora <[email protected]>
1 parent f37ff17 commit 7e433fe

File tree

4 files changed

+22
-4
lines changed

4 files changed

+22
-4
lines changed

vllm/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1293,7 +1293,7 @@ def __post_init__(self) -> None:
12931293
raise ValueError(f"worker-use-ray can't be used with "
12941294
f"distributed executor backend "
12951295
f"'{self.distributed_executor_backend}'.")
1296-
ray_only_devices = ["tpu", "hpu"]
1296+
ray_only_devices = ["tpu"]
12971297
from vllm.platforms import current_platform
12981298
if (current_platform.device_type in ray_only_devices
12991299
and self.world_size > 1):

vllm/engine/arg_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -397,7 +397,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
397397
'or equal to the number of GPUs available, "mp" will be used to '
398398
'keep processing on a single host. Otherwise, this will default '
399399
'to "ray" if Ray is installed and fail otherwise. Note that tpu '
400-
'and hpu only support Ray for distributed inference.')
400+
'only supports Ray for distributed inference.')
401401

402402
parser.add_argument(
403403
'--worker-use-ray',

vllm/platforms/hpu.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1+
import os
12
from typing import TYPE_CHECKING, Optional
23

34
import torch
45

6+
from vllm import envs
57
from vllm.logger import init_logger
68

79
from .interface import Platform, PlatformEnum, _Backend
@@ -58,6 +60,22 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
5860
cache_config = vllm_config.cache_config
5961
if cache_config and cache_config.block_size is None:
6062
cache_config.block_size = 128
63+
if (parallel_config.distributed_executor_backend == 'mp'
64+
and envs.VLLM_WORKER_MULTIPROC_METHOD == 'fork'):
65+
if os.environ.get("VLLM_WORKER_MULTIPROC_METHOD",
66+
None) is not None:
67+
logger.warning("On HPU, VLLM_WORKER_MULTIPROC_METHOD=fork "
68+
"might cause application hangs on exit. Using "
69+
"VLLM_WORKER_MULTIPROC_METHOD=fork anyway, "
70+
"as it was explicitly requested.")
71+
else:
72+
logger.warning(
73+
"On HPU, VLLM_WORKER_MULTIPROC_METHOD=fork "
74+
"might cause application hangs on exit. Setting "
75+
"VLLM_WORKER_MULTIPROC_METHOD to 'spawn'. "
76+
"To override that behavior, please set "
77+
"VLLM_WORKER_MULTIPROC_METHOD=fork explicitly.")
78+
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
6179

6280
@classmethod
6381
def is_pin_memory_available(cls):

vllm/worker/hpu_worker.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,6 @@ def execute_model(
130130
self,
131131
execute_model_req: Optional[ExecuteModelRequest] = None,
132132
) -> Optional[List[SamplerOutput]]:
133-
assert execute_model_req is not None
134133
# VLLM_HPU_LOG_STEP_GRAPH_COMPILATION - will log graph compilations per engine step, only when there was any - highly recommended to use alongside PT_HPU_METRICS_GC_DETAILS! # noqa:E501
135134
# VLLM_HPU_LOG_STEP_GRAPH_COMPILATION_ALL - will log graph compilations per engine step, always, even if there were none # noqa:E501
136135
# VLLM_HPU_LOG_STEP_CPU_FALLBACKS - will log cpu fallbacks per engine step, only when there was any # noqa:E501
@@ -144,7 +143,8 @@ def execute_model(
144143
'VLLM_HPU_LOG_STEP_CPU_FALLBACKS_ALL', '0') != '0'
145144
log_cpu_fallbacks = os.environ.get('VLLM_HPU_LOG_STEP_CPU_FALLBACKS',
146145
'0') != '0' or log_cpu_fallbacks_all
147-
if log_graph_compilation or log_cpu_fallbacks:
146+
if (log_graph_compilation or log_cpu_fallbacks) and \
147+
execute_model_req is not None:
148148
from habana_frameworks.torch.hpu.metrics import metric_localcontext
149149
seq_group_metadata_list = execute_model_req.seq_group_metadata_list
150150
is_prompt = any([

0 commit comments

Comments
 (0)