Skip to content

Commit 2bb0e1a

Browse files
vllmellmtjtanaaDarkLight1337
authored
[Bugfix][ROCm] running new process using spawn method for rocm in tests. (#14810)
Signed-off-by: vllmellm <[email protected]> Signed-off-by: tjtanaa <[email protected]> Co-authored-by: TJian <[email protected]> Co-authored-by: Cyrus Leung <[email protected]>
1 parent 6eaf1e5 commit 2bb0e1a

21 files changed

+174
-99
lines changed

tests/basic_correctness/test_cumem.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@
77
from vllm.device_allocator.cumem import CuMemAllocator
88
from vllm.utils import GiB_bytes
99

10-
from ..utils import fork_new_process_for_each_test
10+
from ..utils import create_new_process_for_each_test
1111

1212

13-
@fork_new_process_for_each_test
13+
@create_new_process_for_each_test()
1414
def test_python_error():
1515
"""
1616
Test if Python error occurs when there's low-level
@@ -36,7 +36,7 @@ def test_python_error():
3636
allocator.wake_up()
3737

3838

39-
@fork_new_process_for_each_test
39+
@create_new_process_for_each_test()
4040
def test_basic_cumem():
4141
# some tensors from default memory pool
4242
shape = (1024, 1024)
@@ -69,7 +69,7 @@ def test_basic_cumem():
6969
assert torch.allclose(output, torch.ones_like(output) * 3)
7070

7171

72-
@fork_new_process_for_each_test
72+
@create_new_process_for_each_test()
7373
def test_cumem_with_cudagraph():
7474
allocator = CuMemAllocator.get_instance()
7575
with allocator.use_memory_pool():
@@ -114,7 +114,7 @@ def model(x):
114114
assert torch.allclose(y, x + 1)
115115

116116

117-
@fork_new_process_for_each_test
117+
@create_new_process_for_each_test()
118118
@pytest.mark.parametrize(
119119
"model, use_v1",
120120
[

tests/compile/test_full_graph.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from vllm.config import CompilationLevel
1313
from vllm.platforms import current_platform
1414

15-
from ..utils import fork_new_process_for_each_test
15+
from ..utils import create_new_process_for_each_test
1616

1717

1818
@pytest.fixture(params=None, name="model_info")
@@ -78,7 +78,7 @@ def models_list_fixture(request):
7878
[CompilationLevel.DYNAMO_ONCE, CompilationLevel.PIECEWISE],
7979
)
8080
@pytest.mark.parametrize("model_info", "", indirect=True)
81-
@fork_new_process_for_each_test
81+
@create_new_process_for_each_test()
8282
def test_full_graph(
8383
monkeypatch: pytest.MonkeyPatch,
8484
model_info: tuple[str, dict[str, Any]],

tests/distributed/test_expert_parallel.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from vllm.config import TaskOption
99
from vllm.logger import init_logger
1010

11-
from ..utils import compare_two_settings, fork_new_process_for_each_test
11+
from ..utils import compare_two_settings, create_new_process_for_each_test
1212

1313
logger = init_logger("test_expert_parallel")
1414

@@ -209,7 +209,7 @@ def _compare_tp(
209209
for params in settings.iter_params(model_name)
210210
],
211211
)
212-
@fork_new_process_for_each_test
212+
@create_new_process_for_each_test()
213213
def test_ep(
214214
model_name: str,
215215
parallel_setup: ParallelSetup,

tests/distributed/test_pipeline_parallel.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from vllm.logger import init_logger
1818

1919
from ..models.registry import HF_EXAMPLE_MODELS
20-
from ..utils import compare_two_settings, fork_new_process_for_each_test
20+
from ..utils import compare_two_settings, create_new_process_for_each_test
2121

2222
logger = init_logger("test_pipeline_parallel")
2323

@@ -402,7 +402,7 @@ def _compare_tp(
402402
for params in settings.iter_params(model_id) if model_id in TEST_MODELS
403403
],
404404
)
405-
@fork_new_process_for_each_test
405+
@create_new_process_for_each_test()
406406
def test_tp_language_generation(
407407
model_id: str,
408408
parallel_setup: ParallelSetup,
@@ -431,7 +431,7 @@ def test_tp_language_generation(
431431
for params in settings.iter_params(model_id) if model_id in TEST_MODELS
432432
],
433433
)
434-
@fork_new_process_for_each_test
434+
@create_new_process_for_each_test()
435435
def test_tp_language_embedding(
436436
model_id: str,
437437
parallel_setup: ParallelSetup,
@@ -460,7 +460,7 @@ def test_tp_language_embedding(
460460
for params in settings.iter_params(model_id) if model_id in TEST_MODELS
461461
],
462462
)
463-
@fork_new_process_for_each_test
463+
@create_new_process_for_each_test()
464464
def test_tp_multimodal_generation(
465465
model_id: str,
466466
parallel_setup: ParallelSetup,

tests/distributed/test_pp_cudagraph.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
import pytest
77

8-
from ..utils import compare_two_settings, fork_new_process_for_each_test
8+
from ..utils import compare_two_settings, create_new_process_for_each_test
99

1010
if TYPE_CHECKING:
1111
from typing_extensions import LiteralString
@@ -18,7 +18,7 @@
1818
"FLASH_ATTN",
1919
"FLASHINFER",
2020
])
21-
@fork_new_process_for_each_test
21+
@create_new_process_for_each_test()
2222
def test_pp_cudagraph(
2323
monkeypatch: pytest.MonkeyPatch,
2424
PP_SIZE: int,

tests/entrypoints/llm/test_collective_rpc.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,12 @@
44

55
from vllm import LLM
66

7-
from ...utils import fork_new_process_for_each_test
7+
from ...utils import create_new_process_for_each_test
88

99

1010
@pytest.mark.parametrize("tp_size", [1, 2])
1111
@pytest.mark.parametrize("backend", ["mp", "ray"])
12-
@fork_new_process_for_each_test
12+
@create_new_process_for_each_test()
1313
def test_collective_rpc(tp_size, backend):
1414
if tp_size == 1 and backend == "ray":
1515
pytest.skip("Skip duplicate test case")

tests/lora/test_chatglm3_tp.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,9 @@
33
import pytest
44

55
import vllm
6-
from tests.utils import fork_new_process_for_each_test
76
from vllm.lora.request import LoRARequest
87

9-
from ..utils import multi_gpu_test
8+
from ..utils import create_new_process_for_each_test, multi_gpu_test
109

1110
MODEL_PATH = "THUDM/chatglm3-6b"
1211

@@ -55,7 +54,7 @@ def v1(run_with_both_engines_lora):
5554
pass
5655

5756

58-
@fork_new_process_for_each_test
57+
@create_new_process_for_each_test()
5958
def test_chatglm3_lora(chatglm3_lora_files):
6059
llm = vllm.LLM(MODEL_PATH,
6160
max_model_len=1024,
@@ -75,7 +74,7 @@ def test_chatglm3_lora(chatglm3_lora_files):
7574

7675

7776
@multi_gpu_test(num_gpus=4)
78-
@fork_new_process_for_each_test
77+
@create_new_process_for_each_test()
7978
def test_chatglm3_lora_tp4(chatglm3_lora_files):
8079
llm = vllm.LLM(MODEL_PATH,
8180
max_model_len=1024,
@@ -96,7 +95,7 @@ def test_chatglm3_lora_tp4(chatglm3_lora_files):
9695

9796

9897
@multi_gpu_test(num_gpus=4)
99-
@fork_new_process_for_each_test
98+
@create_new_process_for_each_test()
10099
def test_chatglm3_lora_tp4_fully_sharded_loras(chatglm3_lora_files):
101100
llm = vllm.LLM(MODEL_PATH,
102101
max_model_len=1024,

tests/lora/test_llama_tp.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,9 @@
44
import ray
55

66
import vllm
7-
from tests.utils import fork_new_process_for_each_test
87
from vllm.lora.request import LoRARequest
98

10-
from ..utils import multi_gpu_test
9+
from ..utils import create_new_process_for_each_test, multi_gpu_test
1110

1211
MODEL_PATH = "meta-llama/Llama-2-7b-hf"
1312

@@ -82,7 +81,7 @@ def v1(run_with_both_engines_lora):
8281

8382
# V1 Test: Failing due to numerics on V1.
8483
@pytest.mark.skip_v1
85-
@fork_new_process_for_each_test
84+
@create_new_process_for_each_test()
8685
def test_llama_lora(sql_lora_files):
8786

8887
llm = vllm.LLM(MODEL_PATH,
@@ -97,7 +96,7 @@ def test_llama_lora(sql_lora_files):
9796
# Skipping for v1 as v1 doesn't have a good way to expose the num_gpu_blocks
9897
# used by the engine yet.
9998
@pytest.mark.skip_v1
100-
@fork_new_process_for_each_test
99+
@create_new_process_for_each_test()
101100
def test_llama_lora_warmup(sql_lora_files):
102101
"""Test that the LLM initialization works with a warmup LORA path and
103102
is more conservative"""
@@ -128,7 +127,7 @@ def get_num_gpu_blocks_no_lora():
128127
# V1 Test: Failing due to numerics on V1.
129128
@pytest.mark.skip_v1
130129
@multi_gpu_test(num_gpus=4)
131-
@fork_new_process_for_each_test
130+
@create_new_process_for_each_test()
132131
def test_llama_lora_tp4(sql_lora_files):
133132

134133
llm = vllm.LLM(
@@ -143,7 +142,7 @@ def test_llama_lora_tp4(sql_lora_files):
143142

144143

145144
@multi_gpu_test(num_gpus=4)
146-
@fork_new_process_for_each_test
145+
@create_new_process_for_each_test()
147146
def test_llama_lora_tp4_fully_sharded_loras(sql_lora_files):
148147

149148
llm = vllm.LLM(
@@ -159,7 +158,7 @@ def test_llama_lora_tp4_fully_sharded_loras(sql_lora_files):
159158

160159

161160
@multi_gpu_test(num_gpus=4)
162-
@fork_new_process_for_each_test
161+
@create_new_process_for_each_test()
163162
def test_llama_lora_tp4_fully_sharded_enable_bias(sql_lora_files):
164163

165164
llm = vllm.LLM(

tests/lora/test_minicpmv_tp.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,12 @@
33
import pytest
44

55
import vllm
6-
from tests.utils import fork_new_process_for_each_test
76
from vllm.assets.image import ImageAsset
87
from vllm.lora.request import LoRARequest
98
from vllm.platforms import current_platform
109

10+
from ..utils import create_new_process_for_each_test
11+
1112
MODEL_PATH = "openbmb/MiniCPM-Llama3-V-2_5"
1213

1314
PROMPT_TEMPLATE = (
@@ -57,7 +58,7 @@ def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> list[str]:
5758
@pytest.mark.xfail(
5859
current_platform.is_rocm(),
5960
reason="MiniCPM-V dependency xformers incompatible with ROCm")
60-
@fork_new_process_for_each_test
61+
@create_new_process_for_each_test()
6162
def test_minicpmv_lora(minicpmv_lora_files):
6263
llm = vllm.LLM(
6364
MODEL_PATH,
@@ -80,7 +81,7 @@ def test_minicpmv_lora(minicpmv_lora_files):
8081
@pytest.mark.xfail(
8182
current_platform.is_rocm(),
8283
reason="MiniCPM-V dependency xformers incompatible with ROCm")
83-
@fork_new_process_for_each_test
84+
@create_new_process_for_each_test()
8485
def test_minicpmv_tp4_wo_fully_sharded_loras(minicpmv_lora_files):
8586
llm = vllm.LLM(
8687
MODEL_PATH,
@@ -101,7 +102,7 @@ def test_minicpmv_tp4_wo_fully_sharded_loras(minicpmv_lora_files):
101102
@pytest.mark.xfail(
102103
current_platform.is_rocm(),
103104
reason="MiniCPM-V dependency xformers incompatible with ROCm")
104-
@fork_new_process_for_each_test
105+
@create_new_process_for_each_test()
105106
def test_minicpmv_tp4_fully_sharded_loras(minicpmv_lora_files):
106107
llm = vllm.LLM(
107108
MODEL_PATH,

tests/lora/test_transfomers_model.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,9 @@
33
import pytest
44

55
import vllm
6-
from tests.utils import fork_new_process_for_each_test
76
from vllm.lora.request import LoRARequest
87

9-
from ..utils import multi_gpu_test
8+
from ..utils import create_new_process_for_each_test, multi_gpu_test
109

1110
MODEL_PATH = "ArthurZ/ilama-3.2-1B"
1211

@@ -56,7 +55,7 @@ def v1(run_with_both_engines_lora):
5655

5756

5857
@pytest.mark.skip_v1
59-
@fork_new_process_for_each_test
58+
@create_new_process_for_each_test()
6059
def test_ilama_lora(ilama_lora_files):
6160
llm = vllm.LLM(MODEL_PATH,
6261
max_model_len=1024,
@@ -77,7 +76,7 @@ def test_ilama_lora(ilama_lora_files):
7776

7877
@pytest.mark.skip_v1
7978
@multi_gpu_test(num_gpus=4)
80-
@fork_new_process_for_each_test
79+
@create_new_process_for_each_test()
8180
def test_ilama_lora_tp4(ilama_lora_files):
8281
llm = vllm.LLM(MODEL_PATH,
8382
max_model_len=1024,
@@ -99,7 +98,7 @@ def test_ilama_lora_tp4(ilama_lora_files):
9998

10099
@pytest.mark.skip_v1
101100
@multi_gpu_test(num_gpus=4)
102-
@fork_new_process_for_each_test
101+
@create_new_process_for_each_test()
103102
def test_ilama_lora_tp4_fully_sharded_loras(ilama_lora_files):
104103
llm = vllm.LLM(MODEL_PATH,
105104
max_model_len=1024,

0 commit comments

Comments
 (0)