vllm-project
diff --git a/‎tests/basic_correctness/test_cumem.py
Lines changed: 5 additions & 5 deletions b/‎tests/basic_correctness/test_cumem.py
Lines changed: 5 additions & 5 deletions
diff --git a/‎tests/compile/test_full_graph.py
Lines changed: 2 additions & 2 deletions b/‎tests/compile/test_full_graph.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎tests/distributed/test_expert_parallel.py
Lines changed: 2 additions & 2 deletions b/‎tests/distributed/test_expert_parallel.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎tests/distributed/test_pipeline_parallel.py
Lines changed: 4 additions & 4 deletions b/‎tests/distributed/test_pipeline_parallel.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎tests/distributed/test_pp_cudagraph.py
Lines changed: 2 additions & 2 deletions b/‎tests/distributed/test_pp_cudagraph.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎tests/entrypoints/llm/test_collective_rpc.py
Lines changed: 2 additions & 2 deletions b/‎tests/entrypoints/llm/test_collective_rpc.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎tests/lora/test_chatglm3_tp.py
Lines changed: 4 additions & 5 deletions b/‎tests/lora/test_chatglm3_tp.py
Lines changed: 4 additions & 5 deletions
diff --git a/‎tests/lora/test_llama_tp.py
Lines changed: 6 additions & 7 deletions b/‎tests/lora/test_llama_tp.py
Lines changed: 6 additions & 7 deletions
diff --git a/‎tests/lora/test_minicpmv_tp.py
Lines changed: 5 additions & 4 deletions b/‎tests/lora/test_minicpmv_tp.py
Lines changed: 5 additions & 4 deletions
diff --git a/‎tests/lora/test_transfomers_model.py
Lines changed: 4 additions & 5 deletions b/‎tests/lora/test_transfomers_model.py
Lines changed: 4 additions & 5 deletions
@@ -7,10 +7,10 @@
 from vllm.device_allocator.cumem import CuMemAllocator
 from vllm.utils import GiB_bytes
 
-from ..utils import fork_new_process_for_each_test
+from ..utils import create_new_process_for_each_test
 
 
-@fork_new_process_for_each_test
+@create_new_process_for_each_test()
 def test_python_error():
     """
     Test if Python error occurs when there's low-level
@@ -36,7 +36,7 @@ def test_python_error():
         allocator.wake_up()
 
 
-@fork_new_process_for_each_test
+@create_new_process_for_each_test()
 def test_basic_cumem():
     # some tensors from default memory pool
     shape = (1024, 1024)
@@ -69,7 +69,7 @@ def test_basic_cumem():
     assert torch.allclose(output, torch.ones_like(output) * 3)
 
 
-@fork_new_process_for_each_test
+@create_new_process_for_each_test()
 def test_cumem_with_cudagraph():
     allocator = CuMemAllocator.get_instance()
     with allocator.use_memory_pool():
@@ -114,7 +114,7 @@ def model(x):
     assert torch.allclose(y, x + 1)
 
 
-@fork_new_process_for_each_test
+@create_new_process_for_each_test()
 @pytest.mark.parametrize(
     "model, use_v1",
     [
 
@@ -12,7 +12,7 @@
 from vllm.config import CompilationLevel
 from vllm.platforms import current_platform
 
-from ..utils import fork_new_process_for_each_test
+from ..utils import create_new_process_for_each_test
 
 
 @pytest.fixture(params=None, name="model_info")
@@ -78,7 +78,7 @@ def models_list_fixture(request):
     [CompilationLevel.DYNAMO_ONCE, CompilationLevel.PIECEWISE],
 )
 @pytest.mark.parametrize("model_info", "", indirect=True)
-@fork_new_process_for_each_test
+@create_new_process_for_each_test()
 def test_full_graph(
     monkeypatch: pytest.MonkeyPatch,
     model_info: tuple[str, dict[str, Any]],
 
@@ -8,7 +8,7 @@
 from vllm.config import TaskOption
 from vllm.logger import init_logger
 
-from ..utils import compare_two_settings, fork_new_process_for_each_test
+from ..utils import compare_two_settings, create_new_process_for_each_test
 
 logger = init_logger("test_expert_parallel")
 
@@ -209,7 +209,7 @@ def _compare_tp(
         for params in settings.iter_params(model_name)
     ],
 )
-@fork_new_process_for_each_test
+@create_new_process_for_each_test()
 def test_ep(
     model_name: str,
     parallel_setup: ParallelSetup,
 
@@ -17,7 +17,7 @@
 from vllm.logger import init_logger
 
 from ..models.registry import HF_EXAMPLE_MODELS
-from ..utils import compare_two_settings, fork_new_process_for_each_test
+from ..utils import compare_two_settings, create_new_process_for_each_test
 
 logger = init_logger("test_pipeline_parallel")
 
@@ -402,7 +402,7 @@ def _compare_tp(
         for params in settings.iter_params(model_id) if model_id in TEST_MODELS
     ],
 )
-@fork_new_process_for_each_test
+@create_new_process_for_each_test()
 def test_tp_language_generation(
     model_id: str,
     parallel_setup: ParallelSetup,
@@ -431,7 +431,7 @@ def test_tp_language_generation(
         for params in settings.iter_params(model_id) if model_id in TEST_MODELS
     ],
 )
-@fork_new_process_for_each_test
+@create_new_process_for_each_test()
 def test_tp_language_embedding(
     model_id: str,
     parallel_setup: ParallelSetup,
@@ -460,7 +460,7 @@ def test_tp_language_embedding(
         for params in settings.iter_params(model_id) if model_id in TEST_MODELS
     ],
 )
-@fork_new_process_for_each_test
+@create_new_process_for_each_test()
 def test_tp_multimodal_generation(
     model_id: str,
     parallel_setup: ParallelSetup,
 
@@ -5,7 +5,7 @@
 
 import pytest
 
-from ..utils import compare_two_settings, fork_new_process_for_each_test
+from ..utils import compare_two_settings, create_new_process_for_each_test
 
 if TYPE_CHECKING:
     from typing_extensions import LiteralString
@@ -18,7 +18,7 @@
     "FLASH_ATTN",
     "FLASHINFER",
 ])
-@fork_new_process_for_each_test
+@create_new_process_for_each_test()
 def test_pp_cudagraph(
     monkeypatch: pytest.MonkeyPatch,
     PP_SIZE: int,
 
@@ -4,12 +4,12 @@
 
 from vllm import LLM
 
-from ...utils import fork_new_process_for_each_test
+from ...utils import create_new_process_for_each_test
 
 
 @pytest.mark.parametrize("tp_size", [1, 2])
 @pytest.mark.parametrize("backend", ["mp", "ray"])
-@fork_new_process_for_each_test
+@create_new_process_for_each_test()
 def test_collective_rpc(tp_size, backend):
     if tp_size == 1 and backend == "ray":
         pytest.skip("Skip duplicate test case")
 
@@ -3,10 +3,9 @@
 import pytest
 
 import vllm
-from tests.utils import fork_new_process_for_each_test
 from vllm.lora.request import LoRARequest
 
-from ..utils import multi_gpu_test
+from ..utils import create_new_process_for_each_test, multi_gpu_test
 
 MODEL_PATH = "THUDM/chatglm3-6b"
 
@@ -55,7 +54,7 @@ def v1(run_with_both_engines_lora):
     pass
 
 
-@fork_new_process_for_each_test
+@create_new_process_for_each_test()
 def test_chatglm3_lora(chatglm3_lora_files):
     llm = vllm.LLM(MODEL_PATH,
                    max_model_len=1024,
@@ -75,7 +74,7 @@ def test_chatglm3_lora(chatglm3_lora_files):
 
 
 @multi_gpu_test(num_gpus=4)
-@fork_new_process_for_each_test
+@create_new_process_for_each_test()
 def test_chatglm3_lora_tp4(chatglm3_lora_files):
     llm = vllm.LLM(MODEL_PATH,
                    max_model_len=1024,
@@ -96,7 +95,7 @@ def test_chatglm3_lora_tp4(chatglm3_lora_files):
 
 
 @multi_gpu_test(num_gpus=4)
-@fork_new_process_for_each_test
+@create_new_process_for_each_test()
 def test_chatglm3_lora_tp4_fully_sharded_loras(chatglm3_lora_files):
     llm = vllm.LLM(MODEL_PATH,
                    max_model_len=1024,
 
@@ -4,10 +4,9 @@
 import ray
 
 import vllm
-from tests.utils import fork_new_process_for_each_test
 from vllm.lora.request import LoRARequest
 
-from ..utils import multi_gpu_test
+from ..utils import create_new_process_for_each_test, multi_gpu_test
 
 MODEL_PATH = "meta-llama/Llama-2-7b-hf"
 
@@ -82,7 +81,7 @@ def v1(run_with_both_engines_lora):
 
 # V1 Test: Failing due to numerics on V1.
 @pytest.mark.skip_v1
-@fork_new_process_for_each_test
+@create_new_process_for_each_test()
 def test_llama_lora(sql_lora_files):
 
     llm = vllm.LLM(MODEL_PATH,
@@ -97,7 +96,7 @@ def test_llama_lora(sql_lora_files):
 # Skipping for v1 as v1 doesn't have a good way to expose the num_gpu_blocks
 # used by the engine yet.
 @pytest.mark.skip_v1
-@fork_new_process_for_each_test
+@create_new_process_for_each_test()
 def test_llama_lora_warmup(sql_lora_files):
     """Test that the LLM initialization works with a warmup LORA path and
     is more conservative"""
@@ -128,7 +127,7 @@ def get_num_gpu_blocks_no_lora():
 # V1 Test: Failing due to numerics on V1.
 @pytest.mark.skip_v1
 @multi_gpu_test(num_gpus=4)
-@fork_new_process_for_each_test
+@create_new_process_for_each_test()
 def test_llama_lora_tp4(sql_lora_files):
 
     llm = vllm.LLM(
@@ -143,7 +142,7 @@ def test_llama_lora_tp4(sql_lora_files):
 
 
 @multi_gpu_test(num_gpus=4)
-@fork_new_process_for_each_test
+@create_new_process_for_each_test()
 def test_llama_lora_tp4_fully_sharded_loras(sql_lora_files):
 
     llm = vllm.LLM(
@@ -159,7 +158,7 @@ def test_llama_lora_tp4_fully_sharded_loras(sql_lora_files):
 
 
 @multi_gpu_test(num_gpus=4)
-@fork_new_process_for_each_test
+@create_new_process_for_each_test()
 def test_llama_lora_tp4_fully_sharded_enable_bias(sql_lora_files):
 
     llm = vllm.LLM(
 
@@ -3,11 +3,12 @@
 import pytest
 
 import vllm
-from tests.utils import fork_new_process_for_each_test
 from vllm.assets.image import ImageAsset
 from vllm.lora.request import LoRARequest
 from vllm.platforms import current_platform
 
+from ..utils import create_new_process_for_each_test
+
 MODEL_PATH = "openbmb/MiniCPM-Llama3-V-2_5"
 
 PROMPT_TEMPLATE = (
@@ -57,7 +58,7 @@ def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> list[str]:
 @pytest.mark.xfail(
     current_platform.is_rocm(),
     reason="MiniCPM-V dependency xformers incompatible with ROCm")
-@fork_new_process_for_each_test
+@create_new_process_for_each_test()
 def test_minicpmv_lora(minicpmv_lora_files):
     llm = vllm.LLM(
         MODEL_PATH,
@@ -80,7 +81,7 @@ def test_minicpmv_lora(minicpmv_lora_files):
 @pytest.mark.xfail(
     current_platform.is_rocm(),
     reason="MiniCPM-V dependency xformers incompatible with ROCm")
-@fork_new_process_for_each_test
+@create_new_process_for_each_test()
 def test_minicpmv_tp4_wo_fully_sharded_loras(minicpmv_lora_files):
     llm = vllm.LLM(
         MODEL_PATH,
@@ -101,7 +102,7 @@ def test_minicpmv_tp4_wo_fully_sharded_loras(minicpmv_lora_files):
 @pytest.mark.xfail(
     current_platform.is_rocm(),
     reason="MiniCPM-V dependency xformers incompatible with ROCm")
-@fork_new_process_for_each_test
+@create_new_process_for_each_test()
 def test_minicpmv_tp4_fully_sharded_loras(minicpmv_lora_files):
     llm = vllm.LLM(
         MODEL_PATH,
 
@@ -3,10 +3,9 @@
 import pytest
 
 import vllm
-from tests.utils import fork_new_process_for_each_test
 from vllm.lora.request import LoRARequest
 
-from ..utils import multi_gpu_test
+from ..utils import create_new_process_for_each_test, multi_gpu_test
 
 MODEL_PATH = "ArthurZ/ilama-3.2-1B"
 
@@ -56,7 +55,7 @@ def v1(run_with_both_engines_lora):
 
 
 @pytest.mark.skip_v1
-@fork_new_process_for_each_test
+@create_new_process_for_each_test()
 def test_ilama_lora(ilama_lora_files):
     llm = vllm.LLM(MODEL_PATH,
                    max_model_len=1024,
@@ -77,7 +76,7 @@ def test_ilama_lora(ilama_lora_files):
 
 @pytest.mark.skip_v1
 @multi_gpu_test(num_gpus=4)
-@fork_new_process_for_each_test
+@create_new_process_for_each_test()
 def test_ilama_lora_tp4(ilama_lora_files):
     llm = vllm.LLM(MODEL_PATH,
                    max_model_len=1024,
@@ -99,7 +98,7 @@ def test_ilama_lora_tp4(ilama_lora_files):
 
 @pytest.mark.skip_v1
 @multi_gpu_test(num_gpus=4)
-@fork_new_process_for_each_test
+@create_new_process_for_each_test()
 def test_ilama_lora_tp4_fully_sharded_loras(ilama_lora_files):
     llm = vllm.LLM(MODEL_PATH,
                    max_model_len=1024,