Skip to content

Commit 5ea5c51

Browse files
authored
[BugFix] Increase timeout for startup failure test (#17642)
Signed-off-by: Nick Hill <[email protected]>
1 parent d3efde8 commit 5ea5c51

File tree

1 file changed

+21
-14
lines changed

1 file changed

+21
-14
lines changed

tests/v1/engine/test_engine_core_client.py

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
# SPDX-License-Identifier: Apache-2.0
22

33
import asyncio
4+
import os
5+
import signal
46
import time
57
import uuid
68
from threading import Thread
79
from typing import Optional
810

9-
import psutil
1011
import pytest
1112
from transformers import AutoTokenizer
1213

@@ -17,8 +18,8 @@
1718
from vllm.usage.usage_lib import UsageContext
1819
from vllm.v1.engine import EngineCoreRequest
1920
from vllm.v1.engine.core import EngineCore
20-
from vllm.v1.engine.core_client import (AsyncMPClient, EngineCoreClient,
21-
SyncMPClient)
21+
from vllm.v1.engine.core_client import (AsyncMPClient, CoreEngine,
22+
EngineCoreClient, SyncMPClient)
2223
from vllm.v1.executor.abstract import Executor
2324

2425
from ...distributed.conftest import MockSubscriber
@@ -337,34 +338,40 @@ def test_kv_cache_events(
337338
"Token ids should be the same as the custom tokens")
338339
finally:
339340
client.shutdown()
340-
return
341341

342342

343-
@pytest.mark.timeout(10)
343+
@pytest.mark.timeout(20)
344344
def test_startup_failure(monkeypatch: pytest.MonkeyPatch):
345345

346346
with monkeypatch.context() as m, pytest.raises(Exception) as e_info:
347347
m.setenv("VLLM_USE_V1", "1")
348348

349+
# Monkey-patch to extract core process pid while it's starting.
350+
core_proc_pid = [None]
351+
ce_ctor = CoreEngine.__init__
352+
353+
def patched_ce_ctor(self, *args, **kwargs):
354+
ce_ctor(self, *args, **kwargs)
355+
core_proc_pid[0] = self.proc_handle.proc.pid
356+
357+
m.setattr(CoreEngine, "__init__", patched_ce_ctor)
358+
359+
t = time.time()
349360
engine_args = EngineArgs(model=MODEL_NAME)
350361
vllm_config = engine_args.create_engine_config(
351362
usage_context=UsageContext.UNKNOWN_CONTEXT)
352363
executor_class = Executor.get_class(vllm_config)
364+
print(f"VllmConfig creation took {time.time() - t:.2f} seconds.")
353365

354366
# Start another thread to wait for engine core process to start
355367
# and kill it - simulate fatal uncaught process exit.
356-
this_proc = psutil.Process()
357-
children_before = set(this_proc.children())
358368

359369
def kill_first_child():
360-
while True:
370+
while (child_pid := core_proc_pid[0]) is None:
361371
time.sleep(0.5)
362-
children = set(this_proc.children()) - children_before
363-
if children:
364-
child = children.pop()
365-
print("Killing child core process", child.pid)
366-
child.kill()
367-
break
372+
print(f"Killing child core process {child_pid}")
373+
assert isinstance(child_pid, int)
374+
os.kill(child_pid, signal.SIGKILL)
368375

369376
Thread(target=kill_first_child, daemon=True).start()
370377

0 commit comments

Comments
 (0)