Skip to content

Commit 9705b90

Browse files
authored
[Bugfix] fix race condition that leads to wrong order of token returned (#10802)
Signed-off-by: Jannis Schönleber <[email protected]>
1 parent 3aec49e commit 9705b90

File tree

1 file changed

+11
-3
lines changed

1 file changed

+11
-3
lines changed

vllm/engine/multiprocessing/client.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,14 @@ async def setup(self):
262262
"""Setup the client before it starts sending server requests."""
263263

264264
# Start output_loop
265-
self.output_loop = asyncio.create_task(self.run_output_handler_loop())
265+
if self.output_loop is None:
266+
# only generate once to avoid multiple concurrent output_loops
267+
# this will lead to race conditions and wrong orders of tokens
268+
# returned by the engine
269+
# setup will be called multiple times during the startup of
270+
# the engine
271+
self.output_loop = asyncio.create_task(
272+
self.run_output_handler_loop())
266273

267274
with self.get_data_socket() as socket:
268275
# Wait until server is ready.
@@ -271,8 +278,9 @@ async def setup(self):
271278
self.tracing_flag = response.tracing_enabled
272279

273280
# Start health_loop.
274-
self.health_loop = asyncio.create_task(
275-
self.run_heartbeat_loop(timeout=VLLM_RPC_TIMEOUT))
281+
if self.health_loop is None:
282+
self.health_loop = asyncio.create_task(
283+
self.run_heartbeat_loop(timeout=VLLM_RPC_TIMEOUT))
276284

277285
def close(self):
278286
"""Destroy the ZeroMQ Context."""

0 commit comments

Comments
 (0)