Skip to content

Commit 24e0379

Browse files
authored
gh-90155: Fix bug in asyncio.Semaphore and strengthen FIFO guarantee (#93222)
The main problem was that an unluckily timed task cancellation could cause the semaphore to be stuck. There were also doubts about strict FIFO ordering of tasks allowed to pass. The Semaphore implementation was rewritten to be more similar to Lock. Many tests for edge cases (including cancellation) were added.
1 parent 8fd2c3b commit 24e0379

File tree

3 files changed

+143
-22
lines changed

3 files changed

+143
-22
lines changed

Lib/asyncio/locks.py

+42-22
Original file line numberDiff line numberDiff line change
@@ -345,9 +345,8 @@ class Semaphore(_ContextManagerMixin, mixins._LoopBoundMixin):
345345
def __init__(self, value=1):
346346
if value < 0:
347347
raise ValueError("Semaphore initial value must be >= 0")
348+
self._waiters = None
348349
self._value = value
349-
self._waiters = collections.deque()
350-
self._wakeup_scheduled = False
351350

352351
def __repr__(self):
353352
res = super().__repr__()
@@ -356,16 +355,8 @@ def __repr__(self):
356355
extra = f'{extra}, waiters:{len(self._waiters)}'
357356
return f'<{res[1:-1]} [{extra}]>'
358357

359-
def _wake_up_next(self):
360-
while self._waiters:
361-
waiter = self._waiters.popleft()
362-
if not waiter.done():
363-
waiter.set_result(None)
364-
self._wakeup_scheduled = True
365-
return
366-
367358
def locked(self):
368-
"""Returns True if semaphore can not be acquired immediately."""
359+
"""Returns True if semaphore counter is zero."""
369360
return self._value == 0
370361

371362
async def acquire(self):
@@ -377,28 +368,57 @@ async def acquire(self):
377368
called release() to make it larger than 0, and then return
378369
True.
379370
"""
380-
# _wakeup_scheduled is set if *another* task is scheduled to wakeup
381-
# but its acquire() is not resumed yet
382-
while self._wakeup_scheduled or self._value <= 0:
383-
fut = self._get_loop().create_future()
384-
self._waiters.append(fut)
371+
if (not self.locked() and (self._waiters is None or
372+
all(w.cancelled() for w in self._waiters))):
373+
self._value -= 1
374+
return True
375+
376+
if self._waiters is None:
377+
self._waiters = collections.deque()
378+
fut = self._get_loop().create_future()
379+
self._waiters.append(fut)
380+
381+
# Finally block should be called before the CancelledError
382+
# handling as we don't want CancelledError to call
383+
# _wake_up_first() and attempt to wake up itself.
384+
try:
385385
try:
386386
await fut
387-
# reset _wakeup_scheduled *after* waiting for a future
388-
self._wakeup_scheduled = False
389-
except exceptions.CancelledError:
390-
self._wake_up_next()
391-
raise
387+
finally:
388+
self._waiters.remove(fut)
389+
except exceptions.CancelledError:
390+
if not self.locked():
391+
self._wake_up_first()
392+
raise
393+
392394
self._value -= 1
395+
if not self.locked():
396+
self._wake_up_first()
393397
return True
394398

395399
def release(self):
396400
"""Release a semaphore, incrementing the internal counter by one.
401+
397402
When it was zero on entry and another coroutine is waiting for it to
398403
become larger than zero again, wake up that coroutine.
399404
"""
400405
self._value += 1
401-
self._wake_up_next()
406+
self._wake_up_first()
407+
408+
def _wake_up_first(self):
409+
"""Wake up the first waiter if it isn't done."""
410+
if not self._waiters:
411+
return
412+
try:
413+
fut = next(iter(self._waiters))
414+
except StopIteration:
415+
return
416+
417+
# .done() necessarily means that a waiter will wake up later on and
418+
# either take the lock, or, if it was cancelled and lock wasn't
419+
# taken already, will hit this again and wake up a new waiter.
420+
if not fut.done():
421+
fut.set_result(True)
402422

403423

404424
class BoundedSemaphore(Semaphore):

Lib/test/test_asyncio/test_locks.py

+100
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import re
66

77
import asyncio
8+
import collections
89

910
STR_RGX_REPR = (
1011
r'^<(?P<class>.*?) object at (?P<address>.*?)'
@@ -774,6 +775,9 @@ async def test_repr(self):
774775
self.assertTrue('waiters' not in repr(sem))
775776
self.assertTrue(RGX_REPR.match(repr(sem)))
776777

778+
if sem._waiters is None:
779+
sem._waiters = collections.deque()
780+
777781
sem._waiters.append(mock.Mock())
778782
self.assertTrue('waiters:1' in repr(sem))
779783
self.assertTrue(RGX_REPR.match(repr(sem)))
@@ -842,6 +846,7 @@ async def c4(result):
842846
sem.release()
843847
self.assertEqual(2, sem._value)
844848

849+
await asyncio.sleep(0)
845850
await asyncio.sleep(0)
846851
self.assertEqual(0, sem._value)
847852
self.assertEqual(3, len(result))
@@ -884,6 +889,7 @@ async def test_acquire_cancel_before_awoken(self):
884889
t2.cancel()
885890
sem.release()
886891

892+
await asyncio.sleep(0)
887893
await asyncio.sleep(0)
888894
num_done = sum(t.done() for t in [t3, t4])
889895
self.assertEqual(num_done, 1)
@@ -904,9 +910,32 @@ async def test_acquire_hang(self):
904910
t1.cancel()
905911
sem.release()
906912
await asyncio.sleep(0)
913+
await asyncio.sleep(0)
907914
self.assertTrue(sem.locked())
908915
self.assertTrue(t2.done())
909916

917+
async def test_acquire_no_hang(self):
918+
919+
sem = asyncio.Semaphore(1)
920+
921+
async def c1():
922+
async with sem:
923+
await asyncio.sleep(0)
924+
t2.cancel()
925+
926+
async def c2():
927+
async with sem:
928+
self.assertFalse(True)
929+
930+
t1 = asyncio.create_task(c1())
931+
t2 = asyncio.create_task(c2())
932+
933+
r1, r2 = await asyncio.gather(t1, t2, return_exceptions=True)
934+
self.assertTrue(r1 is None)
935+
self.assertTrue(isinstance(r2, asyncio.CancelledError))
936+
937+
await asyncio.wait_for(sem.acquire(), timeout=1.0)
938+
910939
def test_release_not_acquired(self):
911940
sem = asyncio.BoundedSemaphore()
912941

@@ -945,6 +974,77 @@ async def coro(tag):
945974
result
946975
)
947976

977+
async def test_acquire_fifo_order_2(self):
978+
sem = asyncio.Semaphore(1)
979+
result = []
980+
981+
async def c1(result):
982+
await sem.acquire()
983+
result.append(1)
984+
return True
985+
986+
async def c2(result):
987+
await sem.acquire()
988+
result.append(2)
989+
sem.release()
990+
await sem.acquire()
991+
result.append(4)
992+
return True
993+
994+
async def c3(result):
995+
await sem.acquire()
996+
result.append(3)
997+
return True
998+
999+
t1 = asyncio.create_task(c1(result))
1000+
t2 = asyncio.create_task(c2(result))
1001+
t3 = asyncio.create_task(c3(result))
1002+
1003+
await asyncio.sleep(0)
1004+
1005+
sem.release()
1006+
sem.release()
1007+
1008+
tasks = [t1, t2, t3]
1009+
await asyncio.gather(*tasks)
1010+
self.assertEqual([1, 2, 3, 4], result)
1011+
1012+
async def test_acquire_fifo_order_3(self):
1013+
sem = asyncio.Semaphore(0)
1014+
result = []
1015+
1016+
async def c1(result):
1017+
await sem.acquire()
1018+
result.append(1)
1019+
return True
1020+
1021+
async def c2(result):
1022+
await sem.acquire()
1023+
result.append(2)
1024+
return True
1025+
1026+
async def c3(result):
1027+
await sem.acquire()
1028+
result.append(3)
1029+
return True
1030+
1031+
t1 = asyncio.create_task(c1(result))
1032+
t2 = asyncio.create_task(c2(result))
1033+
t3 = asyncio.create_task(c3(result))
1034+
1035+
await asyncio.sleep(0)
1036+
1037+
t1.cancel()
1038+
1039+
await asyncio.sleep(0)
1040+
1041+
sem.release()
1042+
sem.release()
1043+
1044+
tasks = [t1, t2, t3]
1045+
await asyncio.gather(*tasks, return_exceptions=True)
1046+
self.assertEqual([2, 3], result)
1047+
9481048

9491049
class BarrierTests(unittest.IsolatedAsyncioTestCase):
9501050

Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix broken :class:`asyncio.Semaphore` when acquire is cancelled.

0 commit comments

Comments
 (0)