From 50db857dd3869a879c5b4f45702990b7c66fec84 Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Fri, 9 May 2025 19:40:18 +0000 Subject: [PATCH] gh-132641: Fix race in lru_cache due to inconsistent critical section use. The bounded_lru_cache code was using a critical section on the lru cache object to protect dictionary accesses in some code paths, but using the critical section on the dictionary itself to protect accesses in other code paths. This led to races since not all threads agreed on which lock they needed to be holding. Instead, always use a critical section on the underlying dictionary, rather than the lru cache object itself. Fixes https://github.com/python/cpython/issues/132641 --- Include/internal/pycore_dict.h | 2 + .../test_free_threading/test_functools.py | 75 +++++++++++++++++++ ...-05-09-20-59-24.gh-issue-132641.3qTw44.rst | 1 + Modules/_functoolsmodule.c | 16 ++-- Objects/dictobject.c | 5 ++ 5 files changed, 94 insertions(+), 5 deletions(-) create mode 100644 Lib/test/test_free_threading/test_functools.py create mode 100644 Misc/NEWS.d/next/Library/2025-05-09-20-59-24.gh-issue-132641.3qTw44.rst diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h index 754eb88a85c33c..25bb224921aa8b 100644 --- a/Include/internal/pycore_dict.h +++ b/Include/internal/pycore_dict.h @@ -150,6 +150,8 @@ extern int _PyDict_Pop_KnownHash( Py_hash_t hash, PyObject **result); +extern void _PyDict_Clear_LockHeld(PyObject *op); + #ifdef Py_GIL_DISABLED PyAPI_FUNC(void) _PyDict_EnsureSharedOnRead(PyDictObject *mp); #endif diff --git a/Lib/test/test_free_threading/test_functools.py b/Lib/test/test_free_threading/test_functools.py new file mode 100644 index 00000000000000..a442fe056cef15 --- /dev/null +++ b/Lib/test/test_free_threading/test_functools.py @@ -0,0 +1,75 @@ +import random +import unittest + +from functools import lru_cache +from threading import Barrier, Thread + +from test.support import threading_helper + +@threading_helper.requires_working_threading() +class TestLRUCache(unittest.TestCase): + + def _test_concurrent_operations(self, maxsize): + num_threads = 10 + b = Barrier(num_threads) + @lru_cache(maxsize=maxsize) + def func(arg=0): + return object() + + + def thread_func(): + b.wait() + for i in range(1000): + r = random.randint(0, 1000) + if i < 800: + func(i) + elif i < 900: + func.cache_info() + else: + func.cache_clear() + + threads = [] + for i in range(num_threads): + t = Thread(target=thread_func) + threads.append(t) + + with threading_helper.start_threads(threads): + pass + + def test_concurrent_operations_unbounded(self): + self._test_concurrent_operations(maxsize=None) + + def test_concurrent_operations_bounded(self): + self._test_concurrent_operations(maxsize=128) + + def _test_reentrant_cache_clear(self, maxsize): + num_threads = 10 + b = Barrier(num_threads) + @lru_cache(maxsize=maxsize) + def func(arg=0): + func.cache_clear() + return object() + + + def thread_func(): + b.wait() + for i in range(1000): + func(random.randint(0, 10000)) + + threads = [] + for i in range(num_threads): + t = Thread(target=thread_func) + threads.append(t) + + with threading_helper.start_threads(threads): + pass + + def test_reentrant_cache_clear_unbounded(self): + self._test_reentrant_cache_clear(maxsize=None) + + def test_reentrant_cache_clear_bounded(self): + self._test_reentrant_cache_clear(maxsize=128) + + +if __name__ == "__main__": + unittest.main() diff --git a/Misc/NEWS.d/next/Library/2025-05-09-20-59-24.gh-issue-132641.3qTw44.rst b/Misc/NEWS.d/next/Library/2025-05-09-20-59-24.gh-issue-132641.3qTw44.rst new file mode 100644 index 00000000000000..419ff19d9c009a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-09-20-59-24.gh-issue-132641.3qTw44.rst @@ -0,0 +1 @@ +Fixed a race in :func:`functools.lru_cache` under free-threading. diff --git a/Modules/_functoolsmodule.c b/Modules/_functoolsmodule.c index 899eef50ecc600..354dbad84b5099 100644 --- a/Modules/_functoolsmodule.c +++ b/Modules/_functoolsmodule.c @@ -1383,8 +1383,8 @@ bounded_lru_cache_update_lock_held(lru_cache_object *self, this same key, then this setitem call will update the cache dict with this new link, leaving the old link as an orphan (i.e. not having a cache dict entry that refers to it). */ - if (_PyDict_SetItem_KnownHash(self->cache, key, (PyObject *)link, - hash) < 0) { + if (_PyDict_SetItem_KnownHash_LockHeld((PyDictObject *)self->cache, key, + (PyObject *)link, hash) < 0) { Py_DECREF(link); return NULL; } @@ -1453,8 +1453,8 @@ bounded_lru_cache_update_lock_held(lru_cache_object *self, for successful insertion in the cache dict before adding the link to the linked list. Otherwise, the potentially reentrant __eq__ call could cause the then orphan link to be visited. */ - if (_PyDict_SetItem_KnownHash(self->cache, key, (PyObject *)link, - hash) < 0) { + if (_PyDict_SetItem_KnownHash_LockHeld((PyDictObject *)self->cache, key, + (PyObject *)link, hash) < 0) { /* Somehow the cache dict update failed. We no longer can restore the old link. Let the error propagate upward and leave the cache short one link. */ @@ -1689,7 +1689,13 @@ _functools__lru_cache_wrapper_cache_clear_impl(PyObject *self) lru_list_elem *list = lru_cache_unlink_list(_self); FT_ATOMIC_STORE_SSIZE_RELAXED(_self->hits, 0); FT_ATOMIC_STORE_SSIZE_RELAXED(_self->misses, 0); - PyDict_Clear(_self->cache); + if (_self->wrapper == bounded_lru_cache_wrapper) { + /* The critical section on the lru cache itself protects the dictionary + for bounded_lru_cache instances. */ + _PyDict_Clear_LockHeld(_self->cache); + } else { + PyDict_Clear(_self->cache); + } lru_cache_clear_list(list); Py_RETURN_NONE; } diff --git a/Objects/dictobject.c b/Objects/dictobject.c index ce27e47dabf3b0..fd8ccf56324207 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -2915,6 +2915,11 @@ clear_lock_held(PyObject *op) ASSERT_CONSISTENT(mp); } +void +_PyDict_Clear_LockHeld(PyObject *op) { + clear_lock_held(op); +} + void PyDict_Clear(PyObject *op) {