Skip to content

Commit cc0bb3a

Browse files
DinoVebonnal
authored andcommitted
pythongh-124470: Fix crash when reading from object instance dictionary while replacing it (python#122489)
Delay free a dictionary when replacing it
1 parent 698312c commit cc0bb3a

File tree

7 files changed

+289
-85
lines changed

7 files changed

+289
-85
lines changed

Include/internal/pycore_gc.h

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,6 @@ static inline PyObject* _Py_FROM_GC(PyGC_Head *gc) {
5050
# define _PyGC_BITS_UNREACHABLE (4)
5151
# define _PyGC_BITS_FROZEN (8)
5252
# define _PyGC_BITS_SHARED (16)
53-
# define _PyGC_BITS_SHARED_INLINE (32)
5453
# define _PyGC_BITS_DEFERRED (64) // Use deferred reference counting
5554
#endif
5655

@@ -119,23 +118,6 @@ static inline void _PyObject_GC_SET_SHARED(PyObject *op) {
119118
}
120119
#define _PyObject_GC_SET_SHARED(op) _PyObject_GC_SET_SHARED(_Py_CAST(PyObject*, op))
121120

122-
/* True if the memory of the object is shared between multiple
123-
* threads and needs special purpose when freeing due to
124-
* the possibility of in-flight lock-free reads occurring.
125-
* Objects with this bit that are GC objects will automatically
126-
* delay-freed by PyObject_GC_Del. */
127-
static inline int _PyObject_GC_IS_SHARED_INLINE(PyObject *op) {
128-
return _PyObject_HAS_GC_BITS(op, _PyGC_BITS_SHARED_INLINE);
129-
}
130-
#define _PyObject_GC_IS_SHARED_INLINE(op) \
131-
_PyObject_GC_IS_SHARED_INLINE(_Py_CAST(PyObject*, op))
132-
133-
static inline void _PyObject_GC_SET_SHARED_INLINE(PyObject *op) {
134-
_PyObject_SET_GC_BITS(op, _PyGC_BITS_SHARED_INLINE);
135-
}
136-
#define _PyObject_GC_SET_SHARED_INLINE(op) \
137-
_PyObject_GC_SET_SHARED_INLINE(_Py_CAST(PyObject*, op))
138-
139121
#endif
140122

141123
/* Bit flags for _gc_prev */

Include/internal/pycore_pymem.h

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,11 +120,25 @@ extern int _PyMem_DebugEnabled(void);
120120
extern void _PyMem_FreeDelayed(void *ptr);
121121

122122
// Enqueue an object to be freed possibly after some delay
123-
extern void _PyObject_FreeDelayed(void *ptr);
123+
#ifdef Py_GIL_DISABLED
124+
extern void _PyObject_XDecRefDelayed(PyObject *obj);
125+
#else
126+
static inline void _PyObject_XDecRefDelayed(PyObject *obj)
127+
{
128+
Py_XDECREF(obj);
129+
}
130+
#endif
124131

125132
// Periodically process delayed free requests.
126133
extern void _PyMem_ProcessDelayed(PyThreadState *tstate);
127134

135+
136+
// Periodically process delayed free requests when the world is stopped.
137+
// Notify of any objects whic should be freeed.
138+
typedef void (*delayed_dealloc_cb)(PyObject *, void *);
139+
extern void _PyMem_ProcessDelayedNoDealloc(PyThreadState *tstate,
140+
delayed_dealloc_cb cb, void *state);
141+
128142
// Abandon all thread-local delayed free requests and push them to the
129143
// interpreter's queue.
130144
extern void _PyMem_AbandonDelayed(PyThreadState *tstate);

Lib/test/test_free_threading/test_dict.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,70 @@ def writer_func(l):
142142
for ref in thread_list:
143143
self.assertIsNone(ref())
144144

145+
def test_racing_set_object_dict(self):
146+
"""Races assigning to __dict__ should be thread safe"""
147+
class C: pass
148+
class MyDict(dict): pass
149+
for cyclic in (False, True):
150+
f = C()
151+
f.__dict__ = {"foo": 42}
152+
THREAD_COUNT = 10
153+
154+
def writer_func(l):
155+
for i in range(1000):
156+
if cyclic:
157+
other_d = {}
158+
d = MyDict({"foo": 100})
159+
if cyclic:
160+
d["x"] = other_d
161+
other_d["bar"] = d
162+
l.append(weakref.ref(d))
163+
f.__dict__ = d
164+
165+
def reader_func():
166+
for i in range(1000):
167+
f.foo
168+
169+
lists = []
170+
readers = []
171+
writers = []
172+
for x in range(THREAD_COUNT):
173+
thread_list = []
174+
lists.append(thread_list)
175+
writer = Thread(target=partial(writer_func, thread_list))
176+
writers.append(writer)
177+
178+
for x in range(THREAD_COUNT):
179+
reader = Thread(target=partial(reader_func))
180+
readers.append(reader)
181+
182+
for writer in writers:
183+
writer.start()
184+
for reader in readers:
185+
reader.start()
186+
187+
for writer in writers:
188+
writer.join()
189+
190+
for reader in readers:
191+
reader.join()
192+
193+
f.__dict__ = {}
194+
gc.collect()
195+
gc.collect()
196+
197+
count = 0
198+
ids = set()
199+
for thread_list in lists:
200+
for i, ref in enumerate(thread_list):
201+
if ref() is None:
202+
continue
203+
count += 1
204+
ids.add(id(ref()))
205+
count += 1
206+
207+
self.assertEqual(count, 0)
208+
145209

146210
if __name__ == "__main__":
147211
unittest.main()
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix crash in free-threaded builds when replacing object dictionary while reading attribute on another thread

Objects/dictobject.c

Lines changed: 132 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -7087,51 +7087,146 @@ set_dict_inline_values(PyObject *obj, PyDictObject *new_dict)
70877087
}
70887088
}
70897089

7090-
int
7091-
_PyObject_SetManagedDict(PyObject *obj, PyObject *new_dict)
7090+
#ifdef Py_GIL_DISABLED
7091+
7092+
// Trys and sets the dictionary for an object in the easy case when our current
7093+
// dictionary is either completely not materialized or is a dictionary which
7094+
// does not point at the inline values.
7095+
static bool
7096+
try_set_dict_inline_only_or_other_dict(PyObject *obj, PyObject *new_dict, PyDictObject **cur_dict)
7097+
{
7098+
bool replaced = false;
7099+
Py_BEGIN_CRITICAL_SECTION(obj);
7100+
7101+
PyDictObject *dict = *cur_dict = _PyObject_GetManagedDict(obj);
7102+
if (dict == NULL) {
7103+
// We only have inline values, we can just completely replace them.
7104+
set_dict_inline_values(obj, (PyDictObject *)new_dict);
7105+
replaced = true;
7106+
goto exit_lock;
7107+
}
7108+
7109+
if (FT_ATOMIC_LOAD_PTR_RELAXED(dict->ma_values) != _PyObject_InlineValues(obj)) {
7110+
// We have a materialized dict which doesn't point at the inline values,
7111+
// We get to simply swap dictionaries and free the old dictionary.
7112+
FT_ATOMIC_STORE_PTR(_PyObject_ManagedDictPointer(obj)->dict,
7113+
(PyDictObject *)Py_XNewRef(new_dict));
7114+
replaced = true;
7115+
goto exit_lock;
7116+
}
7117+
else {
7118+
// We have inline values, we need to lock the dict and the object
7119+
// at the same time to safely dematerialize them. To do that while releasing
7120+
// the object lock we need a strong reference to the current dictionary.
7121+
Py_INCREF(dict);
7122+
}
7123+
exit_lock:
7124+
Py_END_CRITICAL_SECTION();
7125+
return replaced;
7126+
}
7127+
7128+
// Replaces a dictionary that is probably the dictionary which has been
7129+
// materialized and points at the inline values. We could have raced
7130+
// and replaced it with another dictionary though.
7131+
static int
7132+
replace_dict_probably_inline_materialized(PyObject *obj, PyDictObject *inline_dict,
7133+
PyDictObject *cur_dict, PyObject *new_dict)
7134+
{
7135+
_Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(obj);
7136+
7137+
if (cur_dict == inline_dict) {
7138+
assert(FT_ATOMIC_LOAD_PTR_RELAXED(inline_dict->ma_values) == _PyObject_InlineValues(obj));
7139+
7140+
int err = _PyDict_DetachFromObject(inline_dict, obj);
7141+
if (err != 0) {
7142+
assert(new_dict == NULL);
7143+
return err;
7144+
}
7145+
}
7146+
7147+
FT_ATOMIC_STORE_PTR(_PyObject_ManagedDictPointer(obj)->dict,
7148+
(PyDictObject *)Py_XNewRef(new_dict));
7149+
return 0;
7150+
}
7151+
7152+
#endif
7153+
7154+
static void
7155+
decref_maybe_delay(PyObject *obj, bool delay)
7156+
{
7157+
if (delay) {
7158+
_PyObject_XDecRefDelayed(obj);
7159+
}
7160+
else {
7161+
Py_XDECREF(obj);
7162+
}
7163+
}
7164+
7165+
static int
7166+
set_or_clear_managed_dict(PyObject *obj, PyObject *new_dict, bool clear)
70927167
{
70937168
assert(Py_TYPE(obj)->tp_flags & Py_TPFLAGS_MANAGED_DICT);
7169+
#ifndef NDEBUG
7170+
Py_BEGIN_CRITICAL_SECTION(obj);
70947171
assert(_PyObject_InlineValuesConsistencyCheck(obj));
7172+
Py_END_CRITICAL_SECTION();
7173+
#endif
70957174
int err = 0;
70967175
PyTypeObject *tp = Py_TYPE(obj);
70977176
if (tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) {
7098-
PyDictObject *dict = _PyObject_GetManagedDict(obj);
7099-
if (dict == NULL) {
71007177
#ifdef Py_GIL_DISABLED
7101-
Py_BEGIN_CRITICAL_SECTION(obj);
7178+
PyDictObject *prev_dict;
7179+
if (!try_set_dict_inline_only_or_other_dict(obj, new_dict, &prev_dict)) {
7180+
// We had a materialized dictionary which pointed at the inline
7181+
// values. We need to lock both the object and the dict at the
7182+
// same time to safely replace it. We can't merely lock the dictionary
7183+
// while the object is locked because it could suspend the object lock.
7184+
PyDictObject *cur_dict;
71027185

7103-
dict = _PyObject_ManagedDictPointer(obj)->dict;
7104-
if (dict == NULL) {
7105-
set_dict_inline_values(obj, (PyDictObject *)new_dict);
7106-
}
7186+
assert(prev_dict != NULL);
7187+
Py_BEGIN_CRITICAL_SECTION2(obj, prev_dict);
71077188

7108-
Py_END_CRITICAL_SECTION();
7189+
// We could have had another thread race in between the call to
7190+
// try_set_dict_inline_only_or_other_dict where we locked the object
7191+
// and when we unlocked and re-locked the dictionary.
7192+
cur_dict = _PyObject_GetManagedDict(obj);
71097193

7110-
if (dict == NULL) {
7111-
return 0;
7194+
err = replace_dict_probably_inline_materialized(obj, prev_dict,
7195+
cur_dict, new_dict);
7196+
7197+
Py_END_CRITICAL_SECTION2();
7198+
7199+
// Decref for the dictionary we incref'd in try_set_dict_inline_only_or_other_dict
7200+
// while the object was locked
7201+
decref_maybe_delay((PyObject *)prev_dict,
7202+
!clear && prev_dict != cur_dict);
7203+
if (err != 0) {
7204+
return err;
71127205
}
7113-
#else
7114-
set_dict_inline_values(obj, (PyDictObject *)new_dict);
7115-
return 0;
7116-
#endif
7117-
}
71187206

7119-
Py_BEGIN_CRITICAL_SECTION2(dict, obj);
7207+
prev_dict = cur_dict;
7208+
}
71207209

7121-
// We've locked dict, but the actual dict could have changed
7122-
// since we locked it.
7123-
dict = _PyObject_ManagedDictPointer(obj)->dict;
7124-
err = _PyDict_DetachFromObject(dict, obj);
7125-
assert(err == 0 || new_dict == NULL);
7126-
if (err == 0) {
7127-
FT_ATOMIC_STORE_PTR(_PyObject_ManagedDictPointer(obj)->dict,
7128-
(PyDictObject *)Py_XNewRef(new_dict));
7210+
if (prev_dict != NULL) {
7211+
// decref for the dictionary that we replaced
7212+
decref_maybe_delay((PyObject *)prev_dict, !clear);
71297213
}
7130-
Py_END_CRITICAL_SECTION2();
71317214

7132-
if (err == 0) {
7133-
Py_XDECREF(dict);
7215+
return 0;
7216+
#else
7217+
PyDictObject *dict = _PyObject_GetManagedDict(obj);
7218+
if (dict == NULL) {
7219+
set_dict_inline_values(obj, (PyDictObject *)new_dict);
7220+
return 0;
7221+
}
7222+
if (_PyDict_DetachFromObject(dict, obj) == 0) {
7223+
_PyObject_ManagedDictPointer(obj)->dict = (PyDictObject *)Py_XNewRef(new_dict);
7224+
Py_DECREF(dict);
7225+
return 0;
71347226
}
7227+
assert(new_dict == NULL);
7228+
return -1;
7229+
#endif
71357230
}
71367231
else {
71377232
PyDictObject *dict;
@@ -7144,17 +7239,22 @@ _PyObject_SetManagedDict(PyObject *obj, PyObject *new_dict)
71447239
(PyDictObject *)Py_XNewRef(new_dict));
71457240

71467241
Py_END_CRITICAL_SECTION();
7147-
7148-
Py_XDECREF(dict);
7242+
decref_maybe_delay((PyObject *)dict, !clear);
71497243
}
71507244
assert(_PyObject_InlineValuesConsistencyCheck(obj));
71517245
return err;
71527246
}
71537247

7248+
int
7249+
_PyObject_SetManagedDict(PyObject *obj, PyObject *new_dict)
7250+
{
7251+
return set_or_clear_managed_dict(obj, new_dict, false);
7252+
}
7253+
71547254
void
71557255
PyObject_ClearManagedDict(PyObject *obj)
71567256
{
7157-
if (_PyObject_SetManagedDict(obj, NULL) < 0) {
7257+
if (set_or_clear_managed_dict(obj, NULL, true) < 0) {
71587258
/* Must be out of memory */
71597259
assert(PyErr_Occurred() == PyExc_MemoryError);
71607260
PyErr_WriteUnraisable(NULL);

0 commit comments

Comments
 (0)