Skip to content

Commit 3c168f7

Browse files
gh-128013: fix data race in PyUnicode_AsUTF8AndSize on free-threading (#128021)
1 parent 46dc1ba commit 3c168f7

File tree

2 files changed

+51
-18
lines changed

2 files changed

+51
-18
lines changed

Lib/test/test_capi/test_unicode.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import unittest
22
import sys
33
from test import support
4-
from test.support import import_helper
4+
from test.support import threading_helper
55

66
try:
77
import _testcapi
@@ -1005,6 +1005,24 @@ def test_asutf8(self):
10051005
self.assertRaises(TypeError, unicode_asutf8, [], 0)
10061006
# CRASHES unicode_asutf8(NULL, 0)
10071007

1008+
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
1009+
@threading_helper.requires_working_threading()
1010+
def test_asutf8_race(self):
1011+
"""Test that there's no race condition in PyUnicode_AsUTF8()"""
1012+
unicode_asutf8 = _testcapi.unicode_asutf8
1013+
from threading import Thread
1014+
1015+
data = "😊"
1016+
1017+
def worker():
1018+
for _ in range(1000):
1019+
self.assertEqual(unicode_asutf8(data, 5), b'\xf0\x9f\x98\x8a\0')
1020+
1021+
threads = [Thread(target=worker) for _ in range(10)]
1022+
with threading_helper.start_threads(threads):
1023+
pass
1024+
1025+
10081026
@support.cpython_only
10091027
@unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module')
10101028
def test_asutf8andsize(self):

Objects/unicodeobject.c

Lines changed: 32 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ NOTE: In the interpreter's initialization phase, some globals are currently
114114

115115
static inline char* _PyUnicode_UTF8(PyObject *op)
116116
{
117-
return (_PyCompactUnicodeObject_CAST(op)->utf8);
117+
return FT_ATOMIC_LOAD_PTR_ACQUIRE(_PyCompactUnicodeObject_CAST(op)->utf8);
118118
}
119119

120120
static inline char* PyUnicode_UTF8(PyObject *op)
@@ -130,7 +130,7 @@ static inline char* PyUnicode_UTF8(PyObject *op)
130130

131131
static inline void PyUnicode_SET_UTF8(PyObject *op, char *utf8)
132132
{
133-
_PyCompactUnicodeObject_CAST(op)->utf8 = utf8;
133+
FT_ATOMIC_STORE_PTR_RELEASE(_PyCompactUnicodeObject_CAST(op)->utf8, utf8);
134134
}
135135

136136
static inline Py_ssize_t PyUnicode_UTF8_LENGTH(PyObject *op)
@@ -700,16 +700,17 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
700700
CHECK(ascii->state.compact == 0);
701701
CHECK(data != NULL);
702702
if (ascii->state.ascii) {
703-
CHECK(compact->utf8 == data);
703+
CHECK(_PyUnicode_UTF8(op) == data);
704704
CHECK(compact->utf8_length == ascii->length);
705705
}
706706
else {
707-
CHECK(compact->utf8 != data);
707+
CHECK(_PyUnicode_UTF8(op) != data);
708708
}
709709
}
710-
711-
if (compact->utf8 == NULL)
710+
#ifndef Py_GIL_DISABLED
711+
if (_PyUnicode_UTF8(op) == NULL)
712712
CHECK(compact->utf8_length == 0);
713+
#endif
713714
}
714715

715716
/* check that the best kind is used: O(n) operation */
@@ -1156,8 +1157,8 @@ resize_compact(PyObject *unicode, Py_ssize_t length)
11561157

11571158
if (_PyUnicode_HAS_UTF8_MEMORY(unicode)) {
11581159
PyMem_Free(_PyUnicode_UTF8(unicode));
1159-
PyUnicode_SET_UTF8(unicode, NULL);
11601160
PyUnicode_SET_UTF8_LENGTH(unicode, 0);
1161+
PyUnicode_SET_UTF8(unicode, NULL);
11611162
}
11621163
#ifdef Py_TRACE_REFS
11631164
_Py_ForgetReference(unicode);
@@ -1210,8 +1211,8 @@ resize_inplace(PyObject *unicode, Py_ssize_t length)
12101211
if (!share_utf8 && _PyUnicode_HAS_UTF8_MEMORY(unicode))
12111212
{
12121213
PyMem_Free(_PyUnicode_UTF8(unicode));
1213-
PyUnicode_SET_UTF8(unicode, NULL);
12141214
PyUnicode_SET_UTF8_LENGTH(unicode, 0);
1215+
PyUnicode_SET_UTF8(unicode, NULL);
12151216
}
12161217

12171218
data = (PyObject *)PyObject_Realloc(data, new_size);
@@ -1221,8 +1222,8 @@ resize_inplace(PyObject *unicode, Py_ssize_t length)
12211222
}
12221223
_PyUnicode_DATA_ANY(unicode) = data;
12231224
if (share_utf8) {
1224-
PyUnicode_SET_UTF8(unicode, data);
12251225
PyUnicode_SET_UTF8_LENGTH(unicode, length);
1226+
PyUnicode_SET_UTF8(unicode, data);
12261227
}
12271228
_PyUnicode_LENGTH(unicode) = length;
12281229
PyUnicode_WRITE(PyUnicode_KIND(unicode), data, length, 0);
@@ -4216,6 +4217,21 @@ PyUnicode_FSDecoder(PyObject* arg, void* addr)
42164217

42174218
static int unicode_fill_utf8(PyObject *unicode);
42184219

4220+
4221+
static int
4222+
unicode_ensure_utf8(PyObject *unicode)
4223+
{
4224+
int err = 0;
4225+
if (PyUnicode_UTF8(unicode) == NULL) {
4226+
Py_BEGIN_CRITICAL_SECTION(unicode);
4227+
if (PyUnicode_UTF8(unicode) == NULL) {
4228+
err = unicode_fill_utf8(unicode);
4229+
}
4230+
Py_END_CRITICAL_SECTION();
4231+
}
4232+
return err;
4233+
}
4234+
42194235
const char *
42204236
PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *psize)
42214237
{
@@ -4227,13 +4243,11 @@ PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *psize)
42274243
return NULL;
42284244
}
42294245

4230-
if (PyUnicode_UTF8(unicode) == NULL) {
4231-
if (unicode_fill_utf8(unicode) == -1) {
4232-
if (psize) {
4233-
*psize = -1;
4234-
}
4235-
return NULL;
4246+
if (unicode_ensure_utf8(unicode) == -1) {
4247+
if (psize) {
4248+
*psize = -1;
42364249
}
4250+
return NULL;
42374251
}
42384252

42394253
if (psize) {
@@ -5854,6 +5868,7 @@ unicode_encode_utf8(PyObject *unicode, _Py_error_handler error_handler,
58545868
static int
58555869
unicode_fill_utf8(PyObject *unicode)
58565870
{
5871+
_Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(unicode);
58575872
/* the string cannot be ASCII, or PyUnicode_UTF8() would be set */
58585873
assert(!PyUnicode_IS_ASCII(unicode));
58595874

@@ -5895,10 +5910,10 @@ unicode_fill_utf8(PyObject *unicode)
58955910
PyErr_NoMemory();
58965911
return -1;
58975912
}
5898-
PyUnicode_SET_UTF8(unicode, cache);
5899-
PyUnicode_SET_UTF8_LENGTH(unicode, len);
59005913
memcpy(cache, start, len);
59015914
cache[len] = '\0';
5915+
PyUnicode_SET_UTF8_LENGTH(unicode, len);
5916+
PyUnicode_SET_UTF8(unicode, cache);
59025917
_PyBytesWriter_Dealloc(&writer);
59035918
return 0;
59045919
}

0 commit comments

Comments
 (0)