Skip to content

Commit 88872a2

Browse files
authored
bpo-47084: Clear Unicode cached representations on finalization (GH-32032)
1 parent 7d810b6 commit 88872a2

File tree

5 files changed

+78
-18
lines changed

5 files changed

+78
-18
lines changed

Include/internal/pycore_unicodeobject.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ extern PyStatus _PyUnicode_InitGlobalObjects(PyInterpreterState *);
1818
extern PyStatus _PyUnicode_InitTypes(PyInterpreterState *);
1919
extern void _PyUnicode_Fini(PyInterpreterState *);
2020
extern void _PyUnicode_FiniTypes(PyInterpreterState *);
21+
extern void _PyStaticUnicode_Dealloc(PyObject *);
2122

2223

2324
/* other API */

Lib/__hello__.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,14 @@
11
initialized = True
22

3+
class TestFrozenUtf8_1:
4+
"""\u00b6"""
5+
6+
class TestFrozenUtf8_2:
7+
"""\u03c0"""
8+
9+
class TestFrozenUtf8_4:
10+
"""\U0001f600"""
11+
312
def main():
413
print("Hello world!")
514

Lib/test/test_embed.py

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1645,24 +1645,29 @@ def test_frozenmain(self):
16451645
'-X showrefcount requires a Python debug build')
16461646
def test_no_memleak(self):
16471647
# bpo-1635741: Python must release all memory at exit
1648-
cmd = [sys.executable, "-I", "-X", "showrefcount", "-c", "pass"]
1649-
proc = subprocess.run(cmd,
1650-
stdout=subprocess.PIPE,
1651-
stderr=subprocess.STDOUT,
1652-
text=True)
1653-
self.assertEqual(proc.returncode, 0)
1654-
out = proc.stdout.rstrip()
1655-
match = re.match(r'^\[(-?\d+) refs, (-?\d+) blocks\]', out)
1656-
if not match:
1657-
self.fail(f"unexpected output: {out!a}")
1658-
refs = int(match.group(1))
1659-
blocks = int(match.group(2))
1660-
self.assertEqual(refs, 0, out)
1661-
if not MS_WINDOWS:
1662-
self.assertEqual(blocks, 0, out)
1663-
else:
1664-
# bpo-46857: on Windows, Python still leaks 1 memory block at exit
1665-
self.assertIn(blocks, (0, 1), out)
1648+
tests = (
1649+
('off', 'pass'),
1650+
('on', 'pass'),
1651+
('off', 'import __hello__'),
1652+
('on', 'import __hello__'),
1653+
)
1654+
for flag, stmt in tests:
1655+
xopt = f"frozen_modules={flag}"
1656+
cmd = [sys.executable, "-I", "-X", "showrefcount", "-X", xopt, "-c", stmt]
1657+
proc = subprocess.run(cmd,
1658+
stdout=subprocess.PIPE,
1659+
stderr=subprocess.STDOUT,
1660+
text=True)
1661+
self.assertEqual(proc.returncode, 0)
1662+
out = proc.stdout.rstrip()
1663+
match = re.match(r'^\[(-?\d+) refs, (-?\d+) blocks\]', out)
1664+
if not match:
1665+
self.fail(f"unexpected output: {out!a}")
1666+
refs = int(match.group(1))
1667+
blocks = int(match.group(2))
1668+
with self.subTest(frozen_modules=flag, stmt=stmt):
1669+
self.assertEqual(refs, 0, out)
1670+
self.assertEqual(blocks, 0, out)
16661671

16671672

16681673
class StdPrinterTests(EmbeddingTestsMixin, unittest.TestCase):

Objects/unicodeobject.c

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16057,6 +16057,35 @@ _PyUnicode_FiniTypes(PyInterpreterState *interp)
1605716057
}
1605816058

1605916059

16060+
static void unicode_static_dealloc(PyObject *op)
16061+
{
16062+
PyASCIIObject* ascii = (PyASCIIObject*)op;
16063+
16064+
assert(ascii->state.compact);
16065+
16066+
if (ascii->state.ascii) {
16067+
if (ascii->wstr) {
16068+
PyObject_Free(ascii->wstr);
16069+
ascii->wstr = NULL;
16070+
}
16071+
}
16072+
else {
16073+
PyCompactUnicodeObject* compact = (PyCompactUnicodeObject*)op;
16074+
void* data = (void*)(compact + 1);
16075+
if (ascii->wstr && ascii->wstr != data) {
16076+
PyObject_Free(ascii->wstr);
16077+
ascii->wstr = NULL;
16078+
compact->wstr_length = 0;
16079+
}
16080+
if (compact->utf8) {
16081+
PyObject_Free(compact->utf8);
16082+
compact->utf8 = NULL;
16083+
compact->utf8_length = 0;
16084+
}
16085+
}
16086+
}
16087+
16088+
1606016089
void
1606116090
_PyUnicode_Fini(PyInterpreterState *interp)
1606216091
{
@@ -16070,6 +16099,21 @@ _PyUnicode_Fini(PyInterpreterState *interp)
1607016099
_PyUnicode_FiniEncodings(&state->fs_codec);
1607116100

1607216101
unicode_clear_identifiers(state);
16102+
16103+
// Clear the single character singletons
16104+
for (int i = 0; i < 128; i++) {
16105+
unicode_static_dealloc((PyObject*)&_Py_SINGLETON(strings).ascii[i]);
16106+
}
16107+
for (int i = 0; i < 128; i++) {
16108+
unicode_static_dealloc((PyObject*)&_Py_SINGLETON(strings).latin1[i]);
16109+
}
16110+
}
16111+
16112+
16113+
void
16114+
_PyStaticUnicode_Dealloc(PyObject *op)
16115+
{
16116+
unicode_static_dealloc(op);
1607316117
}
1607416118

1607516119

Tools/scripts/deepfreeze.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,7 @@ def generate_unicode(self, name: str, s: str) -> str:
185185
else:
186186
self.write("PyCompactUnicodeObject _compact;")
187187
self.write(f"{datatype} _data[{len(s)+1}];")
188+
self.deallocs.append(f"_PyStaticUnicode_Dealloc((PyObject *)&{name});")
188189
with self.block(f"{name} =", ";"):
189190
if ascii:
190191
with self.block("._ascii =", ","):

0 commit comments

Comments
 (0)