From 4b166f03d1c9e81829f7f06215d0301c39e2c271 Mon Sep 17 00:00:00 2001 From: Jeremy Kloth Date: Mon, 21 Mar 2022 12:41:09 -0600 Subject: [PATCH 1/4] Clear cached representations on finalization --- Include/internal/pycore_unicodeobject.h | 1 + Lib/__hello__.py | 9 +++++ Lib/test/test_embed.py | 40 +++++++++++---------- Objects/unicodeobject.c | 46 +++++++++++++++++++++++++ Tools/scripts/deepfreeze.py | 1 + 5 files changed, 79 insertions(+), 18 deletions(-) diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h index 4394ce939b5673..c7f06051a622fc 100644 --- a/Include/internal/pycore_unicodeobject.h +++ b/Include/internal/pycore_unicodeobject.h @@ -18,6 +18,7 @@ extern PyStatus _PyUnicode_InitGlobalObjects(PyInterpreterState *); extern PyStatus _PyUnicode_InitTypes(PyInterpreterState *); extern void _PyUnicode_Fini(PyInterpreterState *); extern void _PyUnicode_FiniTypes(PyInterpreterState *); +extern void _PyStaticUnicode_Dealloc(PyObject *); /* other API */ diff --git a/Lib/__hello__.py b/Lib/__hello__.py index d37bd2766ac1c6..c09d6a4f52332e 100644 --- a/Lib/__hello__.py +++ b/Lib/__hello__.py @@ -1,5 +1,14 @@ initialized = True +class TestFrozenUtf8_1: + """\u00b6""" + +class TestFrozenUtf8_2: + """\u03c0""" + +class TestFrozenUtf8_4: + """\U0001f600""" + def main(): print("Hello world!") diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py index 80b9674c1c2582..22b1e3bc992896 100644 --- a/Lib/test/test_embed.py +++ b/Lib/test/test_embed.py @@ -1645,24 +1645,28 @@ def test_frozenmain(self): '-X showrefcount requires a Python debug build') def test_no_memleak(self): # bpo-1635741: Python must release all memory at exit - cmd = [sys.executable, "-I", "-X", "showrefcount", "-c", "pass"] - proc = subprocess.run(cmd, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - text=True) - self.assertEqual(proc.returncode, 0) - out = proc.stdout.rstrip() - match = re.match(r'^\[(-?\d+) refs, (-?\d+) blocks\]', out) - if not match: - self.fail(f"unexpected output: {out!a}") - refs = int(match.group(1)) - blocks = int(match.group(2)) - self.assertEqual(refs, 0, out) - if not MS_WINDOWS: - self.assertEqual(blocks, 0, out) - else: - # bpo-46857: on Windows, Python still leaks 1 memory block at exit - self.assertIn(blocks, (0, 1), out) + tests = ( + ('off', 'pass'), + ('on', 'pass'), + ('off', 'import __hello__'), + ('on', 'import __hello__'), + ) + for flag, stmt in tests: + xopt = f"frozen_modules={flag}" + cmd = [sys.executable, "-I", "-X", "showrefcount", "-X", xopt, "-c", stmt] + proc = subprocess.run(cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True) + self.assertEqual(proc.returncode, 0) + out = proc.stdout.rstrip() + match = re.match(r'^\[(-?\d+) refs, (-?\d+) blocks\]', out) + if not match: + self.fail(f"unexpected output: {out!a}") + refs = int(match.group(1)) + blocks = int(match.group(2)) + self.assertEqual(refs, 0, f"{xopt}, stmt='{stmt}' -> {out}") + self.assertEqual(blocks, 0, f"{xopt}, stmt='{stmt}' -> {out}") class StdPrinterTests(EmbeddingTestsMixin, unittest.TestCase): diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 5dfe6e1e93f9f7..088101cb6ac77e 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -16070,6 +16070,52 @@ _PyUnicode_Fini(PyInterpreterState *interp) _PyUnicode_FiniEncodings(&state->fs_codec); unicode_clear_identifiers(state); + + // Clear the single character singletons + for (int i = 0; i < 128; i++) { + PyASCIIObject *ascii = (PyASCIIObject *)& _Py_SINGLETON(strings).ascii[i]; + if (ascii->wstr != NULL) { + PyObject_FREE(ascii->wstr); + ascii->wstr = NULL; + } + } + for (int i = 0; i < 128; i++) { + PyASCIIObject *ascii = (PyASCIIObject *)&_Py_SINGLETON(strings).latin1[i]; + PyCompactUnicodeObject *compact = (PyCompactUnicodeObject *)ascii; + if (ascii->wstr != NULL) { + PyObject_FREE(ascii->wstr); + ascii->wstr = NULL; + } + if (compact->utf8 != NULL) { + PyObject_Free(compact->utf8); + compact->utf8 = NULL; + } + } +} + + +void +_PyStaticUnicode_Dealloc(PyObject *op) +{ + PyASCIIObject *ascii = (PyASCIIObject*)op; + void* data; + return; + assert(ascii->state.compact); + + if (ascii->state.ascii) { + data = (void*)(ascii + 1); + } else { + PyCompactUnicodeObject* compact = (PyCompactUnicodeObject*)op; + data = (void*)(compact + 1); + if (compact->utf8 && compact->utf8 != data) { + PyObject_Free(compact->utf8); + compact->utf8 = NULL; + } + } + if (ascii->wstr && ascii->wstr != data) { + PyObject_Free(ascii->wstr); + ascii->wstr = NULL; + } } diff --git a/Tools/scripts/deepfreeze.py b/Tools/scripts/deepfreeze.py index 954fca81b51e99..e847ac62866528 100644 --- a/Tools/scripts/deepfreeze.py +++ b/Tools/scripts/deepfreeze.py @@ -185,6 +185,7 @@ def generate_unicode(self, name: str, s: str) -> str: else: self.write("PyCompactUnicodeObject _compact;") self.write(f"{datatype} _data[{len(s)+1}];") + self.deallocs.append(f"_PyStaticUnicode_Dealloc((PyObject *)&{name});") with self.block(f"{name} =", ";"): if ascii: with self.block("._ascii =", ","): From 7a4b78837dea3774bfd7d22019bb8ea721bfdd34 Mon Sep 17 00:00:00 2001 From: Jeremy Kloth Date: Mon, 21 Mar 2022 14:34:42 -0600 Subject: [PATCH 2/4] Also reset representation lengths on finalization --- Objects/unicodeobject.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 088101cb6ac77e..b553a73a96485a 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -16085,10 +16085,12 @@ _PyUnicode_Fini(PyInterpreterState *interp) if (ascii->wstr != NULL) { PyObject_FREE(ascii->wstr); ascii->wstr = NULL; + compact->wstr_length = 0; } if (compact->utf8 != NULL) { PyObject_Free(compact->utf8); compact->utf8 = NULL; + compact->utf8_length = 0; } } } @@ -16098,24 +16100,28 @@ void _PyStaticUnicode_Dealloc(PyObject *op) { PyASCIIObject *ascii = (PyASCIIObject*)op; - void* data; - return; + assert(ascii->state.compact); if (ascii->state.ascii) { - data = (void*)(ascii + 1); + if (ascii->wstr) { + PyObject_Free(ascii->wstr); + ascii->wstr = NULL; + } } else { - PyCompactUnicodeObject* compact = (PyCompactUnicodeObject*)op; - data = (void*)(compact + 1); - if (compact->utf8 && compact->utf8 != data) { + PyCompactUnicodeObject *compact = (PyCompactUnicodeObject*)op; + void *data = (void*)(compact + 1); + if (ascii->wstr && ascii->wstr != data) { + PyObject_Free(ascii->wstr); + ascii->wstr = NULL; + compact->wstr_length = 0; + } + if (compact->utf8) { PyObject_Free(compact->utf8); compact->utf8 = NULL; + compact->utf8_length = 0; } } - if (ascii->wstr && ascii->wstr != data) { - PyObject_Free(ascii->wstr); - ascii->wstr = NULL; - } } From 55163a2794066fe83f05baaa12c1f642d1d08c98 Mon Sep 17 00:00:00 2001 From: Jeremy Kloth Date: Mon, 21 Mar 2022 15:47:19 -0600 Subject: [PATCH 3/4] Use TestCase.subTest() for per-iteration testing --- Lib/test/test_embed.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py index 22b1e3bc992896..f0c88de68e89eb 100644 --- a/Lib/test/test_embed.py +++ b/Lib/test/test_embed.py @@ -1665,8 +1665,9 @@ def test_no_memleak(self): self.fail(f"unexpected output: {out!a}") refs = int(match.group(1)) blocks = int(match.group(2)) - self.assertEqual(refs, 0, f"{xopt}, stmt='{stmt}' -> {out}") - self.assertEqual(blocks, 0, f"{xopt}, stmt='{stmt}' -> {out}") + with self.subTest(frozen_modules=flag, stmt=stmt): + self.assertEqual(refs, 0, out) + self.assertEqual(blocks, 0, out) class StdPrinterTests(EmbeddingTestsMixin, unittest.TestCase): From 72f50c337faf1eee410e6b73d2b5ab188e55b68a Mon Sep 17 00:00:00 2001 From: Jeremy Kloth Date: Mon, 21 Mar 2022 16:27:26 -0600 Subject: [PATCH 4/4] Refactor dealloc into single function. What started out as different behavior for each type ended up being identical after edge cases were discovered. --- Objects/unicodeobject.c | 72 ++++++++++++++++++----------------------- 1 file changed, 32 insertions(+), 40 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index b553a73a96485a..ce3ebce1ff72d0 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -16057,6 +16057,35 @@ _PyUnicode_FiniTypes(PyInterpreterState *interp) } +static void unicode_static_dealloc(PyObject *op) +{ + PyASCIIObject* ascii = (PyASCIIObject*)op; + + assert(ascii->state.compact); + + if (ascii->state.ascii) { + if (ascii->wstr) { + PyObject_Free(ascii->wstr); + ascii->wstr = NULL; + } + } + else { + PyCompactUnicodeObject* compact = (PyCompactUnicodeObject*)op; + void* data = (void*)(compact + 1); + if (ascii->wstr && ascii->wstr != data) { + PyObject_Free(ascii->wstr); + ascii->wstr = NULL; + compact->wstr_length = 0; + } + if (compact->utf8) { + PyObject_Free(compact->utf8); + compact->utf8 = NULL; + compact->utf8_length = 0; + } + } +} + + void _PyUnicode_Fini(PyInterpreterState *interp) { @@ -16073,25 +16102,10 @@ _PyUnicode_Fini(PyInterpreterState *interp) // Clear the single character singletons for (int i = 0; i < 128; i++) { - PyASCIIObject *ascii = (PyASCIIObject *)& _Py_SINGLETON(strings).ascii[i]; - if (ascii->wstr != NULL) { - PyObject_FREE(ascii->wstr); - ascii->wstr = NULL; - } + unicode_static_dealloc((PyObject*)&_Py_SINGLETON(strings).ascii[i]); } for (int i = 0; i < 128; i++) { - PyASCIIObject *ascii = (PyASCIIObject *)&_Py_SINGLETON(strings).latin1[i]; - PyCompactUnicodeObject *compact = (PyCompactUnicodeObject *)ascii; - if (ascii->wstr != NULL) { - PyObject_FREE(ascii->wstr); - ascii->wstr = NULL; - compact->wstr_length = 0; - } - if (compact->utf8 != NULL) { - PyObject_Free(compact->utf8); - compact->utf8 = NULL; - compact->utf8_length = 0; - } + unicode_static_dealloc((PyObject*)&_Py_SINGLETON(strings).latin1[i]); } } @@ -16099,29 +16113,7 @@ _PyUnicode_Fini(PyInterpreterState *interp) void _PyStaticUnicode_Dealloc(PyObject *op) { - PyASCIIObject *ascii = (PyASCIIObject*)op; - - assert(ascii->state.compact); - - if (ascii->state.ascii) { - if (ascii->wstr) { - PyObject_Free(ascii->wstr); - ascii->wstr = NULL; - } - } else { - PyCompactUnicodeObject *compact = (PyCompactUnicodeObject*)op; - void *data = (void*)(compact + 1); - if (ascii->wstr && ascii->wstr != data) { - PyObject_Free(ascii->wstr); - ascii->wstr = NULL; - compact->wstr_length = 0; - } - if (compact->utf8) { - PyObject_Free(compact->utf8); - compact->utf8 = NULL; - compact->utf8_length = 0; - } - } + unicode_static_dealloc(op); }