Skip to content

Commit 226a012

Browse files
authored
bpo-42536: GC track recycled tuples (GH-23623)
Several built-in and standard library types now ensure that their internal result tuples are always tracked by the garbage collector: - collections.OrderedDict.items - dict.items - enumerate - functools.reduce - itertools.combinations - itertools.combinations_with_replacement - itertools.permutations - itertools.product - itertools.zip_longest - zip Previously, they could have become untracked by a prior garbage collection.
1 parent 2de5097 commit 226a012

12 files changed

+192
-0
lines changed

Lib/test/test_builtin.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import collections
77
import decimal
88
import fractions
9+
import gc
910
import io
1011
import locale
1112
import os
@@ -1756,6 +1757,18 @@ def __next__(self):
17561757
l8 = self.iter_error(zip(Iter(3), "AB", strict=True), ValueError)
17571758
self.assertEqual(l8, [(2, "A"), (1, "B")])
17581759

1760+
@support.cpython_only
1761+
def test_zip_result_gc(self):
1762+
# bpo-42536: zip's tuple-reuse speed trick breaks the GC's assumptions
1763+
# about what can be untracked. Make sure we re-track result tuples
1764+
# whenever we reuse them.
1765+
it = zip([[]])
1766+
gc.collect()
1767+
# That GC collection probably untracked the recycled internal result
1768+
# tuple, which is initialized to (None,). Make sure it's re-tracked when
1769+
# it's mutated and returned from __next__:
1770+
self.assertTrue(gc.is_tracked(next(it)))
1771+
17591772
def test_format(self):
17601773
# Test the basic machinery of the format() builtin. Don't test
17611774
# the specifics of the various formatters

Lib/test/test_dict.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1452,6 +1452,25 @@ def items(self):
14521452
d = CustomReversedDict(pairs)
14531453
self.assertEqual(pairs[::-1], list(dict(d).items()))
14541454

1455+
@support.cpython_only
1456+
def test_dict_items_result_gc(self):
1457+
# bpo-42536: dict.items's tuple-reuse speed trick breaks the GC's
1458+
# assumptions about what can be untracked. Make sure we re-track result
1459+
# tuples whenever we reuse them.
1460+
it = iter({None: []}.items())
1461+
gc.collect()
1462+
# That GC collection probably untracked the recycled internal result
1463+
# tuple, which is initialized to (None, None). Make sure it's re-tracked
1464+
# when it's mutated and returned from __next__:
1465+
self.assertTrue(gc.is_tracked(next(it)))
1466+
1467+
@support.cpython_only
1468+
def test_dict_items_result_gc(self):
1469+
# Same as test_dict_items_result_gc above, but reversed.
1470+
it = reversed({None: []}.items())
1471+
gc.collect()
1472+
self.assertTrue(gc.is_tracked(next(it)))
1473+
14551474

14561475
class CAPITest(unittest.TestCase):
14571476

Lib/test/test_enumerate.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import operator
33
import sys
44
import pickle
5+
import gc
56

67
from test import support
78

@@ -134,6 +135,18 @@ def test_tuple_reuse(self):
134135
self.assertEqual(len(set(map(id, list(enumerate(self.seq))))), len(self.seq))
135136
self.assertEqual(len(set(map(id, enumerate(self.seq)))), min(1,len(self.seq)))
136137

138+
@support.cpython_only
139+
def test_enumerate_result_gc(self):
140+
# bpo-42536: enumerate's tuple-reuse speed trick breaks the GC's
141+
# assumptions about what can be untracked. Make sure we re-track result
142+
# tuples whenever we reuse them.
143+
it = self.enum([[]])
144+
gc.collect()
145+
# That GC collection probably untracked the recycled internal result
146+
# tuple, which is initialized to (None, None). Make sure it's re-tracked
147+
# when it's mutated and returned from __next__:
148+
self.assertTrue(gc.is_tracked(next(it)))
149+
137150
class MyEnum(enumerate):
138151
pass
139152

Lib/test/test_itertools.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
import sys
1313
import struct
1414
import threading
15+
import gc
16+
1517
maxsize = support.MAX_Py_ssize_t
1618
minsize = -maxsize-1
1719

@@ -1573,6 +1575,51 @@ def test_StopIteration(self):
15731575
self.assertRaises(StopIteration, next, f(lambda x:x, []))
15741576
self.assertRaises(StopIteration, next, f(lambda x:x, StopNow()))
15751577

1578+
@support.cpython_only
1579+
def test_combinations_result_gc(self):
1580+
# bpo-42536: combinations's tuple-reuse speed trick breaks the GC's
1581+
# assumptions about what can be untracked. Make sure we re-track result
1582+
# tuples whenever we reuse them.
1583+
it = combinations([None, []], 1)
1584+
next(it)
1585+
gc.collect()
1586+
# That GC collection probably untracked the recycled internal result
1587+
# tuple, which has the value (None,). Make sure it's re-tracked when
1588+
# it's mutated and returned from __next__:
1589+
self.assertTrue(gc.is_tracked(next(it)))
1590+
1591+
@support.cpython_only
1592+
def test_combinations_with_replacement_result_gc(self):
1593+
# Ditto for combinations_with_replacement.
1594+
it = combinations_with_replacement([None, []], 1)
1595+
next(it)
1596+
gc.collect()
1597+
self.assertTrue(gc.is_tracked(next(it)))
1598+
1599+
@support.cpython_only
1600+
def test_permutations_result_gc(self):
1601+
# Ditto for permutations.
1602+
it = permutations([None, []], 1)
1603+
next(it)
1604+
gc.collect()
1605+
self.assertTrue(gc.is_tracked(next(it)))
1606+
1607+
@support.cpython_only
1608+
def test_product_result_gc(self):
1609+
# Ditto for product.
1610+
it = product([None, []])
1611+
next(it)
1612+
gc.collect()
1613+
self.assertTrue(gc.is_tracked(next(it)))
1614+
1615+
@support.cpython_only
1616+
def test_zip_longest_result_gc(self):
1617+
# Ditto for zip_longest.
1618+
it = zip_longest([[]])
1619+
gc.collect()
1620+
self.assertTrue(gc.is_tracked(next(it)))
1621+
1622+
15761623
class TestExamples(unittest.TestCase):
15771624

15781625
def test_accumulate(self):

Lib/test/test_ordered_dict.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -700,6 +700,17 @@ def test_merge_operator(self):
700700
with self.assertRaises(ValueError):
701701
a |= "BAD"
702702

703+
@support.cpython_only
704+
def test_ordered_dict_items_result_gc(self):
705+
# bpo-42536: OrderedDict.items's tuple-reuse speed trick breaks the GC's
706+
# assumptions about what can be untracked. Make sure we re-track result
707+
# tuples whenever we reuse them.
708+
it = iter(self.OrderedDict({None: []}).items())
709+
gc.collect()
710+
# That GC collection probably untracked the recycled internal result
711+
# tuple, which is initialized to (None, None). Make sure it's re-tracked
712+
# when it's mutated and returned from __next__:
713+
self.assertTrue(gc.is_tracked(next(it)))
703714

704715
class PurePythonOrderedDictTests(OrderedDictTests, unittest.TestCase):
705716

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
Several built-in and standard library types now ensure that their internal
2+
result tuples are always tracked by the :term:`garbage collector
3+
<garbage collection>`:
4+
5+
- :meth:`collections.OrderedDict.items() <collections.OrderedDict>`
6+
7+
- :meth:`dict.items`
8+
9+
- :func:`enumerate`
10+
11+
- :func:`functools.reduce`
12+
13+
- :func:`itertools.combinations`
14+
15+
- :func:`itertools.combinations_with_replacement`
16+
17+
- :func:`itertools.permutations`
18+
19+
- :func:`itertools.product`
20+
21+
- :func:`itertools.zip_longest`
22+
23+
- :func:`zip`
24+
25+
Previously, they could have become untracked by a prior garbage collection.
26+
Patch by Brandt Bucher.

Modules/_functoolsmodule.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "Python.h"
22
#include "pycore_long.h" // _PyLong_GetZero()
3+
#include "pycore_object.h" // _PyObject_GC_TRACK
34
#include "pycore_pystate.h" // _PyThreadState_GET()
45
#include "pycore_tuple.h" // _PyTuple_ITEMS()
56
#include "structmember.h" // PyMemberDef
@@ -673,6 +674,11 @@ functools_reduce(PyObject *self, PyObject *args)
673674
if ((result = PyObject_Call(func, args, NULL)) == NULL) {
674675
goto Fail;
675676
}
677+
// bpo-42536: The GC may have untracked this args tuple. Since we're
678+
// recycling it, make sure it's tracked again:
679+
if (!_PyObject_GC_IS_TRACKED(args)) {
680+
_PyObject_GC_TRACK(args);
681+
}
676682
}
677683
}
678684

Modules/itertoolsmodule.c

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#define PY_SSIZE_T_CLEAN
44
#include "Python.h"
55
#include "pycore_long.h" // _PyLong_GetZero()
6+
#include "pycore_object.h" // _PyObject_GC_TRACK()
67
#include "pycore_tuple.h" // _PyTuple_ITEMS()
78
#include <stddef.h> // offsetof()
89

@@ -2378,6 +2379,11 @@ product_next(productobject *lz)
23782379
lz->result = result;
23792380
Py_DECREF(old_result);
23802381
}
2382+
// bpo-42536: The GC may have untracked this result tuple. Since we're
2383+
// recycling it, make sure it's tracked again:
2384+
else if (!_PyObject_GC_IS_TRACKED(result)) {
2385+
_PyObject_GC_TRACK(result);
2386+
}
23812387
/* Now, we've got the only copy so we can update it in-place */
23822388
assert (npools==0 || Py_REFCNT(result) == 1);
23832389

@@ -2701,6 +2707,11 @@ combinations_next(combinationsobject *co)
27012707
co->result = result;
27022708
Py_DECREF(old_result);
27032709
}
2710+
// bpo-42536: The GC may have untracked this result tuple. Since we're
2711+
// recycling it, make sure it's tracked again:
2712+
else if (!_PyObject_GC_IS_TRACKED(result)) {
2713+
_PyObject_GC_TRACK(result);
2714+
}
27042715
/* Now, we've got the only copy so we can update it in-place
27052716
* CPython's empty tuple is a singleton and cached in
27062717
* PyTuple's freelist.
@@ -3035,6 +3046,11 @@ cwr_next(cwrobject *co)
30353046
co->result = result;
30363047
Py_DECREF(old_result);
30373048
}
3049+
// bpo-42536: The GC may have untracked this result tuple. Since we're
3050+
// recycling it, make sure it's tracked again:
3051+
else if (!_PyObject_GC_IS_TRACKED(result)) {
3052+
_PyObject_GC_TRACK(result);
3053+
}
30383054
/* Now, we've got the only copy so we can update it in-place CPython's
30393055
empty tuple is a singleton and cached in PyTuple's freelist. */
30403056
assert(r == 0 || Py_REFCNT(result) == 1);
@@ -3379,6 +3395,11 @@ permutations_next(permutationsobject *po)
33793395
po->result = result;
33803396
Py_DECREF(old_result);
33813397
}
3398+
// bpo-42536: The GC may have untracked this result tuple. Since we're
3399+
// recycling it, make sure it's tracked again:
3400+
else if (!_PyObject_GC_IS_TRACKED(result)) {
3401+
_PyObject_GC_TRACK(result);
3402+
}
33823403
/* Now, we've got the only copy so we can update it in-place */
33833404
assert(r == 0 || Py_REFCNT(result) == 1);
33843405

@@ -4649,6 +4670,11 @@ zip_longest_next(ziplongestobject *lz)
46494670
PyTuple_SET_ITEM(result, i, item);
46504671
Py_DECREF(olditem);
46514672
}
4673+
// bpo-42536: The GC may have untracked this result tuple. Since we're
4674+
// recycling it, make sure it's tracked again:
4675+
if (!_PyObject_GC_IS_TRACKED(result)) {
4676+
_PyObject_GC_TRACK(result);
4677+
}
46524678
} else {
46534679
result = PyTuple_New(tuplesize);
46544680
if (result == NULL)

Objects/dictobject.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3989,6 +3989,11 @@ dictiter_iternextitem(dictiterobject *di)
39893989
Py_INCREF(result);
39903990
Py_DECREF(oldkey);
39913991
Py_DECREF(oldvalue);
3992+
// bpo-42536: The GC may have untracked this result tuple. Since we're
3993+
// recycling it, make sure it's tracked again:
3994+
if (!_PyObject_GC_IS_TRACKED(result)) {
3995+
_PyObject_GC_TRACK(result);
3996+
}
39923997
}
39933998
else {
39943999
result = PyTuple_New(2);
@@ -4104,6 +4109,11 @@ dictreviter_iternext(dictiterobject *di)
41044109
Py_INCREF(result);
41054110
Py_DECREF(oldkey);
41064111
Py_DECREF(oldvalue);
4112+
// bpo-42536: The GC may have untracked this result tuple. Since
4113+
// we're recycling it, make sure it's tracked again:
4114+
if (!_PyObject_GC_IS_TRACKED(result)) {
4115+
_PyObject_GC_TRACK(result);
4116+
}
41074117
}
41084118
else {
41094119
result = PyTuple_New(2);

Objects/enumobject.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
#include "Python.h"
44
#include "pycore_long.h" // _PyLong_GetOne()
5+
#include "pycore_object.h" // _PyObject_GC_TRACK()
56

67
#include "clinic/enumobject.c.h"
78

@@ -131,6 +132,11 @@ enum_next_long(enumobject *en, PyObject* next_item)
131132
PyTuple_SET_ITEM(result, 1, next_item);
132133
Py_DECREF(old_index);
133134
Py_DECREF(old_item);
135+
// bpo-42536: The GC may have untracked this result tuple. Since we're
136+
// recycling it, make sure it's tracked again:
137+
if (!_PyObject_GC_IS_TRACKED(result)) {
138+
_PyObject_GC_TRACK(result);
139+
}
134140
return result;
135141
}
136142
result = PyTuple_New(2);
@@ -176,6 +182,11 @@ enum_next(enumobject *en)
176182
PyTuple_SET_ITEM(result, 1, next_item);
177183
Py_DECREF(old_index);
178184
Py_DECREF(old_item);
185+
// bpo-42536: The GC may have untracked this result tuple. Since we're
186+
// recycling it, make sure it's tracked again:
187+
if (!_PyObject_GC_IS_TRACKED(result)) {
188+
_PyObject_GC_TRACK(result);
189+
}
179190
return result;
180191
}
181192
result = PyTuple_New(2);

Objects/odictobject.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1814,6 +1814,11 @@ odictiter_iternext(odictiterobject *di)
18141814
Py_INCREF(result);
18151815
Py_DECREF(PyTuple_GET_ITEM(result, 0)); /* borrowed */
18161816
Py_DECREF(PyTuple_GET_ITEM(result, 1)); /* borrowed */
1817+
// bpo-42536: The GC may have untracked this result tuple. Since we're
1818+
// recycling it, make sure it's tracked again:
1819+
if (!_PyObject_GC_IS_TRACKED(result)) {
1820+
_PyObject_GC_TRACK(result);
1821+
}
18171822
}
18181823
else {
18191824
result = PyTuple_New(2);

Python/bltinmodule.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2636,6 +2636,11 @@ zip_next(zipobject *lz)
26362636
PyTuple_SET_ITEM(result, i, item);
26372637
Py_DECREF(olditem);
26382638
}
2639+
// bpo-42536: The GC may have untracked this result tuple. Since we're
2640+
// recycling it, make sure it's tracked again:
2641+
if (!_PyObject_GC_IS_TRACKED(result)) {
2642+
_PyObject_GC_TRACK(result);
2643+
}
26392644
} else {
26402645
result = PyTuple_New(tuplesize);
26412646
if (result == NULL)

0 commit comments

Comments
 (0)