Skip to content

Commit 69c6352

Browse files
author
Kristján Valur Jónsson
committed
Issue #10576: Add a progress callback to gcmodule
1 parent c014df7 commit 69c6352

File tree

4 files changed

+249
-9
lines changed

4 files changed

+249
-9
lines changed

Doc/library/gc.rst

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -153,8 +153,8 @@ The :mod:`gc` module provides the following functions:
153153
.. versionadded:: 3.1
154154

155155

156-
The following variable is provided for read-only access (you can mutate its
157-
value but should not rebind it):
156+
The following variables are provided for read-only access (you can mutate the
157+
values but should not rebind them):
158158

159159
.. data:: garbage
160160

@@ -183,6 +183,41 @@ value but should not rebind it):
183183
:const:`DEBUG_UNCOLLECTABLE` is set, in addition all uncollectable objects
184184
are printed.
185185

186+
.. data:: callbacks
187+
188+
A list of callbacks that will be invoked by the garbage collector before and
189+
after collection. The callbacks will be called with two arguments,
190+
:arg:`phase` and :arg:`info`.
191+
192+
:arg:`phase` can one of two values:
193+
194+
"start": The garbage collection is about to start.
195+
196+
"stop": The garbage collection has finished.
197+
198+
:arg:`info` provides more information for the callback. The following
199+
keys are currently defined:
200+
201+
"generation": The oldest generation being collected.
202+
203+
"collected": When :arg:`phase` is "stop", the number of objects
204+
successfully collected.
205+
206+
"uncollectable": when :arg:`phase` is "stop", the number of objects
207+
that could not be collected and were put in :data:`garbage`.
208+
209+
Applications can add their own callbacks to this list. The primary
210+
use cases are:
211+
212+
Gathering statistics about garbage collection, such as how often
213+
various generations are collected, and how long the collection
214+
takes.
215+
216+
Allowing applications to identify and clear their own uncollectable
217+
types when they appear in :data:`garbage`.
218+
219+
.. versionadded:: 3.3
220+
186221

187222
The following constants are provided for use with :func:`set_debug`:
188223

Lib/test/test_gc.py

Lines changed: 135 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,20 @@ def it_happened(ignored):
3232
# gc collects it.
3333
self.wr = weakref.ref(C1055820(666), it_happened)
3434

35+
class Uncollectable(object):
36+
"""Create a reference cycle with multiple __del__ methods.
37+
38+
An object in a reference cycle will never have zero references,
39+
and so must be garbage collected. If one or more objects in the
40+
cycle have __del__ methods, the gc refuses to guess an order,
41+
and leaves the cycle uncollected."""
42+
def __init__(self, partner=None):
43+
if partner is None:
44+
self.partner = Uncollectable(partner=self)
45+
else:
46+
self.partner = partner
47+
def __del__(self):
48+
pass
3549

3650
### Tests
3751
###############################################################################
@@ -528,6 +542,126 @@ def run_command(code):
528542
self.assertNotIn(b"uncollectable objects at shutdown", stderr)
529543

530544

545+
class GCCallbackTests(unittest.TestCase):
546+
def setUp(self):
547+
# Save gc state and disable it.
548+
self.enabled = gc.isenabled()
549+
gc.disable()
550+
self.debug = gc.get_debug()
551+
gc.set_debug(0)
552+
gc.callbacks.append(self.cb1)
553+
gc.callbacks.append(self.cb2)
554+
555+
def tearDown(self):
556+
# Restore gc state
557+
del self.visit
558+
gc.callbacks.remove(self.cb1)
559+
gc.callbacks.remove(self.cb2)
560+
gc.set_debug(self.debug)
561+
if self.enabled:
562+
gc.enable()
563+
# destroy any uncollectables
564+
gc.collect()
565+
for obj in gc.garbage:
566+
if isinstance(obj, Uncollectable):
567+
obj.partner = None
568+
del gc.garbage[:]
569+
gc.collect()
570+
571+
othergarbage = []
572+
def preclean(self):
573+
# Remove all fluff from the system. Invoke this function
574+
# manually rather than through self.setUp() for maximum
575+
# safety.
576+
self.visit = []
577+
gc.collect()
578+
garbage, gc.garbage[:] = gc.garbage[:], []
579+
self.othergarbage.append(garbage)
580+
self.visit = []
581+
582+
def cb1(self, phase, info):
583+
self.visit.append((1, phase, dict(info)))
584+
585+
def cb2(self, phase, info):
586+
self.visit.append((2, phase, dict(info)))
587+
if phase == "stop" and hasattr(self, "cleanup"):
588+
# Clean Uncollectable from garbage
589+
uc = [e for e in gc.garbage if isinstance(e, Uncollectable)]
590+
gc.garbage[:] = [e for e in gc.garbage
591+
if not isinstance(e, Uncollectable)]
592+
for e in uc:
593+
e.partner = None
594+
595+
def testCollect(self):
596+
self.preclean()
597+
gc.collect()
598+
# Algorithmically verify the contents of self.visit
599+
# because it is long and tortuous.
600+
601+
# Count the number of visits to each callback
602+
n = [v[0] for v in self.visit]
603+
n1 = [i for i in n if i == 1]
604+
n2 = [i for i in n if i == 2]
605+
self.assertEqual(n1, [1]*2)
606+
self.assertEqual(n2, [2]*2)
607+
608+
# Count that we got the right number of start and stop callbacks.
609+
n = [v[1] for v in self.visit]
610+
n1 = [i for i in n if i == "start"]
611+
n2 = [i for i in n if i == "stop"]
612+
self.assertEqual(n1, ["start"]*2)
613+
self.assertEqual(n2, ["stop"]*2)
614+
615+
# Check that we got the right info dict for all callbacks
616+
for v in self.visit:
617+
info = v[2]
618+
self.assertTrue("generation" in info)
619+
self.assertTrue("collected" in info)
620+
self.assertTrue("uncollectable" in info)
621+
622+
def testCollectGen(self):
623+
self.preclean()
624+
gc.collect(2)
625+
for v in self.visit:
626+
info = v[2]
627+
self.assertEqual(info["generation"], 2)
628+
629+
def testCollectGarbage(self):
630+
self.preclean()
631+
# Each of these cause four objects to be garbage: Two
632+
# Uncolectables and their instance dicts.
633+
Uncollectable()
634+
Uncollectable()
635+
C1055820(666)
636+
gc.collect()
637+
for v in self.visit:
638+
if v[1] != "stop":
639+
continue
640+
info = v[2]
641+
self.assertEqual(info["collected"], 2)
642+
self.assertEqual(info["uncollectable"], 8)
643+
644+
# We should now have the Uncollectables in gc.garbage
645+
self.assertEqual(len(gc.garbage), 4)
646+
for e in gc.garbage:
647+
self.assertIsInstance(e, Uncollectable)
648+
649+
# Now, let our callback handle the Uncollectable instances
650+
self.cleanup=True
651+
self.visit = []
652+
gc.garbage[:] = []
653+
gc.collect()
654+
for v in self.visit:
655+
if v[1] != "stop":
656+
continue
657+
info = v[2]
658+
self.assertEqual(info["collected"], 0)
659+
self.assertEqual(info["uncollectable"], 4)
660+
661+
# Uncollectables should be gone
662+
self.assertEqual(len(gc.garbage), 0)
663+
664+
531665
class GCTogglingTests(unittest.TestCase):
532666
def setUp(self):
533667
gc.enable()
@@ -681,7 +815,7 @@ def test_main():
681815

682816
try:
683817
gc.collect() # Delete 2nd generation garbage
684-
run_unittest(GCTests, GCTogglingTests)
818+
run_unittest(GCTests, GCTogglingTests, GCCallbackTests)
685819
finally:
686820
gc.set_debug(debug)
687821
# test gc.enable() even if GC is disabled by default

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,9 @@ Library
275275
- Issue #14310: Sockets can now be with other processes on Windows using
276276
the api socket.socket.share() and socket.fromshare().
277277

278+
- Issue #10576: The gc module now has a 'callbacks' member that will get
279+
called when garbage collection takes place.
280+
278281
Build
279282
-----
280283

Modules/gcmodule.c

Lines changed: 74 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -65,14 +65,17 @@ static PyObject *garbage = NULL;
6565
/* Python string to use if unhandled exception occurs */
6666
static PyObject *gc_str = NULL;
6767

68-
/* This is the number of objects who survived the last full collection. It
68+
/* a list of callbacks to be invoked when collection is performed */
69+
static PyObject *callbacks = NULL;
70+
71+
/* This is the number of objects that survived the last full collection. It
6972
approximates the number of long lived objects tracked by the GC.
7073
7174
(by "full collection", we mean a collection of the oldest generation).
7275
*/
7376
static Py_ssize_t long_lived_total = 0;
7477

75-
/* This is the number of objects who survived all "non-full" collections,
78+
/* This is the number of objects that survived all "non-full" collections,
7679
and are awaiting to undergo a full collection for the first time.
7780
7881
*/
@@ -787,7 +790,7 @@ get_time(void)
787790
/* This is the main function. Read this to understand how the
788791
* collection process works. */
789792
static Py_ssize_t
790-
collect(int generation)
793+
collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable)
791794
{
792795
int i;
793796
Py_ssize_t m = 0; /* # objects collected */
@@ -935,9 +938,64 @@ collect(int generation)
935938
PyErr_WriteUnraisable(gc_str);
936939
Py_FatalError("unexpected exception during garbage collection");
937940
}
941+
942+
if (n_collected)
943+
*n_collected = m;
944+
if (n_uncollectable)
945+
*n_uncollectable = n;
938946
return n+m;
939947
}
940948

949+
/* Invoke progress callbacks to notify clients that garbage collection
950+
* is starting or stopping
951+
*/
952+
static void
953+
invoke_gc_callback(const char *phase, int generation,
954+
Py_ssize_t collected, Py_ssize_t uncollectable)
955+
{
956+
Py_ssize_t i;
957+
PyObject *info = NULL;
958+
959+
/* we may get called very early */
960+
if (callbacks == NULL)
961+
return;
962+
/* The local variable cannot be rebound, check it for sanity */
963+
assert(callbacks != NULL && PyList_CheckExact(callbacks));
964+
if (PyList_GET_SIZE(callbacks) != 0) {
965+
info = Py_BuildValue("{sisnsn}",
966+
"generation", generation,
967+
"collected", collected,
968+
"uncollectable", uncollectable);
969+
if (info == NULL) {
970+
PyErr_WriteUnraisable(NULL);
971+
return;
972+
}
973+
}
974+
for (i=0; i<PyList_GET_SIZE(callbacks); i++) {
975+
PyObject *r, *cb = PyList_GET_ITEM(callbacks, i);
976+
Py_INCREF(cb); /* make sure cb doesn't go away */
977+
r = PyObject_CallFunction(cb, "sO", phase, info);
978+
Py_XDECREF(r);
979+
if (r == NULL)
980+
PyErr_WriteUnraisable(cb);
981+
Py_DECREF(cb);
982+
}
983+
Py_XDECREF(info);
984+
}
985+
986+
/* Perform garbage collection of a generation and invoke
987+
* progress callbacks.
988+
*/
989+
static Py_ssize_t
990+
collect_with_callback(int generation)
991+
{
992+
Py_ssize_t result, collected, uncollectable;
993+
invoke_gc_callback("start", generation, 0, 0);
994+
result = collect(generation, &collected, &uncollectable);
995+
invoke_gc_callback("stop", generation, collected, uncollectable);
996+
return result;
997+
}
998+
941999
static Py_ssize_t
9421000
collect_generations(void)
9431001
{
@@ -956,7 +1014,7 @@ collect_generations(void)
9561014
if (i == NUM_GENERATIONS - 1
9571015
&& long_lived_pending < long_lived_total / 4)
9581016
continue;
959-
n = collect(i);
1017+
n = collect_with_callback(i);
9601018
break;
9611019
}
9621020
}
@@ -1027,7 +1085,7 @@ gc_collect(PyObject *self, PyObject *args, PyObject *kws)
10271085
n = 0; /* already collecting, don't do anything */
10281086
else {
10291087
collecting = 1;
1030-
n = collect(genarg);
1088+
n = collect_with_callback(genarg);
10311089
collecting = 0;
10321090
}
10331091

@@ -1320,6 +1378,15 @@ PyInit_gc(void)
13201378
if (PyModule_AddObject(m, "garbage", garbage) < 0)
13211379
return NULL;
13221380

1381+
if (callbacks == NULL) {
1382+
callbacks = PyList_New(0);
1383+
if (callbacks == NULL)
1384+
return NULL;
1385+
}
1386+
Py_INCREF(callbacks);
1387+
if (PyModule_AddObject(m, "callbacks", callbacks) < 0)
1388+
return NULL;
1389+
13231390
/* Importing can't be done in collect() because collect()
13241391
* can be called via PyGC_Collect() in Py_Finalize().
13251392
* This wouldn't be a problem, except that <initialized> is
@@ -1352,7 +1419,7 @@ PyGC_Collect(void)
13521419
n = 0; /* already collecting, don't do anything */
13531420
else {
13541421
collecting = 1;
1355-
n = collect(NUM_GENERATIONS - 1);
1422+
n = collect_with_callback(NUM_GENERATIONS - 1);
13561423
collecting = 0;
13571424
}
13581425

@@ -1389,6 +1456,7 @@ _PyGC_Fini(void)
13891456
Py_XDECREF(bytes);
13901457
}
13911458
}
1459+
Py_CLEAR(callbacks);
13921460
}
13931461

13941462
/* for debugging */

0 commit comments

Comments
 (0)