Skip to content

Commit ac38a9f

Browse files
bpo-43693: Eliminate unused "fast locals". (gh-26587)
Currently, if an arg value escapes (into the closure for an inner function) we end up allocating two indices in the fast locals even though only one gets used. Additionally, using the lower index would be better in some cases, such as with no-arg `super()`. To address this, we update the compiler to fix the offsets so each variable only gets one "fast local". As a consequence, now some cell offsets are interspersed with the locals (only when an arg escapes to an inner function). https://bugs.python.org/issue43693
1 parent 1d10bf0 commit ac38a9f

File tree

12 files changed

+4187
-4212
lines changed

12 files changed

+4187
-4212
lines changed

Include/cpython/code.h

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -83,11 +83,11 @@ struct PyCodeObject {
8383

8484
/* These fields are set with computed values on new code objects. */
8585

86-
int *co_cell2arg; /* Maps cell vars which are arguments. */
8786
// redundant values (derived from co_localsplusnames and co_localspluskinds)
8887
int co_nlocalsplus; /* number of local + cell + free variables */
8988
int co_nlocals; /* number of local variables */
90-
int co_ncellvars; /* number of cell variables */
89+
int co_nplaincellvars; /* number of non-arg cell variables */
90+
int co_ncellvars; /* total number of cell variables */
9191
int co_nfreevars; /* number of free variables */
9292
// lazily-computed values
9393
PyObject *co_varnames; /* tuple of strings (local variable names) */
@@ -142,10 +142,6 @@ struct PyCodeObject {
142142
#define CO_FUTURE_GENERATOR_STOP 0x800000
143143
#define CO_FUTURE_ANNOTATIONS 0x1000000
144144

145-
/* This value is found in the co_cell2arg array when the associated cell
146-
variable does not correspond to an argument. */
147-
#define CO_CELL_NOT_AN_ARG (-1)
148-
149145
/* This should be defined if a future statement modifies the syntax.
150146
For example, when a keyword is added.
151147
*/

Include/internal/pycore_frame.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,6 @@ _PyFrame_GetBuiltins(PyFrameObject *f)
3232

3333
int _PyFrame_TakeLocals(PyFrameObject *f);
3434

35-
PyAPI_FUNC(int) _PyFrame_OpAlreadyRan(PyFrameObject *f, int opcode, int oparg);
36-
3735
#ifdef __cplusplus
3836
}
3937
#endif

Lib/importlib/_bootstrap_external.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,7 @@ def _write_atomic(path, data, mode=0o666):
358358
# Python 3.11a1 3453 (add co_fastlocalnames and co_fastlocalkinds)
359359
# Python 3.11a1 3454 (compute cell offsets relative to locals bpo-43693)
360360
# Python 3.11a1 3455 (add MAKE_CELL bpo-43693)
361+
# Python 3.11a1 3456 (interleave cell args bpo-43693)
361362

362363
#
363364
# MAGIC must change whenever the bytecode emitted by the compiler may no
@@ -367,7 +368,7 @@ def _write_atomic(path, data, mode=0o666):
367368
# Whenever MAGIC_NUMBER is changed, the ranges in the magic_values array
368369
# in PC/launcher.c must also be updated.
369370

370-
MAGIC_NUMBER = (3455).to_bytes(2, 'little') + b'\r\n'
371+
MAGIC_NUMBER = (3456).to_bytes(2, 'little') + b'\r\n'
371372
_RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c
372373

373374
_PYCACHE = '__pycache__'

Lib/test/test_dis.py

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -427,9 +427,9 @@ def foo(x):
427427
return foo
428428

429429
dis_nested_0 = """\
430-
0 MAKE_CELL 2 (y)
430+
0 MAKE_CELL 0 (y)
431431
432-
%3d 2 LOAD_CLOSURE 2 (y)
432+
%3d 2 LOAD_CLOSURE 0 (y)
433433
4 BUILD_TUPLE 1
434434
6 LOAD_CONST 1 (<code object foo at 0x..., file "%s", line %d>)
435435
8 LOAD_CONST 2 ('_h.<locals>.foo')
@@ -446,14 +446,14 @@ def foo(x):
446446

447447
dis_nested_1 = """%s
448448
Disassembly of <code object foo at 0x..., file "%s", line %d>:
449-
0 MAKE_CELL 1 (x)
449+
0 MAKE_CELL 0 (x)
450450
451-
%3d 2 LOAD_CLOSURE 1 (x)
451+
%3d 2 LOAD_CLOSURE 0 (x)
452452
4 BUILD_TUPLE 1
453453
6 LOAD_CONST 1 (<code object <listcomp> at 0x..., file "%s", line %d>)
454454
8 LOAD_CONST 2 ('_h.<locals>.foo.<locals>.<listcomp>')
455455
10 MAKE_FUNCTION 8 (closure)
456-
12 LOAD_DEREF 2 (y)
456+
12 LOAD_DEREF 1 (y)
457457
14 GET_ITER
458458
16 CALL_FUNCTION 1
459459
18 RETURN_VALUE
@@ -966,19 +966,19 @@ def jumpy():
966966

967967
Instruction = dis.Instruction
968968
expected_opinfo_outer = [
969-
Instruction(opname='MAKE_CELL', opcode=135, arg=3, argval='a', argrepr='a', offset=0, starts_line=None, is_jump_target=False),
970-
Instruction(opname='MAKE_CELL', opcode=135, arg=4, argval='b', argrepr='b', offset=2, starts_line=None, is_jump_target=False),
969+
Instruction(opname='MAKE_CELL', opcode=135, arg=0, argval='a', argrepr='a', offset=0, starts_line=None, is_jump_target=False),
970+
Instruction(opname='MAKE_CELL', opcode=135, arg=1, argval='b', argrepr='b', offset=2, starts_line=None, is_jump_target=False),
971971
Instruction(opname='LOAD_CONST', opcode=100, arg=8, argval=(3, 4), argrepr='(3, 4)', offset=4, starts_line=2, is_jump_target=False),
972-
Instruction(opname='LOAD_CLOSURE', opcode=136, arg=3, argval='a', argrepr='a', offset=6, starts_line=None, is_jump_target=False),
973-
Instruction(opname='LOAD_CLOSURE', opcode=136, arg=4, argval='b', argrepr='b', offset=8, starts_line=None, is_jump_target=False),
972+
Instruction(opname='LOAD_CLOSURE', opcode=136, arg=0, argval='a', argrepr='a', offset=6, starts_line=None, is_jump_target=False),
973+
Instruction(opname='LOAD_CLOSURE', opcode=136, arg=1, argval='b', argrepr='b', offset=8, starts_line=None, is_jump_target=False),
974974
Instruction(opname='BUILD_TUPLE', opcode=102, arg=2, argval=2, argrepr='', offset=10, starts_line=None, is_jump_target=False),
975975
Instruction(opname='LOAD_CONST', opcode=100, arg=3, argval=code_object_f, argrepr=repr(code_object_f), offset=12, starts_line=None, is_jump_target=False),
976976
Instruction(opname='LOAD_CONST', opcode=100, arg=4, argval='outer.<locals>.f', argrepr="'outer.<locals>.f'", offset=14, starts_line=None, is_jump_target=False),
977977
Instruction(opname='MAKE_FUNCTION', opcode=132, arg=9, argval=9, argrepr='defaults, closure', offset=16, starts_line=None, is_jump_target=False),
978978
Instruction(opname='STORE_FAST', opcode=125, arg=2, argval='f', argrepr='f', offset=18, starts_line=None, is_jump_target=False),
979979
Instruction(opname='LOAD_GLOBAL', opcode=116, arg=0, argval='print', argrepr='print', offset=20, starts_line=7, is_jump_target=False),
980-
Instruction(opname='LOAD_DEREF', opcode=137, arg=3, argval='a', argrepr='a', offset=22, starts_line=None, is_jump_target=False),
981-
Instruction(opname='LOAD_DEREF', opcode=137, arg=4, argval='b', argrepr='b', offset=24, starts_line=None, is_jump_target=False),
980+
Instruction(opname='LOAD_DEREF', opcode=137, arg=0, argval='a', argrepr='a', offset=22, starts_line=None, is_jump_target=False),
981+
Instruction(opname='LOAD_DEREF', opcode=137, arg=1, argval='b', argrepr='b', offset=24, starts_line=None, is_jump_target=False),
982982
Instruction(opname='LOAD_CONST', opcode=100, arg=5, argval='', argrepr="''", offset=26, starts_line=None, is_jump_target=False),
983983
Instruction(opname='LOAD_CONST', opcode=100, arg=6, argval=1, argrepr='1', offset=28, starts_line=None, is_jump_target=False),
984984
Instruction(opname='BUILD_LIST', opcode=103, arg=0, argval=0, argrepr='', offset=30, starts_line=None, is_jump_target=False),
@@ -991,23 +991,23 @@ def jumpy():
991991
]
992992

993993
expected_opinfo_f = [
994-
Instruction(opname='MAKE_CELL', opcode=135, arg=3, argval='c', argrepr='c', offset=0, starts_line=None, is_jump_target=False),
995-
Instruction(opname='MAKE_CELL', opcode=135, arg=4, argval='d', argrepr='d', offset=2, starts_line=None, is_jump_target=False),
994+
Instruction(opname='MAKE_CELL', opcode=135, arg=0, argval='c', argrepr='c', offset=0, starts_line=None, is_jump_target=False),
995+
Instruction(opname='MAKE_CELL', opcode=135, arg=1, argval='d', argrepr='d', offset=2, starts_line=None, is_jump_target=False),
996996
Instruction(opname='LOAD_CONST', opcode=100, arg=5, argval=(5, 6), argrepr='(5, 6)', offset=4, starts_line=3, is_jump_target=False),
997-
Instruction(opname='LOAD_CLOSURE', opcode=136, arg=5, argval='a', argrepr='a', offset=6, starts_line=None, is_jump_target=False),
998-
Instruction(opname='LOAD_CLOSURE', opcode=136, arg=6, argval='b', argrepr='b', offset=8, starts_line=None, is_jump_target=False),
999-
Instruction(opname='LOAD_CLOSURE', opcode=136, arg=3, argval='c', argrepr='c', offset=10, starts_line=None, is_jump_target=False),
1000-
Instruction(opname='LOAD_CLOSURE', opcode=136, arg=4, argval='d', argrepr='d', offset=12, starts_line=None, is_jump_target=False),
997+
Instruction(opname='LOAD_CLOSURE', opcode=136, arg=3, argval='a', argrepr='a', offset=6, starts_line=None, is_jump_target=False),
998+
Instruction(opname='LOAD_CLOSURE', opcode=136, arg=4, argval='b', argrepr='b', offset=8, starts_line=None, is_jump_target=False),
999+
Instruction(opname='LOAD_CLOSURE', opcode=136, arg=0, argval='c', argrepr='c', offset=10, starts_line=None, is_jump_target=False),
1000+
Instruction(opname='LOAD_CLOSURE', opcode=136, arg=1, argval='d', argrepr='d', offset=12, starts_line=None, is_jump_target=False),
10011001
Instruction(opname='BUILD_TUPLE', opcode=102, arg=4, argval=4, argrepr='', offset=14, starts_line=None, is_jump_target=False),
10021002
Instruction(opname='LOAD_CONST', opcode=100, arg=3, argval=code_object_inner, argrepr=repr(code_object_inner), offset=16, starts_line=None, is_jump_target=False),
10031003
Instruction(opname='LOAD_CONST', opcode=100, arg=4, argval='outer.<locals>.f.<locals>.inner', argrepr="'outer.<locals>.f.<locals>.inner'", offset=18, starts_line=None, is_jump_target=False),
10041004
Instruction(opname='MAKE_FUNCTION', opcode=132, arg=9, argval=9, argrepr='defaults, closure', offset=20, starts_line=None, is_jump_target=False),
10051005
Instruction(opname='STORE_FAST', opcode=125, arg=2, argval='inner', argrepr='inner', offset=22, starts_line=None, is_jump_target=False),
10061006
Instruction(opname='LOAD_GLOBAL', opcode=116, arg=0, argval='print', argrepr='print', offset=24, starts_line=5, is_jump_target=False),
1007-
Instruction(opname='LOAD_DEREF', opcode=137, arg=5, argval='a', argrepr='a', offset=26, starts_line=None, is_jump_target=False),
1008-
Instruction(opname='LOAD_DEREF', opcode=137, arg=6, argval='b', argrepr='b', offset=28, starts_line=None, is_jump_target=False),
1009-
Instruction(opname='LOAD_DEREF', opcode=137, arg=3, argval='c', argrepr='c', offset=30, starts_line=None, is_jump_target=False),
1010-
Instruction(opname='LOAD_DEREF', opcode=137, arg=4, argval='d', argrepr='d', offset=32, starts_line=None, is_jump_target=False),
1007+
Instruction(opname='LOAD_DEREF', opcode=137, arg=3, argval='a', argrepr='a', offset=26, starts_line=None, is_jump_target=False),
1008+
Instruction(opname='LOAD_DEREF', opcode=137, arg=4, argval='b', argrepr='b', offset=28, starts_line=None, is_jump_target=False),
1009+
Instruction(opname='LOAD_DEREF', opcode=137, arg=0, argval='c', argrepr='c', offset=30, starts_line=None, is_jump_target=False),
1010+
Instruction(opname='LOAD_DEREF', opcode=137, arg=1, argval='d', argrepr='d', offset=32, starts_line=None, is_jump_target=False),
10111011
Instruction(opname='CALL_FUNCTION', opcode=131, arg=4, argval=4, argrepr='', offset=34, starts_line=None, is_jump_target=False),
10121012
Instruction(opname='POP_TOP', opcode=1, arg=None, argval=None, argrepr='', offset=36, starts_line=None, is_jump_target=False),
10131013
Instruction(opname='LOAD_FAST', opcode=124, arg=2, argval='inner', argrepr='inner', offset=38, starts_line=6, is_jump_target=False),
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Computation of the offsets of cell variables is done in the compiler instead
2+
of at runtime. This reduces the overhead of handling cell and free
3+
variables, especially in the case where a variable is both an argument and
4+
cell variable.

Objects/codeobject.c

Lines changed: 36 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -162,43 +162,28 @@ _Py_set_localsplus_info(int offset, PyObject *name, _PyLocalsPlusKind kind,
162162
Py_INCREF(name);
163163
PyTuple_SET_ITEM(names, offset, name);
164164
kinds[offset] = kind;
165-
166-
if (kind == CO_FAST_CELL) {
167-
// Cells can overlap with args, so mark those cases.
168-
int nlocalsplus = (int)PyTuple_GET_SIZE(names);
169-
for (int i = 0; i < nlocalsplus; i++) {
170-
_PyLocalsPlusKind kind = kinds[i];
171-
if (kind && !(kind & CO_FAST_LOCAL)) {
172-
// We've moved past the locals.
173-
break;
174-
}
175-
PyObject *varname = PyTuple_GET_ITEM(names, i);
176-
int cmp = PyUnicode_Compare(name, varname);
177-
if (cmp == 0) {
178-
kinds[i] |= CO_FAST_CELL;
179-
break;
180-
}
181-
assert(cmp > 0 || !PyErr_Occurred());
182-
}
183-
}
184165
}
185166

186167
static void
187168
get_localsplus_counts(PyObject *names, _PyLocalsPlusKinds kinds,
188-
int *pnlocals, int *pncellvars,
169+
int *pnlocals, int *pnplaincellvars, int *pncellvars,
189170
int *pnfreevars)
190171
{
191172
int nlocals = 0;
173+
int nplaincellvars = 0;
192174
int ncellvars = 0;
193175
int nfreevars = 0;
194-
int nlocalsplus = Py_SAFE_DOWNCAST(PyTuple_GET_SIZE(names),
195-
Py_ssize_t, int);
176+
Py_ssize_t nlocalsplus = PyTuple_GET_SIZE(names);
196177
for (int i = 0; i < nlocalsplus; i++) {
197178
if (kinds[i] & CO_FAST_LOCAL) {
198179
nlocals += 1;
180+
if (kinds[i] & CO_FAST_CELL) {
181+
ncellvars += 1;
182+
}
199183
}
200184
else if (kinds[i] & CO_FAST_CELL) {
201185
ncellvars += 1;
186+
nplaincellvars += 1;
202187
}
203188
else if (kinds[i] & CO_FAST_FREE) {
204189
nfreevars += 1;
@@ -207,6 +192,9 @@ get_localsplus_counts(PyObject *names, _PyLocalsPlusKinds kinds,
207192
if (pnlocals != NULL) {
208193
*pnlocals = nlocals;
209194
}
195+
if (pnplaincellvars != NULL) {
196+
*pnplaincellvars = nplaincellvars;
197+
}
210198
if (pncellvars != NULL) {
211199
*pncellvars = ncellvars;
212200
}
@@ -227,10 +215,6 @@ get_localsplus_names(PyCodeObject *co, _PyLocalsPlusKind kind, int num)
227215
if ((co->co_localspluskinds[offset] & kind) == 0) {
228216
continue;
229217
}
230-
// For now there may be duplicates, which we ignore.
231-
if (kind == CO_FAST_CELL && co->co_localspluskinds[offset] != kind) {
232-
continue;
233-
}
234218
assert(index < num);
235219
PyObject *name = PyTuple_GET_ITEM(co->co_localsplusnames, offset);
236220
Py_INCREF(name);
@@ -283,7 +267,7 @@ _PyCode_Validate(struct _PyCodeConstructor *con)
283267
* here to avoid the possibility of overflow (however remote). */
284268
int nlocals;
285269
get_localsplus_counts(con->localsplusnames, con->localspluskinds,
286-
&nlocals, NULL, NULL);
270+
&nlocals, NULL, NULL, NULL);
287271
int nplainlocals = nlocals -
288272
con->argcount -
289273
con->kwonlyargcount -
@@ -301,9 +285,9 @@ static void
301285
init_code(PyCodeObject *co, struct _PyCodeConstructor *con)
302286
{
303287
int nlocalsplus = (int)PyTuple_GET_SIZE(con->localsplusnames);
304-
int nlocals, ncellvars, nfreevars;
288+
int nlocals, nplaincellvars, ncellvars, nfreevars;
305289
get_localsplus_counts(con->localsplusnames, con->localspluskinds,
306-
&nlocals, &ncellvars, &nfreevars);
290+
&nlocals, &nplaincellvars, &ncellvars, &nfreevars);
307291

308292
Py_INCREF(con->filename);
309293
co->co_filename = con->filename;
@@ -338,9 +322,9 @@ init_code(PyCodeObject *co, struct _PyCodeConstructor *con)
338322
co->co_exceptiontable = con->exceptiontable;
339323

340324
/* derived values */
341-
co->co_cell2arg = NULL; // This will be set soon.
342325
co->co_nlocalsplus = nlocalsplus;
343326
co->co_nlocals = nlocals;
327+
co->co_nplaincellvars = nplaincellvars;
344328
co->co_ncellvars = ncellvars;
345329
co->co_nfreevars = nfreevars;
346330
co->co_varnames = NULL;
@@ -392,44 +376,6 @@ _PyCode_New(struct _PyCodeConstructor *con)
392376
co->co_flags &= ~CO_NOFREE;
393377
}
394378

395-
/* Create mapping between cells and arguments if needed. */
396-
if (co->co_ncellvars) {
397-
int totalargs = co->co_argcount +
398-
co->co_kwonlyargcount +
399-
((co->co_flags & CO_VARARGS) != 0) +
400-
((co->co_flags & CO_VARKEYWORDS) != 0);
401-
assert(totalargs <= co->co_nlocals);
402-
/* Find cells which are also arguments. */
403-
for (int i = 0; i < co->co_ncellvars; i++) {
404-
PyObject *cellname = PyTuple_GET_ITEM(co->co_localsplusnames,
405-
i + co->co_nlocals);
406-
for (int j = 0; j < totalargs; j++) {
407-
PyObject *argname = PyTuple_GET_ITEM(co->co_localsplusnames, j);
408-
int cmp = PyUnicode_Compare(cellname, argname);
409-
if (cmp == -1 && PyErr_Occurred()) {
410-
Py_DECREF(co);
411-
return NULL;
412-
}
413-
if (cmp == 0) {
414-
if (co->co_cell2arg == NULL) {
415-
co->co_cell2arg = PyMem_NEW(int, co->co_ncellvars);
416-
if (co->co_cell2arg == NULL) {
417-
Py_DECREF(co);
418-
PyErr_NoMemory();
419-
return NULL;
420-
}
421-
for (int k = 0; k < co->co_ncellvars; k++) {
422-
co->co_cell2arg[k] = CO_CELL_NOT_AN_ARG;
423-
}
424-
}
425-
co->co_cell2arg[i] = j;
426-
// Go to the next cell name.
427-
break;
428-
}
429-
}
430-
}
431-
}
432-
433379
return co;
434380
}
435381

@@ -478,6 +424,23 @@ PyCode_NewWithPosOnlyArgs(int argcount, int posonlyargcount, int kwonlyargcount,
478424
}
479425
for (int i = 0; i < ncellvars; i++, offset++) {
480426
PyObject *name = PyTuple_GET_ITEM(cellvars, i);
427+
int argoffset = -1;
428+
for (int j = 0; j < nvarnames; j++) {
429+
int cmp = PyUnicode_Compare(PyTuple_GET_ITEM(varnames, j),
430+
name);
431+
assert(!PyErr_Occurred());
432+
if (cmp == 0) {
433+
argoffset = j;
434+
break;
435+
}
436+
}
437+
if (argoffset >= 0) {
438+
// Merge the localsplus indices.
439+
nlocalsplus -= 1;
440+
offset -= 1;
441+
localspluskinds[argoffset] |= CO_FAST_CELL;
442+
continue;
443+
}
481444
_Py_set_localsplus_info(offset, name, CO_FAST_CELL,
482445
localsplusnames, localspluskinds);
483446
}
@@ -486,6 +449,11 @@ PyCode_NewWithPosOnlyArgs(int argcount, int posonlyargcount, int kwonlyargcount,
486449
_Py_set_localsplus_info(offset, name, CO_FAST_FREE,
487450
localsplusnames, localspluskinds);
488451
}
452+
// If any cells were args then nlocalsplus will have shrunk.
453+
// We don't bother resizing localspluskinds.
454+
if (_PyTuple_Resize(&localsplusnames, nlocalsplus) < 0) {
455+
goto error;
456+
}
489457

490458
struct _PyCodeConstructor con = {
491459
.filename = filename,
@@ -1182,8 +1150,6 @@ code_dealloc(PyCodeObject *co)
11821150
Py_XDECREF(co->co_name);
11831151
Py_XDECREF(co->co_linetable);
11841152
Py_XDECREF(co->co_exceptiontable);
1185-
if (co->co_cell2arg != NULL)
1186-
PyMem_Free(co->co_cell2arg);
11871153
if (co->co_weakreflist != NULL)
11881154
PyObject_ClearWeakRefs((PyObject*)co);
11891155
if (co->co_quickened) {
@@ -1377,10 +1343,6 @@ code_sizeof(PyCodeObject *co, PyObject *Py_UNUSED(args))
13771343
(co_extra->ce_size-1) * sizeof(co_extra->ce_extras[0]);
13781344
}
13791345

1380-
if (co->co_cell2arg != NULL && co->co_cellvars != NULL) {
1381-
res += co->co_ncellvars * sizeof(Py_ssize_t);
1382-
}
1383-
13841346
if (co->co_quickened != NULL) {
13851347
Py_ssize_t count = co->co_quickened[0].entry.zero.cache_count;
13861348
count += (PyBytes_GET_SIZE(co->co_code)+sizeof(SpecializedCacheEntry)-1)/

0 commit comments

Comments
 (0)