Skip to content

Commit 02026ac

Browse files
markshannonGlyphack
authored andcommitted
pythonGH-111848: Set the IP when de-optimizing (pythonGH-112065)
* Replace jumps with deopts in tier 2 * Fewer special cases of uop names * Add target field to uop IR * Remove more redundant SET_IP and _CHECK_VALIDITY micro-ops * Extend whitelist of non-escaping API functions.
1 parent 3928dc9 commit 02026ac

File tree

6 files changed

+118
-98
lines changed

6 files changed

+118
-98
lines changed

Include/internal/pycore_opcode_metadata.h

Lines changed: 52 additions & 52 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Include/internal/pycore_uops.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,9 @@ extern "C" {
1313
#define _Py_UOP_MAX_TRACE_LENGTH 128
1414

1515
typedef struct {
16-
uint32_t opcode;
17-
uint32_t oparg;
16+
uint16_t opcode;
17+
uint16_t oparg;
18+
uint32_t target;
1819
uint64_t operand; // A cache entry
1920
} _PyUOpInstruction;
2021

Python/ceval.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1067,6 +1067,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
10671067
UOP_STAT_INC(opcode, miss);
10681068
frame->return_offset = 0; // Dispatch to frame->instr_ptr
10691069
_PyFrame_SetStackPointer(frame, stack_pointer);
1070+
frame->instr_ptr = next_uop[-1].target + _PyCode_CODE((PyCodeObject *)frame->f_executable);
10701071
Py_DECREF(current_executor);
10711072
// Fall through
10721073
// Jump here from ENTER_EXECUTOR
@@ -1077,6 +1078,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
10771078
// Jump here from _EXIT_TRACE
10781079
exit_trace:
10791080
_PyFrame_SetStackPointer(frame, stack_pointer);
1081+
frame->instr_ptr = next_uop[-1].target + _PyCode_CODE((PyCodeObject *)frame->f_executable);
10801082
Py_DECREF(current_executor);
10811083
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
10821084
goto enter_tier_one;

Python/optimizer.c

Lines changed: 22 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -446,7 +446,8 @@ translate_bytecode_to_trace(
446446
#define DPRINTF(level, ...)
447447
#endif
448448

449-
#define ADD_TO_TRACE(OPCODE, OPARG, OPERAND) \
449+
450+
#define ADD_TO_TRACE(OPCODE, OPARG, OPERAND, TARGET) \
450451
DPRINTF(2, \
451452
" ADD_TO_TRACE(%s, %d, %" PRIu64 ")\n", \
452453
uop_name(OPCODE), \
@@ -458,23 +459,12 @@ translate_bytecode_to_trace(
458459
trace[trace_length].opcode = (OPCODE); \
459460
trace[trace_length].oparg = (OPARG); \
460461
trace[trace_length].operand = (OPERAND); \
462+
trace[trace_length].target = (TARGET); \
461463
trace_length++;
462464

463465
#define INSTR_IP(INSTR, CODE) \
464466
((uint32_t)((INSTR) - ((_Py_CODEUNIT *)(CODE)->co_code_adaptive)))
465467

466-
#define ADD_TO_STUB(INDEX, OPCODE, OPARG, OPERAND) \
467-
DPRINTF(2, " ADD_TO_STUB(%d, %s, %d, %" PRIu64 ")\n", \
468-
(INDEX), \
469-
uop_name(OPCODE), \
470-
(OPARG), \
471-
(uint64_t)(OPERAND)); \
472-
assert(reserved > 0); \
473-
reserved--; \
474-
trace[(INDEX)].opcode = (OPCODE); \
475-
trace[(INDEX)].oparg = (OPARG); \
476-
trace[(INDEX)].operand = (OPERAND);
477-
478468
// Reserve space for n uops
479469
#define RESERVE_RAW(n, opname) \
480470
if (trace_length + (n) > max_length) { \
@@ -483,7 +473,7 @@ translate_bytecode_to_trace(
483473
OPT_STAT_INC(trace_too_long); \
484474
goto done; \
485475
} \
486-
reserved = (n); // Keep ADD_TO_TRACE / ADD_TO_STUB honest
476+
reserved = (n); // Keep ADD_TO_TRACE honest
487477

488478
// Reserve space for main+stub uops, plus 3 for _SET_IP, _CHECK_VALIDITY and _EXIT_TRACE
489479
#define RESERVE(main, stub) RESERVE_RAW((main) + (stub) + 3, uop_name(opcode))
@@ -493,7 +483,7 @@ translate_bytecode_to_trace(
493483
if (trace_stack_depth >= TRACE_STACK_SIZE) { \
494484
DPRINTF(2, "Trace stack overflow\n"); \
495485
OPT_STAT_INC(trace_stack_overflow); \
496-
ADD_TO_TRACE(_SET_IP, 0, 0); \
486+
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0); \
497487
goto done; \
498488
} \
499489
trace_stack[trace_stack_depth].code = code; \
@@ -513,22 +503,28 @@ translate_bytecode_to_trace(
513503
PyUnicode_AsUTF8(code->co_filename),
514504
code->co_firstlineno,
515505
2 * INSTR_IP(initial_instr, code));
516-
506+
uint32_t target = 0;
517507
top: // Jump here after _PUSH_FRAME or likely branches
518508
for (;;) {
509+
target = INSTR_IP(instr, code);
519510
RESERVE_RAW(3, "epilogue"); // Always need space for _SET_IP, _CHECK_VALIDITY and _EXIT_TRACE
520-
ADD_TO_TRACE(_SET_IP, INSTR_IP(instr, code), 0);
521-
ADD_TO_TRACE(_CHECK_VALIDITY, 0, 0);
511+
ADD_TO_TRACE(_SET_IP, target, 0, target);
512+
ADD_TO_TRACE(_CHECK_VALIDITY, 0, 0, target);
522513

523514
uint32_t opcode = instr->op.code;
524515
uint32_t oparg = instr->op.arg;
525516
uint32_t extras = 0;
526517

527-
while (opcode == EXTENDED_ARG) {
518+
519+
if (opcode == EXTENDED_ARG) {
528520
instr++;
529521
extras += 1;
530522
opcode = instr->op.code;
531523
oparg = (oparg << 8) | instr->op.arg;
524+
if (opcode == EXTENDED_ARG) {
525+
instr--;
526+
goto done;
527+
}
532528
}
533529

534530
if (opcode == ENTER_EXECUTOR) {
@@ -554,7 +550,7 @@ translate_bytecode_to_trace(
554550
DPRINTF(4, "%s(%d): counter=%x, bitcount=%d, likely=%d, uopcode=%s\n",
555551
uop_name(opcode), oparg,
556552
counter, bitcount, jump_likely, uop_name(uopcode));
557-
ADD_TO_TRACE(uopcode, max_length, 0);
553+
ADD_TO_TRACE(uopcode, max_length, 0, target);
558554
if (jump_likely) {
559555
_Py_CODEUNIT *target_instr = next_instr + oparg;
560556
DPRINTF(2, "Jump likely (%x = %d bits), continue at byte offset %d\n",
@@ -569,7 +565,7 @@ translate_bytecode_to_trace(
569565
{
570566
if (instr + 2 - oparg == initial_instr && code == initial_code) {
571567
RESERVE(1, 0);
572-
ADD_TO_TRACE(_JUMP_TO_TOP, 0, 0);
568+
ADD_TO_TRACE(_JUMP_TO_TOP, 0, 0, 0);
573569
}
574570
else {
575571
OPT_STAT_INC(inner_loop);
@@ -653,7 +649,7 @@ translate_bytecode_to_trace(
653649
expansion->uops[i].offset);
654650
Py_FatalError("garbled expansion");
655651
}
656-
ADD_TO_TRACE(uop, oparg, operand);
652+
ADD_TO_TRACE(uop, oparg, operand, target);
657653
if (uop == _POP_FRAME) {
658654
TRACE_STACK_POP();
659655
DPRINTF(2,
@@ -682,15 +678,15 @@ translate_bytecode_to_trace(
682678
PyUnicode_AsUTF8(new_code->co_filename),
683679
new_code->co_firstlineno);
684680
OPT_STAT_INC(recursive_call);
685-
ADD_TO_TRACE(_SET_IP, 0, 0);
681+
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0);
686682
goto done;
687683
}
688684
if (new_code->co_version != func_version) {
689685
// func.__code__ was updated.
690686
// Perhaps it may happen again, so don't bother tracing.
691687
// TODO: Reason about this -- is it better to bail or not?
692688
DPRINTF(2, "Bailing because co_version != func_version\n");
693-
ADD_TO_TRACE(_SET_IP, 0, 0);
689+
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0);
694690
goto done;
695691
}
696692
// Increment IP to the return address
@@ -707,7 +703,7 @@ translate_bytecode_to_trace(
707703
2 * INSTR_IP(instr, code));
708704
goto top;
709705
}
710-
ADD_TO_TRACE(_SET_IP, 0, 0);
706+
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0);
711707
goto done;
712708
}
713709
}
@@ -732,7 +728,7 @@ translate_bytecode_to_trace(
732728
assert(code == initial_code);
733729
// Skip short traces like _SET_IP, LOAD_FAST, _SET_IP, _EXIT_TRACE
734730
if (trace_length > 4) {
735-
ADD_TO_TRACE(_EXIT_TRACE, 0, 0);
731+
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target);
736732
DPRINTF(1,
737733
"Created a trace for %s (%s:%d) at byte offset %d -- length %d+%d\n",
738734
PyUnicode_AsUTF8(code->co_qualname),

Python/optimizer_analysis.c

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17,21 +17,15 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size)
1717
{
1818
// Note that we don't enter stubs, those SET_IPs are needed.
1919
int last_set_ip = -1;
20-
bool need_ip = true;
2120
bool maybe_invalid = false;
2221
for (int pc = 0; pc < buffer_size; pc++) {
2322
int opcode = buffer[pc].opcode;
2423
if (opcode == _SET_IP) {
25-
if (!need_ip && last_set_ip >= 0) {
26-
buffer[last_set_ip].opcode = NOP;
27-
}
28-
need_ip = false;
24+
buffer[pc].opcode = NOP;
2925
last_set_ip = pc;
3026
}
3127
else if (opcode == _CHECK_VALIDITY) {
3228
if (maybe_invalid) {
33-
/* Exiting the trace requires that IP is correct */
34-
need_ip = true;
3529
maybe_invalid = false;
3630
}
3731
else {
@@ -42,12 +36,16 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size)
4236
break;
4337
}
4438
else {
45-
// If opcode has ERROR or DEOPT, set need_ip to true
46-
if (_PyOpcode_opcode_metadata[opcode].flags & (HAS_ERROR_FLAG | HAS_DEOPT_FLAG) || opcode == _PUSH_FRAME) {
47-
need_ip = true;
48-
}
49-
if (_PyOpcode_opcode_metadata[opcode].flags & HAS_ESCAPES_FLAG) {
39+
if (OPCODE_HAS_ESCAPES(opcode)) {
5040
maybe_invalid = true;
41+
if (last_set_ip >= 0) {
42+
buffer[last_set_ip].opcode = _SET_IP;
43+
}
44+
}
45+
if (OPCODE_HAS_ERROR(opcode) || opcode == _PUSH_FRAME) {
46+
if (last_set_ip >= 0) {
47+
buffer[last_set_ip].opcode = _SET_IP;
48+
}
5149
}
5250
}
5351
}

Tools/cases_generator/flags.py

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import parsing
66
from typing import AbstractSet
77

8-
WHITELIST = (
8+
NON_ESCAPING_FUNCTIONS = (
99
"Py_INCREF",
1010
"_PyDictOrValues_IsValues",
1111
"_PyObject_DictOrValuesPointer",
@@ -31,9 +31,29 @@
3131
"_PyLong_IsNonNegativeCompact",
3232
"_PyLong_CompactValue",
3333
"_Py_NewRef",
34+
"_Py_IsImmortal",
35+
"_Py_STR",
36+
"_PyLong_Add",
37+
"_PyLong_Multiply",
38+
"_PyLong_Subtract",
39+
"Py_NewRef",
40+
"_PyList_ITEMS",
41+
"_PyTuple_ITEMS",
42+
"_PyList_AppendTakeRef",
43+
"_Py_atomic_load_uintptr_relaxed",
44+
"_PyFrame_GetCode",
45+
"_PyThreadState_HasStackSpace",
3446
)
3547

36-
def makes_escaping_api_call(instr: parsing.Node) -> bool:
48+
ESCAPING_FUNCTIONS = (
49+
"import_name",
50+
"import_from",
51+
)
52+
53+
54+
def makes_escaping_api_call(instr: parsing.InstDef) -> bool:
55+
if "CALL_INTRINSIC" in instr.name:
56+
return True;
3757
tkns = iter(instr.tokens)
3858
for tkn in tkns:
3959
if tkn.kind != lx.IDENTIFIER:
@@ -44,13 +64,17 @@ def makes_escaping_api_call(instr: parsing.Node) -> bool:
4464
return False
4565
if next_tkn.kind != lx.LPAREN:
4666
continue
67+
if tkn.text in ESCAPING_FUNCTIONS:
68+
return True
4769
if not tkn.text.startswith("Py") and not tkn.text.startswith("_Py"):
4870
continue
4971
if tkn.text.endswith("Check"):
5072
continue
73+
if tkn.text.startswith("Py_Is"):
74+
continue
5175
if tkn.text.endswith("CheckExact"):
5276
continue
53-
if tkn.text in WHITELIST:
77+
if tkn.text in NON_ESCAPING_FUNCTIONS:
5478
continue
5579
return True
5680
return False
@@ -74,7 +98,7 @@ def __post_init__(self) -> None:
7498
self.bitmask = {name: (1 << i) for i, name in enumerate(self.names())}
7599

76100
@staticmethod
77-
def fromInstruction(instr: parsing.Node) -> "InstructionFlags":
101+
def fromInstruction(instr: parsing.InstDef) -> "InstructionFlags":
78102
has_free = (
79103
variable_used(instr, "PyCell_New")
80104
or variable_used(instr, "PyCell_GET")
@@ -101,8 +125,7 @@ def fromInstruction(instr: parsing.Node) -> "InstructionFlags":
101125
or variable_used(instr, "resume_with_error")
102126
),
103127
HAS_ESCAPES_FLAG=(
104-
variable_used(instr, "tstate")
105-
or makes_escaping_api_call(instr)
128+
makes_escaping_api_call(instr)
106129
),
107130
)
108131

0 commit comments

Comments
 (0)