diff --git a/Include/internal/pycore_backoff.h b/Include/internal/pycore_backoff.h index 942d8b107a7daf..454c8dde031ff4 100644 --- a/Include/internal/pycore_backoff.h +++ b/Include/internal/pycore_backoff.h @@ -95,8 +95,10 @@ backoff_counter_triggers(_Py_BackoffCounter counter) return counter.value_and_backoff < UNREACHABLE_BACKOFF; } -/* Initial JUMP_BACKWARD counter. - * This determines when we create a trace for a loop. */ +// Initial JUMP_BACKWARD counter. +// Must be larger than ADAPTIVE_COOLDOWN_VALUE, otherwise when JIT code is +// invalidated we may construct a new trace before the bytecode has properly +// re-specialized: #define JUMP_BACKWARD_INITIAL_VALUE 4095 #define JUMP_BACKWARD_INITIAL_BACKOFF 12 static inline _Py_BackoffCounter diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 37a747aa4e3e46..439989c60f6f24 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -451,6 +451,9 @@ write_location_entry_start(uint8_t *ptr, int code, int length) #define ADAPTIVE_COOLDOWN_BACKOFF 0 // Can't assert this in pycore_backoff.h because of header order dependencies +#if JUMP_BACKWARD_INITIAL_VALUE <= ADAPTIVE_COOLDOWN_VALUE +# error "JIT threshold value should be larger than adaptive cooldown value" +#endif #if SIDE_EXIT_INITIAL_VALUE <= ADAPTIVE_COOLDOWN_VALUE # error "Cold exit value should be larger than adaptive cooldown value" #endif diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 4c270211d4c323..d6c2ba59db1eda 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -209,129 +209,131 @@ extern "C" { #define _LOAD_CONST LOAD_CONST #define _LOAD_CONST_INLINE 446 #define _LOAD_CONST_INLINE_BORROW 447 +#define _LOAD_CONST_UNDER_INLINE 448 +#define _LOAD_CONST_UNDER_INLINE_BORROW 449 #define _LOAD_DEREF LOAD_DEREF -#define _LOAD_FAST 448 -#define _LOAD_FAST_0 449 -#define _LOAD_FAST_1 450 -#define _LOAD_FAST_2 451 -#define _LOAD_FAST_3 452 -#define _LOAD_FAST_4 453 -#define _LOAD_FAST_5 454 -#define _LOAD_FAST_6 455 -#define _LOAD_FAST_7 456 +#define _LOAD_FAST 450 +#define _LOAD_FAST_0 451 +#define _LOAD_FAST_1 452 +#define _LOAD_FAST_2 453 +#define _LOAD_FAST_3 454 +#define _LOAD_FAST_4 455 +#define _LOAD_FAST_5 456 +#define _LOAD_FAST_6 457 +#define _LOAD_FAST_7 458 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR -#define _LOAD_FAST_BORROW 457 -#define _LOAD_FAST_BORROW_0 458 -#define _LOAD_FAST_BORROW_1 459 -#define _LOAD_FAST_BORROW_2 460 -#define _LOAD_FAST_BORROW_3 461 -#define _LOAD_FAST_BORROW_4 462 -#define _LOAD_FAST_BORROW_5 463 -#define _LOAD_FAST_BORROW_6 464 -#define _LOAD_FAST_BORROW_7 465 +#define _LOAD_FAST_BORROW 459 +#define _LOAD_FAST_BORROW_0 460 +#define _LOAD_FAST_BORROW_1 461 +#define _LOAD_FAST_BORROW_2 462 +#define _LOAD_FAST_BORROW_3 463 +#define _LOAD_FAST_BORROW_4 464 +#define _LOAD_FAST_BORROW_5 465 +#define _LOAD_FAST_BORROW_6 466 +#define _LOAD_FAST_BORROW_7 467 #define _LOAD_FAST_BORROW_LOAD_FAST_BORROW LOAD_FAST_BORROW_LOAD_FAST_BORROW #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS -#define _LOAD_GLOBAL 466 -#define _LOAD_GLOBAL_BUILTINS 467 -#define _LOAD_GLOBAL_MODULE 468 +#define _LOAD_GLOBAL 468 +#define _LOAD_GLOBAL_BUILTINS 469 +#define _LOAD_GLOBAL_MODULE 470 #define _LOAD_LOCALS LOAD_LOCALS #define _LOAD_NAME LOAD_NAME -#define _LOAD_SMALL_INT 469 -#define _LOAD_SMALL_INT_0 470 -#define _LOAD_SMALL_INT_1 471 -#define _LOAD_SMALL_INT_2 472 -#define _LOAD_SMALL_INT_3 473 -#define _LOAD_SPECIAL 474 +#define _LOAD_SMALL_INT 471 +#define _LOAD_SMALL_INT_0 472 +#define _LOAD_SMALL_INT_1 473 +#define _LOAD_SMALL_INT_2 474 +#define _LOAD_SMALL_INT_3 475 +#define _LOAD_SPECIAL 476 #define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR #define _LOAD_SUPER_ATTR_METHOD LOAD_SUPER_ATTR_METHOD -#define _MAKE_CALLARGS_A_TUPLE 475 +#define _MAKE_CALLARGS_A_TUPLE 477 #define _MAKE_CELL MAKE_CELL #define _MAKE_FUNCTION MAKE_FUNCTION -#define _MAKE_WARM 476 +#define _MAKE_WARM 478 #define _MAP_ADD MAP_ADD #define _MATCH_CLASS MATCH_CLASS #define _MATCH_KEYS MATCH_KEYS #define _MATCH_MAPPING MATCH_MAPPING #define _MATCH_SEQUENCE MATCH_SEQUENCE -#define _MAYBE_EXPAND_METHOD 477 -#define _MAYBE_EXPAND_METHOD_KW 478 -#define _MONITOR_CALL 479 -#define _MONITOR_CALL_KW 480 -#define _MONITOR_JUMP_BACKWARD 481 -#define _MONITOR_RESUME 482 +#define _MAYBE_EXPAND_METHOD 479 +#define _MAYBE_EXPAND_METHOD_KW 480 +#define _MONITOR_CALL 481 +#define _MONITOR_CALL_KW 482 +#define _MONITOR_JUMP_BACKWARD 483 +#define _MONITOR_RESUME 484 #define _NOP NOP -#define _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW 483 +#define _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW 485 #define _POP_EXCEPT POP_EXCEPT -#define _POP_JUMP_IF_FALSE 484 -#define _POP_JUMP_IF_TRUE 485 +#define _POP_JUMP_IF_FALSE 486 +#define _POP_JUMP_IF_TRUE 487 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE 486 -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 487 -#define _POP_TWO 488 -#define _POP_TWO_LOAD_CONST_INLINE_BORROW 489 +#define _POP_TOP_LOAD_CONST_INLINE 488 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 489 +#define _POP_TWO 490 +#define _POP_TWO_LOAD_CONST_INLINE_BORROW 491 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 490 +#define _PUSH_FRAME 492 #define _PUSH_NULL PUSH_NULL -#define _PUSH_NULL_CONDITIONAL 491 -#define _PY_FRAME_GENERAL 492 -#define _PY_FRAME_KW 493 -#define _QUICKEN_RESUME 494 -#define _REPLACE_WITH_TRUE 495 +#define _PUSH_NULL_CONDITIONAL 493 +#define _PY_FRAME_GENERAL 494 +#define _PY_FRAME_KW 495 +#define _QUICKEN_RESUME 496 +#define _REPLACE_WITH_TRUE 497 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 496 -#define _SEND 497 -#define _SEND_GEN_FRAME 498 +#define _SAVE_RETURN_OFFSET 498 +#define _SEND 499 +#define _SEND_GEN_FRAME 500 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 499 -#define _STORE_ATTR 500 -#define _STORE_ATTR_INSTANCE_VALUE 501 -#define _STORE_ATTR_SLOT 502 -#define _STORE_ATTR_WITH_HINT 503 +#define _START_EXECUTOR 501 +#define _STORE_ATTR 502 +#define _STORE_ATTR_INSTANCE_VALUE 503 +#define _STORE_ATTR_SLOT 504 +#define _STORE_ATTR_WITH_HINT 505 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 504 -#define _STORE_FAST_0 505 -#define _STORE_FAST_1 506 -#define _STORE_FAST_2 507 -#define _STORE_FAST_3 508 -#define _STORE_FAST_4 509 -#define _STORE_FAST_5 510 -#define _STORE_FAST_6 511 -#define _STORE_FAST_7 512 +#define _STORE_FAST 506 +#define _STORE_FAST_0 507 +#define _STORE_FAST_1 508 +#define _STORE_FAST_2 509 +#define _STORE_FAST_3 510 +#define _STORE_FAST_4 511 +#define _STORE_FAST_5 512 +#define _STORE_FAST_6 513 +#define _STORE_FAST_7 514 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME -#define _STORE_SLICE 513 -#define _STORE_SUBSCR 514 -#define _STORE_SUBSCR_DICT 515 -#define _STORE_SUBSCR_LIST_INT 516 +#define _STORE_SLICE 515 +#define _STORE_SUBSCR 516 +#define _STORE_SUBSCR_DICT 517 +#define _STORE_SUBSCR_LIST_INT 518 #define _SWAP SWAP -#define _TIER2_RESUME_CHECK 517 -#define _TO_BOOL 518 +#define _TIER2_RESUME_CHECK 519 +#define _TO_BOOL 520 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT -#define _TO_BOOL_LIST 519 +#define _TO_BOOL_LIST 521 #define _TO_BOOL_NONE TO_BOOL_NONE -#define _TO_BOOL_STR 520 +#define _TO_BOOL_STR 522 #define _UNARY_INVERT UNARY_INVERT #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 521 -#define _UNPACK_SEQUENCE_LIST 522 -#define _UNPACK_SEQUENCE_TUPLE 523 -#define _UNPACK_SEQUENCE_TWO_TUPLE 524 +#define _UNPACK_SEQUENCE 523 +#define _UNPACK_SEQUENCE_LIST 524 +#define _UNPACK_SEQUENCE_TUPLE 525 +#define _UNPACK_SEQUENCE_TWO_TUPLE 526 #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 524 +#define MAX_UOP_ID 526 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index fc2c4c2924d9ad..725238228a3dbc 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -306,6 +306,8 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_POP_TOP_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, [_POP_TWO_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, [_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, + [_LOAD_CONST_UNDER_INLINE] = 0, + [_LOAD_CONST_UNDER_INLINE_BORROW] = 0, [_CHECK_FUNCTION] = HAS_DEOPT_FLAG, [_START_EXECUTOR] = 0, [_MAKE_WARM] = 0, @@ -504,6 +506,8 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_LOAD_CONST] = "_LOAD_CONST", [_LOAD_CONST_INLINE] = "_LOAD_CONST_INLINE", [_LOAD_CONST_INLINE_BORROW] = "_LOAD_CONST_INLINE_BORROW", + [_LOAD_CONST_UNDER_INLINE] = "_LOAD_CONST_UNDER_INLINE", + [_LOAD_CONST_UNDER_INLINE_BORROW] = "_LOAD_CONST_UNDER_INLINE_BORROW", [_LOAD_DEREF] = "_LOAD_DEREF", [_LOAD_FAST] = "_LOAD_FAST", [_LOAD_FAST_0] = "_LOAD_FAST_0", @@ -1196,6 +1200,10 @@ int _PyUop_num_popped(int opcode, int oparg) return 2; case _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW: return 4; + case _LOAD_CONST_UNDER_INLINE: + return 1; + case _LOAD_CONST_UNDER_INLINE_BORROW: + return 1; case _CHECK_FUNCTION: return 0; case _START_EXECUTOR: diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 2b777acb1ec4fd..0bc0e1b212b6b8 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -1280,8 +1280,8 @@ class Bar: self.assertIsNotNone(ex) self.assertEqual(res, TIER2_THRESHOLD * 6 + 1) call = opnames.index("_CALL_BUILTIN_FAST") - load_attr_top = opnames.index("_LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES", 0, call) - load_attr_bottom = opnames.index("_LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES", call) + load_attr_top = opnames.index("_POP_TOP_LOAD_CONST_INLINE_BORROW", 0, call) + load_attr_bottom = opnames.index("_POP_TOP_LOAD_CONST_INLINE_BORROW", call) self.assertEqual(opnames[:load_attr_top].count("_GUARD_TYPE_VERSION"), 1) self.assertEqual(opnames[call:load_attr_bottom].count("_CHECK_VALIDITY"), 2) @@ -1303,8 +1303,8 @@ class Foo: self.assertIsNotNone(ex) self.assertEqual(res, TIER2_THRESHOLD * 2) call = opnames.index("_CALL_BUILTIN_FAST_WITH_KEYWORDS") - load_attr_top = opnames.index("_LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES", 0, call) - load_attr_bottom = opnames.index("_LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES", call) + load_attr_top = opnames.index("_POP_TOP_LOAD_CONST_INLINE_BORROW", 0, call) + load_attr_bottom = opnames.index("_POP_TOP_LOAD_CONST_INLINE_BORROW", call) self.assertEqual(opnames[:load_attr_top].count("_GUARD_TYPE_VERSION"), 1) self.assertEqual(opnames[call:load_attr_bottom].count("_CHECK_VALIDITY"), 2) @@ -2169,6 +2169,45 @@ def testfunc(n): self.assertNotIn("_LOAD_SMALL_INT", uops) self.assertIn("_LOAD_CONST_INLINE_BORROW", uops) + def test_cached_attributes(self): + class C: + A = 1 + def m(self): + return 1 + class D: + __slots__ = () + A = 1 + def m(self): + return 1 + class E(Exception): + def m(self): + return 1 + def f(n): + x = 0 + c = C() + d = D() + e = E() + for _ in range(n): + x += C.A # _LOAD_ATTR_CLASS + x += c.A # _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES + x += d.A # _LOAD_ATTR_NONDESCRIPTOR_NO_DICT + x += c.m() # _LOAD_ATTR_METHOD_WITH_VALUES + x += d.m() # _LOAD_ATTR_METHOD_NO_DICT + x += e.m() # _LOAD_ATTR_METHOD_LAZY_DICT + return x + + res, ex = self._run_with_optimizer(f, TIER2_THRESHOLD) + self.assertEqual(res, 6 * TIER2_THRESHOLD) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertNotIn("_LOAD_ATTR_CLASS", uops) + self.assertNotIn("_LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES", uops) + self.assertNotIn("_LOAD_ATTR_NONDESCRIPTOR_NO_DICT", uops) + self.assertNotIn("_LOAD_ATTR_METHOD_WITH_VALUES", uops) + self.assertNotIn("_LOAD_ATTR_METHOD_NO_DICT", uops) + self.assertNotIn("_LOAD_ATTR_METHOD_LAZY_DICT", uops) + + def global_identity(x): return x diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-20-23-32-11.gh-issue-131798.G9ZQZw.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-20-23-32-11.gh-issue-131798.G9ZQZw.rst new file mode 100644 index 00000000000000..8eb8782037abfa --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-20-23-32-11.gh-issue-131798.G9ZQZw.rst @@ -0,0 +1,2 @@ +Improve the JIT's ability to optimize away cached class attribute and method +loads. diff --git a/Python/bytecodes.c b/Python/bytecodes.c index a2367026cde8b8..652bda9c182e49 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -5307,6 +5307,18 @@ dummy_func( value = PyStackRef_FromPyObjectBorrow(ptr); } + tier2 op(_LOAD_CONST_UNDER_INLINE, (ptr/4, old -- value, new)) { + new = old; + DEAD(old); + value = PyStackRef_FromPyObjectNew(ptr); + } + + tier2 op(_LOAD_CONST_UNDER_INLINE_BORROW, (ptr/4, old -- value, new)) { + new = old; + DEAD(old); + value = PyStackRef_FromPyObjectBorrow(ptr); + } + tier2 op(_CHECK_FUNCTION, (func_version/2 -- )) { assert(PyStackRef_FunctionCheck(frame->f_funcobj)); PyFunctionObject *func = (PyFunctionObject *)PyStackRef_AsPyObjectBorrow(frame->f_funcobj); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 1c8239f38eec91..fcde31a30126a4 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -7105,6 +7105,36 @@ break; } + case _LOAD_CONST_UNDER_INLINE: { + _PyStackRef old; + _PyStackRef value; + _PyStackRef new; + old = stack_pointer[-1]; + PyObject *ptr = (PyObject *)CURRENT_OPERAND0(); + new = old; + value = PyStackRef_FromPyObjectNew(ptr); + stack_pointer[-1] = value; + stack_pointer[0] = new; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_CONST_UNDER_INLINE_BORROW: { + _PyStackRef old; + _PyStackRef value; + _PyStackRef new; + old = stack_pointer[-1]; + PyObject *ptr = (PyObject *)CURRENT_OPERAND0(); + new = old; + value = PyStackRef_FromPyObjectBorrow(ptr); + stack_pointer[-1] = value; + stack_pointer[0] = new; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _CHECK_FUNCTION: { uint32_t func_version = (uint32_t)CURRENT_OPERAND0(); assert(PyStackRef_FunctionCheck(frame->f_funcobj)); diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 53ab289b75cc9a..5c50228a13b2d1 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -375,6 +375,23 @@ eliminate_pop_guard(_PyUOpInstruction *this_instr, bool exit) } } +static JitOptSymbol * +lookup_attr(JitOptContext *ctx, _PyUOpInstruction *this_instr, + PyTypeObject *type, PyObject *name, uint16_t immortal, + uint16_t mortal) +{ + // The cached value may be dead, so we need to do the lookup again... :( + if (type && PyType_Check(type)) { + PyObject *lookup = _PyType_Lookup(type, name); + if (lookup) { + int opcode = _Py_IsImmortal(lookup) ? immortal : mortal; + REPLACE_OP(this_instr, opcode, 0, (uintptr_t)lookup); + return sym_new_const(ctx, lookup); + } + } + return sym_new_not_null(ctx); +} + /* _PUSH_FRAME/_RETURN_VALUE's operand can be 0, a PyFunctionObject *, or a * PyCodeObject *. Retrieve the code object if possible. */ @@ -527,6 +544,8 @@ const uint16_t op_without_push[MAX_UOP_ID + 1] = { [_COPY] = _NOP, [_LOAD_CONST_INLINE] = _NOP, [_LOAD_CONST_INLINE_BORROW] = _NOP, + [_LOAD_CONST_UNDER_INLINE] = _POP_TOP_LOAD_CONST_INLINE, + [_LOAD_CONST_UNDER_INLINE_BORROW] = _POP_TOP_LOAD_CONST_INLINE_BORROW, [_LOAD_FAST] = _NOP, [_LOAD_FAST_BORROW] = _NOP, [_LOAD_SMALL_INT] = _NOP, @@ -535,10 +554,16 @@ const uint16_t op_without_push[MAX_UOP_ID + 1] = { [_POP_TWO_LOAD_CONST_INLINE_BORROW] = _POP_TWO, }; +const bool op_skip[MAX_UOP_ID + 1] = { + [_NOP] = true, + [_CHECK_VALIDITY] = true, +}; + const uint16_t op_without_pop[MAX_UOP_ID + 1] = { [_POP_TOP] = _NOP, [_POP_TOP_LOAD_CONST_INLINE] = _LOAD_CONST_INLINE, [_POP_TOP_LOAD_CONST_INLINE_BORROW] = _LOAD_CONST_INLINE_BORROW, + [_POP_TWO] = _POP_TOP, [_POP_TWO_LOAD_CONST_INLINE_BORROW] = _POP_TOP_LOAD_CONST_INLINE_BORROW, }; @@ -578,7 +603,7 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) // _NOP + _POP_TOP + _NOP while (op_without_pop[opcode]) { _PyUOpInstruction *last = &buffer[pc - 1]; - while (last->opcode == _NOP) { + while (op_skip[last->opcode]) { last--; } if (!op_without_push[last->opcode]) { @@ -586,6 +611,10 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) } last->opcode = op_without_push[last->opcode]; opcode = buffer[pc].opcode = op_without_pop[opcode]; + if (op_without_pop[last->opcode]) { + opcode = last->opcode; + pc = last - buffer; + } } /* _PUSH_FRAME doesn't escape or error, but it * does need the IP for the return address */ diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index b9ebd8678e0f1e..0b6bbd133d6ac9 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -522,7 +522,7 @@ dummy_func(void) { } op(_LOAD_CONST, (-- value)) { - PyObject *val = PyTuple_GET_ITEM(co->co_consts, this_instr->oparg); + PyObject *val = PyTuple_GET_ITEM(co->co_consts, oparg); REPLACE_OP(this_instr, _LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)val); value = sym_new_const(ctx, val); } @@ -608,7 +608,7 @@ dummy_func(void) { op(_LOAD_ATTR, (owner -- attr, self_or_null[oparg&1])) { (void)owner; attr = sym_new_not_null(ctx); - if (oparg &1) { + if (oparg & 1) { self_or_null[0] = sym_new_unknown(ctx); } } @@ -624,25 +624,59 @@ dummy_func(void) { } op(_LOAD_ATTR_CLASS, (descr/4, owner -- attr)) { - attr = sym_new_not_null(ctx); (void)descr; + PyTypeObject *type = (PyTypeObject *)sym_get_const(ctx, owner); + PyObject *name = PyTuple_GET_ITEM(co->co_names, oparg >> 1); + attr = lookup_attr(ctx, this_instr, type, name, + _POP_TOP_LOAD_CONST_INLINE_BORROW, + _POP_TOP_LOAD_CONST_INLINE); + } + + op(_LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES, (descr/4, owner -- attr)) { + (void)descr; + PyTypeObject *type = sym_get_type(owner); + PyObject *name = PyTuple_GET_ITEM(co->co_names, oparg >> 1); + attr = lookup_attr(ctx, this_instr, type, name, + _POP_TOP_LOAD_CONST_INLINE_BORROW, + _POP_TOP_LOAD_CONST_INLINE); + } + + op(_LOAD_ATTR_NONDESCRIPTOR_NO_DICT, (descr/4, owner -- attr)) { + (void)descr; + PyTypeObject *type = sym_get_type(owner); + PyObject *name = PyTuple_GET_ITEM(co->co_names, oparg >> 1); + attr = lookup_attr(ctx, this_instr, type, name, + _POP_TOP_LOAD_CONST_INLINE_BORROW, + _POP_TOP_LOAD_CONST_INLINE); } op(_LOAD_ATTR_METHOD_WITH_VALUES, (descr/4, owner -- attr, self)) { (void)descr; - attr = sym_new_not_null(ctx); + PyTypeObject *type = sym_get_type(owner); + PyObject *name = PyTuple_GET_ITEM(co->co_names, oparg >> 1); + attr = lookup_attr(ctx, this_instr, type, name, + _LOAD_CONST_UNDER_INLINE_BORROW, + _LOAD_CONST_UNDER_INLINE); self = owner; } op(_LOAD_ATTR_METHOD_NO_DICT, (descr/4, owner -- attr, self)) { (void)descr; - attr = sym_new_not_null(ctx); + PyTypeObject *type = sym_get_type(owner); + PyObject *name = PyTuple_GET_ITEM(co->co_names, oparg >> 1); + attr = lookup_attr(ctx, this_instr, type, name, + _LOAD_CONST_UNDER_INLINE_BORROW, + _LOAD_CONST_UNDER_INLINE); self = owner; } op(_LOAD_ATTR_METHOD_LAZY_DICT, (descr/4, owner -- attr, self)) { (void)descr; - attr = sym_new_not_null(ctx); + PyTypeObject *type = sym_get_type(owner); + PyObject *name = PyTuple_GET_ITEM(co->co_names, oparg >> 1); + attr = lookup_attr(ctx, this_instr, type, name, + _LOAD_CONST_UNDER_INLINE_BORROW, + _LOAD_CONST_UNDER_INLINE); self = owner; } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 0ba45e1f58fe08..5a9fcf3b1b6924 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -68,7 +68,7 @@ case _LOAD_CONST: { JitOptSymbol *value; - PyObject *val = PyTuple_GET_ITEM(co->co_consts, this_instr->oparg); + PyObject *val = PyTuple_GET_ITEM(co->co_consts, oparg); REPLACE_OP(this_instr, _LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)val); value = sym_new_const(ctx, val); stack_pointer[0] = value; @@ -1174,7 +1174,7 @@ self_or_null = &stack_pointer[0]; (void)owner; attr = sym_new_not_null(ctx); - if (oparg &1) { + if (oparg & 1) { self_or_null[0] = sym_new_unknown(ctx); } stack_pointer[-1] = attr; @@ -1284,10 +1284,16 @@ } case _LOAD_ATTR_CLASS: { + JitOptSymbol *owner; JitOptSymbol *attr; + owner = stack_pointer[-1]; PyObject *descr = (PyObject *)this_instr->operand0; - attr = sym_new_not_null(ctx); (void)descr; + PyTypeObject *type = (PyTypeObject *)sym_get_const(ctx, owner); + PyObject *name = PyTuple_GET_ITEM(co->co_names, oparg >> 1); + attr = lookup_attr(ctx, this_instr, type, name, + _POP_TOP_LOAD_CONST_INLINE_BORROW, + _POP_TOP_LOAD_CONST_INLINE); stack_pointer[-1] = attr; break; } @@ -1701,7 +1707,11 @@ owner = stack_pointer[-1]; PyObject *descr = (PyObject *)this_instr->operand0; (void)descr; - attr = sym_new_not_null(ctx); + PyTypeObject *type = sym_get_type(owner); + PyObject *name = PyTuple_GET_ITEM(co->co_names, oparg >> 1); + attr = lookup_attr(ctx, this_instr, type, name, + _LOAD_CONST_UNDER_INLINE_BORROW, + _LOAD_CONST_UNDER_INLINE); self = owner; stack_pointer[-1] = attr; stack_pointer[0] = self; @@ -1717,7 +1727,11 @@ owner = stack_pointer[-1]; PyObject *descr = (PyObject *)this_instr->operand0; (void)descr; - attr = sym_new_not_null(ctx); + PyTypeObject *type = sym_get_type(owner); + PyObject *name = PyTuple_GET_ITEM(co->co_names, oparg >> 1); + attr = lookup_attr(ctx, this_instr, type, name, + _LOAD_CONST_UNDER_INLINE_BORROW, + _LOAD_CONST_UNDER_INLINE); self = owner; stack_pointer[-1] = attr; stack_pointer[0] = self; @@ -1727,15 +1741,31 @@ } case _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES: { + JitOptSymbol *owner; JitOptSymbol *attr; - attr = sym_new_not_null(ctx); + owner = stack_pointer[-1]; + PyObject *descr = (PyObject *)this_instr->operand0; + (void)descr; + PyTypeObject *type = sym_get_type(owner); + PyObject *name = PyTuple_GET_ITEM(co->co_names, oparg >> 1); + attr = lookup_attr(ctx, this_instr, type, name, + _POP_TOP_LOAD_CONST_INLINE_BORROW, + _POP_TOP_LOAD_CONST_INLINE); stack_pointer[-1] = attr; break; } case _LOAD_ATTR_NONDESCRIPTOR_NO_DICT: { + JitOptSymbol *owner; JitOptSymbol *attr; - attr = sym_new_not_null(ctx); + owner = stack_pointer[-1]; + PyObject *descr = (PyObject *)this_instr->operand0; + (void)descr; + PyTypeObject *type = sym_get_type(owner); + PyObject *name = PyTuple_GET_ITEM(co->co_names, oparg >> 1); + attr = lookup_attr(ctx, this_instr, type, name, + _POP_TOP_LOAD_CONST_INLINE_BORROW, + _POP_TOP_LOAD_CONST_INLINE); stack_pointer[-1] = attr; break; } @@ -1751,7 +1781,11 @@ owner = stack_pointer[-1]; PyObject *descr = (PyObject *)this_instr->operand0; (void)descr; - attr = sym_new_not_null(ctx); + PyTypeObject *type = sym_get_type(owner); + PyObject *name = PyTuple_GET_ITEM(co->co_names, oparg >> 1); + attr = lookup_attr(ctx, this_instr, type, name, + _LOAD_CONST_UNDER_INLINE_BORROW, + _LOAD_CONST_UNDER_INLINE); self = owner; stack_pointer[-1] = attr; stack_pointer[0] = self; @@ -2594,6 +2628,30 @@ break; } + case _LOAD_CONST_UNDER_INLINE: { + JitOptSymbol *value; + JitOptSymbol *new; + value = sym_new_not_null(ctx); + new = sym_new_not_null(ctx); + stack_pointer[-1] = value; + stack_pointer[0] = new; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_CONST_UNDER_INLINE_BORROW: { + JitOptSymbol *value; + JitOptSymbol *new; + value = sym_new_not_null(ctx); + new = sym_new_not_null(ctx); + stack_pointer[-1] = value; + stack_pointer[0] = new; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _CHECK_FUNCTION: { break; } diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index 2e619fa6f9977f..25de5d83166f64 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -13,22 +13,46 @@ #include #include -/* Symbols - ======= - - See the diagram at - https://github.com/faster-cpython/ideas/blob/main/3.13/redundancy_eliminator.md - - We represent the nodes in the diagram as follows - (the flag bits are only defined in optimizer_symbols.c): - - Top: no flag bits, typ and const_val are NULL. - - NULL: IS_NULL flag set, type and const_val NULL. - - Not NULL: NOT_NULL flag set, type and const_val NULL. - - None/not None: not used. (None could be represented as any other constant.) - - Known type: NOT_NULL flag set and typ set; const_val is NULL. - - Known constant: NOT_NULL flag set, type set, const_val set. - - Bottom: IS_NULL and NOT_NULL flags set, type and const_val NULL. - */ +/* + +Symbols +======= + +https://github.com/faster-cpython/ideas/blob/main/3.13/redundancy_eliminator.md + +Logically, all symbols begin as UNKNOWN, and can transition downwards along the +edges of the lattice, but *never* upwards (see the diagram below). The UNKNOWN +state represents no information, and the BOTTOM state represents contradictory +information. Though symbols logically progress through all intermediate nodes, +we often skip in-between states for convenience: + + UNKNOWN + | | +NULL | +| | <- Anything below this level is an object. +| NON_NULL +| | | <- Anything below this level has a known type version. +| TYPE_VERSION | +| | | <- Anything below this level has a known type. +| KNOWN_CLASS | +| | | | <- Anything below this level has a known truthiness. +| | | TRUTHINESS +| | | | +| TUPLE | | +| | | | <- Anything below this level is a known constant. +| KNOWN_VALUE +| | <- Anything below this level is unreachable. +BOTTOM + +For example, after guarding that the type of an UNKNOWN local is int, we can +narrow the symbol to KNOWN_CLASS (logically progressing though NON_NULL and +TYPE_VERSION to get there). Later, we may learn that it is falsey based on the +result of a truth test, which would allow us to narrow the symbol to KNOWN_VALUE +(with a value of integer zero). If at any point we encounter a float guard on +the same symbol, that would be a contradiction, and the symbol would be set to +BOTTOM (indicating that the code is unreachable). + +*/ #ifdef Py_DEBUG static inline int get_lltrace(void) { @@ -420,7 +444,6 @@ _Py_uop_sym_get_type(JitOptSymbol *sym) JitSymType tag = sym->tag; switch(tag) { case JIT_SYM_NULL_TAG: - case JIT_SYM_TYPE_VERSION_TAG: case JIT_SYM_BOTTOM_TAG: case JIT_SYM_NON_NULL_TAG: case JIT_SYM_UNKNOWN_TAG: @@ -429,6 +452,8 @@ _Py_uop_sym_get_type(JitOptSymbol *sym) return sym->cls.type; case JIT_SYM_KNOWN_VALUE_TAG: return Py_TYPE(sym->value.value); + case JIT_SYM_TYPE_VERSION_TAG: + return _PyType_LookupByVersion(sym->version.version); case JIT_SYM_TUPLE_TAG: return &PyTuple_Type; case JIT_SYM_TRUTHINESS_TAG: @@ -464,21 +489,7 @@ _Py_uop_sym_get_type_version(JitOptSymbol *sym) bool _Py_uop_sym_has_type(JitOptSymbol *sym) { - JitSymType tag = sym->tag; - switch(tag) { - case JIT_SYM_NULL_TAG: - case JIT_SYM_TYPE_VERSION_TAG: - case JIT_SYM_BOTTOM_TAG: - case JIT_SYM_NON_NULL_TAG: - case JIT_SYM_UNKNOWN_TAG: - return false; - case JIT_SYM_KNOWN_CLASS_TAG: - case JIT_SYM_KNOWN_VALUE_TAG: - case JIT_SYM_TUPLE_TAG: - case JIT_SYM_TRUTHINESS_TAG: - return true; - } - Py_UNREACHABLE(); + return _Py_uop_sym_get_type(sym) != NULL; } bool @@ -576,7 +587,7 @@ _Py_uop_sym_tuple_getitem(JitOptContext *ctx, JitOptSymbol *sym, int item) else if (sym->tag == JIT_SYM_TUPLE_TAG && item < sym->tuple.length) { return allocation_base(ctx) + sym->tuple.items[item]; } - return _Py_uop_sym_new_unknown(ctx); + return _Py_uop_sym_new_not_null(ctx); } int @@ -863,6 +874,11 @@ _Py_uop_symbols_test(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(ignored)) _Py_uop_sym_get_const(ctx, _Py_uop_sym_tuple_getitem(ctx, sym, 1)) == val_43, "tuple item does not match value used to create tuple" ); + sym = _Py_uop_sym_new_type(ctx, &PyTuple_Type); + TEST_PREDICATE( + _Py_uop_sym_is_not_null(_Py_uop_sym_tuple_getitem(ctx, sym, 42)), + "Unknown tuple item is not narrowed to non-NULL" + ); JitOptSymbol *value = _Py_uop_sym_new_type(ctx, &PyBool_Type); sym = _Py_uop_sym_new_truthiness(ctx, value, false); TEST_PREDICATE(_Py_uop_sym_matches_type(sym, &PyBool_Type), "truthiness is not boolean");