-
-
Notifications
You must be signed in to change notification settings - Fork 32k
gh-106581: Split CALL_PY_EXACT_ARGS
into uops
#107760
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 19 commits
56133bb
907ff95
2c6be6d
6d78ff2
0d8e66c
b75f30e
61c2822
f73ea90
12910fc
2fafa2c
2717b07
e487908
1d549af
f40fb1f
4f6f8f8
6facc8d
94630d4
cf8e2c0
1e62876
05af848
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -957,13 +957,13 @@ dummy_func( | |
{ | ||
PyGenObject *gen = (PyGenObject *)receiver; | ||
_PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe; | ||
frame->return_offset = oparg; | ||
STACK_SHRINK(1); | ||
_PyFrame_StackPush(gen_frame, v); | ||
gen->gi_frame_state = FRAME_EXECUTING; | ||
gen->gi_exc_state.previous_item = tstate->exc_info; | ||
tstate->exc_info = &gen->gi_exc_state; | ||
SKIP_OVER(INLINE_CACHE_ENTRIES_SEND); | ||
frame->return_offset = oparg; | ||
DISPATCH_INLINED(gen_frame); | ||
} | ||
if (Py_IsNone(v) && PyIter_Check(receiver)) { | ||
|
@@ -996,13 +996,13 @@ dummy_func( | |
DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING, SEND); | ||
STAT_INC(SEND, hit); | ||
_PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe; | ||
frame->return_offset = oparg; | ||
STACK_SHRINK(1); | ||
_PyFrame_StackPush(gen_frame, v); | ||
gen->gi_frame_state = FRAME_EXECUTING; | ||
gen->gi_exc_state.previous_item = tstate->exc_info; | ||
tstate->exc_info = &gen->gi_exc_state; | ||
SKIP_OVER(INLINE_CACHE_ENTRIES_SEND); | ||
frame->return_offset = oparg; | ||
DISPATCH_INLINED(gen_frame); | ||
} | ||
|
||
|
@@ -2588,14 +2588,14 @@ dummy_func( | |
DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING, FOR_ITER); | ||
STAT_INC(FOR_ITER, hit); | ||
_PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe; | ||
frame->return_offset = oparg; | ||
_PyFrame_StackPush(gen_frame, Py_None); | ||
gen->gi_frame_state = FRAME_EXECUTING; | ||
gen->gi_exc_state.previous_item = tstate->exc_info; | ||
tstate->exc_info = &gen->gi_exc_state; | ||
SKIP_OVER(INLINE_CACHE_ENTRIES_FOR_ITER); | ||
assert(next_instr[oparg].op.code == END_FOR || | ||
next_instr[oparg].op.code == INSTRUMENTED_END_FOR); | ||
frame->return_offset = oparg; | ||
DISPATCH_INLINED(gen_frame); | ||
} | ||
|
||
|
@@ -2950,32 +2950,71 @@ dummy_func( | |
GO_TO_INSTRUCTION(CALL_PY_EXACT_ARGS); | ||
} | ||
|
||
inst(CALL_PY_EXACT_ARGS, (unused/1, func_version/2, callable, self_or_null, args[oparg] -- unused)) { | ||
ASSERT_KWNAMES_IS_NULL(); | ||
op(_CHECK_PEP_523, (--)) { | ||
DEOPT_IF(tstate->interp->eval_frame, CALL); | ||
int argcount = oparg; | ||
if (self_or_null != NULL) { | ||
args--; | ||
argcount++; | ||
} | ||
} | ||
|
||
op(_CHECK_FUNCTION_EXACT_ARGS, (func_version/2, callable, self_or_null, unused[oparg] -- callable, self_or_null, unused[oparg])) { | ||
ASSERT_KWNAMES_IS_NULL(); | ||
DEOPT_IF(!PyFunction_Check(callable), CALL); | ||
PyFunctionObject *func = (PyFunctionObject *)callable; | ||
DEOPT_IF(func->func_version != func_version, CALL); | ||
PyCodeObject *code = (PyCodeObject *)func->func_code; | ||
DEOPT_IF(code->co_argcount != argcount, CALL); | ||
DEOPT_IF(code->co_argcount != oparg + (self_or_null != NULL), CALL); | ||
} | ||
|
||
op(_CHECK_STACK_SPACE, (callable, unused, unused[oparg] -- callable, unused, unused[oparg])) { | ||
PyFunctionObject *func = (PyFunctionObject *)callable; | ||
PyCodeObject *code = (PyCodeObject *)func->func_code; | ||
DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), CALL); | ||
} | ||
|
||
op(_INIT_CALL_PY_EXACT_ARGS, (callable, self_or_null, args[oparg] -- new_frame: _PyInterpreterFrame*)) { | ||
int argcount = oparg; | ||
if (self_or_null != NULL) { | ||
args--; | ||
argcount++; | ||
} | ||
STAT_INC(CALL, hit); | ||
gvanrossum marked this conversation as resolved.
Show resolved
Hide resolved
|
||
_PyInterpreterFrame *new_frame = _PyFrame_PushUnchecked(tstate, func, argcount); | ||
PyFunctionObject *func = (PyFunctionObject *)callable; | ||
new_frame = _PyFrame_PushUnchecked(tstate, func, argcount); | ||
for (int i = 0; i < argcount; i++) { | ||
new_frame->localsplus[i] = args[i]; | ||
} | ||
// Manipulate stack directly since we leave using DISPATCH_INLINED(). | ||
STACK_SHRINK(oparg + 2); | ||
SKIP_OVER(INLINE_CACHE_ENTRIES_CALL); | ||
} | ||
|
||
// The 'unused' output effect represents the return value | ||
// (which will be pushed when the frame returns). | ||
// It is needed so CALL_PY_EXACT_ARGS matches its family. | ||
op(_PUSH_FRAME, (new_frame: _PyInterpreterFrame* -- unused)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since op(_PUSH_FRAME, (new_frame: _PyInterpreterFrame* -- unused)) {
SAVE_FRAME_STATE(); // Equivalent to frame->prev_instr = next_instr - 1; _PyFrame_SetStackPointer(frame, stack_pointer);
frame->return_offset = 0;
new_frame->previous = frame;
frame = cframe.current_frame = new_frame;
CALL_STAT_INC(inlined_py_calls);
if (_Py_EnterRecursivePy(tstate)) {
goto exit_unwind;
}
START_FRAME(); // Equivalent to next_instr = frame->prev_instr + 1; stack_pointer =
stack_pointer = _PyFrame_GetStackPointer(frame);
} There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For example: 2b3e6f2 In which case the code generators needs to know to push the temporary stack values to the real stack before There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have to study this more. A problem is that the Tier 1 and Tier 2 versions of
I'm not sure yet what you mean with your last remark about pushing temp stack values. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Comparing carefully the two versions of In Tier 1:
In Tier 2:
Diff: @@ -1,8 +1,9 @@
frame->return_offset = 0;
assert(tstate->interp->eval_frame == NULL);
_PyFrame_SetStackPointer(frame, stack_pointer);
- frame->prev_instr = next_instr - 1;
+ frame->prev_instr -= 1;
(NEW_FRAME)->previous = frame;
- frame = cframe.current_frame = (NEW_FRAME);
+ frame = tstate->cframe->current_frame = (NEW_FRAME);
CALL_STAT_INC(inlined_py_calls);
- goto start_frame;
+ stack_pointer = _PyFrame_GetStackPointer(frame);
+ ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive; Note that the Tier 2 version must be preceded by a
which would reduce the special-casing in the code generator a bit (it would still need to do something special for The second diff chunk relates to how we set The third and final diff chunk relates to really start using the new frame. In Tier 1, this must actually do the following:
This is done by the code at In Tier 2 there is no (More later.) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's another thing though, and I think that is what Mark meant. In Tier 1 the code generation for macros is special-cased for But this is really ugly and unprincipled, and the logic is much hairier than the other special cases for
The last 4 lines here, starting with I'll mull this over some more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think I have addressed this. @markshannon Please have another look. Assuming the tests look okay I'll un-draft this. |
||
// Write it out explicitly because it's subtly different. | ||
// Eventually this should be the only occurrence of this code. | ||
frame->return_offset = 0; | ||
DISPATCH_INLINED(new_frame); | ||
assert(tstate->interp->eval_frame == NULL); | ||
SAVE_FRAME_STATE(); // Signals to the code generator | ||
new_frame->previous = frame; | ||
CALL_STAT_INC(inlined_py_calls); | ||
#if TIER_ONE | ||
frame = cframe.current_frame = new_frame; | ||
goto start_frame; | ||
#endif | ||
#if TIER_TWO | ||
frame = tstate->cframe->current_frame = new_frame; | ||
ERROR_IF(_Py_EnterRecursivePy(tstate), exit_unwind); | ||
stack_pointer = _PyFrame_GetStackPointer(frame); | ||
ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive; | ||
#endif | ||
} | ||
|
||
macro(CALL_PY_EXACT_ARGS) = | ||
unused/1 + // Skip over the counter | ||
_CHECK_PEP_523 + | ||
_CHECK_FUNCTION_EXACT_ARGS + | ||
_CHECK_STACK_SPACE + | ||
_INIT_CALL_PY_EXACT_ARGS + | ||
SAVE_IP + // Tier 2 only; special-cased oparg | ||
_PUSH_FRAME; | ||
|
||
inst(CALL_PY_WITH_DEFAULTS, (unused/1, func_version/2, callable, self_or_null, args[oparg] -- unused)) { | ||
ASSERT_KWNAMES_IS_NULL(); | ||
DEOPT_IF(tstate->interp->eval_frame, CALL); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -103,11 +103,16 @@ | |
DISPATCH_GOTO(); \ | ||
} | ||
|
||
#define SAVE_FRAME_STATE() \ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See my comment below about splitting this into |
||
do { \ | ||
frame->prev_instr = next_instr - 1; \ | ||
_PyFrame_SetStackPointer(frame, stack_pointer); \ | ||
} while (0) | ||
|
||
#define DISPATCH_INLINED(NEW_FRAME) \ | ||
do { \ | ||
assert(tstate->interp->eval_frame == NULL); \ | ||
_PyFrame_SetStackPointer(frame, stack_pointer); \ | ||
frame->prev_instr = next_instr - 1; \ | ||
SAVE_FRAME_STATE(); \ | ||
(NEW_FRAME)->previous = frame; \ | ||
frame = cframe.current_frame = (NEW_FRAME); \ | ||
CALL_STAT_INC(inlined_py_calls); \ | ||
|
@@ -364,3 +369,8 @@ static const convertion_func_ptr CONVERSION_FUNCTIONS[4] = { | |
#else | ||
#define _Py_atomic_load_relaxed_int32(ATOMIC_VAL) _Py_atomic_load_relaxed(ATOMIC_VAL) | ||
#endif | ||
|
||
static inline int _Py_EnterRecursivePy(PyThreadState *tstate) { | ||
return (tstate->py_recursion_remaining-- <= 0) && | ||
_Py_CheckRecursiveCallPy(tstate); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -30,6 +30,14 @@ | |
#undef ENABLE_SPECIALIZATION | ||
#define ENABLE_SPECIALIZATION 0 | ||
|
||
#undef SAVE_FRAME_STATE | ||
#define SAVE_FRAME_STATE() \ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Rather than a macro, I think the code generator needs to understand this. Given that In general, we want to avoid macros in the generated C code. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well, there are already many macros (and static inline functions) in the generated code. The generator recognizes the presence of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You are proposing that the macro expansion for
where
Or we could special-case its expansion in the generator, potayto-potato. But it has to differ between tiers because in Tier 1 it must store next_instr whereas in Tier 2 it must rely on the preceding SAVE_IP to set The If I can get this to work I'll apply it and merge the PR. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I did get this working (see 05af848), and will test and benchmark it before merging it. Note that there are still some |
||
do { \ | ||
/* Assume preceding SAVE_IP has set frame->prev_instr */ \ | ||
frame->prev_instr--; \ | ||
_PyFrame_SetStackPointer(frame, stack_pointer); \ | ||
} while (0) | ||
|
||
|
||
_PyInterpreterFrame * | ||
_PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject **stack_pointer) | ||
|
@@ -81,6 +89,7 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject | |
OBJECT_STAT_INC(optimization_uops_executed); | ||
switch (opcode) { | ||
|
||
#define TIER_TWO 2 | ||
#include "executor_cases.c.h" | ||
|
||
default: | ||
|
@@ -106,6 +115,7 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject | |
pop_2_error: | ||
STACK_SHRINK(1); | ||
pop_1_error: | ||
pop_1_exit_unwind: | ||
STACK_SHRINK(1); | ||
error: | ||
// On ERROR_IF we return NULL as the frame. | ||
|
Uh oh!
There was an error while loading. Please reload this page.