Skip to content

GH-118095: Handle RETURN_GENERATOR in tier 2 #118180

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Apr 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Include/internal/pycore_ceval.h
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ static inline void _Py_LeaveRecursiveCall(void) {

extern struct _PyInterpreterFrame* _PyEval_GetFrame(void);

extern PyObject* _Py_MakeCoro(PyFunctionObject *func);
PyAPI_FUNC(PyObject *)_Py_MakeCoro(PyFunctionObject *func);

/* Handle signals, pending calls, GIL drop request
and asynchronous exception */
Expand Down
14 changes: 12 additions & 2 deletions Include/internal/pycore_frame.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,17 @@ _PyFrame_NumSlotsForCodeObject(PyCodeObject *code)
return code->co_framesize - FRAME_SPECIALS_SIZE;
}

void _PyFrame_Copy(_PyInterpreterFrame *src, _PyInterpreterFrame *dest);
static inline void _PyFrame_Copy(_PyInterpreterFrame *src, _PyInterpreterFrame *dest)
{
assert(src->stacktop >= _PyFrame_GetCode(src)->co_nlocalsplus);
*dest = *src;
for (int i = 1; i < src->stacktop; i++) {
dest->localsplus[i] = src->localsplus[i];
}
// Don't leave a dangling pointer to the old frame when creating generators
// and coroutines:
dest->previous = NULL;
}

/* Consumes reference to func and locals.
Does not initialize frame->previous, which happens
Expand Down Expand Up @@ -256,7 +266,7 @@ _PyThreadState_HasStackSpace(PyThreadState *tstate, int size)
extern _PyInterpreterFrame *
_PyThreadState_PushFrame(PyThreadState *tstate, size_t size);

void _PyThreadState_PopFrame(PyThreadState *tstate, _PyInterpreterFrame *frame);
PyAPI_FUNC(void) _PyThreadState_PopFrame(PyThreadState *tstate, _PyInterpreterFrame *frame);

/* Pushes a frame without checking for space.
* Must be guarded by _PyThreadState_HasStackSpace()
Expand Down
3 changes: 2 additions & 1 deletion Include/internal/pycore_opcode_metadata.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Include/internal/pycore_uop_ids.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions Include/internal/pycore_uop_metadata.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions Lib/test/test_capi/test_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -1286,5 +1286,17 @@ def testfunc(n):
self.assertEqual(res, 32 * 32)
self.assertIsNone(ex)

def test_return_generator(self):
def gen():
yield None
def testfunc(n):
for i in range(n):
gen()
return i
res, ex = self._run_with_optimizer(testfunc, 20)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd assert the value of res as well.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

self.assertEqual(res, 19)
self.assertIsNotNone(ex)
self.assertIn("_RETURN_GENERATOR", get_opnames(ex))

if __name__ == "__main__":
unittest.main()
5 changes: 0 additions & 5 deletions Objects/frameobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -304,11 +304,6 @@ mark_stacks(PyCodeObject *code_obj, int len)
stacks[i] = UNINITIALIZED;
}
stacks[0] = EMPTY_STACK;
if (code_obj->co_flags & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR))
{
// Generators get sent None while starting:
stacks[0] = push_value(stacks[0], Object);
}
int todo = 1;
while (todo) {
todo = 0;
Expand Down
24 changes: 7 additions & 17 deletions Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -837,12 +837,7 @@ dummy_func(
_PyFrame_StackPush(frame, retval);
LOAD_SP();
LOAD_IP(frame->return_offset);
#if LLTRACE && TIER_ONE
lltrace = maybe_lltrace_resume_frame(frame, &entry_frame, GLOBALS());
if (lltrace < 0) {
goto exit_unwind;
}
#endif
LLTRACE_RESUME_FRAME();
}

macro(RETURN_VALUE) =
Expand Down Expand Up @@ -3186,12 +3181,7 @@ dummy_func(
tstate->py_recursion_remaining--;
LOAD_SP();
LOAD_IP(0);
#if LLTRACE && TIER_ONE
lltrace = maybe_lltrace_resume_frame(frame, &entry_frame, GLOBALS());
if (lltrace < 0) {
goto exit_unwind;
}
#endif
LLTRACE_RESUME_FRAME();
}

macro(CALL_BOUND_METHOD_EXACT_ARGS) =
Expand Down Expand Up @@ -3877,7 +3867,7 @@ dummy_func(
}
}

tier1 inst(RETURN_GENERATOR, (--)) {
inst(RETURN_GENERATOR, (-- res)) {
assert(PyFunction_Check(frame->f_funcobj));
PyFunctionObject *func = (PyFunctionObject *)frame->f_funcobj;
PyGenObject *gen = (PyGenObject *)_Py_MakeCoro(func);
Expand All @@ -3887,19 +3877,19 @@ dummy_func(
assert(EMPTY());
_PyFrame_SetStackPointer(frame, stack_pointer);
_PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe;
frame->instr_ptr = next_instr;
frame->instr_ptr++;
_PyFrame_Copy(frame, gen_frame);
assert(frame->frame_obj == NULL);
gen->gi_frame_state = FRAME_CREATED;
gen_frame->owner = FRAME_OWNED_BY_GENERATOR;
_Py_LeaveRecursiveCallPy(tstate);
assert(frame != &entry_frame);
res = (PyObject *)gen;
_PyInterpreterFrame *prev = frame->previous;
_PyThreadState_PopFrame(tstate, frame);
frame = tstate->current_frame = prev;
_PyFrame_StackPush(frame, (PyObject *)gen);
LOAD_IP(frame->return_offset);
goto resume_frame;
LOAD_SP();
LLTRACE_RESUME_FRAME();
}

inst(BUILD_SLICE, (start, stop, step if (oparg == 3) -- slice)) {
Expand Down
12 changes: 12 additions & 0 deletions Python/ceval_macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,18 @@
#define PRE_DISPATCH_GOTO() ((void)0)
#endif

#if LLTRACE
#define LLTRACE_RESUME_FRAME() \
do { \
lltrace = maybe_lltrace_resume_frame(frame, &entry_frame, GLOBALS()); \
if (lltrace < 0) { \
goto exit_unwind; \
} \
} while (0)
#else
#define LLTRACE_RESUME_FRAME() ((void)0)
#endif

#ifdef Py_GIL_DISABLED
#define QSBR_QUIESCENT_STATE(tstate) _Py_qsbr_quiescent_state(((_PyThreadStateImpl *)tstate)->qsbr)
#else
Expand Down
43 changes: 31 additions & 12 deletions Python/executor_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 0 additions & 12 deletions Python/frame.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,18 +53,6 @@ _PyFrame_MakeAndSetFrameObject(_PyInterpreterFrame *frame)
return f;
}

void
_PyFrame_Copy(_PyInterpreterFrame *src, _PyInterpreterFrame *dest)
{
assert(src->stacktop >= _PyFrame_GetCode(src)->co_nlocalsplus);
Py_ssize_t size = ((char*)&src->localsplus[src->stacktop]) - (char *)src;
memcpy(dest, src, size);
// Don't leave a dangling pointer to the old frame when creating generators
// and coroutines:
dest->previous = NULL;
}


static void
take_ownership(PyFrameObject *f, _PyInterpreterFrame *frame)
{
Expand Down
40 changes: 12 additions & 28 deletions Python/generated_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions Python/optimizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -697,7 +697,8 @@ translate_bytecode_to_trace(
// Reserve space for nuops (+ _SET_IP + _EXIT_TRACE)
int nuops = expansion->nuops;
RESERVE(nuops + 1); /* One extra for exit */
if (expansion->uops[nuops-1].uop == _POP_FRAME) {
int16_t last_op = expansion->uops[nuops-1].uop;
if (last_op == _POP_FRAME || last_op == _RETURN_GENERATOR) {
// Check for trace stack underflow now:
// We can't bail e.g. in the middle of
// LOAD_CONST + _POP_FRAME.
Expand Down Expand Up @@ -756,7 +757,7 @@ translate_bytecode_to_trace(
Py_FatalError("garbled expansion");
}

if (uop == _POP_FRAME) {
if (uop == _POP_FRAME || uop == _RETURN_GENERATOR) {
TRACE_STACK_POP();
/* Set the operand to the function or code object returned to,
* to assist optimization passes. (See _PUSH_FRAME below.)
Expand Down
2 changes: 1 addition & 1 deletion Python/optimizer_analysis.c
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,7 @@ eliminate_pop_guard(_PyUOpInstruction *this_instr, bool exit)
static PyCodeObject *
get_code(_PyUOpInstruction *op)
{
assert(op->opcode == _PUSH_FRAME || op->opcode == _POP_FRAME);
assert(op->opcode == _PUSH_FRAME || op->opcode == _POP_FRAME || op->opcode == _RETURN_GENERATOR);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a bit inelegant that all places that treat _PUSH_FRAME and _POP_FRAME special now also have to check for _RETURN_GENERATOR. Not sure what to do about it. :-(

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Once #118095 is done we can add a flag for all uops that push or pop frames.

PyCodeObject *co = NULL;
uint64_t operand = op->operand;
if (operand == 0) {
Expand Down
Loading
Loading