diff --git a/.gitattributes b/.gitattributes index 13289182400109..35d81d575bf1e1 100644 --- a/.gitattributes +++ b/.gitattributes @@ -83,6 +83,7 @@ Parser/token.c generated Programs/test_frozenmain.h generated Python/Python-ast.c generated Python/generated_cases.c.h generated +Python/tier2_typepropagator.c.h generated Python/opcode_targets.h generated Python/stdlib_module_names.h generated Tools/peg_generator/pegen/grammar_parser.py generated diff --git a/CS4215.md b/CS4215.md new file mode 100644 index 00000000000000..a1e03357240abf --- /dev/null +++ b/CS4215.md @@ -0,0 +1,40 @@ +# A Lazy Basic Block Versioning Interpreter for CPython. + +# Build instructions + +You should follow the official CPython build instructions for your platform. +https://devguide.python.org/getting-started/setup-building/ + +We have one major difference - you must have a pre-existing Python installation. +Preferrably Python 3.9 or higher. On MacOS/Unix systems, that Python installation +*must* be located at `python3`. + +The main reason for this limitation is that Python is used to bootstrap the compilation +of Python. However, since our interpreter is unable to run a large part of the Python +language, our interpreter cannot be used as a bootstrap Python. + +During the build process, errors may be printed, and the build process may error. However, +the final Python executable should still be generated. + +# Where are files located? + +The majority of the changes and functionality are in `Python/tier2.c` where Doxygen documentation +is written alongside the code, and in `Tools/cases_generator/` which contains the DSL implementation. + +# Running tests + +We've written simple tests of the main functionalities. +Unfortunately we did not have time to write comprehensive tests, and it doesn't seem worth it eitherways given the experimental nature of this project. + +After building, run `python tier2_test.py` in the repository's root folder. + +# Debugging output + +In `tier2.c`, two flags can be set to print debug messages: +```c +// Prints codegen debug messages +#define BB_DEBUG 0 + +// Prints typeprop debug messages +#define TYPEPROP_DEBUG 0 +``` \ No newline at end of file diff --git a/Include/cpython/code.h b/Include/cpython/code.h index abcf1250603dfe..02898cec582f91 100644 --- a/Include/cpython/code.h +++ b/Include/cpython/code.h @@ -56,6 +56,95 @@ typedef struct { PyObject *_co_freevars; } _PyCoCached; +// TYPENODE is a tagged pointer that uses the last 2 LSB as the tag +#define _Py_TYPENODE_t uintptr_t + +// TYPENODE Tags +typedef enum _Py_TypeNodeTags { + // Node is unused + TYPE_NULL = 0, + // TYPE_ROOT can point to a PyTypeObject or be a NULL + TYPE_ROOT = 1, + // TYPE_REF points to a TYPE_ROOT or a TYPE_REF + TYPE_REF = 2 +} _Py_TypeNodeTags; + +#define _Py_TYPENODE_GET_TAG(typenode) ((typenode) & (0b11)) +#define _Py_TYPENODE_CLEAR_TAG(typenode) ((typenode) & (~(uintptr_t)(0b11))) + +#define _Py_TYPENODE_MAKE_ROOT(ptr) (_Py_TYPENODE_CLEAR_TAG(ptr) | TYPE_ROOT) +#define _Py_TYPENODE_MAKE_REF(ptr) (_Py_TYPENODE_CLEAR_TAG(ptr) | TYPE_REF) + +#define _Py_TYPENODE_NULL 0 +#define _Py_TYPENODE_NULLROOT _Py_TYPENODE_MAKE_ROOT(_Py_TYPENODE_NULL) + +// Tier 2 types meta interpreter +typedef struct _PyTier2TypeContext { + // points into type_stack, points to one element after the stack + _Py_TYPENODE_t *type_stack_ptr; + int type_locals_len; + int type_stack_len; + _Py_TYPENODE_t *type_stack; + _Py_TYPENODE_t *type_locals; +} _PyTier2TypeContext; + +// Tier 2 interpreter information +typedef struct _PyTier2BBMetadata { + // Index into _PyTier2Info->bb_data + int id; + _PyTier2TypeContext *type_context; + _Py_CODEUNIT *tier2_start; + // Note, this is the first tier 1 instruction to execute AFTER the BB ends. + _Py_CODEUNIT *tier1_end; +} _PyTier2BBMetadata; + +// Bump allocator for basic blocks (overallocated) +typedef struct _PyTier2BBSpace { + // (in bytes) + Py_ssize_t max_capacity; + // How much space has been consumed in bbs. (in bytes) + Py_ssize_t water_level; + // There's extra memory at the end of this. + _Py_CODEUNIT u_code[1]; +} _PyTier2BBSpace; + +typedef struct _PyTier2BBStartTypeContextTriplet { + int id; + _Py_CODEUNIT *tier1_start; + // This is a strong reference. So during cleanup we need to free this. + _PyTier2TypeContext *start_type_context; +} _PyTier2BBStartTypeContextTriplet; + +// Tier 2 info stored in the code object. Lazily allocated. +typedef struct _PyTier2Info { + /* the tier 2 basic block to execute (if any) */ + _PyTier2BBMetadata *_entry_bb; + _PyTier2BBSpace *_bb_space; + // Keeps track of offset of jump targets (in number of codeunits) + // from co_code_adaptive. + int backward_jump_count; + int *backward_jump_offsets; + // Each backward jump offset will have a corresponding array of _PyTier2BBMetadata * + // This allows us to find a suitable BB on a backward jump. + // So backward jump offset [1, 2, 3 ,4] + // will have [[BB_ID1, BB_ID2], [BB_ID3,], [], []] + // etc. + _PyTier2BBStartTypeContextTriplet **backward_jump_target_bb_pairs; + // Max len of bb_data + int bb_data_len; + // Current index to write into in bb_data. Incremented after each write. + // This also assigns the BB ID. + int bb_data_curr; + _PyTier2BBMetadata **bb_data; + + // @TODO: + // Potentially optimise _PyTier2TypeContext by allocating the stacksize + // to the size needed for the snapshot, and the type propagation is performed + // on type_metainterpreter_stack_scratch which is allocated only once per + // code object. + // PyTypeObject** type_metainterpreter_stack_scratch; +} _PyTier2Info; + // To avoid repeating ourselves in deepfreeze.py, all PyCodeObject members are // defined in this macro: #define _PyCode_DEF(SIZE) { \ @@ -116,6 +205,8 @@ typedef struct { _PyCoCached *_co_cached; /* cached co_* attributes */ \ int _co_firsttraceable; /* index of first traceable instruction */ \ char *_co_linearray; /* array of line offsets */ \ + int _tier2_warmup; /* warmup counter for tier 2 */ \ + _PyTier2Info *_tier2_info; /* info required for tier 2, lazily alloc */ \ /* Scratch space for extra data relating to the code object. \ Type is a void* to keep the format private in codeobject.c to force \ people to go through the proper APIs. */ \ diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 3359dfd8a499e0..add3dfc28a436c 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -6,6 +6,16 @@ extern "C" { #define CODE_MAX_WATCHERS 8 +typedef struct { + // Unique ID (for this code object) for this basic block. This indexes into + // the PyTier2Info bb_data field. + // The LSB indicates whether the bb branch is a type guard or not. + // To get the actual BB ID, do a right bit shift by one. + uint16_t bb_id_tagged; +} _PyBBBranchCache; + +#define INLINE_CACHE_ENTRIES_BB_BRANCH CACHE_ENTRIES(_PyBBBranchCache) + /* PEP 659 * Specialization and quickening structs and helper functions */ @@ -238,7 +248,7 @@ extern void _Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr, int oparg); extern void _Py_Specialize_UnpackSequence(PyObject *seq, _Py_CODEUNIT *instr, int oparg); -extern void _Py_Specialize_ForIter(PyObject *iter, _Py_CODEUNIT *instr, int oparg); +extern void _Py_Specialize_ForIter(PyObject *iter, _Py_CODEUNIT *instr, int oparg, char is_bb); extern void _Py_Specialize_Send(PyObject *receiver, _Py_CODEUNIT *instr); /* Finalizer function for static codeobjects used in deepfreeze.py */ @@ -246,6 +256,36 @@ extern void _PyStaticCode_Fini(PyCodeObject *co); /* Function to intern strings of codeobjects and quicken the bytecode */ extern int _PyStaticCode_Init(PyCodeObject *co); +/* Tier 2 interpreter */ + +// gen_bb_is_successor: +// true = successor +// false = alternate +// gen_bb_requires_pop (maximum 7): +// For tier2 type propagation, handling of jump instructions with +// runtime-dependent stack effect. +// This flag is used to determine if the type context of a new bb +// requires a stack element to be popped. +#define BB_TEST(gen_bb_is_successor, gen_bb_requires_pop) \ + (((gen_bb_is_successor) << 4) | (gen_bb_requires_pop)) +#define BB_TEST_IS_SUCCESSOR(bb_test) ((bb_test) >> 4) +#define BB_TEST_GET_N_REQUIRES_POP(bb_test) ((bb_test) & 0b1111) + +extern _Py_CODEUNIT *_PyCode_Tier2Warmup(struct _PyInterpreterFrame *, + _Py_CODEUNIT *); +extern _Py_CODEUNIT *_PyTier2_GenerateNextBB( + struct _PyInterpreterFrame *frame, + uint16_t bb_id_tagged, + _Py_CODEUNIT *curr_executing_instr, + int jumpby, + _Py_CODEUNIT **tier1_fallback, + char bb_flag); +extern _Py_CODEUNIT *_PyTier2_LocateJumpBackwardsBB( + struct _PyInterpreterFrame *frame, uint16_t bb_id, int jumpby, + _Py_CODEUNIT **tier1_fallback, _Py_CODEUNIT *curr, int stacksize); +extern void _PyTier2_RewriteForwardJump(_Py_CODEUNIT *bb_branch, _Py_CODEUNIT *target); +extern void _PyTier2_RewriteBackwardJump(_Py_CODEUNIT *jump_backward_lazy, _Py_CODEUNIT *target); +void _PyTier2TypeContext_Free(_PyTier2TypeContext *type_context); #ifdef Py_STATS @@ -503,7 +543,6 @@ extern uint32_t _Py_next_func_version; #define COMPARISON_NOT_EQUALS (COMPARISON_UNORDERED | COMPARISON_LESS_THAN | COMPARISON_GREATER_THAN) - #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h index 5806cf05f174a9..b60bd5e25aa3d7 100644 --- a/Include/internal/pycore_frame.h +++ b/Include/internal/pycore_frame.h @@ -62,12 +62,19 @@ typedef struct _PyInterpreterFrame { int stacktop; /* Offset of TOS from localsplus */ uint16_t yield_offset; char owner; - /* Locals and stack */ + /* Locals and stack and unboxed bit mask */ PyObject *localsplus[1]; } _PyInterpreterFrame; -#define _PyInterpreterFrame_LASTI(IF) \ - ((int)((IF)->prev_instr - _PyCode_CODE((IF)->f_code))) +static inline int +_PyInterpreterFrame_LASTI(_PyInterpreterFrame *f) { + if (f->f_code->_tier2_info != NULL) { + return ((int)((f)->prev_instr - f->f_code->_tier2_info->_bb_space->u_code)); + } + return ((int)((f)->prev_instr - _PyCode_CODE((f)->f_code))); +} +//#define _PyInterpreterFrame_LASTI(IF) \ +// ((int)((IF)->prev_instr - _PyCode_CODE((IF)->f_code))) static inline PyObject **_PyFrame_Stackbase(_PyInterpreterFrame *f) { return f->localsplus + f->f_code->co_nlocalsplus; @@ -98,7 +105,10 @@ _PyFrame_NumSlotsForCodeObject(PyCodeObject *code) /* This function needs to remain in sync with the calculation of * co_framesize in Tools/build/deepfreeze.py */ assert(code->co_framesize >= FRAME_SPECIALS_SIZE); - return code->co_framesize - FRAME_SPECIALS_SIZE; + int res = code->co_framesize - FRAME_SPECIALS_SIZE - + (code->co_nlocalsplus * sizeof(char) / sizeof(PyObject *) + 1); + assert(res > 0); + return res; } void _PyFrame_Copy(_PyInterpreterFrame *src, _PyInterpreterFrame *dest); @@ -119,7 +129,12 @@ _PyFrame_Initialize( frame->f_locals = locals; frame->stacktop = code->co_nlocalsplus; frame->frame_obj = NULL; - frame->prev_instr = _PyCode_CODE(code) - 1; + if (code->_tier2_info != NULL) { + frame->prev_instr = code->_tier2_info->_entry_bb->tier2_start - 1; + } + else { + frame->prev_instr = _PyCode_CODE(code) - 1; + } frame->yield_offset = 0; frame->owner = FRAME_OWNED_BY_THREAD; @@ -128,6 +143,15 @@ _PyFrame_Initialize( } } +// The unboxed bitmask. true indicates an unboxed value. false indicates a normal PyObject. +static inline char* +_PyFrame_GetUnboxedBitMask(_PyInterpreterFrame *frame) +{ + PyCodeObject *co = frame->f_code; + return (char *)(frame + co->co_framesize - + (co->co_nlocalsplus * sizeof(char) / sizeof(PyObject *) + 1)); +} + /* Gets the pointer to the locals array * that precedes this frame. */ diff --git a/Include/internal/pycore_opcode.h b/Include/internal/pycore_opcode.h index 4a0b27a13ae96c..bef8db3d0ebbb1 100644 --- a/Include/internal/pycore_opcode.h +++ b/Include/internal/pycore_opcode.h @@ -55,6 +55,9 @@ const uint8_t _PyOpcode_Caches[256] = { }; const uint8_t _PyOpcode_Deopt[256] = { + [BB_TEST_ITER_LIST] = BB_TEST_ITER, + [BB_TEST_ITER_RANGE] = BB_TEST_ITER, + [BB_TEST_ITER_TUPLE] = BB_TEST_ITER, [BEFORE_ASYNC_WITH] = BEFORE_ASYNC_WITH, [BEFORE_WITH] = BEFORE_WITH, [BINARY_OP] = BINARY_OP, @@ -140,6 +143,7 @@ const uint8_t _PyOpcode_Deopt[256] = { [IS_OP] = IS_OP, [JUMP_BACKWARD] = JUMP_BACKWARD, [JUMP_BACKWARD_NO_INTERRUPT] = JUMP_BACKWARD_NO_INTERRUPT, + [JUMP_BACKWARD_QUICK] = JUMP_BACKWARD, [JUMP_FORWARD] = JUMP_FORWARD, [KW_NAMES] = KW_NAMES, [LIST_APPEND] = LIST_APPEND, @@ -189,6 +193,7 @@ const uint8_t _PyOpcode_Deopt[256] = { [RAISE_VARARGS] = RAISE_VARARGS, [RERAISE] = RERAISE, [RESUME] = RESUME, + [RESUME_QUICK] = RESUME, [RETURN_CONST] = RETURN_CONST, [RETURN_GENERATOR] = RETURN_GENERATOR, [RETURN_VALUE] = RETURN_VALUE, @@ -232,17 +237,19 @@ static const char *const _PyOpcode_OpName[263] = { [PUSH_NULL] = "PUSH_NULL", [INTERPRETER_EXIT] = "INTERPRETER_EXIT", [END_FOR] = "END_FOR", + [RESUME_QUICK] = "RESUME_QUICK", + [JUMP_BACKWARD_QUICK] = "JUMP_BACKWARD_QUICK", [BINARY_OP_ADD_FLOAT] = "BINARY_OP_ADD_FLOAT", [BINARY_OP_ADD_INT] = "BINARY_OP_ADD_INT", - [BINARY_OP_ADD_UNICODE] = "BINARY_OP_ADD_UNICODE", - [BINARY_OP_INPLACE_ADD_UNICODE] = "BINARY_OP_INPLACE_ADD_UNICODE", [NOP] = "NOP", - [BINARY_OP_MULTIPLY_FLOAT] = "BINARY_OP_MULTIPLY_FLOAT", + [BINARY_OP_ADD_UNICODE] = "BINARY_OP_ADD_UNICODE", [UNARY_NEGATIVE] = "UNARY_NEGATIVE", [UNARY_NOT] = "UNARY_NOT", + [BINARY_OP_INPLACE_ADD_UNICODE] = "BINARY_OP_INPLACE_ADD_UNICODE", + [BINARY_OP_MULTIPLY_FLOAT] = "BINARY_OP_MULTIPLY_FLOAT", + [UNARY_INVERT] = "UNARY_INVERT", [BINARY_OP_MULTIPLY_INT] = "BINARY_OP_MULTIPLY_INT", [BINARY_OP_SUBTRACT_FLOAT] = "BINARY_OP_SUBTRACT_FLOAT", - [UNARY_INVERT] = "UNARY_INVERT", [BINARY_OP_SUBTRACT_INT] = "BINARY_OP_SUBTRACT_INT", [BINARY_SUBSCR_DICT] = "BINARY_SUBSCR_DICT", [BINARY_SUBSCR_GETITEM] = "BINARY_SUBSCR_GETITEM", @@ -250,21 +257,21 @@ static const char *const _PyOpcode_OpName[263] = { [BINARY_SUBSCR_TUPLE_INT] = "BINARY_SUBSCR_TUPLE_INT", [CALL_PY_EXACT_ARGS] = "CALL_PY_EXACT_ARGS", [CALL_PY_WITH_DEFAULTS] = "CALL_PY_WITH_DEFAULTS", - [CALL_BOUND_METHOD_EXACT_ARGS] = "CALL_BOUND_METHOD_EXACT_ARGS", - [CALL_BUILTIN_CLASS] = "CALL_BUILTIN_CLASS", [BINARY_SUBSCR] = "BINARY_SUBSCR", [BINARY_SLICE] = "BINARY_SLICE", [STORE_SLICE] = "STORE_SLICE", - [CALL_BUILTIN_FAST_WITH_KEYWORDS] = "CALL_BUILTIN_FAST_WITH_KEYWORDS", - [CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS] = "CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS", + [CALL_BOUND_METHOD_EXACT_ARGS] = "CALL_BOUND_METHOD_EXACT_ARGS", + [CALL_BUILTIN_CLASS] = "CALL_BUILTIN_CLASS", [GET_LEN] = "GET_LEN", [MATCH_MAPPING] = "MATCH_MAPPING", [MATCH_SEQUENCE] = "MATCH_SEQUENCE", [MATCH_KEYS] = "MATCH_KEYS", - [CALL_NO_KW_BUILTIN_FAST] = "CALL_NO_KW_BUILTIN_FAST", + [CALL_BUILTIN_FAST_WITH_KEYWORDS] = "CALL_BUILTIN_FAST_WITH_KEYWORDS", [PUSH_EXC_INFO] = "PUSH_EXC_INFO", [CHECK_EXC_MATCH] = "CHECK_EXC_MATCH", [CHECK_EG_MATCH] = "CHECK_EG_MATCH", + [CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS] = "CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS", + [CALL_NO_KW_BUILTIN_FAST] = "CALL_NO_KW_BUILTIN_FAST", [CALL_NO_KW_BUILTIN_O] = "CALL_NO_KW_BUILTIN_O", [CALL_NO_KW_ISINSTANCE] = "CALL_NO_KW_ISINSTANCE", [CALL_NO_KW_LEN] = "CALL_NO_KW_LEN", @@ -274,8 +281,6 @@ static const char *const _PyOpcode_OpName[263] = { [CALL_NO_KW_METHOD_DESCRIPTOR_O] = "CALL_NO_KW_METHOD_DESCRIPTOR_O", [CALL_NO_KW_STR_1] = "CALL_NO_KW_STR_1", [CALL_NO_KW_TUPLE_1] = "CALL_NO_KW_TUPLE_1", - [CALL_NO_KW_TYPE_1] = "CALL_NO_KW_TYPE_1", - [COMPARE_OP_FLOAT] = "COMPARE_OP_FLOAT", [WITH_EXCEPT_START] = "WITH_EXCEPT_START", [GET_AITER] = "GET_AITER", [GET_ANEXT] = "GET_ANEXT", @@ -283,39 +288,39 @@ static const char *const _PyOpcode_OpName[263] = { [BEFORE_WITH] = "BEFORE_WITH", [END_ASYNC_FOR] = "END_ASYNC_FOR", [CLEANUP_THROW] = "CLEANUP_THROW", + [CALL_NO_KW_TYPE_1] = "CALL_NO_KW_TYPE_1", + [COMPARE_OP_FLOAT] = "COMPARE_OP_FLOAT", [COMPARE_OP_INT] = "COMPARE_OP_INT", [COMPARE_OP_STR] = "COMPARE_OP_STR", - [FOR_ITER_LIST] = "FOR_ITER_LIST", - [FOR_ITER_TUPLE] = "FOR_ITER_TUPLE", [STORE_SUBSCR] = "STORE_SUBSCR", [DELETE_SUBSCR] = "DELETE_SUBSCR", + [FOR_ITER_LIST] = "FOR_ITER_LIST", + [FOR_ITER_TUPLE] = "FOR_ITER_TUPLE", [FOR_ITER_RANGE] = "FOR_ITER_RANGE", [FOR_ITER_GEN] = "FOR_ITER_GEN", + [BB_TEST_ITER_LIST] = "BB_TEST_ITER_LIST", + [BB_TEST_ITER_TUPLE] = "BB_TEST_ITER_TUPLE", + [GET_ITER] = "GET_ITER", + [GET_YIELD_FROM_ITER] = "GET_YIELD_FROM_ITER", + [BB_TEST_ITER_RANGE] = "BB_TEST_ITER_RANGE", + [LOAD_BUILD_CLASS] = "LOAD_BUILD_CLASS", [LOAD_ATTR_CLASS] = "LOAD_ATTR_CLASS", [LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN] = "LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN", + [LOAD_ASSERTION_ERROR] = "LOAD_ASSERTION_ERROR", + [RETURN_GENERATOR] = "RETURN_GENERATOR", [LOAD_ATTR_INSTANCE_VALUE] = "LOAD_ATTR_INSTANCE_VALUE", [LOAD_ATTR_MODULE] = "LOAD_ATTR_MODULE", - [GET_ITER] = "GET_ITER", - [GET_YIELD_FROM_ITER] = "GET_YIELD_FROM_ITER", [LOAD_ATTR_PROPERTY] = "LOAD_ATTR_PROPERTY", - [LOAD_BUILD_CLASS] = "LOAD_BUILD_CLASS", [LOAD_ATTR_SLOT] = "LOAD_ATTR_SLOT", [LOAD_ATTR_WITH_HINT] = "LOAD_ATTR_WITH_HINT", - [LOAD_ASSERTION_ERROR] = "LOAD_ASSERTION_ERROR", - [RETURN_GENERATOR] = "RETURN_GENERATOR", [LOAD_ATTR_METHOD_LAZY_DICT] = "LOAD_ATTR_METHOD_LAZY_DICT", [LOAD_ATTR_METHOD_NO_DICT] = "LOAD_ATTR_METHOD_NO_DICT", + [RETURN_VALUE] = "RETURN_VALUE", [LOAD_ATTR_METHOD_WITH_VALUES] = "LOAD_ATTR_METHOD_WITH_VALUES", + [SETUP_ANNOTATIONS] = "SETUP_ANNOTATIONS", [LOAD_CONST__LOAD_FAST] = "LOAD_CONST__LOAD_FAST", [LOAD_FAST__LOAD_CONST] = "LOAD_FAST__LOAD_CONST", [LOAD_FAST__LOAD_FAST] = "LOAD_FAST__LOAD_FAST", - [LOAD_GLOBAL_BUILTIN] = "LOAD_GLOBAL_BUILTIN", - [RETURN_VALUE] = "RETURN_VALUE", - [LOAD_GLOBAL_MODULE] = "LOAD_GLOBAL_MODULE", - [SETUP_ANNOTATIONS] = "SETUP_ANNOTATIONS", - [STORE_ATTR_INSTANCE_VALUE] = "STORE_ATTR_INSTANCE_VALUE", - [STORE_ATTR_SLOT] = "STORE_ATTR_SLOT", - [STORE_ATTR_WITH_HINT] = "STORE_ATTR_WITH_HINT", [POP_EXCEPT] = "POP_EXCEPT", [STORE_NAME] = "STORE_NAME", [DELETE_NAME] = "DELETE_NAME", @@ -338,9 +343,9 @@ static const char *const _PyOpcode_OpName[263] = { [IMPORT_NAME] = "IMPORT_NAME", [IMPORT_FROM] = "IMPORT_FROM", [JUMP_FORWARD] = "JUMP_FORWARD", - [STORE_FAST__LOAD_FAST] = "STORE_FAST__LOAD_FAST", - [STORE_FAST__STORE_FAST] = "STORE_FAST__STORE_FAST", - [STORE_SUBSCR_DICT] = "STORE_SUBSCR_DICT", + [LOAD_GLOBAL_BUILTIN] = "LOAD_GLOBAL_BUILTIN", + [LOAD_GLOBAL_MODULE] = "LOAD_GLOBAL_MODULE", + [STORE_ATTR_INSTANCE_VALUE] = "STORE_ATTR_INSTANCE_VALUE", [POP_JUMP_IF_FALSE] = "POP_JUMP_IF_FALSE", [POP_JUMP_IF_TRUE] = "POP_JUMP_IF_TRUE", [LOAD_GLOBAL] = "LOAD_GLOBAL", @@ -368,9 +373,9 @@ static const char *const _PyOpcode_OpName[263] = { [STORE_DEREF] = "STORE_DEREF", [DELETE_DEREF] = "DELETE_DEREF", [JUMP_BACKWARD] = "JUMP_BACKWARD", - [STORE_SUBSCR_LIST_INT] = "STORE_SUBSCR_LIST_INT", + [STORE_ATTR_SLOT] = "STORE_ATTR_SLOT", [CALL_FUNCTION_EX] = "CALL_FUNCTION_EX", - [UNPACK_SEQUENCE_LIST] = "UNPACK_SEQUENCE_LIST", + [STORE_ATTR_WITH_HINT] = "STORE_ATTR_WITH_HINT", [EXTENDED_ARG] = "EXTENDED_ARG", [LIST_APPEND] = "LIST_APPEND", [SET_ADD] = "SET_ADD", @@ -380,58 +385,58 @@ static const char *const _PyOpcode_OpName[263] = { [YIELD_VALUE] = "YIELD_VALUE", [RESUME] = "RESUME", [MATCH_CLASS] = "MATCH_CLASS", - [UNPACK_SEQUENCE_TUPLE] = "UNPACK_SEQUENCE_TUPLE", - [UNPACK_SEQUENCE_TWO_TUPLE] = "UNPACK_SEQUENCE_TWO_TUPLE", + [STORE_FAST__LOAD_FAST] = "STORE_FAST__LOAD_FAST", + [STORE_FAST__STORE_FAST] = "STORE_FAST__STORE_FAST", [FORMAT_VALUE] = "FORMAT_VALUE", [BUILD_CONST_KEY_MAP] = "BUILD_CONST_KEY_MAP", [BUILD_STRING] = "BUILD_STRING", - [SEND_GEN] = "SEND_GEN", - [159] = "<159>", - [160] = "<160>", - [161] = "<161>", + [STORE_SUBSCR_DICT] = "STORE_SUBSCR_DICT", + [STORE_SUBSCR_LIST_INT] = "STORE_SUBSCR_LIST_INT", + [UNPACK_SEQUENCE_LIST] = "UNPACK_SEQUENCE_LIST", + [UNPACK_SEQUENCE_TUPLE] = "UNPACK_SEQUENCE_TUPLE", [LIST_EXTEND] = "LIST_EXTEND", [SET_UPDATE] = "SET_UPDATE", [DICT_MERGE] = "DICT_MERGE", [DICT_UPDATE] = "DICT_UPDATE", - [166] = "<166>", - [167] = "<167>", - [168] = "<168>", - [169] = "<169>", - [170] = "<170>", + [UNPACK_SEQUENCE_TWO_TUPLE] = "UNPACK_SEQUENCE_TWO_TUPLE", + [SEND_GEN] = "SEND_GEN", + [BB_BRANCH] = "BB_BRANCH", + [BB_BRANCH_IF_FLAG_UNSET] = "BB_BRANCH_IF_FLAG_UNSET", + [BB_BRANCH_IF_FLAG_SET] = "BB_BRANCH_IF_FLAG_SET", [CALL] = "CALL", [KW_NAMES] = "KW_NAMES", [CALL_INTRINSIC_1] = "CALL_INTRINSIC_1", [CALL_INTRINSIC_2] = "CALL_INTRINSIC_2", - [175] = "<175>", - [176] = "<176>", - [177] = "<177>", - [178] = "<178>", - [179] = "<179>", - [180] = "<180>", - [181] = "<181>", - [182] = "<182>", - [183] = "<183>", - [184] = "<184>", - [185] = "<185>", - [186] = "<186>", - [187] = "<187>", - [188] = "<188>", - [189] = "<189>", - [190] = "<190>", - [191] = "<191>", - [192] = "<192>", - [193] = "<193>", - [194] = "<194>", - [195] = "<195>", - [196] = "<196>", - [197] = "<197>", - [198] = "<198>", - [199] = "<199>", - [200] = "<200>", - [201] = "<201>", - [202] = "<202>", - [203] = "<203>", - [204] = "<204>", + [BB_JUMP_IF_FLAG_UNSET] = "BB_JUMP_IF_FLAG_UNSET", + [BB_JUMP_IF_FLAG_SET] = "BB_JUMP_IF_FLAG_SET", + [BB_TEST_ITER] = "BB_TEST_ITER", + [BB_TEST_ITER_RANGE] = "BB_TEST_ITER_RANGE", + [BB_TEST_ITER_LIST] = "BB_TEST_ITER_LIST", + [BB_TEST_ITER_TUPLE] = "BB_TEST_ITER_TUPLE", + [BB_TEST_POP_IF_FALSE] = "BB_TEST_POP_IF_FALSE", + [BB_TEST_POP_IF_TRUE] = "BB_TEST_POP_IF_TRUE", + [BB_TEST_POP_IF_NOT_NONE] = "BB_TEST_POP_IF_NOT_NONE", + [BB_TEST_POP_IF_NONE] = "BB_TEST_POP_IF_NONE", + [BB_JUMP_BACKWARD_LAZY] = "BB_JUMP_BACKWARD_LAZY", + [BINARY_CHECK_INT] = "BINARY_CHECK_INT", + [BINARY_CHECK_FLOAT] = "BINARY_CHECK_FLOAT", + [CHECK_LIST] = "CHECK_LIST", + [BINARY_OP_ADD_INT_REST] = "BINARY_OP_ADD_INT_REST", + [BINARY_OP_ADD_FLOAT_UNBOXED] = "BINARY_OP_ADD_FLOAT_UNBOXED", + [BINARY_OP_SUBTRACT_INT_REST] = "BINARY_OP_SUBTRACT_INT_REST", + [BINARY_OP_SUBTRACT_FLOAT_UNBOXED] = "BINARY_OP_SUBTRACT_FLOAT_UNBOXED", + [BINARY_OP_MULTIPLY_INT_REST] = "BINARY_OP_MULTIPLY_INT_REST", + [BINARY_OP_MULTIPLY_FLOAT_UNBOXED] = "BINARY_OP_MULTIPLY_FLOAT_UNBOXED", + [BINARY_SUBSCR_LIST_INT_REST] = "BINARY_SUBSCR_LIST_INT_REST", + [STORE_SUBSCR_LIST_INT_REST] = "STORE_SUBSCR_LIST_INT_REST", + [POP_TOP_NO_DECREF] = "POP_TOP_NO_DECREF", + [UNBOX_FLOAT] = "UNBOX_FLOAT", + [BOX_FLOAT] = "BOX_FLOAT", + [COPY_NO_INCREF] = "COPY_NO_INCREF", + [LOAD_FAST_NO_INCREF] = "LOAD_FAST_NO_INCREF", + [STORE_FAST_BOXED_UNBOXED] = "STORE_FAST_BOXED_UNBOXED", + [STORE_FAST_UNBOXED_BOXED] = "STORE_FAST_UNBOXED_BOXED", + [STORE_FAST_UNBOXED_UNBOXED] = "STORE_FAST_UNBOXED_UNBOXED", [205] = "<205>", [206] = "<206>", [207] = "<207>", @@ -493,45 +498,8 @@ static const char *const _PyOpcode_OpName[263] = { }; #endif + #define EXTRA_CASES \ - case 159: \ - case 160: \ - case 161: \ - case 166: \ - case 167: \ - case 168: \ - case 169: \ - case 170: \ - case 175: \ - case 176: \ - case 177: \ - case 178: \ - case 179: \ - case 180: \ - case 181: \ - case 182: \ - case 183: \ - case 184: \ - case 185: \ - case 186: \ - case 187: \ - case 188: \ - case 189: \ - case 190: \ - case 191: \ - case 192: \ - case 193: \ - case 194: \ - case 195: \ - case 196: \ - case 197: \ - case 198: \ - case 199: \ - case 200: \ - case 201: \ - case 202: \ - case 203: \ - case 204: \ case 205: \ case 206: \ case 207: \ diff --git a/Include/opcode.h b/Include/opcode.h index 0ff84dc5a551a0..749e50e727fe37 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -123,69 +123,105 @@ extern "C" { #define JUMP_NO_INTERRUPT 261 #define LOAD_METHOD 262 #define MAX_PSEUDO_OPCODE 262 -#define BINARY_OP_ADD_FLOAT 5 -#define BINARY_OP_ADD_INT 6 -#define BINARY_OP_ADD_UNICODE 7 -#define BINARY_OP_INPLACE_ADD_UNICODE 8 -#define BINARY_OP_MULTIPLY_FLOAT 10 -#define BINARY_OP_MULTIPLY_INT 13 -#define BINARY_OP_SUBTRACT_FLOAT 14 -#define BINARY_OP_SUBTRACT_INT 16 -#define BINARY_SUBSCR_DICT 17 -#define BINARY_SUBSCR_GETITEM 18 -#define BINARY_SUBSCR_LIST_INT 19 -#define BINARY_SUBSCR_TUPLE_INT 20 -#define CALL_PY_EXACT_ARGS 21 -#define CALL_PY_WITH_DEFAULTS 22 -#define CALL_BOUND_METHOD_EXACT_ARGS 23 -#define CALL_BUILTIN_CLASS 24 -#define CALL_BUILTIN_FAST_WITH_KEYWORDS 28 -#define CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS 29 -#define CALL_NO_KW_BUILTIN_FAST 34 -#define CALL_NO_KW_BUILTIN_O 38 -#define CALL_NO_KW_ISINSTANCE 39 -#define CALL_NO_KW_LEN 40 -#define CALL_NO_KW_LIST_APPEND 41 -#define CALL_NO_KW_METHOD_DESCRIPTOR_FAST 42 -#define CALL_NO_KW_METHOD_DESCRIPTOR_NOARGS 43 -#define CALL_NO_KW_METHOD_DESCRIPTOR_O 44 -#define CALL_NO_KW_STR_1 45 -#define CALL_NO_KW_TUPLE_1 46 -#define CALL_NO_KW_TYPE_1 47 -#define COMPARE_OP_FLOAT 48 -#define COMPARE_OP_INT 56 -#define COMPARE_OP_STR 57 -#define FOR_ITER_LIST 58 -#define FOR_ITER_TUPLE 59 -#define FOR_ITER_RANGE 62 -#define FOR_ITER_GEN 63 -#define LOAD_ATTR_CLASS 64 -#define LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN 65 -#define LOAD_ATTR_INSTANCE_VALUE 66 -#define LOAD_ATTR_MODULE 67 -#define LOAD_ATTR_PROPERTY 70 -#define LOAD_ATTR_SLOT 72 -#define LOAD_ATTR_WITH_HINT 73 -#define LOAD_ATTR_METHOD_LAZY_DICT 76 -#define LOAD_ATTR_METHOD_NO_DICT 77 -#define LOAD_ATTR_METHOD_WITH_VALUES 78 -#define LOAD_CONST__LOAD_FAST 79 -#define LOAD_FAST__LOAD_CONST 80 -#define LOAD_FAST__LOAD_FAST 81 -#define LOAD_GLOBAL_BUILTIN 82 -#define LOAD_GLOBAL_MODULE 84 -#define STORE_ATTR_INSTANCE_VALUE 86 -#define STORE_ATTR_SLOT 87 -#define STORE_ATTR_WITH_HINT 88 -#define STORE_FAST__LOAD_FAST 111 -#define STORE_FAST__STORE_FAST 112 -#define STORE_SUBSCR_DICT 113 -#define STORE_SUBSCR_LIST_INT 141 -#define UNPACK_SEQUENCE_LIST 143 -#define UNPACK_SEQUENCE_TUPLE 153 -#define UNPACK_SEQUENCE_TWO_TUPLE 154 -#define SEND_GEN 158 +#define RESUME_QUICK 5 +#define JUMP_BACKWARD_QUICK 6 +#define BINARY_OP_ADD_FLOAT 7 +#define BINARY_OP_ADD_INT 8 +#define BINARY_OP_ADD_UNICODE 10 +#define BINARY_OP_INPLACE_ADD_UNICODE 13 +#define BINARY_OP_MULTIPLY_FLOAT 14 +#define BINARY_OP_MULTIPLY_INT 16 +#define BINARY_OP_SUBTRACT_FLOAT 17 +#define BINARY_OP_SUBTRACT_INT 18 +#define BINARY_SUBSCR_DICT 19 +#define BINARY_SUBSCR_GETITEM 20 +#define BINARY_SUBSCR_LIST_INT 21 +#define BINARY_SUBSCR_TUPLE_INT 22 +#define CALL_PY_EXACT_ARGS 23 +#define CALL_PY_WITH_DEFAULTS 24 +#define CALL_BOUND_METHOD_EXACT_ARGS 28 +#define CALL_BUILTIN_CLASS 29 +#define CALL_BUILTIN_FAST_WITH_KEYWORDS 34 +#define CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS 38 +#define CALL_NO_KW_BUILTIN_FAST 39 +#define CALL_NO_KW_BUILTIN_O 40 +#define CALL_NO_KW_ISINSTANCE 41 +#define CALL_NO_KW_LEN 42 +#define CALL_NO_KW_LIST_APPEND 43 +#define CALL_NO_KW_METHOD_DESCRIPTOR_FAST 44 +#define CALL_NO_KW_METHOD_DESCRIPTOR_NOARGS 45 +#define CALL_NO_KW_METHOD_DESCRIPTOR_O 46 +#define CALL_NO_KW_STR_1 47 +#define CALL_NO_KW_TUPLE_1 48 +#define CALL_NO_KW_TYPE_1 56 +#define COMPARE_OP_FLOAT 57 +#define COMPARE_OP_INT 58 +#define COMPARE_OP_STR 59 +#define FOR_ITER_LIST 62 +#define FOR_ITER_TUPLE 63 +#define FOR_ITER_RANGE 64 +#define FOR_ITER_GEN 65 +#define LOAD_ATTR_CLASS 72 +#define LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN 73 +#define LOAD_ATTR_INSTANCE_VALUE 76 +#define LOAD_ATTR_MODULE 77 +#define LOAD_ATTR_PROPERTY 78 +#define LOAD_ATTR_SLOT 79 +#define LOAD_ATTR_WITH_HINT 80 +#define LOAD_ATTR_METHOD_LAZY_DICT 81 +#define LOAD_ATTR_METHOD_NO_DICT 82 +#define LOAD_ATTR_METHOD_WITH_VALUES 84 +#define LOAD_CONST__LOAD_FAST 86 +#define LOAD_FAST__LOAD_CONST 87 +#define LOAD_FAST__LOAD_FAST 88 +#define LOAD_GLOBAL_BUILTIN 111 +#define LOAD_GLOBAL_MODULE 112 +#define STORE_ATTR_INSTANCE_VALUE 113 +#define STORE_ATTR_SLOT 141 +#define STORE_ATTR_WITH_HINT 143 +#define STORE_FAST__LOAD_FAST 153 +#define STORE_FAST__STORE_FAST 154 +#define STORE_SUBSCR_DICT 158 +#define STORE_SUBSCR_LIST_INT 159 +#define UNPACK_SEQUENCE_LIST 160 +#define UNPACK_SEQUENCE_TUPLE 161 +#define UNPACK_SEQUENCE_TWO_TUPLE 166 +#define SEND_GEN 167 #define DO_TRACING 255 +// Tier 2 interpreter ops +#define BB_BRANCH 168 +#define BB_BRANCH_IF_FLAG_UNSET 169 +#define BB_BRANCH_IF_FLAG_SET 170 +#define BB_JUMP_IF_FLAG_UNSET 175 +#define BB_JUMP_IF_FLAG_SET 176 +#define BB_TEST_ITER 177 +#define BB_TEST_ITER_RANGE 178 +#define BB_TEST_ITER_LIST 179 +#define BB_TEST_ITER_TUPLE 180 +#define BB_TEST_POP_IF_FALSE 181 +#define BB_TEST_POP_IF_TRUE 182 +#define BB_TEST_POP_IF_NOT_NONE 183 +#define BB_TEST_POP_IF_NONE 184 +#define BB_JUMP_BACKWARD_LAZY 185 +#define BINARY_CHECK_INT 186 +#define BINARY_CHECK_FLOAT 187 +#define CHECK_LIST 188 +#define BINARY_OP_ADD_INT_REST 189 +#define BINARY_OP_ADD_FLOAT_UNBOXED 190 +#define BINARY_OP_SUBTRACT_INT_REST 191 +#define BINARY_OP_SUBTRACT_FLOAT_UNBOXED 192 +#define BINARY_OP_MULTIPLY_INT_REST 193 +#define BINARY_OP_MULTIPLY_FLOAT_UNBOXED 194 +#define BINARY_SUBSCR_LIST_INT_REST 195 +#define STORE_SUBSCR_LIST_INT_REST 196 +#define POP_TOP_NO_DECREF 197 +#define UNBOX_FLOAT 198 +#define BOX_FLOAT 199 +#define COPY_NO_INCREF 200 +#define LOAD_FAST_NO_INCREF 201 +#define STORE_FAST_BOXED_UNBOXED 202 +#define STORE_FAST_UNBOXED_BOXED 203 +#define STORE_FAST_UNBOXED_UNBOXED 204 #define HAS_ARG(op) ((((op) >= HAVE_ARGUMENT) && (!IS_PSEUDO_OPCODE(op)))\ || ((op) == JUMP) \ diff --git a/Lib/dis.py b/Lib/dis.py index b39b2835330135..501035389a1d81 100644 --- a/Lib/dis.py +++ b/Lib/dis.py @@ -13,6 +13,7 @@ _nb_ops, _specializations, _specialized_instructions, + _uops, ) __all__ = ["code_info", "dis", "disassemble", "distb", "disco", @@ -52,10 +53,32 @@ _all_opname[spec_op] = specialized _all_opmap[specialized] = spec_op +_bb_jumps = [] +_uop_hasoparg = [] +_empty_slot = [slot for slot, name in enumerate(_all_opname) if name.startswith("<")] +for uop_opcode, uop in zip(_empty_slot, _uops): + _all_opname[uop_opcode] = uop + _all_opmap[uop] = uop_opcode + if uop.startswith('BB_BRANCH') or uop.startswith('BB_JUMP'): + if uop.startswith('BB_JUMP'): + _bb_jumps.append(uop_opcode) + if uop.startswith('BB_BRANCH'): + _inline_cache_entries[uop_opcode] = 1 + _uop_hasoparg.append(uop_opcode) + deoptmap = { specialized: base for base, family in _specializations.items() for specialized in family } +_TIER2_STORE_OPS = ( + # 'LOAD_FAST_NO_INCREF', + # 'STORE_FAST_BOXED_UNBOXED', + # 'STORE_FAST_UNBOXED_BOXED', + # 'STORE_FAST_UNBOXED_UNBOXED', +) +for store_op in ([_all_opmap[op] for op in _TIER2_STORE_OPS]): + hasname.append(store_op) + hasarg.append(store_op) def _try_compile(source, name): """Attempts to compile the given source, first as an expression and then as a statement if the first approach fails. @@ -69,7 +92,8 @@ def _try_compile(source, name): c = compile(source, name, 'exec') return c -def dis(x=None, *, file=None, depth=None, show_caches=False, adaptive=False): +def dis(x=None, *, file=None, depth=None, show_caches=False, adaptive=False, + tier2=False): """Disassemble classes, methods, functions, and other compiled objects. With no argument, disassemble the last traceback. @@ -105,7 +129,7 @@ def dis(x=None, *, file=None, depth=None, show_caches=False, adaptive=False): print("Sorry:", msg, file=file) print(file=file) elif hasattr(x, 'co_code'): # Code object - _disassemble_recursive(x, file=file, depth=depth, show_caches=show_caches, adaptive=adaptive) + _disassemble_recursive(x, file=file, depth=depth, show_caches=show_caches, adaptive=adaptive, tier2=tier2) elif isinstance(x, (bytes, bytearray)): # Raw bytecode _disassemble_bytes(x, file=file, show_caches=show_caches) elif isinstance(x, str): # Source code @@ -191,7 +215,9 @@ def _deoptop(op): name = _all_opname[op] return _all_opmap[deoptmap[name]] if name in deoptmap else op -def _get_code_array(co, adaptive): +def _get_code_array(co, adaptive, tier2=False): + if tier2: + return co._co_code_tier2 return co._co_code_adaptive if adaptive else co.co_code def code_info(x): @@ -334,7 +360,8 @@ def _disassemble(self, lineno_width=3, mark_as_current=False, offset_width=4): return ' '.join(fields).rstrip() -def get_instructions(x, *, first_line=None, show_caches=False, adaptive=False): +def get_instructions(x, *, first_line=None, show_caches=False, adaptive=False, + tier2=False): """Iterator for the opcodes in methods, functions or code Generates a series of Instruction named tuples giving the details of @@ -351,7 +378,7 @@ def get_instructions(x, *, first_line=None, show_caches=False, adaptive=False): line_offset = first_line - co.co_firstlineno else: line_offset = 0 - return _get_instructions_bytes(_get_code_array(co, adaptive), + return _get_instructions_bytes(_get_code_array(co, adaptive, tier2), co._varname_from_oparg, co.co_names, co.co_consts, linestarts, line_offset, @@ -425,7 +452,7 @@ def _parse_exception_table(code): return entries def _is_backward_jump(op): - return 'JUMP_BACKWARD' in opname[op] + return 'JUMP_BACKWARD' in opname[op] or 'JUMP_BACKWARD_QUICK' in opname[op] def _get_instructions_bytes(code, varname_from_oparg=None, names=None, co_consts=None, @@ -447,6 +474,7 @@ def _get_instructions_bytes(code, varname_from_oparg=None, for i in range(start, end): labels.add(target) starts_line = None + ret = [] for offset, op, arg in _unpack_opargs(code): if linestarts is not None: starts_line = linestarts.get(offset, None) @@ -480,6 +508,11 @@ def _get_instructions_bytes(code, varname_from_oparg=None, elif deop in hasjabs: argval = arg*2 argrepr = "to " + repr(argval) + elif deop in _bb_jumps: + signed_arg = -arg if _is_backward_jump(deop) else arg + argval = offset + 2 + signed_arg*2 + argval += 2 * caches + argrepr = "to " + repr(argval) elif deop in hasjrel: signed_arg = -arg if _is_backward_jump(deop) else arg argval = offset + 2 + signed_arg*2 @@ -502,9 +535,9 @@ def _get_instructions_bytes(code, varname_from_oparg=None, if arg & (1< 0: if depth is not None: depth = depth - 1 @@ -548,7 +582,8 @@ def _disassemble_recursive(co, *, file=None, depth=None, show_caches=False, adap print(file=file) print("Disassembly of %r:" % (x,), file=file) _disassemble_recursive( - x, file=file, depth=depth, show_caches=show_caches, adaptive=adaptive + x, file=file, depth=depth, show_caches=show_caches, adaptive=adaptive, + tier2=tier2 ) def _disassemble_bytes(code, lasti=-1, varname_from_oparg=None, @@ -581,12 +616,7 @@ def _disassemble_bytes(code, lasti=-1, varname_from_oparg=None, instr.offset > 0) if new_source_line: print(file=file) - if show_caches: - is_current_instr = instr.offset == lasti - else: - # Each CACHE takes 2 bytes - is_current_instr = instr.offset <= lasti \ - <= instr.offset + 2 * _inline_cache_entries[_deoptop(instr.opcode)] + is_current_instr = instr.offset == lasti print(instr._disassemble(lineno_width, is_current_instr, offset_width), file=file) if exception_entries: @@ -619,7 +649,7 @@ def _unpack_opargs(code): op = code[i] deop = _deoptop(op) caches = _inline_cache_entries[deop] - if deop in hasarg: + if deop in hasarg or deop in _uop_hasoparg: arg = code[i+1] | extended_arg extended_arg = (arg << 8) if deop == EXTENDED_ARG else 0 # The oparg is stored as a signed integer @@ -714,7 +744,8 @@ class Bytecode: Iterating over this yields the bytecode operations as Instruction instances. """ - def __init__(self, x, *, first_line=None, current_offset=None, show_caches=False, adaptive=False): + def __init__(self, x, *, first_line=None, current_offset=None, show_caches=False, + adaptive=False, tier2=False): self.codeobj = co = _get_code_object(x) if first_line is None: self.first_line = co.co_firstlineno @@ -728,10 +759,11 @@ def __init__(self, x, *, first_line=None, current_offset=None, show_caches=False self.exception_entries = _parse_exception_table(co) self.show_caches = show_caches self.adaptive = adaptive + self.tier2 = tier2 def __iter__(self): co = self.codeobj - return _get_instructions_bytes(_get_code_array(co, self.adaptive), + return _get_instructions_bytes(_get_code_array(co, self.adaptive, self.tier2), co._varname_from_oparg, co.co_names, co.co_consts, self._linestarts, @@ -765,7 +797,7 @@ def dis(self): else: offset = -1 with io.StringIO() as output: - _disassemble_bytes(_get_code_array(co, self.adaptive), + _disassemble_bytes(_get_code_array(co, self.adaptive, self.tier2), varname_from_oparg=co._varname_from_oparg, names=co.co_names, co_consts=co.co_consts, linestarts=self._linestarts, diff --git a/Lib/opcode.py b/Lib/opcode.py index 60670f571fdc4d..297d0001793abf 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -277,6 +277,12 @@ def pseudo_op(name, op, real_ops): ] _specializations = { + "RESUME": [ + "RESUME_QUICK", + ], + "JUMP_BACKWARD": [ + "JUMP_BACKWARD_QUICK", + ], "BINARY_OP": [ "BINARY_OP_ADD_FLOAT", "BINARY_OP_ADD_INT", @@ -323,6 +329,11 @@ def pseudo_op(name, op, real_ops): "FOR_ITER_RANGE", "FOR_ITER_GEN", ], + "BB_TEST_ITER": [ + "BB_TEST_ITER_LIST", + "BB_TEST_ITER_TUPLE", + "BB_TEST_ITER_RANGE", + ], "LOAD_ATTR": [ # These potentially push [NULL, bound method] onto the stack. "LOAD_ATTR_CLASS", @@ -398,6 +409,9 @@ def pseudo_op(name, op, real_ops): "FOR_ITER": { "counter": 1, }, + "BB_TEST_ITER": { + "counter": 1, + }, "LOAD_ATTR": { "counter": 1, "version": 2, @@ -425,3 +439,84 @@ def pseudo_op(name, op, real_ops): _inline_cache_entries = [ sum(_cache_format.get(opname[opcode], {}).values()) for opcode in range(256) ] + +_macro_ops = [ + 'BINARY_OP_ADD_INT', + 'BINARY_OP_SUBTRACT_INT', + 'BINARY_OP_MULTIPLY_INT', + 'BINARY_OP_ADD_FLOAT', + 'BINARY_SUBSCR_LIST_INT', +] +_uops = [ + # Tier 2 BB opcodes + # Frame creation + # 'BB_ENTER_FRAME', + # 'BB_EXIT_FRAME', + # Initial generic branching instruction. + 'BB_BRANCH', # When both exits have not been generated. + # The BB_BRANCH transitions to one of these two. + # This happens when the fall through is generated, but not the other branch. + 'BB_BRANCH_IF_FLAG_UNSET', # When alternate exit is not yet generated. + 'BB_BRANCH_IF_FLAG_SET', # When successor exit is not yet generated. + # When both edges are generated + 'BB_JUMP_IF_FLAG_UNSET', + 'BB_JUMP_IF_FLAG_SET', + # The final form is that once both branches are generated, we can just + # override these instructions with a generic JUMP. + + # These tests correspond to the jump instructions + # FOR_ITER's null (iterator) check + 'BB_TEST_ITER', + 'BB_TEST_ITER_RANGE', + 'BB_TEST_ITER_LIST', + 'BB_TEST_ITER_TUPLE', + # POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE + 'BB_TEST_POP_IF_FALSE', + 'BB_TEST_POP_IF_TRUE', + # POP_JUMP_IF_NOT_NONE, POP_JUMP_IF_NONE + 'BB_TEST_POP_IF_NOT_NONE', + 'BB_TEST_POP_IF_NONE', + # JUMP_BACKWARD + 'BB_JUMP_BACKWARD_LAZY', + + # Common type checks + # These instructions check that one operand is a certain type. + # Their oparg is the offset from TOS to read. + # 'UNARY_CHECK_INT', + # 'UNARY_CHECK_FLOAT', + # 'UNARY_CHECK_STR', + + # These instructions check that both operands are a certain type. + # The benefit is that they save some dispatch overhead versus the + # single operand forms. + 'BINARY_CHECK_INT', + 'BINARY_CHECK_FLOAT', + 'CHECK_LIST', + + # These are guardless instructions + ## Arithmetic + 'BINARY_OP_ADD_INT_REST', + 'BINARY_OP_ADD_FLOAT_UNBOXED', + 'BINARY_OP_SUBTRACT_INT_REST', + 'BINARY_OP_SUBTRACT_FLOAT_UNBOXED', + 'BINARY_OP_MULTIPLY_INT_REST', + 'BINARY_OP_MULTIPLY_FLOAT_UNBOXED', + + # Containers + 'BINARY_SUBSCR_LIST_INT_REST', + 'STORE_SUBSCR_LIST_INT_REST', + + # Boxing / unboxing ops + 'POP_TOP_NO_DECREF', + 'UNBOX_FLOAT', + 'BOX_FLOAT', + 'COPY_NO_INCREF', + 'LOAD_FAST_NO_INCREF', + # Storing a boxed value, overwriting an unboxed local. + 'STORE_FAST_BOXED_UNBOXED', + # Storing an unboxed value, overwriting a boxed local. + 'STORE_FAST_UNBOXED_BOXED', + # Storing an unboxed value, overwriting an unboxed local. + 'STORE_FAST_UNBOXED_UNBOXED', + # The traditional STORE_FAST is storing a boxed value, overwriting a boxed local. +] diff --git a/Makefile.pre.in b/Makefile.pre.in index 74e4171b010d0f..1818cf1362e24b 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -411,6 +411,7 @@ PYTHON_OBJS= \ Python/pytime.o \ Python/bootstrap_hash.o \ Python/specialize.o \ + Python/tier2.o \ Python/structmember.o \ Python/symtable.o \ Python/sysmodule.o \ @@ -1197,7 +1198,7 @@ DEEPFREEZE_DEPS=$(srcdir)/Tools/build/deepfreeze.py $(FREEZE_MODULE_DEPS) $(FROZ # BEGIN: deepfreeze modules Python/deepfreeze/deepfreeze.c: $(DEEPFREEZE_DEPS) - $(PYTHON_FOR_FREEZE) $(srcdir)/Tools/build/deepfreeze.py \ + python3 $(srcdir)/Tools/build/deepfreeze.py \ Python/frozen_modules/importlib._bootstrap.h:importlib._bootstrap \ Python/frozen_modules/importlib._bootstrap_external.h:importlib._bootstrap_external \ Python/frozen_modules/zipimport.h:zipimport \ diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 65b1d258fb76af..f60aea5e4d46a5 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -424,7 +424,10 @@ init_code(PyCodeObject *co, struct _PyCodeConstructor *con) /* derived values */ co->co_nlocalsplus = nlocalsplus; co->co_nlocals = nlocals; - co->co_framesize = nlocalsplus + con->stacksize + FRAME_SPECIALS_SIZE; + co->co_framesize = nlocalsplus + con->stacksize + FRAME_SPECIALS_SIZE + + // + this because at the end of the frame, we store the bit masks + // that indicate whether this value is unboxed or not + (nlocalsplus * sizeof(char) / sizeof(PyObject *) + 1); co->co_ncellvars = ncellvars; co->co_nfreevars = nfreevars; co->co_version = _Py_next_func_version; @@ -438,6 +441,8 @@ init_code(PyCodeObject *co, struct _PyCodeConstructor *con) co->_co_linearray_entry_size = 0; co->_co_linearray = NULL; + co->_tier2_warmup = -64; + co->_tier2_info = NULL; memcpy(_PyCode_CODE(co), PyBytes_AS_STRING(con->code), PyBytes_GET_SIZE(con->code)); int entry_point = 0; @@ -445,6 +450,7 @@ init_code(PyCodeObject *co, struct _PyCodeConstructor *con) _PyCode_CODE(co)[entry_point].op.code != RESUME) { entry_point++; } + co->_co_firsttraceable = entry_point; _PyCode_Quicken(co); notify_code_watchers(PY_CODE_EVENT_CREATE, co); @@ -870,7 +876,7 @@ PyCode_Addr2Line(PyCodeObject *co, int addrq) if (addrq < 0) { return co->co_firstlineno; } - assert(addrq >= 0 && addrq < _PyCode_NBYTES(co)); + // assert(addrq >= 0 && addrq < _PyCode_NBYTES(co)); if (co->_co_linearray) { return _PyCode_LineNumberFromArray(co, addrq / sizeof(_Py_CODEUNIT)); } @@ -1693,6 +1699,54 @@ code_new_impl(PyTypeObject *type, int argcount, int posonlyargcount, return co; } +static void +code_tier2_fini(PyCodeObject *co) +{ + if (co->_tier2_info == NULL) { + return; + } + // @TODO: + // Write a proper destructor for _PyTier2Info + // and it's children structures. + // Current implementation e.g., doesn't clear + // bb_data + _PyTier2Info *t2_info = co->_tier2_info; + t2_info->_entry_bb = NULL; + if (t2_info->_bb_space != NULL) { + PyMem_Free(t2_info->_bb_space); + t2_info->_bb_space = NULL; + } + + if (t2_info->backward_jump_count > 0 && + t2_info->backward_jump_offsets != NULL) { + PyMem_Free(t2_info->backward_jump_offsets); + t2_info->backward_jump_offsets = NULL; + _PyTier2BBStartTypeContextTriplet **backward_jump_target_bb_pairs = t2_info->backward_jump_target_bb_pairs; + //int backwards_jump_count = t2_info->backward_jump_count; + //for (int i = 0; i < backwards_jump_count; i++) { + // PyMem_Free(backward_jump_target_bb_pairs[i]); + //} + PyMem_Free(backward_jump_target_bb_pairs); + } + + t2_info->backward_jump_count = 0; + if (t2_info->bb_data != NULL && t2_info->bb_data_len > 0) { + PyMem_Free(t2_info->bb_data); + } + //if (t2_info->bb_data != NULL) { + // for (int i = 0; i < t2_info->bb_data_curr; i++) { + // if (t2_info->bb_data[i] != NULL) { + // _PyTier2BBMetadata *meta = t2_info->bb_data[i]; + // //_PyTier2TypeContext_Free(meta->type_context); + // //PyMem_Free(meta); + // } + // } + // PyMem_Free(t2_info->bb_data); + //} + t2_info->bb_data_len = 0; + PyMem_Free(t2_info); +} + static void code_dealloc(PyCodeObject *co) { @@ -1742,6 +1796,8 @@ code_dealloc(PyCodeObject *co) if (co->_co_linearray) { PyMem_Free(co->_co_linearray); } + code_tier2_fini(co); + co->_tier2_info = NULL; PyObject_Free(co); } @@ -1955,9 +2011,21 @@ code_getcode(PyCodeObject *code, void *closure) return _PyCode_GetCode(code); } +static PyObject * +code_getcodetier2(PyCodeObject *code, void *closure) +{ + if (code->_tier2_info == NULL) { + return PyBytes_FromStringAndSize("", 0); + } + return PyBytes_FromStringAndSize( + (const char *)code->_tier2_info->_bb_space->u_code, + code->_tier2_info->_bb_space->water_level); +} + static PyGetSetDef code_getsetlist[] = { {"co_lnotab", (getter)code_getlnotab, NULL, NULL}, {"_co_code_adaptive", (getter)code_getcodeadaptive, NULL, NULL}, + {"_co_code_tier2", (getter)code_getcodetier2, NULL, NULL}, // The following old names are kept for backward compatibility. {"co_varnames", (getter)code_getvarnames, NULL, NULL}, {"co_cellvars", (getter)code_getcellvars, NULL, NULL}, @@ -2319,6 +2387,8 @@ _PyStaticCode_Fini(PyCodeObject *co) PyMem_Free(co->_co_cached); co->_co_cached = NULL; } + code_tier2_fini(co); + co->_tier2_info = NULL; co->co_extra = NULL; if (co->co_weakreflist != NULL) { PyObject_ClearWeakRefs((PyObject *)co); diff --git a/Objects/genobject.c b/Objects/genobject.c index 6316fa9865fe65..29a00f08b00eca 100644 --- a/Objects/genobject.c +++ b/Objects/genobject.c @@ -346,7 +346,7 @@ _PyGen_yf(PyGenObject *gen) return NULL; } _Py_CODEUNIT next = frame->prev_instr[1]; - if (next.op.code != RESUME || next.op.arg < 2) + if ((next.op.code != RESUME && next.op.code != RESUME_QUICK) || next.op.arg < 2) { /* Not in a yield from */ return NULL; @@ -382,7 +382,7 @@ gen_close(PyGenObject *gen, PyObject *args) /* It is possible for the previous instruction to not be a * YIELD_VALUE if the debugger has changed the lineno. */ if (err == 0 && frame->prev_instr[0].op.code == YIELD_VALUE) { - assert(frame->prev_instr[1].op.code == RESUME); + assert(frame->prev_instr[1].op.code == RESUME || frame->prev_instr[1].op.code == RESUME_QUICK); int exception_handler_depth = frame->prev_instr[0].op.code; assert(exception_handler_depth > 0); /* We can safely ignore the outermost try block diff --git a/PCbuild/_freeze_module.vcxproj b/PCbuild/_freeze_module.vcxproj index 4f39756019e692..5271008885aa34 100644 --- a/PCbuild/_freeze_module.vcxproj +++ b/PCbuild/_freeze_module.vcxproj @@ -229,6 +229,7 @@ + diff --git a/PCbuild/_freeze_module.vcxproj.filters b/PCbuild/_freeze_module.vcxproj.filters index 7d7c4587b9a3f3..1f2ae059d6fd1a 100644 --- a/PCbuild/_freeze_module.vcxproj.filters +++ b/PCbuild/_freeze_module.vcxproj.filters @@ -343,6 +343,9 @@ Source Files + + Source Files + Source Files diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index c754b2165745ff..03d70e711c32bf 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -550,6 +550,7 @@ + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index 90ed0602821bff..cf03edff4e10d1 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -1232,6 +1232,9 @@ Python + + Python + Python diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 2fe85dfeedf47f..2bb42c2ad449e9 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -86,6 +86,13 @@ dummy_func( } inst(RESUME, (--)) { + if (cframe.use_tracing == 0) { + next_instr = _PyCode_Tier2Warmup(frame, next_instr); + } + GO_TO_INSTRUCTION(RESUME_QUICK); + } + + inst(RESUME_QUICK, (--)) { assert(tstate->cframe == &cframe); assert(frame == cframe.current_frame); if (_Py_atomic_load_relaxed_int32(eval_breaker) && oparg < 2) { @@ -93,32 +100,50 @@ dummy_func( } } - inst(LOAD_CLOSURE, (-- value)) { + inst(LOAD_CLOSURE, (-- value : locals[oparg])) { /* We keep LOAD_CLOSURE so that the bytecode stays more readable. */ value = GETLOCAL(oparg); ERROR_IF(value == NULL, unbound_local_error); Py_INCREF(value); } - inst(LOAD_FAST_CHECK, (-- value)) { + inst(LOAD_FAST_CHECK, (-- value : locals[oparg])) { value = GETLOCAL(oparg); ERROR_IF(value == NULL, unbound_local_error); Py_INCREF(value); } - inst(LOAD_FAST, (-- value)) { + inst(LOAD_FAST, (-- value : locals[oparg])) { value = GETLOCAL(oparg); assert(value != NULL); Py_INCREF(value); } - inst(LOAD_CONST, (-- value)) { + inst(LOAD_FAST_NO_INCREF, (-- value : locals[oparg])) { + value = GETLOCAL(oparg); + } + + inst(LOAD_CONST, (-- value : consts[oparg])) { value = GETITEM(frame->f_code->co_consts, oparg); Py_INCREF(value); } - inst(STORE_FAST, (value --)) { + inst(STORE_FAST, (value --), locals[oparg] = *value) { + SETLOCAL(oparg, value); + } + + inst(STORE_FAST_BOXED_UNBOXED, (value --), locals[oparg] = *value) { + SETLOCAL_NO_DECREF(oparg, value); + _PyFrame_GetUnboxedBitMask(frame)[oparg] = false; + } + + inst(STORE_FAST_UNBOXED_BOXED, (value--), locals[oparg] = *value) { SETLOCAL(oparg, value); + _PyFrame_GetUnboxedBitMask(frame)[oparg] = true; + } + + inst(STORE_FAST_UNBOXED_UNBOXED, (value--), locals[oparg] = *value) { + SETLOCAL_NO_DECREF(oparg, value); } super(LOAD_FAST__LOAD_FAST) = LOAD_FAST + LOAD_FAST; @@ -131,7 +156,10 @@ dummy_func( DECREF_INPUTS(); } - inst(PUSH_NULL, (-- res)) { + inst(POP_TOP_NO_DECREF, (value--)) { + } + + inst(PUSH_NULL, (-- res: NULL)) { res = NULL; } @@ -175,10 +203,14 @@ dummy_func( }; - inst(BINARY_OP_MULTIPLY_INT, (unused/1, left, right -- prod)) { + macro_inst(BINARY_OP_MULTIPLY_INT, (unused/1, left, right -- prod)) { assert(cframe.use_tracing == 0); DEOPT_IF(!PyLong_CheckExact(left), BINARY_OP); DEOPT_IF(!PyLong_CheckExact(right), BINARY_OP); + U_INST(BINARY_OP_MULTIPLY_INT_REST); + } + + u_inst(BINARY_OP_MULTIPLY_INT_REST, (left, right -- prod : PyLong_Type)) { STAT_INC(BINARY_OP, hit); prod = _PyLong_Multiply((PyLongObject *)left, (PyLongObject *)right); _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); @@ -196,10 +228,14 @@ dummy_func( DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dprod, prod); } - inst(BINARY_OP_SUBTRACT_INT, (unused/1, left, right -- sub)) { + macro_inst(BINARY_OP_SUBTRACT_INT, (unused/1, left, right -- sub)) { assert(cframe.use_tracing == 0); DEOPT_IF(!PyLong_CheckExact(left), BINARY_OP); DEOPT_IF(!PyLong_CheckExact(right), BINARY_OP); + U_INST(BINARY_OP_SUBTRACT_INT_REST); + } + + u_inst(BINARY_OP_SUBTRACT_INT_REST, (left, right -- sub : PyLong_Type)) { STAT_INC(BINARY_OP, hit); sub = _PyLong_Subtract((PyLongObject *)left, (PyLongObject *)right); _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); @@ -273,10 +309,67 @@ dummy_func( DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dsum, sum); } - inst(BINARY_OP_ADD_INT, (unused/1, left, right -- sum)) { + inst(BINARY_CHECK_FLOAT, ( + left, right + -- left_unboxed : {<<= PyFloat_Type, PyRawFloat_Type}, + right_unboxed: {<<= PyFloat_Type, PyRawFloat_Type}) + ) { + assert(cframe.use_tracing == 0); + char is_successor = PyFloat_CheckExact(left) && (Py_TYPE(left) == Py_TYPE(right)); + bb_test = BB_TEST(is_successor, 0); + + if (is_successor) { + left_unboxed = *((PyObject **)(&(((PyFloatObject *)left)->ob_fval))); + right_unboxed = *((PyObject **)(&(((PyFloatObject *)right)->ob_fval))); + DECREF_INPUTS(); + } else { + left_unboxed = left; + right_unboxed = right; + } + } + + inst(BINARY_OP_ADD_FLOAT_UNBOXED, (left, right -- sum : PyRawFloat_Type)) { + STAT_INC(BINARY_OP, hit); + double temp = *(double *)(&(left)) + *(double *)(&(right)); + sum = *(PyObject **)(&temp); + } + + inst(BINARY_OP_SUBTRACT_FLOAT_UNBOXED, (left, right -- sum : PyRawFloat_Type)) { + STAT_INC(BINARY_OP, hit); + double temp = *(double *)(&(left)) - *(double *)(&(right)); + sum = *(PyObject **)(&temp); + } + + inst(BINARY_OP_MULTIPLY_FLOAT_UNBOXED, (left, right -- prod : PyRawFloat_Type)) { + STAT_INC(BINARY_OP, hit); + double temp = *(double *)(&(left)) * *(double *)(&(right)); + prod = *(PyObject **)(&temp); + } + + inst(UNBOX_FLOAT, (boxed_float, unused[oparg] -- unboxed_float : PyRawFloat_Type, unused[oparg])) { + double temp = ((PyFloatObject *)boxed_float)->ob_fval; + Py_DECREF(boxed_float); + unboxed_float = (*(PyObject **)(&temp)); + } + + inst(BOX_FLOAT, (raw_float, unused[oparg] -- boxed_float : PyFloat_Type, unused[oparg])) { + boxed_float = PyFloat_FromDouble(*(double *)(&(raw_float))); + } + + macro_inst(BINARY_OP_ADD_INT, (unused/1, left, right -- sum)) { assert(cframe.use_tracing == 0); DEOPT_IF(!PyLong_CheckExact(left), BINARY_OP); DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_OP); + U_INST(BINARY_OP_ADD_INT_REST); + } + + inst(BINARY_CHECK_INT, (left, right -- left : <<= PyLong_Type, right : <<= PyLong_Type)) { + assert(cframe.use_tracing == 0); + char is_successor = PyLong_CheckExact(left) && (Py_TYPE(left) == Py_TYPE(right)); + bb_test = BB_TEST(is_successor, 0); + } + + u_inst(BINARY_OP_ADD_INT_REST, (left, right -- sum : PyLong_Type)) { STAT_INC(BINARY_OP, hit); sum = _PyLong_Add((PyLongObject *)left, (PyLongObject *)right); _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); @@ -339,14 +432,18 @@ dummy_func( ERROR_IF(err, error); } - inst(BINARY_SUBSCR_LIST_INT, (unused/4, list, sub -- res)) { + macro_inst(BINARY_SUBSCR_LIST_INT, (unused/4, list, sub -- res)) { assert(cframe.use_tracing == 0); DEOPT_IF(!PyLong_CheckExact(sub), BINARY_SUBSCR); DEOPT_IF(!PyList_CheckExact(list), BINARY_SUBSCR); // Deopt unless 0 <= sub < PyList_Size(list) DEOPT_IF(!_PyLong_IsNonNegativeCompact((PyLongObject *)sub), BINARY_SUBSCR); - Py_ssize_t index = ((PyLongObject*)sub)->long_value.ob_digit[0]; + U_INST(BINARY_SUBSCR_LIST_INT_REST); + } + + u_inst(BINARY_SUBSCR_LIST_INT_REST, (list, sub -- res)) { + Py_ssize_t index = ((PyLongObject *)sub)->long_value.ob_digit[0]; DEOPT_IF(index >= PyList_GET_SIZE(list), BINARY_SUBSCR); STAT_INC(BINARY_SUBSCR, hit); res = PyList_GET_ITEM(list, index); @@ -356,6 +453,11 @@ dummy_func( Py_DECREF(list); } + inst(CHECK_LIST, (container, unused[oparg] -- container : { <<= PyList_Type, PyList_Type}, unused[oparg])) { + char is_successor = PyList_CheckExact(container); + bb_test = BB_TEST(is_successor, 0); + } + inst(BINARY_SUBSCR_TUPLE_INT, (unused/4, tuple, sub -- res)) { assert(cframe.use_tracing == 0); DEOPT_IF(!PyLong_CheckExact(sub), BINARY_SUBSCR); @@ -410,7 +512,7 @@ dummy_func( DISPATCH_INLINED(new_frame); } - inst(LIST_APPEND, (list, unused[oparg-1], v -- list, unused[oparg-1])) { + inst(LIST_APPEND, (list, unused[oparg-1], v -- list : PyList_Type, unused[oparg-1])) { ERROR_IF(_PyList_AppendTakeRef((PyListObject *)list, v) < 0, error); PREDICT(JUMP_BACKWARD); } @@ -448,7 +550,7 @@ dummy_func( ERROR_IF(err, error); } - inst(STORE_SUBSCR_LIST_INT, (unused/1, value, list, sub -- )) { + macro_inst(STORE_SUBSCR_LIST_INT, (unused/1, value, list, sub -- )) { assert(cframe.use_tracing == 0); DEOPT_IF(!PyLong_CheckExact(sub), STORE_SUBSCR); DEOPT_IF(!PyList_CheckExact(list), STORE_SUBSCR); @@ -456,7 +558,12 @@ dummy_func( // Ensure nonnegative, zero-or-one-digit ints. DEOPT_IF(!_PyLong_IsNonNegativeCompact((PyLongObject *)sub), STORE_SUBSCR); Py_ssize_t index = ((PyLongObject*)sub)->long_value.ob_digit[0]; - // Ensure index < len(list) + U_INST(STORE_SUBSCR_LIST_INT_REST); + } + + u_inst(STORE_SUBSCR_LIST_INT_REST, (value, list, sub -- )) { + Py_ssize_t index = ((PyLongObject *)sub)->long_value.ob_digit[0]; + /* Ensure index < len(list) */ DEOPT_IF(index >= PyList_GET_SIZE(list), STORE_SUBSCR); STAT_INC(STORE_SUBSCR, hit); @@ -878,7 +985,7 @@ dummy_func( UNPACK_SEQUENCE_LIST, }; - inst(UNPACK_SEQUENCE, (unused/1, seq -- unused[oparg])) { + inst(UNPACK_SEQUENCE, (unused/1, seq -- values[oparg])) { #if ENABLE_SPECIALIZATION _PyUnpackSequenceCache *cache = (_PyUnpackSequenceCache *)next_instr; if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { @@ -928,7 +1035,7 @@ dummy_func( DECREF_INPUTS(); } - inst(UNPACK_EX, (seq -- unused[oparg & 0xFF], unused, unused[oparg >> 8])) { + inst(UNPACK_EX, (seq -- unused[oparg >> 8], unused, values[oparg & 0xFF])) { int totalargs = 1 + (oparg & 0xFF) + (oparg >> 8); PyObject **top = stack_pointer + totalargs - 1; int res = unpack_iterable(tstate, seq, oparg & 0xFF, oparg >> 8, top); @@ -1145,13 +1252,13 @@ dummy_func( null = NULL; } - inst(DELETE_FAST, (--)) { + inst(DELETE_FAST, (--), locals[oparg] = NULL) { PyObject *v = GETLOCAL(oparg); ERROR_IF(v == NULL, unbound_local_error); SETLOCAL(oparg, NULL); } - inst(MAKE_CELL, (--)) { + inst(MAKE_CELL, (--), locals[oparg] = NULL) { // "initial" is probably NULL but not if it's an arg (or set // via PyFrame_LocalsToFast() before MAKE_CELL has run). PyObject *initial = GETLOCAL(oparg); @@ -1239,18 +1346,18 @@ dummy_func( } } - inst(BUILD_STRING, (pieces[oparg] -- str)) { + inst(BUILD_STRING, (pieces[oparg] -- str: PyUnicode_Type)) { str = _PyUnicode_JoinArray(&_Py_STR(empty), pieces, oparg); DECREF_INPUTS(); ERROR_IF(str == NULL, error); } - inst(BUILD_TUPLE, (values[oparg] -- tup)) { + inst(BUILD_TUPLE, (values[oparg] -- tup: PyTuple_Type)) { tup = _PyTuple_FromArraySteal(values, oparg); ERROR_IF(tup == NULL, error); } - inst(BUILD_LIST, (values[oparg] -- list)) { + inst(BUILD_LIST, (values[oparg] -- list: PyList_Type)) { list = _PyList_FromArraySteal(values, oparg); ERROR_IF(list == NULL, error); } @@ -1273,13 +1380,13 @@ dummy_func( DECREF_INPUTS(); } - inst(SET_UPDATE, (set, unused[oparg-1], iterable -- set, unused[oparg-1])) { + inst(SET_UPDATE, (set, unused[oparg-1], iterable -- set: PySet_Type, unused[oparg-1])) { int err = _PySet_Update(set, iterable); DECREF_INPUTS(); ERROR_IF(err < 0, error); } - inst(BUILD_SET, (values[oparg] -- set)) { + inst(BUILD_SET, (values[oparg] -- set: PySet_Type)) { set = PySet_New(NULL); if (set == NULL) goto error; @@ -1296,7 +1403,7 @@ dummy_func( } } - inst(BUILD_MAP, (values[oparg*2] -- map)) { + inst(BUILD_MAP, (values[oparg*2] -- map: PyDict_Type)) { map = _PyDict_FromItems( values, 2, values+1, 2, @@ -1350,7 +1457,7 @@ dummy_func( } } - inst(BUILD_CONST_KEY_MAP, (values[oparg], keys -- map)) { + inst(BUILD_CONST_KEY_MAP, (values[oparg], keys -- map: PyDict_Type)) { if (!PyTuple_CheckExact(keys) || PyTuple_GET_SIZE(keys) != (Py_ssize_t)oparg) { _PyErr_SetString(tstate, PyExc_SystemError, @@ -1764,13 +1871,13 @@ dummy_func( Py_INCREF(res); } - inst(IS_OP, (left, right -- b)) { + inst(IS_OP, (left, right -- b: PyBool_Type)) { int res = Py_Is(left, right) ^ oparg; DECREF_INPUTS(); b = Py_NewRef(res ? Py_True : Py_False); } - inst(CONTAINS_OP, (left, right -- b)) { + inst(CONTAINS_OP, (left, right -- b: PyBool_Type)) { int res = PySequence_Contains(right, left); DECREF_INPUTS(); ERROR_IF(res < 0, error); @@ -1798,7 +1905,7 @@ dummy_func( } } - inst(CHECK_EXC_MATCH, (left, right -- left, b)) { + inst(CHECK_EXC_MATCH, (left, right -- left, b: PyBool_Type)) { assert(PyExceptionInstance_Check(left)); if (check_except_type_valid(tstate, right) < 0) { DECREF_INPUTS(); @@ -1828,6 +1935,11 @@ dummy_func( } inst(JUMP_BACKWARD, (--)) { + frame->f_code->_tier2_warmup++; + GO_TO_INSTRUCTION(JUMP_BACKWARD_QUICK); + } + + inst(JUMP_BACKWARD_QUICK, (--)) { assert(oparg < INSTR_OFFSET()); JUMPBY(-oparg); CHECK_EVAL_BREAKER(); @@ -1853,6 +1965,27 @@ dummy_func( } } + inst(BB_TEST_POP_IF_FALSE, (cond -- )) { + if (Py_IsTrue(cond)) { + _Py_DECREF_NO_DEALLOC(cond); + bb_test = BB_TEST(1, 0); + } + else if (Py_IsFalse(cond)) { + _Py_DECREF_NO_DEALLOC(cond); + bb_test = BB_TEST(0, 0); + } + else { + int err = PyObject_IsTrue(cond); + Py_DECREF(cond); + if (err == 0) { + bb_test = BB_TEST(0, 0); + } + else { + ERROR_IF(err < 0, error); + } + } + } + inst(POP_JUMP_IF_TRUE, (cond -- )) { if (Py_IsFalse(cond)) { _Py_DECREF_NO_DEALLOC(cond); @@ -1873,6 +2006,28 @@ dummy_func( } } + inst(BB_TEST_POP_IF_TRUE, (cond -- )) { + if (Py_IsFalse(cond)) { + _Py_DECREF_NO_DEALLOC(cond); + bb_test = BB_TEST(1, 0); + } + else if (Py_IsTrue(cond)) { + _Py_DECREF_NO_DEALLOC(cond); + bb_test = BB_TEST(0, 0); + } + else { + int err = PyObject_IsTrue(cond); + Py_DECREF(cond); + if (err > 0) { + bb_test = BB_TEST(0, 0); + } + else { + ERROR_IF(err < 0, error); + } + } + } + + inst(POP_JUMP_IF_NOT_NONE, (value -- )) { if (!Py_IsNone(value)) { DECREF_INPUTS(); @@ -1883,6 +2038,17 @@ dummy_func( } } + inst(BB_TEST_POP_IF_NOT_NONE, (value -- )) { + if (!Py_IsNone(value)) { + Py_DECREF(value); + bb_test = BB_TEST(0, 0); + } + else { + _Py_DECREF_NO_DEALLOC(value); + bb_test = BB_TEST(1, 0); + } + } + inst(POP_JUMP_IF_NONE, (value -- )) { if (Py_IsNone(value)) { _Py_DECREF_NO_DEALLOC(value); @@ -1893,6 +2059,17 @@ dummy_func( } } + inst(BB_TEST_POP_IF_NONE, (value -- )) { + if (Py_IsNone(value)) { + Py_DECREF(value); + bb_test = BB_TEST(0, 0); + } + else { + _Py_DECREF_NO_DEALLOC(value); + bb_test = BB_TEST(1, 0); + } + } + inst(JUMP_BACKWARD_NO_INTERRUPT, (--)) { /* This bytecode is used in the `yield from` or `await` loop. * If there is an interrupt, we want it handled in the innermost @@ -1902,7 +2079,7 @@ dummy_func( JUMPBY(-oparg); } - inst(GET_LEN, (obj -- obj, len_o)) { + inst(GET_LEN, (obj -- obj, len_o: PyLong_Type)) { // PUSH(len(TOS)) Py_ssize_t len_i = PyObject_Length(obj); ERROR_IF(len_i < 0, error); @@ -1925,13 +2102,13 @@ dummy_func( } } - inst(MATCH_MAPPING, (subject -- subject, res)) { + inst(MATCH_MAPPING, (subject -- subject, res: PyBool_Type)) { int match = Py_TYPE(subject)->tp_flags & Py_TPFLAGS_MAPPING; res = Py_NewRef(match ? Py_True : Py_False); PREDICT(POP_JUMP_IF_FALSE); } - inst(MATCH_SEQUENCE, (subject -- subject, res)) { + inst(MATCH_SEQUENCE, (subject -- subject, res: PyBool_Type)) { int match = Py_TYPE(subject)->tp_flags & Py_TPFLAGS_SEQUENCE; res = Py_NewRef(match ? Py_True : Py_False); PREDICT(POP_JUMP_IF_FALSE); @@ -1998,7 +2175,7 @@ dummy_func( if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { assert(cframe.use_tracing == 0); next_instr--; - _Py_Specialize_ForIter(iter, next_instr, oparg); + _Py_Specialize_ForIter(iter, next_instr, oparg, 0); DISPATCH_SAME_OPARG(); } STAT_INC(FOR_ITER, deferred); @@ -2027,6 +2204,40 @@ dummy_func( // Common case: no jump, leave it to the code generator } + // FOR_ITER + inst(BB_TEST_ITER, (unused/1, iter -- iter, next)) { + #if ENABLE_SPECIALIZATION + _PyForIterCache *cache = (_PyForIterCache *)next_instr; + if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { + assert(cframe.use_tracing == 0); + next_instr--; + _Py_Specialize_ForIter(iter, next_instr, oparg, 1); + DISPATCH_SAME_OPARG(); + } + STAT_INC(BB_TEST_ITER, deferred); + DECREMENT_ADAPTIVE_COUNTER(cache->counter); + #endif /* ENABLE_SPECIALIZATION */ + next = (*Py_TYPE(iter)->tp_iternext)(iter); + if (next == NULL) { + if (_PyErr_Occurred(tstate)) { + if (!_PyErr_ExceptionMatches(tstate, PyExc_StopIteration)) { + goto error; + } + else if (tstate->c_tracefunc != NULL) { + call_exc_trace(tstate->c_tracefunc, tstate->c_traceobj, tstate, frame); + } + _PyErr_Clear(tstate); + } + /* iterator ended normally */ + Py_DECREF(iter); + STACK_SHRINK(1); + bb_test = BB_TEST(0, 2); + JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER); + DISPATCH(); + } + bb_test = BB_TEST(1, 0); + } + inst(FOR_ITER_LIST, (unused/1, iter -- iter, next)) { assert(cframe.use_tracing == 0); DEOPT_IF(Py_TYPE(iter) != &PyListIter_Type, FOR_ITER); @@ -2050,6 +2261,30 @@ dummy_func( // Common case: no jump, leave it to the code generator } + inst(BB_TEST_ITER_LIST, (unused/1, iter -- iter, next)) { + assert(cframe.use_tracing == 0); + DEOPT_IF(Py_TYPE(iter) != &PyListIter_Type, BB_TEST_ITER); + _PyListIterObject *it = (_PyListIterObject *)iter; + STAT_INC(FOR_ITER, hit); + PyListObject *seq = it->it_seq; + if (seq) { + if (it->it_index < PyList_GET_SIZE(seq)) { + next = Py_NewRef(PyList_GET_ITEM(seq, it->it_index++)); + goto end_bb_iter_list; // End of this instruction + } + it->it_seq = NULL; + Py_DECREF(seq); + } + Py_DECREF(iter); + STACK_SHRINK(1); + bb_test = BB_TEST(0, 2); + JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER); + DISPATCH(); + end_bb_iter_list: + // Common case: no jump, leave it to the code generator + bb_test = BB_TEST(1, 0); + } + inst(FOR_ITER_TUPLE, (unused/1, iter -- iter, next)) { assert(cframe.use_tracing == 0); _PyTupleIterObject *it = (_PyTupleIterObject *)iter; @@ -2073,6 +2308,30 @@ dummy_func( // Common case: no jump, leave it to the code generator } + inst(BB_TEST_ITER_TUPLE, (unused/1, iter -- iter, next)) { + assert(cframe.use_tracing == 0); + _PyTupleIterObject *it = (_PyTupleIterObject *)iter; + DEOPT_IF(Py_TYPE(it) != &PyTupleIter_Type, BB_TEST_ITER); + STAT_INC(FOR_ITER, hit); + PyTupleObject *seq = it->it_seq; + if (seq) { + if (it->it_index < PyTuple_GET_SIZE(seq)) { + next = Py_NewRef(PyTuple_GET_ITEM(seq, it->it_index++)); + goto end_test_iter_tuple; // End of this instruction + } + it->it_seq = NULL; + Py_DECREF(seq); + } + Py_DECREF(iter); + STACK_SHRINK(1); + bb_test = BB_TEST(0, 2); + JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER); + DISPATCH(); + end_test_iter_tuple: + // Common case: no jump, leave it to the code generator + bb_test = BB_TEST(1, 0); + } + inst(FOR_ITER_RANGE, (unused/1, iter -- iter, next)) { assert(cframe.use_tracing == 0); _PyRangeIterObject *r = (_PyRangeIterObject *)iter; @@ -2094,6 +2353,28 @@ dummy_func( } } + inst(BB_TEST_ITER_RANGE, (unused / 1, iter -- iter, next)) { + assert(cframe.use_tracing == 0); + _PyRangeIterObject *r = (_PyRangeIterObject *)iter; + DEOPT_IF(Py_TYPE(r) != &PyRangeIter_Type, BB_TEST_ITER); + STAT_INC(FOR_ITER, hit); + if (r->len <= 0) { + STACK_SHRINK(1); + Py_DECREF(r); + bb_test = BB_TEST(0, 2); + JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER); + DISPATCH(); + } + long value = r->start; + r->start = value + r->step; + r->len--; + next = PyLong_FromLong(value); + if (next == NULL) { + goto error; + } + bb_test = BB_TEST(1, 0); + } + inst(FOR_ITER_GEN, (unused/1, iter -- iter, unused)) { assert(cframe.use_tracing == 0); PyGenObject *gen = (PyGenObject *)iter; @@ -2179,7 +2460,7 @@ dummy_func( } } - inst(WITH_EXCEPT_START, (exit_func, lasti, unused, val -- exit_func, lasti, unused, val, res)) { + inst(WITH_EXCEPT_START, (exit_func, lasti, unused, val -- exit_func, lasti: PyLong_Type, unused, val, res)) { /* At the top of the stack are 4 values: - val: TOP = exc_info() - unused: SECOND = previous exception @@ -2956,11 +3237,16 @@ dummy_func( ERROR_IF(result == NULL, error); } - inst(COPY, (bottom, unused[oparg-1] -- bottom, unused[oparg-1], top)) { + inst(COPY, (bottom, unused[oparg-1] -- bottom, unused[oparg-1], top: *bottom)) { assert(oparg > 0); top = Py_NewRef(bottom); } + inst(COPY_NO_INCREF, (bottom, unused[oparg - 1] -- bottom, unused[oparg - 1], top: *bottom)) { + assert(oparg > 0); + top = bottom; + } + inst(BINARY_OP, (unused/1, lhs, rhs -- res)) { #if ENABLE_SPECIALIZATION _PyBinaryOpCache *cache = (_PyBinaryOpCache *)next_instr; @@ -2982,12 +3268,12 @@ dummy_func( } inst(SWAP, (bottom, unused[oparg-2], top -- - top, unused[oparg-2], bottom)) { + top : *top, unused[oparg-2], bottom : *bottom)) { assert(oparg >= 2); } inst(EXTENDED_ARG, (--)) { - assert(oparg); + // assert(oparg); assert(cframe.use_tracing == 0); opcode = next_instr->op.code; oparg = oparg << 8 | next_instr->op.arg; @@ -2999,6 +3285,125 @@ dummy_func( Py_UNREACHABLE(); } + // Tier 2 instructions + // Type propagator assumes this doesn't affect type context + inst(BB_BRANCH, (unused/1 --)) { + _Py_CODEUNIT *t2_nextinstr = NULL; + _PyBBBranchCache *cache = (_PyBBBranchCache *)next_instr; + _Py_CODEUNIT *tier1_fallback = NULL; + if (BB_TEST_IS_SUCCESSOR(bb_test)) { + // Rewrite self + _py_set_opcode(next_instr - 1, BB_BRANCH_IF_FLAG_UNSET); + // Generate consequent. + t2_nextinstr = _PyTier2_GenerateNextBB( + frame, cache->bb_id_tagged, next_instr - 1, + 0, &tier1_fallback, bb_test); + if (t2_nextinstr == NULL) { + // Fall back to tier 1. + next_instr = tier1_fallback; + DISPATCH(); + } + } + else { + // Rewrite self + _py_set_opcode(next_instr - 1, BB_BRANCH_IF_FLAG_SET); + // Generate alternative. + t2_nextinstr = _PyTier2_GenerateNextBB( + frame, cache->bb_id_tagged, next_instr - 1, + oparg, &tier1_fallback, bb_test); + if (t2_nextinstr == NULL) { + // Fall back to tier 1. + next_instr = tier1_fallback + oparg; + DISPATCH(); + } + } + // Their addresses should be the same. Because + // The first BB should be generated right after the previous one. + assert(next_instr + INLINE_CACHE_ENTRIES_BB_BRANCH == t2_nextinstr); + next_instr = t2_nextinstr; + DISPATCH(); + } + + inst(BB_BRANCH_IF_FLAG_UNSET, (unused/1 --)) { + if (!BB_TEST_IS_SUCCESSOR(bb_test)) { + _Py_CODEUNIT *curr = next_instr - 1; + _Py_CODEUNIT *t2_nextinstr = NULL; + _PyBBBranchCache *cache = (_PyBBBranchCache *)next_instr; + _Py_CODEUNIT *tier1_fallback = NULL; + + t2_nextinstr = _PyTier2_GenerateNextBB( + frame, cache->bb_id_tagged, next_instr - 1, + oparg, &tier1_fallback, bb_test); + if (t2_nextinstr == NULL) { + // Fall back to tier 1. + next_instr = tier1_fallback; + } + next_instr = t2_nextinstr; + + // Rewrite self + _PyTier2_RewriteForwardJump(curr, next_instr); + DISPATCH(); + } + } + + inst(BB_JUMP_IF_FLAG_UNSET, (unused/1 --)) { + if (!BB_TEST_IS_SUCCESSOR(bb_test)) { + JUMPBY(oparg); + DISPATCH(); + } + // Fall through to next instruction. + } + + inst(BB_BRANCH_IF_FLAG_SET, (unused/1 --)) { + if (BB_TEST_IS_SUCCESSOR(bb_test)) { + _Py_CODEUNIT *curr = next_instr - 1; + _Py_CODEUNIT *t2_nextinstr = NULL; + _PyBBBranchCache *cache = (_PyBBBranchCache *)next_instr; + _Py_CODEUNIT *tier1_fallback = NULL; + + t2_nextinstr = _PyTier2_GenerateNextBB( + frame, cache->bb_id_tagged, next_instr - 1, + oparg, &tier1_fallback, bb_test); + if (t2_nextinstr == NULL) { + // Fall back to tier 1. + next_instr = tier1_fallback; + } + next_instr = t2_nextinstr; + + // Rewrite self + _PyTier2_RewriteForwardJump(curr, next_instr); + DISPATCH(); + } + } + + inst(BB_JUMP_IF_FLAG_SET, (unused/1 --)) { + if (BB_TEST_IS_SUCCESSOR(bb_test)) { + JUMPBY(oparg); + DISPATCH(); + } + // Fall through to next instruction. + } + + // Type propagator assumes this doesn't affect type context + inst(BB_JUMP_BACKWARD_LAZY, (--)) { + _Py_CODEUNIT *curr = next_instr - 1; + _Py_CODEUNIT *t2_nextinstr = NULL; + _PyBBBranchCache *cache = (_PyBBBranchCache *)next_instr; + _Py_CODEUNIT *tier1_fallback = NULL; + + t2_nextinstr = _PyTier2_LocateJumpBackwardsBB( + frame, cache->bb_id_tagged, -oparg, &tier1_fallback, curr, + STACK_LEVEL()); + if (t2_nextinstr == NULL) { + // Fall back to tier 1. + next_instr = tier1_fallback; + } + next_instr = t2_nextinstr; + + // Rewrite self + _PyTier2_RewriteBackwardJump(curr, next_instr); + } + // END BYTECODES // diff --git a/Python/ceval.c b/Python/ceval.c index 7d60cf987e9c47..6cf504a91e0c35 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -725,6 +725,8 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int _PyInterpreterFrame entry_frame; PyObject *kwnames = NULL; // Borrowed reference. Reset by CALL instructions. + char bb_test = BB_TEST(0, 0); + /* WARNING: Because the _PyCFrame lives on the C stack, * but can be accessed from a heap allocated object (tstate) * strict stack discipline must be maintained. @@ -1008,7 +1010,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int PyObject **stackbase = _PyFrame_Stackbase(frame); while (stack_pointer > stackbase) { PyObject *o = POP(); - Py_XDECREF(o); + Py_XDECREF(o); } assert(STACK_LEVEL() == 0); _PyFrame_SetStackPointer(frame, stack_pointer); diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index c2257515a30599..1d3f15735a031a 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -68,7 +68,7 @@ lastopcode = op; \ } while (0) #else -#define INSTRUCTION_START(op) (frame->prev_instr = next_instr++) +#define INSTRUCTION_START(op) (frame->prev_instr = next_instr++); /* fprintf(stderr, "%d: %s\n", INSTR_OFFSET(), _PyOpcode_OpName[op]); */ #endif #if USE_COMPUTED_GOTOS @@ -140,7 +140,9 @@ GETITEM(PyObject *v, Py_ssize_t i) { /* Code access macros */ /* The integer overflow is checked by an assertion below. */ -#define INSTR_OFFSET() ((int)(next_instr - _PyCode_CODE(frame->f_code))) +// TODO change this calculation when interpreter is bb aware. +#define INSTR_OFFSET() ((int)(next_instr - \ + (frame->f_code->_tier2_info == NULL ? _PyCode_CODE(frame->f_code) : frame->f_code->_tier2_info->_bb_space->u_code))) #define NEXTOPARG() do { \ _Py_CODEUNIT word = *next_instr; \ opcode = word.op.code; \ @@ -249,6 +251,8 @@ GETITEM(PyObject *v, Py_ssize_t i) { #define SETLOCAL(i, value) do { PyObject *tmp = GETLOCAL(i); \ GETLOCAL(i) = value; \ Py_XDECREF(tmp); } while (0) +#define SETLOCAL_NO_DECREF(i, value) do { PyObject *tmp = GETLOCAL(i); \ + GETLOCAL(i) = value;} while (0) #define GO_TO_INSTRUCTION(op) goto PREDICT_ID(op) diff --git a/Python/frame.c b/Python/frame.c index c2c0be30113912..1adc7a328792a5 100644 --- a/Python/frame.c +++ b/Python/frame.c @@ -136,7 +136,11 @@ _PyFrame_ClearExceptCode(_PyInterpreterFrame *frame) Py_DECREF(f); } assert(frame->stacktop >= 0); + char *unboxed_bitmask = _PyFrame_GetUnboxedBitMask(frame); for (int i = 0; i < frame->stacktop; i++) { + if (unboxed_bitmask[i]) { + continue; + } Py_XDECREF(frame->localsplus[i]); } Py_XDECREF(frame->frame_obj); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index d793c1e23bc48e..9ae72b8d258798 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3,29 +3,87 @@ // Python/bytecodes.c // Do not edit! + #define UOP_BINARY_OP_MULTIPLY_INT_REST() \ + do { \ + STAT_INC(BINARY_OP, hit);\ + prod = _PyLong_Multiply((PyLongObject *)left, (PyLongObject *)right);\ + _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free);\ + _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free);\ + if (prod == NULL) goto pop_2_error;\ + } while (0) + + #define UOP_BINARY_OP_SUBTRACT_INT_REST() \ + do { \ + STAT_INC(BINARY_OP, hit);\ + sub = _PyLong_Subtract((PyLongObject *)left, (PyLongObject *)right);\ + _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free);\ + _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free);\ + if (sub == NULL) goto pop_2_error;\ + } while (0) + + #define UOP_BINARY_OP_ADD_INT_REST() \ + do { \ + STAT_INC(BINARY_OP, hit);\ + sum = _PyLong_Add((PyLongObject *)left, (PyLongObject *)right);\ + _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free);\ + _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free);\ + if (sum == NULL) goto pop_2_error;\ + } while (0) + + #define UOP_BINARY_SUBSCR_LIST_INT_REST() \ + do { \ + Py_ssize_t index = ((PyLongObject *)sub)->long_value.ob_digit[0];\ + DEOPT_IF(index >= PyList_GET_SIZE(list), BINARY_SUBSCR);\ + STAT_INC(BINARY_SUBSCR, hit);\ + res = PyList_GET_ITEM(list, index);\ + assert(res != NULL);\ + Py_INCREF(res);\ + _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free);\ + Py_DECREF(list);\ + } while (0) + + #define UOP_STORE_SUBSCR_LIST_INT_REST() \ + do { \ + Py_ssize_t index = ((PyLongObject *)sub)->long_value.ob_digit[0];\ + /* Ensure index < len(list) */\ + DEOPT_IF(index >= PyList_GET_SIZE(list), STORE_SUBSCR);\ + STAT_INC(STORE_SUBSCR, hit);\ +\ + PyObject *old_value = PyList_GET_ITEM(list, index);\ + PyList_SET_ITEM(list, index, value);\ + assert(old_value != NULL);\ + Py_DECREF(old_value);\ + _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free);\ + Py_DECREF(list);\ + } while (0) + TARGET(NOP) { DISPATCH(); } TARGET(RESUME) { - #line 89 "Python/bytecodes.c" + if (cframe.use_tracing == 0) { + next_instr = _PyCode_Tier2Warmup(frame, next_instr); + } + GO_TO_INSTRUCTION(RESUME_QUICK); + } + + TARGET(RESUME_QUICK) { + PREDICTED(RESUME_QUICK); assert(tstate->cframe == &cframe); assert(frame == cframe.current_frame); if (_Py_atomic_load_relaxed_int32(eval_breaker) && oparg < 2) { goto handle_eval_breaker; } - #line 18 "Python/generated_cases.c.h" DISPATCH(); } TARGET(LOAD_CLOSURE) { PyObject *value; - #line 97 "Python/bytecodes.c" /* We keep LOAD_CLOSURE so that the bytecode stays more readable. */ value = GETLOCAL(oparg); if (value == NULL) goto unbound_local_error; Py_INCREF(value); - #line 29 "Python/generated_cases.c.h" STACK_GROW(1); stack_pointer[-1] = value; DISPATCH(); @@ -33,11 +91,9 @@ TARGET(LOAD_FAST_CHECK) { PyObject *value; - #line 104 "Python/bytecodes.c" value = GETLOCAL(oparg); if (value == NULL) goto unbound_local_error; Py_INCREF(value); - #line 41 "Python/generated_cases.c.h" STACK_GROW(1); stack_pointer[-1] = value; DISPATCH(); @@ -45,11 +101,17 @@ TARGET(LOAD_FAST) { PyObject *value; - #line 110 "Python/bytecodes.c" value = GETLOCAL(oparg); assert(value != NULL); Py_INCREF(value); - #line 53 "Python/generated_cases.c.h" + STACK_GROW(1); + stack_pointer[-1] = value; + DISPATCH(); + } + + TARGET(LOAD_FAST_NO_INCREF) { + PyObject *value; + value = GETLOCAL(oparg); STACK_GROW(1); stack_pointer[-1] = value; DISPATCH(); @@ -58,10 +120,8 @@ TARGET(LOAD_CONST) { PREDICTED(LOAD_CONST); PyObject *value; - #line 116 "Python/bytecodes.c" value = GETITEM(frame->f_code->co_consts, oparg); Py_INCREF(value); - #line 65 "Python/generated_cases.c.h" STACK_GROW(1); stack_pointer[-1] = value; DISPATCH(); @@ -69,9 +129,30 @@ TARGET(STORE_FAST) { PyObject *value = stack_pointer[-1]; - #line 121 "Python/bytecodes.c" SETLOCAL(oparg, value); - #line 75 "Python/generated_cases.c.h" + STACK_SHRINK(1); + DISPATCH(); + } + + TARGET(STORE_FAST_BOXED_UNBOXED) { + PyObject *value = stack_pointer[-1]; + SETLOCAL_NO_DECREF(oparg, value); + _PyFrame_GetUnboxedBitMask(frame)[oparg] = false; + STACK_SHRINK(1); + DISPATCH(); + } + + TARGET(STORE_FAST_UNBOXED_BOXED) { + PyObject *value = stack_pointer[-1]; + SETLOCAL(oparg, value); + _PyFrame_GetUnboxedBitMask(frame)[oparg] = true; + STACK_SHRINK(1); + DISPATCH(); + } + + TARGET(STORE_FAST_UNBOXED_UNBOXED) { + PyObject *value = stack_pointer[-1]; + SETLOCAL_NO_DECREF(oparg, value); STACK_SHRINK(1); DISPATCH(); } @@ -81,21 +162,17 @@ PyObject *_tmp_2; { PyObject *value; - #line 110 "Python/bytecodes.c" value = GETLOCAL(oparg); assert(value != NULL); Py_INCREF(value); - #line 89 "Python/generated_cases.c.h" _tmp_2 = value; } oparg = (next_instr++)->op.arg; { PyObject *value; - #line 110 "Python/bytecodes.c" value = GETLOCAL(oparg); assert(value != NULL); Py_INCREF(value); - #line 99 "Python/generated_cases.c.h" _tmp_1 = value; } STACK_GROW(2); @@ -109,20 +186,16 @@ PyObject *_tmp_2; { PyObject *value; - #line 110 "Python/bytecodes.c" value = GETLOCAL(oparg); assert(value != NULL); Py_INCREF(value); - #line 117 "Python/generated_cases.c.h" _tmp_2 = value; } oparg = (next_instr++)->op.arg; { PyObject *value; - #line 116 "Python/bytecodes.c" value = GETITEM(frame->f_code->co_consts, oparg); Py_INCREF(value); - #line 126 "Python/generated_cases.c.h" _tmp_1 = value; } STACK_GROW(2); @@ -135,18 +208,14 @@ PyObject *_tmp_1 = stack_pointer[-1]; { PyObject *value = _tmp_1; - #line 121 "Python/bytecodes.c" SETLOCAL(oparg, value); - #line 141 "Python/generated_cases.c.h" } oparg = (next_instr++)->op.arg; { PyObject *value; - #line 110 "Python/bytecodes.c" value = GETLOCAL(oparg); assert(value != NULL); Py_INCREF(value); - #line 150 "Python/generated_cases.c.h" _tmp_1 = value; } stack_pointer[-1] = _tmp_1; @@ -158,16 +227,12 @@ PyObject *_tmp_2 = stack_pointer[-2]; { PyObject *value = _tmp_1; - #line 121 "Python/bytecodes.c" SETLOCAL(oparg, value); - #line 164 "Python/generated_cases.c.h" } oparg = (next_instr++)->op.arg; { PyObject *value = _tmp_2; - #line 121 "Python/bytecodes.c" SETLOCAL(oparg, value); - #line 171 "Python/generated_cases.c.h" } STACK_SHRINK(2); DISPATCH(); @@ -178,20 +243,16 @@ PyObject *_tmp_2; { PyObject *value; - #line 116 "Python/bytecodes.c" value = GETITEM(frame->f_code->co_consts, oparg); Py_INCREF(value); - #line 185 "Python/generated_cases.c.h" _tmp_2 = value; } oparg = (next_instr++)->op.arg; { PyObject *value; - #line 110 "Python/bytecodes.c" value = GETLOCAL(oparg); assert(value != NULL); Py_INCREF(value); - #line 195 "Python/generated_cases.c.h" _tmp_1 = value; } STACK_GROW(2); @@ -202,18 +263,20 @@ TARGET(POP_TOP) { PyObject *value = stack_pointer[-1]; - #line 131 "Python/bytecodes.c" - #line 207 "Python/generated_cases.c.h" Py_DECREF(value); STACK_SHRINK(1); DISPATCH(); } + TARGET(POP_TOP_NO_DECREF) { + PyObject *value = stack_pointer[-1]; + STACK_SHRINK(1); + DISPATCH(); + } + TARGET(PUSH_NULL) { PyObject *res; - #line 135 "Python/bytecodes.c" res = NULL; - #line 217 "Python/generated_cases.c.h" STACK_GROW(1); stack_pointer[-1] = res; DISPATCH(); @@ -224,14 +287,10 @@ PyObject *_tmp_2 = stack_pointer[-2]; { PyObject *value = _tmp_1; - #line 131 "Python/bytecodes.c" - #line 229 "Python/generated_cases.c.h" Py_DECREF(value); } { PyObject *value = _tmp_2; - #line 131 "Python/bytecodes.c" - #line 235 "Python/generated_cases.c.h" Py_DECREF(value); } STACK_SHRINK(2); @@ -241,13 +300,9 @@ TARGET(UNARY_NEGATIVE) { PyObject *value = stack_pointer[-1]; PyObject *res; - #line 141 "Python/bytecodes.c" res = PyNumber_Negative(value); - #line 247 "Python/generated_cases.c.h" Py_DECREF(value); - #line 143 "Python/bytecodes.c" if (res == NULL) goto pop_1_error; - #line 251 "Python/generated_cases.c.h" stack_pointer[-1] = res; DISPATCH(); } @@ -255,11 +310,8 @@ TARGET(UNARY_NOT) { PyObject *value = stack_pointer[-1]; PyObject *res; - #line 147 "Python/bytecodes.c" int err = PyObject_IsTrue(value); - #line 261 "Python/generated_cases.c.h" Py_DECREF(value); - #line 149 "Python/bytecodes.c" if (err < 0) goto pop_1_error; if (err == 0) { res = Py_True; @@ -268,7 +320,6 @@ res = Py_False; } Py_INCREF(res); - #line 272 "Python/generated_cases.c.h" stack_pointer[-1] = res; DISPATCH(); } @@ -276,13 +327,9 @@ TARGET(UNARY_INVERT) { PyObject *value = stack_pointer[-1]; PyObject *res; - #line 160 "Python/bytecodes.c" res = PyNumber_Invert(value); - #line 282 "Python/generated_cases.c.h" Py_DECREF(value); - #line 162 "Python/bytecodes.c" if (res == NULL) goto pop_1_error; - #line 286 "Python/generated_cases.c.h" stack_pointer[-1] = res; DISPATCH(); } @@ -291,19 +338,27 @@ PyObject *right = stack_pointer[-1]; PyObject *left = stack_pointer[-2]; PyObject *prod; - #line 179 "Python/bytecodes.c" assert(cframe.use_tracing == 0); DEOPT_IF(!PyLong_CheckExact(left), BINARY_OP); DEOPT_IF(!PyLong_CheckExact(right), BINARY_OP); + UOP_BINARY_OP_MULTIPLY_INT_REST(); + STACK_SHRINK(1); + stack_pointer[-1] = prod; + next_instr += 1; + DISPATCH(); + } + + TARGET(BINARY_OP_MULTIPLY_INT_REST) { + PyObject *right = stack_pointer[-1]; + PyObject *left = stack_pointer[-2]; + PyObject *prod; STAT_INC(BINARY_OP, hit); prod = _PyLong_Multiply((PyLongObject *)left, (PyLongObject *)right); _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); if (prod == NULL) goto pop_2_error; - #line 304 "Python/generated_cases.c.h" STACK_SHRINK(1); stack_pointer[-1] = prod; - next_instr += 1; DISPATCH(); } @@ -311,7 +366,6 @@ PyObject *right = stack_pointer[-1]; PyObject *left = stack_pointer[-2]; PyObject *prod; - #line 190 "Python/bytecodes.c" assert(cframe.use_tracing == 0); DEOPT_IF(!PyFloat_CheckExact(left), BINARY_OP); DEOPT_IF(!PyFloat_CheckExact(right), BINARY_OP); @@ -319,7 +373,6 @@ double dprod = ((PyFloatObject *)left)->ob_fval * ((PyFloatObject *)right)->ob_fval; DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dprod, prod); - #line 323 "Python/generated_cases.c.h" STACK_SHRINK(1); stack_pointer[-1] = prod; next_instr += 1; @@ -330,19 +383,27 @@ PyObject *right = stack_pointer[-1]; PyObject *left = stack_pointer[-2]; PyObject *sub; - #line 200 "Python/bytecodes.c" assert(cframe.use_tracing == 0); DEOPT_IF(!PyLong_CheckExact(left), BINARY_OP); DEOPT_IF(!PyLong_CheckExact(right), BINARY_OP); + UOP_BINARY_OP_SUBTRACT_INT_REST(); + STACK_SHRINK(1); + stack_pointer[-1] = sub; + next_instr += 1; + DISPATCH(); + } + + TARGET(BINARY_OP_SUBTRACT_INT_REST) { + PyObject *right = stack_pointer[-1]; + PyObject *left = stack_pointer[-2]; + PyObject *sub; STAT_INC(BINARY_OP, hit); sub = _PyLong_Subtract((PyLongObject *)left, (PyLongObject *)right); _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); if (sub == NULL) goto pop_2_error; - #line 343 "Python/generated_cases.c.h" STACK_SHRINK(1); stack_pointer[-1] = sub; - next_instr += 1; DISPATCH(); } @@ -350,14 +411,12 @@ PyObject *right = stack_pointer[-1]; PyObject *left = stack_pointer[-2]; PyObject *sub; - #line 211 "Python/bytecodes.c" assert(cframe.use_tracing == 0); DEOPT_IF(!PyFloat_CheckExact(left), BINARY_OP); DEOPT_IF(!PyFloat_CheckExact(right), BINARY_OP); STAT_INC(BINARY_OP, hit); double dsub = ((PyFloatObject *)left)->ob_fval - ((PyFloatObject *)right)->ob_fval; DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dsub, sub); - #line 361 "Python/generated_cases.c.h" STACK_SHRINK(1); stack_pointer[-1] = sub; next_instr += 1; @@ -368,7 +427,6 @@ PyObject *right = stack_pointer[-1]; PyObject *left = stack_pointer[-2]; PyObject *res; - #line 220 "Python/bytecodes.c" assert(cframe.use_tracing == 0); DEOPT_IF(!PyUnicode_CheckExact(left), BINARY_OP); DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_OP); @@ -377,7 +435,6 @@ _Py_DECREF_SPECIALIZED(left, _PyUnicode_ExactDealloc); _Py_DECREF_SPECIALIZED(right, _PyUnicode_ExactDealloc); if (res == NULL) goto pop_2_error; - #line 381 "Python/generated_cases.c.h" STACK_SHRINK(1); stack_pointer[-1] = res; next_instr += 1; @@ -387,7 +444,6 @@ TARGET(BINARY_OP_INPLACE_ADD_UNICODE) { PyObject *right = stack_pointer[-1]; PyObject *left = stack_pointer[-2]; - #line 237 "Python/bytecodes.c" assert(cframe.use_tracing == 0); DEOPT_IF(!PyUnicode_CheckExact(left), BINARY_OP); DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_OP); @@ -415,7 +471,6 @@ if (*target_local == NULL) goto pop_2_error; // The STORE_FAST is already done. JUMPBY(INLINE_CACHE_ENTRIES_BINARY_OP + 1); - #line 419 "Python/generated_cases.c.h" STACK_SHRINK(2); DISPATCH(); } @@ -424,7 +479,6 @@ PyObject *right = stack_pointer[-1]; PyObject *left = stack_pointer[-2]; PyObject *sum; - #line 267 "Python/bytecodes.c" assert(cframe.use_tracing == 0); DEOPT_IF(!PyFloat_CheckExact(left), BINARY_OP); DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_OP); @@ -432,30 +486,123 @@ double dsum = ((PyFloatObject *)left)->ob_fval + ((PyFloatObject *)right)->ob_fval; DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dsum, sum); - #line 436 "Python/generated_cases.c.h" STACK_SHRINK(1); stack_pointer[-1] = sum; next_instr += 1; DISPATCH(); } + TARGET(BINARY_CHECK_FLOAT) { + PyObject *right = stack_pointer[-1]; + PyObject *left = stack_pointer[-2]; + PyObject *left_unboxed; + PyObject *right_unboxed; + assert(cframe.use_tracing == 0); + char is_successor = PyFloat_CheckExact(left) && (Py_TYPE(left) == Py_TYPE(right)); + bb_test = BB_TEST(is_successor, 0); + + if (is_successor) { + left_unboxed = *((PyObject **)(&(((PyFloatObject *)left)->ob_fval))); + right_unboxed = *((PyObject **)(&(((PyFloatObject *)right)->ob_fval))); + Py_DECREF(left); + Py_DECREF(right); + } else { + left_unboxed = left; + right_unboxed = right; + } + stack_pointer[-1] = right_unboxed; + stack_pointer[-2] = left_unboxed; + DISPATCH(); + } + + TARGET(BINARY_OP_ADD_FLOAT_UNBOXED) { + PyObject *right = stack_pointer[-1]; + PyObject *left = stack_pointer[-2]; + PyObject *sum; + STAT_INC(BINARY_OP, hit); + double temp = *(double *)(&(left)) + *(double *)(&(right)); + sum = *(PyObject **)(&temp); + STACK_SHRINK(1); + stack_pointer[-1] = sum; + DISPATCH(); + } + + TARGET(BINARY_OP_SUBTRACT_FLOAT_UNBOXED) { + PyObject *right = stack_pointer[-1]; + PyObject *left = stack_pointer[-2]; + PyObject *sum; + STAT_INC(BINARY_OP, hit); + double temp = *(double *)(&(left)) - *(double *)(&(right)); + sum = *(PyObject **)(&temp); + STACK_SHRINK(1); + stack_pointer[-1] = sum; + DISPATCH(); + } + + TARGET(BINARY_OP_MULTIPLY_FLOAT_UNBOXED) { + PyObject *right = stack_pointer[-1]; + PyObject *left = stack_pointer[-2]; + PyObject *prod; + STAT_INC(BINARY_OP, hit); + double temp = *(double *)(&(left)) * *(double *)(&(right)); + prod = *(PyObject **)(&temp); + STACK_SHRINK(1); + stack_pointer[-1] = prod; + DISPATCH(); + } + + TARGET(UNBOX_FLOAT) { + PyObject *boxed_float = stack_pointer[-(1 + oparg)]; + PyObject *unboxed_float; + double temp = ((PyFloatObject *)boxed_float)->ob_fval; + Py_DECREF(boxed_float); + unboxed_float = (*(PyObject **)(&temp)); + stack_pointer[-(1 + oparg)] = unboxed_float; + DISPATCH(); + } + + TARGET(BOX_FLOAT) { + PyObject *raw_float = stack_pointer[-(1 + oparg)]; + PyObject *boxed_float; + boxed_float = PyFloat_FromDouble(*(double *)(&(raw_float))); + stack_pointer[-(1 + oparg)] = boxed_float; + DISPATCH(); + } + TARGET(BINARY_OP_ADD_INT) { PyObject *right = stack_pointer[-1]; PyObject *left = stack_pointer[-2]; PyObject *sum; - #line 277 "Python/bytecodes.c" assert(cframe.use_tracing == 0); DEOPT_IF(!PyLong_CheckExact(left), BINARY_OP); DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_OP); + UOP_BINARY_OP_ADD_INT_REST(); + STACK_SHRINK(1); + stack_pointer[-1] = sum; + next_instr += 1; + DISPATCH(); + } + + TARGET(BINARY_CHECK_INT) { + PyObject *right = stack_pointer[-1]; + PyObject *left = stack_pointer[-2]; + assert(cframe.use_tracing == 0); + char is_successor = PyLong_CheckExact(left) && (Py_TYPE(left) == Py_TYPE(right)); + bb_test = BB_TEST(is_successor, 0); + DISPATCH(); + } + + TARGET(BINARY_OP_ADD_INT_REST) { + PyObject *right = stack_pointer[-1]; + PyObject *left = stack_pointer[-2]; + PyObject *sum; STAT_INC(BINARY_OP, hit); sum = _PyLong_Add((PyLongObject *)left, (PyLongObject *)right); _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); if (sum == NULL) goto pop_2_error; - #line 456 "Python/generated_cases.c.h" STACK_SHRINK(1); stack_pointer[-1] = sum; - next_instr += 1; DISPATCH(); } @@ -465,7 +612,6 @@ PyObject *sub = stack_pointer[-1]; PyObject *container = stack_pointer[-2]; PyObject *res; - #line 296 "Python/bytecodes.c" #if ENABLE_SPECIALIZATION _PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)next_instr; if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { @@ -478,12 +624,9 @@ DECREMENT_ADAPTIVE_COUNTER(cache->counter); #endif /* ENABLE_SPECIALIZATION */ res = PyObject_GetItem(container, sub); - #line 482 "Python/generated_cases.c.h" Py_DECREF(container); Py_DECREF(sub); - #line 309 "Python/bytecodes.c" if (res == NULL) goto pop_2_error; - #line 487 "Python/generated_cases.c.h" STACK_SHRINK(1); stack_pointer[-1] = res; next_instr += 4; @@ -495,7 +638,6 @@ PyObject *start = stack_pointer[-2]; PyObject *container = stack_pointer[-3]; PyObject *res; - #line 313 "Python/bytecodes.c" PyObject *slice = _PyBuildSlice_ConsumeRefs(start, stop); // Can't use ERROR_IF() here, because we haven't // DECREF'ed container yet, and we still own slice. @@ -508,7 +650,6 @@ } Py_DECREF(container); if (res == NULL) goto pop_3_error; - #line 512 "Python/generated_cases.c.h" STACK_SHRINK(2); stack_pointer[-1] = res; DISPATCH(); @@ -519,7 +660,6 @@ PyObject *start = stack_pointer[-2]; PyObject *container = stack_pointer[-3]; PyObject *v = stack_pointer[-4]; - #line 328 "Python/bytecodes.c" PyObject *slice = _PyBuildSlice_ConsumeRefs(start, stop); int err; if (slice == NULL) { @@ -532,7 +672,6 @@ Py_DECREF(v); Py_DECREF(container); if (err) goto pop_4_error; - #line 536 "Python/generated_cases.c.h" STACK_SHRINK(4); DISPATCH(); } @@ -541,14 +680,24 @@ PyObject *sub = stack_pointer[-1]; PyObject *list = stack_pointer[-2]; PyObject *res; - #line 343 "Python/bytecodes.c" assert(cframe.use_tracing == 0); DEOPT_IF(!PyLong_CheckExact(sub), BINARY_SUBSCR); DEOPT_IF(!PyList_CheckExact(list), BINARY_SUBSCR); // Deopt unless 0 <= sub < PyList_Size(list) DEOPT_IF(!_PyLong_IsNonNegativeCompact((PyLongObject *)sub), BINARY_SUBSCR); - Py_ssize_t index = ((PyLongObject*)sub)->long_value.ob_digit[0]; + UOP_BINARY_SUBSCR_LIST_INT_REST(); + STACK_SHRINK(1); + stack_pointer[-1] = res; + next_instr += 4; + DISPATCH(); + } + + TARGET(BINARY_SUBSCR_LIST_INT_REST) { + PyObject *sub = stack_pointer[-1]; + PyObject *list = stack_pointer[-2]; + PyObject *res; + Py_ssize_t index = ((PyLongObject *)sub)->long_value.ob_digit[0]; DEOPT_IF(index >= PyList_GET_SIZE(list), BINARY_SUBSCR); STAT_INC(BINARY_SUBSCR, hit); res = PyList_GET_ITEM(list, index); @@ -556,10 +705,15 @@ Py_INCREF(res); _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); Py_DECREF(list); - #line 560 "Python/generated_cases.c.h" STACK_SHRINK(1); stack_pointer[-1] = res; - next_instr += 4; + DISPATCH(); + } + + TARGET(CHECK_LIST) { + PyObject *container = stack_pointer[-(1 + oparg)]; + char is_successor = PyList_CheckExact(container); + bb_test = BB_TEST(is_successor, 0); DISPATCH(); } @@ -567,7 +721,6 @@ PyObject *sub = stack_pointer[-1]; PyObject *tuple = stack_pointer[-2]; PyObject *res; - #line 360 "Python/bytecodes.c" assert(cframe.use_tracing == 0); DEOPT_IF(!PyLong_CheckExact(sub), BINARY_SUBSCR); DEOPT_IF(!PyTuple_CheckExact(tuple), BINARY_SUBSCR); @@ -582,7 +735,6 @@ Py_INCREF(res); _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); Py_DECREF(tuple); - #line 586 "Python/generated_cases.c.h" STACK_SHRINK(1); stack_pointer[-1] = res; next_instr += 4; @@ -593,7 +745,6 @@ PyObject *sub = stack_pointer[-1]; PyObject *dict = stack_pointer[-2]; PyObject *res; - #line 377 "Python/bytecodes.c" assert(cframe.use_tracing == 0); DEOPT_IF(!PyDict_CheckExact(dict), BINARY_SUBSCR); STAT_INC(BINARY_SUBSCR, hit); @@ -602,14 +753,11 @@ if (!_PyErr_Occurred(tstate)) { _PyErr_SetKeyError(sub); } - #line 606 "Python/generated_cases.c.h" Py_DECREF(dict); Py_DECREF(sub); - #line 386 "Python/bytecodes.c" if (true) goto pop_2_error; } Py_INCREF(res); // Do this before DECREF'ing dict, sub - #line 613 "Python/generated_cases.c.h" Py_DECREF(dict); Py_DECREF(sub); STACK_SHRINK(1); @@ -623,7 +771,6 @@ PyObject *container = stack_pointer[-2]; uint32_t type_version = read_u32(&next_instr[1].cache); uint16_t func_version = read_u16(&next_instr[3].cache); - #line 393 "Python/bytecodes.c" PyTypeObject *tp = Py_TYPE(container); DEOPT_IF(tp->tp_version_tag != type_version, BINARY_SUBSCR); assert(tp->tp_flags & Py_TPFLAGS_HEAPTYPE); @@ -642,15 +789,12 @@ new_frame->localsplus[1] = sub; JUMPBY(INLINE_CACHE_ENTRIES_BINARY_SUBSCR); DISPATCH_INLINED(new_frame); - #line 646 "Python/generated_cases.c.h" } TARGET(LIST_APPEND) { PyObject *v = stack_pointer[-1]; PyObject *list = stack_pointer[-(2 + (oparg-1))]; - #line 414 "Python/bytecodes.c" if (_PyList_AppendTakeRef((PyListObject *)list, v) < 0) goto pop_1_error; - #line 654 "Python/generated_cases.c.h" STACK_SHRINK(1); PREDICT(JUMP_BACKWARD); DISPATCH(); @@ -659,13 +803,9 @@ TARGET(SET_ADD) { PyObject *v = stack_pointer[-1]; PyObject *set = stack_pointer[-(2 + (oparg-1))]; - #line 419 "Python/bytecodes.c" int err = PySet_Add(set, v); - #line 665 "Python/generated_cases.c.h" Py_DECREF(v); - #line 421 "Python/bytecodes.c" if (err) goto pop_1_error; - #line 669 "Python/generated_cases.c.h" STACK_SHRINK(1); PREDICT(JUMP_BACKWARD); DISPATCH(); @@ -678,7 +818,6 @@ PyObject *container = stack_pointer[-2]; PyObject *v = stack_pointer[-3]; uint16_t counter = read_u16(&next_instr[0].cache); - #line 432 "Python/bytecodes.c" #if ENABLE_SPECIALIZATION if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { assert(cframe.use_tracing == 0); @@ -694,13 +833,10 @@ #endif /* ENABLE_SPECIALIZATION */ /* container[sub] = v */ int err = PyObject_SetItem(container, sub, v); - #line 698 "Python/generated_cases.c.h" Py_DECREF(v); Py_DECREF(container); Py_DECREF(sub); - #line 448 "Python/bytecodes.c" if (err) goto pop_3_error; - #line 704 "Python/generated_cases.c.h" STACK_SHRINK(3); next_instr += 1; DISPATCH(); @@ -710,7 +846,6 @@ PyObject *sub = stack_pointer[-1]; PyObject *list = stack_pointer[-2]; PyObject *value = stack_pointer[-3]; - #line 452 "Python/bytecodes.c" assert(cframe.use_tracing == 0); DEOPT_IF(!PyLong_CheckExact(sub), STORE_SUBSCR); DEOPT_IF(!PyList_CheckExact(list), STORE_SUBSCR); @@ -718,7 +853,18 @@ // Ensure nonnegative, zero-or-one-digit ints. DEOPT_IF(!_PyLong_IsNonNegativeCompact((PyLongObject *)sub), STORE_SUBSCR); Py_ssize_t index = ((PyLongObject*)sub)->long_value.ob_digit[0]; - // Ensure index < len(list) + UOP_STORE_SUBSCR_LIST_INT_REST(); + STACK_SHRINK(3); + next_instr += 1; + DISPATCH(); + } + + TARGET(STORE_SUBSCR_LIST_INT_REST) { + PyObject *sub = stack_pointer[-1]; + PyObject *list = stack_pointer[-2]; + PyObject *value = stack_pointer[-3]; + Py_ssize_t index = ((PyLongObject *)sub)->long_value.ob_digit[0]; + /* Ensure index < len(list) */ DEOPT_IF(index >= PyList_GET_SIZE(list), STORE_SUBSCR); STAT_INC(STORE_SUBSCR, hit); @@ -728,9 +874,7 @@ Py_DECREF(old_value); _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); Py_DECREF(list); - #line 732 "Python/generated_cases.c.h" STACK_SHRINK(3); - next_instr += 1; DISPATCH(); } @@ -738,14 +882,12 @@ PyObject *sub = stack_pointer[-1]; PyObject *dict = stack_pointer[-2]; PyObject *value = stack_pointer[-3]; - #line 472 "Python/bytecodes.c" assert(cframe.use_tracing == 0); DEOPT_IF(!PyDict_CheckExact(dict), STORE_SUBSCR); STAT_INC(STORE_SUBSCR, hit); int err = _PyDict_SetItem_Take2((PyDictObject *)dict, sub, value); Py_DECREF(dict); if (err) goto pop_3_error; - #line 749 "Python/generated_cases.c.h" STACK_SHRINK(3); next_instr += 1; DISPATCH(); @@ -754,15 +896,11 @@ TARGET(DELETE_SUBSCR) { PyObject *sub = stack_pointer[-1]; PyObject *container = stack_pointer[-2]; - #line 481 "Python/bytecodes.c" /* del container[sub] */ int err = PyObject_DelItem(container, sub); - #line 761 "Python/generated_cases.c.h" Py_DECREF(container); Py_DECREF(sub); - #line 484 "Python/bytecodes.c" if (err) goto pop_2_error; - #line 766 "Python/generated_cases.c.h" STACK_SHRINK(2); DISPATCH(); } @@ -770,14 +908,10 @@ TARGET(CALL_INTRINSIC_1) { PyObject *value = stack_pointer[-1]; PyObject *res; - #line 488 "Python/bytecodes.c" assert(oparg <= MAX_INTRINSIC_1); res = _PyIntrinsics_UnaryFunctions[oparg](tstate, value); - #line 777 "Python/generated_cases.c.h" Py_DECREF(value); - #line 491 "Python/bytecodes.c" if (res == NULL) goto pop_1_error; - #line 781 "Python/generated_cases.c.h" stack_pointer[-1] = res; DISPATCH(); } @@ -786,15 +920,11 @@ PyObject *value1 = stack_pointer[-1]; PyObject *value2 = stack_pointer[-2]; PyObject *res; - #line 495 "Python/bytecodes.c" assert(oparg <= MAX_INTRINSIC_2); res = _PyIntrinsics_BinaryFunctions[oparg](tstate, value2, value1); - #line 793 "Python/generated_cases.c.h" Py_DECREF(value2); Py_DECREF(value1); - #line 498 "Python/bytecodes.c" if (res == NULL) goto pop_2_error; - #line 798 "Python/generated_cases.c.h" STACK_SHRINK(1); stack_pointer[-1] = res; DISPATCH(); @@ -802,7 +932,6 @@ TARGET(RAISE_VARARGS) { PyObject **args = (stack_pointer - oparg); - #line 502 "Python/bytecodes.c" PyObject *cause = NULL, *exc = NULL; switch (oparg) { case 2: @@ -820,12 +949,10 @@ break; } if (true) { STACK_SHRINK(oparg); goto error; } - #line 824 "Python/generated_cases.c.h" } TARGET(INTERPRETER_EXIT) { PyObject *retval = stack_pointer[-1]; - #line 522 "Python/bytecodes.c" assert(frame == &entry_frame); assert(_PyFrame_IsIncomplete(frame)); STACK_SHRINK(1); // Since we're not going to DISPATCH() @@ -837,12 +964,10 @@ assert(!_PyErr_Occurred(tstate)); _Py_LeaveRecursiveCallTstate(tstate); return retval; - #line 841 "Python/generated_cases.c.h" } TARGET(RETURN_VALUE) { PyObject *retval = stack_pointer[-1]; - #line 536 "Python/bytecodes.c" STACK_SHRINK(1); assert(EMPTY()); _PyFrame_SetStackPointer(frame, stack_pointer); @@ -856,11 +981,9 @@ _PyEvalFrameClearAndPop(tstate, dying); _PyFrame_StackPush(frame, retval); goto resume_frame; - #line 860 "Python/generated_cases.c.h" } TARGET(RETURN_CONST) { - #line 552 "Python/bytecodes.c" PyObject *retval = GETITEM(frame->f_code->co_consts, oparg); Py_INCREF(retval); assert(EMPTY()); @@ -875,13 +998,11 @@ _PyEvalFrameClearAndPop(tstate, dying); _PyFrame_StackPush(frame, retval); goto resume_frame; - #line 879 "Python/generated_cases.c.h" } TARGET(GET_AITER) { PyObject *obj = stack_pointer[-1]; PyObject *iter; - #line 569 "Python/bytecodes.c" unaryfunc getter = NULL; PyTypeObject *type = Py_TYPE(obj); @@ -894,16 +1015,12 @@ "'async for' requires an object with " "__aiter__ method, got %.100s", type->tp_name); - #line 898 "Python/generated_cases.c.h" Py_DECREF(obj); - #line 582 "Python/bytecodes.c" if (true) goto pop_1_error; } iter = (*getter)(obj); - #line 905 "Python/generated_cases.c.h" Py_DECREF(obj); - #line 587 "Python/bytecodes.c" if (iter == NULL) goto pop_1_error; if (Py_TYPE(iter)->tp_as_async == NULL || @@ -916,7 +1033,6 @@ Py_DECREF(iter); if (true) goto pop_1_error; } - #line 920 "Python/generated_cases.c.h" stack_pointer[-1] = iter; DISPATCH(); } @@ -924,7 +1040,6 @@ TARGET(GET_ANEXT) { PyObject *aiter = stack_pointer[-1]; PyObject *awaitable; - #line 602 "Python/bytecodes.c" unaryfunc getter = NULL; PyObject *next_iter = NULL; PyTypeObject *type = Py_TYPE(aiter); @@ -968,7 +1083,6 @@ } } - #line 972 "Python/generated_cases.c.h" STACK_GROW(1); stack_pointer[-1] = awaitable; PREDICT(LOAD_CONST); @@ -979,16 +1093,13 @@ PREDICTED(GET_AWAITABLE); PyObject *iterable = stack_pointer[-1]; PyObject *iter; - #line 649 "Python/bytecodes.c" iter = _PyCoro_GetAwaitableIter(iterable); if (iter == NULL) { format_awaitable_error(tstate, Py_TYPE(iterable), oparg); } - #line 990 "Python/generated_cases.c.h" Py_DECREF(iterable); - #line 656 "Python/bytecodes.c" if (iter != NULL && PyCoro_CheckExact(iter)) { PyObject *yf = _PyGen_yf((PyGenObject*)iter); @@ -1006,7 +1117,6 @@ if (iter == NULL) goto pop_1_error; - #line 1010 "Python/generated_cases.c.h" stack_pointer[-1] = iter; PREDICT(LOAD_CONST); DISPATCH(); @@ -1017,7 +1127,6 @@ PyObject *v = stack_pointer[-1]; PyObject *receiver = stack_pointer[-2]; PyObject *retval; - #line 682 "Python/bytecodes.c" #if ENABLE_SPECIALIZATION _PySendCache *cache = (_PySendCache *)next_instr; if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { @@ -1053,7 +1162,6 @@ assert(retval != NULL); } Py_DECREF(v); - #line 1057 "Python/generated_cases.c.h" stack_pointer[-1] = retval; next_instr += 1; DISPATCH(); @@ -1062,7 +1170,6 @@ TARGET(SEND_GEN) { PyObject *v = stack_pointer[-1]; PyObject *receiver = stack_pointer[-2]; - #line 720 "Python/bytecodes.c" assert(cframe.use_tracing == 0); PyGenObject *gen = (PyGenObject *)receiver; DEOPT_IF(Py_TYPE(gen) != &PyGen_Type && @@ -1078,12 +1185,10 @@ tstate->exc_info = &gen->gi_exc_state; JUMPBY(INLINE_CACHE_ENTRIES_SEND + oparg); DISPATCH_INLINED(gen_frame); - #line 1082 "Python/generated_cases.c.h" } TARGET(YIELD_VALUE) { PyObject *retval = stack_pointer[-1]; - #line 738 "Python/bytecodes.c" // NOTE: It's important that YIELD_VALUE never raises an exception! // The compiler treats any exception raised here as a failed close() // or throw() call. @@ -1102,15 +1207,12 @@ frame->prev_instr -= frame->yield_offset; _PyFrame_StackPush(frame, retval); goto resume_frame; - #line 1106 "Python/generated_cases.c.h" } TARGET(POP_EXCEPT) { PyObject *exc_value = stack_pointer[-1]; - #line 759 "Python/bytecodes.c" _PyErr_StackItem *exc_info = tstate->exc_info; Py_XSETREF(exc_info->exc_value, exc_value); - #line 1114 "Python/generated_cases.c.h" STACK_SHRINK(1); DISPATCH(); } @@ -1118,7 +1220,6 @@ TARGET(RERAISE) { PyObject *exc = stack_pointer[-1]; PyObject **values = (stack_pointer - (1 + oparg)); - #line 764 "Python/bytecodes.c" assert(oparg >= 0 && oparg <= 2); if (oparg) { PyObject *lasti = values[0]; @@ -1136,26 +1237,21 @@ Py_INCREF(exc); _PyErr_SetRaisedException(tstate, exc); goto exception_unwind; - #line 1140 "Python/generated_cases.c.h" } TARGET(END_ASYNC_FOR) { PyObject *exc = stack_pointer[-1]; PyObject *awaitable = stack_pointer[-2]; - #line 784 "Python/bytecodes.c" assert(exc && PyExceptionInstance_Check(exc)); if (PyErr_GivenExceptionMatches(exc, PyExc_StopAsyncIteration)) { - #line 1149 "Python/generated_cases.c.h" Py_DECREF(awaitable); Py_DECREF(exc); - #line 787 "Python/bytecodes.c" } else { Py_INCREF(exc); _PyErr_SetRaisedException(tstate, exc); goto exception_unwind; } - #line 1159 "Python/generated_cases.c.h" STACK_SHRINK(2); DISPATCH(); } @@ -1166,23 +1262,19 @@ PyObject *sub_iter = stack_pointer[-3]; PyObject *none; PyObject *value; - #line 796 "Python/bytecodes.c" assert(throwflag); assert(exc_value && PyExceptionInstance_Check(exc_value)); if (PyErr_GivenExceptionMatches(exc_value, PyExc_StopIteration)) { value = Py_NewRef(((PyStopIterationObject *)exc_value)->value); - #line 1175 "Python/generated_cases.c.h" Py_DECREF(sub_iter); Py_DECREF(last_sent_val); Py_DECREF(exc_value); - #line 801 "Python/bytecodes.c" none = Py_NewRef(Py_None); } else { _PyErr_SetRaisedException(tstate, Py_NewRef(exc_value)); goto exception_unwind; } - #line 1186 "Python/generated_cases.c.h" STACK_SHRINK(1); stack_pointer[-1] = value; stack_pointer[-2] = none; @@ -1191,9 +1283,7 @@ TARGET(LOAD_ASSERTION_ERROR) { PyObject *value; - #line 810 "Python/bytecodes.c" value = Py_NewRef(PyExc_AssertionError); - #line 1197 "Python/generated_cases.c.h" STACK_GROW(1); stack_pointer[-1] = value; DISPATCH(); @@ -1201,7 +1291,6 @@ TARGET(LOAD_BUILD_CLASS) { PyObject *bc; - #line 814 "Python/bytecodes.c" if (PyDict_CheckExact(BUILTINS())) { bc = _PyDict_GetItemWithError(BUILTINS(), &_Py_ID(__build_class__)); @@ -1223,7 +1312,6 @@ if (true) goto error; } } - #line 1227 "Python/generated_cases.c.h" STACK_GROW(1); stack_pointer[-1] = bc; DISPATCH(); @@ -1231,33 +1319,26 @@ TARGET(STORE_NAME) { PyObject *v = stack_pointer[-1]; - #line 838 "Python/bytecodes.c" PyObject *name = GETITEM(frame->f_code->co_names, oparg); PyObject *ns = LOCALS(); int err; if (ns == NULL) { _PyErr_Format(tstate, PyExc_SystemError, "no locals found when storing %R", name); - #line 1242 "Python/generated_cases.c.h" Py_DECREF(v); - #line 845 "Python/bytecodes.c" if (true) goto pop_1_error; } if (PyDict_CheckExact(ns)) err = PyDict_SetItem(ns, name, v); else err = PyObject_SetItem(ns, name, v); - #line 1251 "Python/generated_cases.c.h" Py_DECREF(v); - #line 852 "Python/bytecodes.c" if (err) goto pop_1_error; - #line 1255 "Python/generated_cases.c.h" STACK_SHRINK(1); DISPATCH(); } TARGET(DELETE_NAME) { - #line 856 "Python/bytecodes.c" PyObject *name = GETITEM(frame->f_code->co_names, oparg); PyObject *ns = LOCALS(); int err; @@ -1274,7 +1355,6 @@ name); goto error; } - #line 1278 "Python/generated_cases.c.h" DISPATCH(); } @@ -1282,7 +1362,7 @@ PREDICTED(UNPACK_SEQUENCE); static_assert(INLINE_CACHE_ENTRIES_UNPACK_SEQUENCE == 1, "incorrect cache size"); PyObject *seq = stack_pointer[-1]; - #line 882 "Python/bytecodes.c" + PyObject **values = stack_pointer - (1); #if ENABLE_SPECIALIZATION _PyUnpackSequenceCache *cache = (_PyUnpackSequenceCache *)next_instr; if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { @@ -1296,11 +1376,8 @@ #endif /* ENABLE_SPECIALIZATION */ PyObject **top = stack_pointer + oparg - 1; int res = unpack_iterable(tstate, seq, oparg, -1, top); - #line 1300 "Python/generated_cases.c.h" Py_DECREF(seq); - #line 896 "Python/bytecodes.c" if (res == 0) goto pop_1_error; - #line 1304 "Python/generated_cases.c.h" STACK_SHRINK(1); STACK_GROW(oparg); next_instr += 1; @@ -1310,14 +1387,12 @@ TARGET(UNPACK_SEQUENCE_TWO_TUPLE) { PyObject *seq = stack_pointer[-1]; PyObject **values = stack_pointer - (1); - #line 900 "Python/bytecodes.c" DEOPT_IF(!PyTuple_CheckExact(seq), UNPACK_SEQUENCE); DEOPT_IF(PyTuple_GET_SIZE(seq) != 2, UNPACK_SEQUENCE); assert(oparg == 2); STAT_INC(UNPACK_SEQUENCE, hit); values[0] = Py_NewRef(PyTuple_GET_ITEM(seq, 1)); values[1] = Py_NewRef(PyTuple_GET_ITEM(seq, 0)); - #line 1321 "Python/generated_cases.c.h" Py_DECREF(seq); STACK_SHRINK(1); STACK_GROW(oparg); @@ -1328,7 +1403,6 @@ TARGET(UNPACK_SEQUENCE_TUPLE) { PyObject *seq = stack_pointer[-1]; PyObject **values = stack_pointer - (1); - #line 910 "Python/bytecodes.c" DEOPT_IF(!PyTuple_CheckExact(seq), UNPACK_SEQUENCE); DEOPT_IF(PyTuple_GET_SIZE(seq) != oparg, UNPACK_SEQUENCE); STAT_INC(UNPACK_SEQUENCE, hit); @@ -1336,7 +1410,6 @@ for (int i = oparg; --i >= 0; ) { *values++ = Py_NewRef(items[i]); } - #line 1340 "Python/generated_cases.c.h" Py_DECREF(seq); STACK_SHRINK(1); STACK_GROW(oparg); @@ -1347,7 +1420,6 @@ TARGET(UNPACK_SEQUENCE_LIST) { PyObject *seq = stack_pointer[-1]; PyObject **values = stack_pointer - (1); - #line 921 "Python/bytecodes.c" DEOPT_IF(!PyList_CheckExact(seq), UNPACK_SEQUENCE); DEOPT_IF(PyList_GET_SIZE(seq) != oparg, UNPACK_SEQUENCE); STAT_INC(UNPACK_SEQUENCE, hit); @@ -1355,7 +1427,6 @@ for (int i = oparg; --i >= 0; ) { *values++ = Py_NewRef(items[i]); } - #line 1359 "Python/generated_cases.c.h" Py_DECREF(seq); STACK_SHRINK(1); STACK_GROW(oparg); @@ -1365,16 +1436,13 @@ TARGET(UNPACK_EX) { PyObject *seq = stack_pointer[-1]; - #line 932 "Python/bytecodes.c" + PyObject **values = stack_pointer - (1) + 1 + (oparg >> 8); int totalargs = 1 + (oparg & 0xFF) + (oparg >> 8); PyObject **top = stack_pointer + totalargs - 1; int res = unpack_iterable(tstate, seq, oparg & 0xFF, oparg >> 8, top); - #line 1373 "Python/generated_cases.c.h" Py_DECREF(seq); - #line 936 "Python/bytecodes.c" if (res == 0) goto pop_1_error; - #line 1377 "Python/generated_cases.c.h" - STACK_GROW((oparg & 0xFF) + (oparg >> 8)); + STACK_GROW((oparg >> 8) + (oparg & 0xFF)); DISPATCH(); } @@ -1384,7 +1452,6 @@ PyObject *owner = stack_pointer[-1]; PyObject *v = stack_pointer[-2]; uint16_t counter = read_u16(&next_instr[0].cache); - #line 947 "Python/bytecodes.c" #if ENABLE_SPECIALIZATION if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { assert(cframe.use_tracing == 0); @@ -1401,12 +1468,9 @@ #endif /* ENABLE_SPECIALIZATION */ PyObject *name = GETITEM(frame->f_code->co_names, oparg); int err = PyObject_SetAttr(owner, name, v); - #line 1405 "Python/generated_cases.c.h" Py_DECREF(v); Py_DECREF(owner); - #line 964 "Python/bytecodes.c" if (err) goto pop_2_error; - #line 1410 "Python/generated_cases.c.h" STACK_SHRINK(2); next_instr += 4; DISPATCH(); @@ -1414,34 +1478,25 @@ TARGET(DELETE_ATTR) { PyObject *owner = stack_pointer[-1]; - #line 968 "Python/bytecodes.c" PyObject *name = GETITEM(frame->f_code->co_names, oparg); int err = PyObject_SetAttr(owner, name, (PyObject *)NULL); - #line 1421 "Python/generated_cases.c.h" Py_DECREF(owner); - #line 971 "Python/bytecodes.c" if (err) goto pop_1_error; - #line 1425 "Python/generated_cases.c.h" STACK_SHRINK(1); DISPATCH(); } TARGET(STORE_GLOBAL) { PyObject *v = stack_pointer[-1]; - #line 975 "Python/bytecodes.c" PyObject *name = GETITEM(frame->f_code->co_names, oparg); int err = PyDict_SetItem(GLOBALS(), name, v); - #line 1435 "Python/generated_cases.c.h" Py_DECREF(v); - #line 978 "Python/bytecodes.c" if (err) goto pop_1_error; - #line 1439 "Python/generated_cases.c.h" STACK_SHRINK(1); DISPATCH(); } TARGET(DELETE_GLOBAL) { - #line 982 "Python/bytecodes.c" PyObject *name = GETITEM(frame->f_code->co_names, oparg); int err; err = PyDict_DelItem(GLOBALS(), name); @@ -1453,13 +1508,11 @@ } goto error; } - #line 1457 "Python/generated_cases.c.h" DISPATCH(); } TARGET(LOAD_NAME) { PyObject *v; - #line 996 "Python/bytecodes.c" PyObject *name = GETITEM(frame->f_code->co_names, oparg); PyObject *locals = LOCALS(); if (locals == NULL) { @@ -1518,7 +1571,6 @@ } } } - #line 1522 "Python/generated_cases.c.h" STACK_GROW(1); stack_pointer[-1] = v; DISPATCH(); @@ -1529,7 +1581,6 @@ static_assert(INLINE_CACHE_ENTRIES_LOAD_GLOBAL == 4, "incorrect cache size"); PyObject *null = NULL; PyObject *v; - #line 1063 "Python/bytecodes.c" #if ENABLE_SPECIALIZATION _PyLoadGlobalCache *cache = (_PyLoadGlobalCache *)next_instr; if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { @@ -1582,7 +1633,6 @@ } } null = NULL; - #line 1586 "Python/generated_cases.c.h" STACK_GROW(1); STACK_GROW(((oparg & 1) ? 1 : 0)); stack_pointer[-1] = v; @@ -1596,7 +1646,6 @@ PyObject *res; uint16_t index = read_u16(&next_instr[1].cache); uint16_t version = read_u16(&next_instr[2].cache); - #line 1118 "Python/bytecodes.c" assert(cframe.use_tracing == 0); DEOPT_IF(!PyDict_CheckExact(GLOBALS()), LOAD_GLOBAL); PyDictObject *dict = (PyDictObject *)GLOBALS(); @@ -1608,7 +1657,6 @@ Py_INCREF(res); STAT_INC(LOAD_GLOBAL, hit); null = NULL; - #line 1612 "Python/generated_cases.c.h" STACK_GROW(1); STACK_GROW(((oparg & 1) ? 1 : 0)); stack_pointer[-1] = res; @@ -1623,7 +1671,6 @@ uint16_t index = read_u16(&next_instr[1].cache); uint16_t mod_version = read_u16(&next_instr[2].cache); uint16_t bltn_version = read_u16(&next_instr[3].cache); - #line 1132 "Python/bytecodes.c" assert(cframe.use_tracing == 0); DEOPT_IF(!PyDict_CheckExact(GLOBALS()), LOAD_GLOBAL); DEOPT_IF(!PyDict_CheckExact(BUILTINS()), LOAD_GLOBAL); @@ -1638,7 +1685,6 @@ Py_INCREF(res); STAT_INC(LOAD_GLOBAL, hit); null = NULL; - #line 1642 "Python/generated_cases.c.h" STACK_GROW(1); STACK_GROW(((oparg & 1) ? 1 : 0)); stack_pointer[-1] = res; @@ -1648,16 +1694,13 @@ } TARGET(DELETE_FAST) { - #line 1149 "Python/bytecodes.c" PyObject *v = GETLOCAL(oparg); if (v == NULL) goto unbound_local_error; SETLOCAL(oparg, NULL); - #line 1656 "Python/generated_cases.c.h" DISPATCH(); } TARGET(MAKE_CELL) { - #line 1155 "Python/bytecodes.c" // "initial" is probably NULL but not if it's an arg (or set // via PyFrame_LocalsToFast() before MAKE_CELL has run). PyObject *initial = GETLOCAL(oparg); @@ -1666,12 +1709,10 @@ goto resume_with_error; } SETLOCAL(oparg, cell); - #line 1670 "Python/generated_cases.c.h" DISPATCH(); } TARGET(DELETE_DEREF) { - #line 1166 "Python/bytecodes.c" PyObject *cell = GETLOCAL(oparg); PyObject *oldobj = PyCell_GET(cell); // Can't use ERROR_IF here. @@ -1682,13 +1723,11 @@ } PyCell_SET(cell, NULL); Py_DECREF(oldobj); - #line 1686 "Python/generated_cases.c.h" DISPATCH(); } TARGET(LOAD_CLASSDEREF) { PyObject *value; - #line 1179 "Python/bytecodes.c" PyObject *name, *locals = LOCALS(); assert(locals); assert(oparg >= 0 && oparg < frame->f_code->co_nlocalsplus); @@ -1720,7 +1759,6 @@ } Py_INCREF(value); } - #line 1724 "Python/generated_cases.c.h" STACK_GROW(1); stack_pointer[-1] = value; DISPATCH(); @@ -1728,7 +1766,6 @@ TARGET(LOAD_DEREF) { PyObject *value; - #line 1213 "Python/bytecodes.c" PyObject *cell = GETLOCAL(oparg); value = PyCell_GET(cell); if (value == NULL) { @@ -1736,7 +1773,6 @@ if (true) goto error; } Py_INCREF(value); - #line 1740 "Python/generated_cases.c.h" STACK_GROW(1); stack_pointer[-1] = value; DISPATCH(); @@ -1744,18 +1780,15 @@ TARGET(STORE_DEREF) { PyObject *v = stack_pointer[-1]; - #line 1223 "Python/bytecodes.c" PyObject *cell = GETLOCAL(oparg); PyObject *oldobj = PyCell_GET(cell); PyCell_SET(cell, v); Py_XDECREF(oldobj); - #line 1753 "Python/generated_cases.c.h" STACK_SHRINK(1); DISPATCH(); } TARGET(COPY_FREE_VARS) { - #line 1230 "Python/bytecodes.c" /* Copy closure variables to free variables */ PyCodeObject *co = frame->f_code; assert(PyFunction_Check(frame->f_funcobj)); @@ -1766,22 +1799,17 @@ PyObject *o = PyTuple_GET_ITEM(closure, i); frame->localsplus[offset + i] = Py_NewRef(o); } - #line 1770 "Python/generated_cases.c.h" DISPATCH(); } TARGET(BUILD_STRING) { PyObject **pieces = (stack_pointer - oparg); PyObject *str; - #line 1243 "Python/bytecodes.c" str = _PyUnicode_JoinArray(&_Py_STR(empty), pieces, oparg); - #line 1779 "Python/generated_cases.c.h" for (int _i = oparg; --_i >= 0;) { Py_DECREF(pieces[_i]); } - #line 1245 "Python/bytecodes.c" if (str == NULL) { STACK_SHRINK(oparg); goto error; } - #line 1785 "Python/generated_cases.c.h" STACK_SHRINK(oparg); STACK_GROW(1); stack_pointer[-1] = str; @@ -1791,10 +1819,8 @@ TARGET(BUILD_TUPLE) { PyObject **values = (stack_pointer - oparg); PyObject *tup; - #line 1249 "Python/bytecodes.c" tup = _PyTuple_FromArraySteal(values, oparg); if (tup == NULL) { STACK_SHRINK(oparg); goto error; } - #line 1798 "Python/generated_cases.c.h" STACK_SHRINK(oparg); STACK_GROW(1); stack_pointer[-1] = tup; @@ -1804,10 +1830,8 @@ TARGET(BUILD_LIST) { PyObject **values = (stack_pointer - oparg); PyObject *list; - #line 1254 "Python/bytecodes.c" list = _PyList_FromArraySteal(values, oparg); if (list == NULL) { STACK_SHRINK(oparg); goto error; } - #line 1811 "Python/generated_cases.c.h" STACK_SHRINK(oparg); STACK_GROW(1); stack_pointer[-1] = list; @@ -1817,7 +1841,6 @@ TARGET(LIST_EXTEND) { PyObject *iterable = stack_pointer[-1]; PyObject *list = stack_pointer[-(2 + (oparg-1))]; - #line 1259 "Python/bytecodes.c" PyObject *none_val = _PyList_Extend((PyListObject *)list, iterable); if (none_val == NULL) { if (_PyErr_ExceptionMatches(tstate, PyExc_TypeError) && @@ -1828,13 +1851,10 @@ "Value after * must be an iterable, not %.200s", Py_TYPE(iterable)->tp_name); } - #line 1832 "Python/generated_cases.c.h" Py_DECREF(iterable); - #line 1270 "Python/bytecodes.c" if (true) goto pop_1_error; } Py_DECREF(none_val); - #line 1838 "Python/generated_cases.c.h" Py_DECREF(iterable); STACK_SHRINK(1); DISPATCH(); @@ -1843,13 +1863,9 @@ TARGET(SET_UPDATE) { PyObject *iterable = stack_pointer[-1]; PyObject *set = stack_pointer[-(2 + (oparg-1))]; - #line 1277 "Python/bytecodes.c" int err = _PySet_Update(set, iterable); - #line 1849 "Python/generated_cases.c.h" Py_DECREF(iterable); - #line 1279 "Python/bytecodes.c" if (err < 0) goto pop_1_error; - #line 1853 "Python/generated_cases.c.h" STACK_SHRINK(1); DISPATCH(); } @@ -1857,7 +1873,6 @@ TARGET(BUILD_SET) { PyObject **values = (stack_pointer - oparg); PyObject *set; - #line 1283 "Python/bytecodes.c" set = PySet_New(NULL); if (set == NULL) goto error; @@ -1872,7 +1887,6 @@ Py_DECREF(set); if (true) { STACK_SHRINK(oparg); goto error; } } - #line 1876 "Python/generated_cases.c.h" STACK_SHRINK(oparg); STACK_GROW(1); stack_pointer[-1] = set; @@ -1882,7 +1896,6 @@ TARGET(BUILD_MAP) { PyObject **values = (stack_pointer - oparg*2); PyObject *map; - #line 1300 "Python/bytecodes.c" map = _PyDict_FromItems( values, 2, values+1, 2, @@ -1890,13 +1903,10 @@ if (map == NULL) goto error; - #line 1894 "Python/generated_cases.c.h" for (int _i = oparg*2; --_i >= 0;) { Py_DECREF(values[_i]); } - #line 1308 "Python/bytecodes.c" if (map == NULL) { STACK_SHRINK(oparg*2); goto error; } - #line 1900 "Python/generated_cases.c.h" STACK_SHRINK(oparg*2); STACK_GROW(1); stack_pointer[-1] = map; @@ -1904,7 +1914,6 @@ } TARGET(SETUP_ANNOTATIONS) { - #line 1312 "Python/bytecodes.c" int err; PyObject *ann_dict; if (LOCALS() == NULL) { @@ -1944,7 +1953,6 @@ Py_DECREF(ann_dict); } } - #line 1948 "Python/generated_cases.c.h" DISPATCH(); } @@ -1952,7 +1960,6 @@ PyObject *keys = stack_pointer[-1]; PyObject **values = (stack_pointer - (1 + oparg)); PyObject *map; - #line 1354 "Python/bytecodes.c" if (!PyTuple_CheckExact(keys) || PyTuple_GET_SIZE(keys) != (Py_ssize_t)oparg) { _PyErr_SetString(tstate, PyExc_SystemError, @@ -1962,14 +1969,11 @@ map = _PyDict_FromItems( &PyTuple_GET_ITEM(keys, 0), 1, values, 1, oparg); - #line 1966 "Python/generated_cases.c.h" for (int _i = oparg; --_i >= 0;) { Py_DECREF(values[_i]); } Py_DECREF(keys); - #line 1364 "Python/bytecodes.c" if (map == NULL) { STACK_SHRINK(oparg); goto pop_1_error; } - #line 1973 "Python/generated_cases.c.h" STACK_SHRINK(oparg); stack_pointer[-1] = map; DISPATCH(); @@ -1977,7 +1981,6 @@ TARGET(DICT_UPDATE) { PyObject *update = stack_pointer[-1]; - #line 1368 "Python/bytecodes.c" PyObject *dict = PEEK(oparg + 1); // update is still on the stack if (PyDict_Update(dict, update) < 0) { if (_PyErr_ExceptionMatches(tstate, PyExc_AttributeError)) { @@ -1985,12 +1988,9 @@ "'%.200s' object is not a mapping", Py_TYPE(update)->tp_name); } - #line 1989 "Python/generated_cases.c.h" Py_DECREF(update); - #line 1376 "Python/bytecodes.c" if (true) goto pop_1_error; } - #line 1994 "Python/generated_cases.c.h" Py_DECREF(update); STACK_SHRINK(1); DISPATCH(); @@ -1998,17 +1998,13 @@ TARGET(DICT_MERGE) { PyObject *update = stack_pointer[-1]; - #line 1382 "Python/bytecodes.c" PyObject *dict = PEEK(oparg + 1); // update is still on the stack if (_PyDict_MergeEx(dict, update, 2) < 0) { format_kwargs_error(tstate, PEEK(3 + oparg), update); - #line 2007 "Python/generated_cases.c.h" Py_DECREF(update); - #line 1387 "Python/bytecodes.c" if (true) goto pop_1_error; } - #line 2012 "Python/generated_cases.c.h" Py_DECREF(update); STACK_SHRINK(1); PREDICT(CALL_FUNCTION_EX); @@ -2018,13 +2014,11 @@ TARGET(MAP_ADD) { PyObject *value = stack_pointer[-1]; PyObject *key = stack_pointer[-2]; - #line 1394 "Python/bytecodes.c" PyObject *dict = PEEK(oparg + 2); // key, value are still on the stack assert(PyDict_CheckExact(dict)); /* dict[key] = value */ // Do not DECREF INPUTS because the function steals the references if (_PyDict_SetItem_Take2((PyDictObject *)dict, key, value) != 0) goto pop_2_error; - #line 2028 "Python/generated_cases.c.h" STACK_SHRINK(2); PREDICT(JUMP_BACKWARD); DISPATCH(); @@ -2036,7 +2030,6 @@ PyObject *owner = stack_pointer[-1]; PyObject *res2 = NULL; PyObject *res; - #line 1417 "Python/bytecodes.c" #if ENABLE_SPECIALIZATION _PyAttrCache *cache = (_PyAttrCache *)next_instr; if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { @@ -2071,9 +2064,7 @@ NULL | meth | arg1 | ... | argN */ - #line 2075 "Python/generated_cases.c.h" Py_DECREF(owner); - #line 1452 "Python/bytecodes.c" if (meth == NULL) goto pop_1_error; res2 = NULL; res = meth; @@ -2082,12 +2073,9 @@ else { /* Classic, pushes one value. */ res = PyObject_GetAttr(owner, name); - #line 2086 "Python/generated_cases.c.h" Py_DECREF(owner); - #line 1461 "Python/bytecodes.c" if (res == NULL) goto pop_1_error; } - #line 2091 "Python/generated_cases.c.h" STACK_GROW(((oparg & 1) ? 1 : 0)); stack_pointer[-1] = res; if (oparg & 1) { stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = res2; } @@ -2101,7 +2089,6 @@ PyObject *res; uint32_t type_version = read_u32(&next_instr[1].cache); uint16_t index = read_u16(&next_instr[3].cache); - #line 1466 "Python/bytecodes.c" assert(cframe.use_tracing == 0); PyTypeObject *tp = Py_TYPE(owner); assert(type_version != 0); @@ -2115,7 +2102,6 @@ STAT_INC(LOAD_ATTR, hit); Py_INCREF(res); res2 = NULL; - #line 2119 "Python/generated_cases.c.h" Py_DECREF(owner); STACK_GROW(((oparg & 1) ? 1 : 0)); stack_pointer[-1] = res; @@ -2130,7 +2116,6 @@ PyObject *res; uint32_t type_version = read_u32(&next_instr[1].cache); uint16_t index = read_u16(&next_instr[3].cache); - #line 1483 "Python/bytecodes.c" assert(cframe.use_tracing == 0); DEOPT_IF(!PyModule_CheckExact(owner), LOAD_ATTR); PyDictObject *dict = (PyDictObject *)((PyModuleObject *)owner)->md_dict; @@ -2144,7 +2129,6 @@ STAT_INC(LOAD_ATTR, hit); Py_INCREF(res); res2 = NULL; - #line 2148 "Python/generated_cases.c.h" Py_DECREF(owner); STACK_GROW(((oparg & 1) ? 1 : 0)); stack_pointer[-1] = res; @@ -2159,7 +2143,6 @@ PyObject *res; uint32_t type_version = read_u32(&next_instr[1].cache); uint16_t index = read_u16(&next_instr[3].cache); - #line 1500 "Python/bytecodes.c" assert(cframe.use_tracing == 0); PyTypeObject *tp = Py_TYPE(owner); assert(type_version != 0); @@ -2187,7 +2170,6 @@ STAT_INC(LOAD_ATTR, hit); Py_INCREF(res); res2 = NULL; - #line 2191 "Python/generated_cases.c.h" Py_DECREF(owner); STACK_GROW(((oparg & 1) ? 1 : 0)); stack_pointer[-1] = res; @@ -2202,7 +2184,6 @@ PyObject *res; uint32_t type_version = read_u32(&next_instr[1].cache); uint16_t index = read_u16(&next_instr[3].cache); - #line 1531 "Python/bytecodes.c" assert(cframe.use_tracing == 0); PyTypeObject *tp = Py_TYPE(owner); assert(type_version != 0); @@ -2213,7 +2194,6 @@ STAT_INC(LOAD_ATTR, hit); Py_INCREF(res); res2 = NULL; - #line 2217 "Python/generated_cases.c.h" Py_DECREF(owner); STACK_GROW(((oparg & 1) ? 1 : 0)); stack_pointer[-1] = res; @@ -2228,7 +2208,6 @@ PyObject *res; uint32_t type_version = read_u32(&next_instr[1].cache); PyObject *descr = read_obj(&next_instr[5].cache); - #line 1545 "Python/bytecodes.c" assert(cframe.use_tracing == 0); DEOPT_IF(!PyType_Check(cls), LOAD_ATTR); @@ -2241,7 +2220,6 @@ res = descr; assert(res != NULL); Py_INCREF(res); - #line 2245 "Python/generated_cases.c.h" Py_DECREF(cls); STACK_GROW(((oparg & 1) ? 1 : 0)); stack_pointer[-1] = res; @@ -2255,7 +2233,6 @@ uint32_t type_version = read_u32(&next_instr[1].cache); uint32_t func_version = read_u32(&next_instr[3].cache); PyObject *fget = read_obj(&next_instr[5].cache); - #line 1561 "Python/bytecodes.c" assert(cframe.use_tracing == 0); DEOPT_IF(tstate->interp->eval_frame, LOAD_ATTR); @@ -2279,7 +2256,6 @@ new_frame->localsplus[0] = owner; JUMPBY(INLINE_CACHE_ENTRIES_LOAD_ATTR); DISPATCH_INLINED(new_frame); - #line 2283 "Python/generated_cases.c.h" } TARGET(LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN) { @@ -2287,7 +2263,6 @@ uint32_t type_version = read_u32(&next_instr[1].cache); uint32_t func_version = read_u32(&next_instr[3].cache); PyObject *getattribute = read_obj(&next_instr[5].cache); - #line 1587 "Python/bytecodes.c" assert(cframe.use_tracing == 0); DEOPT_IF(tstate->interp->eval_frame, LOAD_ATTR); PyTypeObject *cls = Py_TYPE(owner); @@ -2313,7 +2288,6 @@ new_frame->localsplus[1] = Py_NewRef(name); JUMPBY(INLINE_CACHE_ENTRIES_LOAD_ATTR); DISPATCH_INLINED(new_frame); - #line 2317 "Python/generated_cases.c.h" } TARGET(STORE_ATTR_INSTANCE_VALUE) { @@ -2321,7 +2295,6 @@ PyObject *value = stack_pointer[-2]; uint32_t type_version = read_u32(&next_instr[1].cache); uint16_t index = read_u16(&next_instr[3].cache); - #line 1615 "Python/bytecodes.c" assert(cframe.use_tracing == 0); PyTypeObject *tp = Py_TYPE(owner); assert(type_version != 0); @@ -2340,7 +2313,6 @@ Py_DECREF(old_value); } Py_DECREF(owner); - #line 2344 "Python/generated_cases.c.h" STACK_SHRINK(2); next_instr += 4; DISPATCH(); @@ -2351,7 +2323,6 @@ PyObject *value = stack_pointer[-2]; uint32_t type_version = read_u32(&next_instr[1].cache); uint16_t hint = read_u16(&next_instr[3].cache); - #line 1636 "Python/bytecodes.c" assert(cframe.use_tracing == 0); PyTypeObject *tp = Py_TYPE(owner); assert(type_version != 0); @@ -2391,7 +2362,6 @@ /* PEP 509 */ dict->ma_version_tag = new_version; Py_DECREF(owner); - #line 2395 "Python/generated_cases.c.h" STACK_SHRINK(2); next_instr += 4; DISPATCH(); @@ -2402,7 +2372,6 @@ PyObject *value = stack_pointer[-2]; uint32_t type_version = read_u32(&next_instr[1].cache); uint16_t index = read_u16(&next_instr[3].cache); - #line 1678 "Python/bytecodes.c" assert(cframe.use_tracing == 0); PyTypeObject *tp = Py_TYPE(owner); assert(type_version != 0); @@ -2413,7 +2382,6 @@ *(PyObject **)addr = value; Py_XDECREF(old_value); Py_DECREF(owner); - #line 2417 "Python/generated_cases.c.h" STACK_SHRINK(2); next_instr += 4; DISPATCH(); @@ -2425,7 +2393,6 @@ PyObject *right = stack_pointer[-1]; PyObject *left = stack_pointer[-2]; PyObject *res; - #line 1698 "Python/bytecodes.c" #if ENABLE_SPECIALIZATION _PyCompareOpCache *cache = (_PyCompareOpCache *)next_instr; if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { @@ -2439,12 +2406,9 @@ #endif /* ENABLE_SPECIALIZATION */ assert((oparg >> 4) <= Py_GE); res = PyObject_RichCompare(left, right, oparg>>4); - #line 2443 "Python/generated_cases.c.h" Py_DECREF(left); Py_DECREF(right); - #line 1712 "Python/bytecodes.c" if (res == NULL) goto pop_2_error; - #line 2448 "Python/generated_cases.c.h" STACK_SHRINK(1); stack_pointer[-1] = res; next_instr += 1; @@ -2455,7 +2419,6 @@ PyObject *right = stack_pointer[-1]; PyObject *left = stack_pointer[-2]; PyObject *res; - #line 1716 "Python/bytecodes.c" assert(cframe.use_tracing == 0); DEOPT_IF(!PyFloat_CheckExact(left), COMPARE_OP); DEOPT_IF(!PyFloat_CheckExact(right), COMPARE_OP); @@ -2468,7 +2431,6 @@ _Py_DECREF_SPECIALIZED(right, _PyFloat_ExactDealloc); res = (sign_ish & oparg) ? Py_True : Py_False; Py_INCREF(res); - #line 2472 "Python/generated_cases.c.h" STACK_SHRINK(1); stack_pointer[-1] = res; next_instr += 1; @@ -2479,7 +2441,6 @@ PyObject *right = stack_pointer[-1]; PyObject *left = stack_pointer[-2]; PyObject *res; - #line 1732 "Python/bytecodes.c" assert(cframe.use_tracing == 0); DEOPT_IF(!PyLong_CheckExact(left), COMPARE_OP); DEOPT_IF(!PyLong_CheckExact(right), COMPARE_OP); @@ -2496,7 +2457,6 @@ _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); res = (sign_ish & oparg) ? Py_True : Py_False; Py_INCREF(res); - #line 2500 "Python/generated_cases.c.h" STACK_SHRINK(1); stack_pointer[-1] = res; next_instr += 1; @@ -2507,7 +2467,6 @@ PyObject *right = stack_pointer[-1]; PyObject *left = stack_pointer[-2]; PyObject *res; - #line 1752 "Python/bytecodes.c" assert(cframe.use_tracing == 0); DEOPT_IF(!PyUnicode_CheckExact(left), COMPARE_OP); DEOPT_IF(!PyUnicode_CheckExact(right), COMPARE_OP); @@ -2521,7 +2480,6 @@ assert(COMPARISON_NOT_EQUALS + 1 == COMPARISON_EQUALS); res = ((COMPARISON_NOT_EQUALS + eq) & oparg) ? Py_True : Py_False; Py_INCREF(res); - #line 2525 "Python/generated_cases.c.h" STACK_SHRINK(1); stack_pointer[-1] = res; next_instr += 1; @@ -2532,14 +2490,10 @@ PyObject *right = stack_pointer[-1]; PyObject *left = stack_pointer[-2]; PyObject *b; - #line 1768 "Python/bytecodes.c" int res = Py_Is(left, right) ^ oparg; - #line 2538 "Python/generated_cases.c.h" Py_DECREF(left); Py_DECREF(right); - #line 1770 "Python/bytecodes.c" b = Py_NewRef(res ? Py_True : Py_False); - #line 2543 "Python/generated_cases.c.h" STACK_SHRINK(1); stack_pointer[-1] = b; DISPATCH(); @@ -2549,15 +2503,11 @@ PyObject *right = stack_pointer[-1]; PyObject *left = stack_pointer[-2]; PyObject *b; - #line 1774 "Python/bytecodes.c" int res = PySequence_Contains(right, left); - #line 2555 "Python/generated_cases.c.h" Py_DECREF(left); Py_DECREF(right); - #line 1776 "Python/bytecodes.c" if (res < 0) goto pop_2_error; b = Py_NewRef((res^oparg) ? Py_True : Py_False); - #line 2561 "Python/generated_cases.c.h" STACK_SHRINK(1); stack_pointer[-1] = b; DISPATCH(); @@ -2568,12 +2518,9 @@ PyObject *exc_value = stack_pointer[-2]; PyObject *rest; PyObject *match; - #line 1781 "Python/bytecodes.c" if (check_except_star_type_valid(tstate, match_type) < 0) { - #line 2574 "Python/generated_cases.c.h" Py_DECREF(exc_value); Py_DECREF(match_type); - #line 1783 "Python/bytecodes.c" if (true) goto pop_2_error; } @@ -2581,10 +2528,8 @@ rest = NULL; int res = exception_group_match(exc_value, match_type, &match, &rest); - #line 2585 "Python/generated_cases.c.h" Py_DECREF(exc_value); Py_DECREF(match_type); - #line 1791 "Python/bytecodes.c" if (res < 0) goto pop_2_error; assert((match == NULL) == (rest == NULL)); @@ -2593,7 +2538,6 @@ if (!Py_IsNone(match)) { PyErr_SetExcInfo(NULL, Py_NewRef(match), NULL); } - #line 2597 "Python/generated_cases.c.h" stack_pointer[-1] = match; stack_pointer[-2] = rest; DISPATCH(); @@ -2603,21 +2547,15 @@ PyObject *right = stack_pointer[-1]; PyObject *left = stack_pointer[-2]; PyObject *b; - #line 1802 "Python/bytecodes.c" assert(PyExceptionInstance_Check(left)); if (check_except_type_valid(tstate, right) < 0) { - #line 2610 "Python/generated_cases.c.h" Py_DECREF(right); - #line 1805 "Python/bytecodes.c" if (true) goto pop_1_error; } int res = PyErr_GivenExceptionMatches(left, right); - #line 2617 "Python/generated_cases.c.h" Py_DECREF(right); - #line 1810 "Python/bytecodes.c" b = Py_NewRef(res ? Py_True : Py_False); - #line 2621 "Python/generated_cases.c.h" stack_pointer[-1] = b; DISPATCH(); } @@ -2626,15 +2564,11 @@ PyObject *fromlist = stack_pointer[-1]; PyObject *level = stack_pointer[-2]; PyObject *res; - #line 1814 "Python/bytecodes.c" PyObject *name = GETITEM(frame->f_code->co_names, oparg); res = import_name(tstate, frame, name, fromlist, level); - #line 2633 "Python/generated_cases.c.h" Py_DECREF(level); Py_DECREF(fromlist); - #line 1817 "Python/bytecodes.c" if (res == NULL) goto pop_2_error; - #line 2638 "Python/generated_cases.c.h" STACK_SHRINK(1); stack_pointer[-1] = res; DISPATCH(); @@ -2643,29 +2577,29 @@ TARGET(IMPORT_FROM) { PyObject *from = stack_pointer[-1]; PyObject *res; - #line 1821 "Python/bytecodes.c" PyObject *name = GETITEM(frame->f_code->co_names, oparg); res = import_from(tstate, from, name); if (res == NULL) goto error; - #line 2651 "Python/generated_cases.c.h" STACK_GROW(1); stack_pointer[-1] = res; DISPATCH(); } TARGET(JUMP_FORWARD) { - #line 1827 "Python/bytecodes.c" JUMPBY(oparg); - #line 2660 "Python/generated_cases.c.h" DISPATCH(); } TARGET(JUMP_BACKWARD) { PREDICTED(JUMP_BACKWARD); - #line 1831 "Python/bytecodes.c" + frame->f_code->_tier2_warmup++; + GO_TO_INSTRUCTION(JUMP_BACKWARD_QUICK); + } + + TARGET(JUMP_BACKWARD_QUICK) { + PREDICTED(JUMP_BACKWARD_QUICK); assert(oparg < INSTR_OFFSET()); JUMPBY(-oparg); - #line 2669 "Python/generated_cases.c.h" CHECK_EVAL_BREAKER(); DISPATCH(); } @@ -2673,7 +2607,6 @@ TARGET(POP_JUMP_IF_FALSE) { PREDICTED(POP_JUMP_IF_FALSE); PyObject *cond = stack_pointer[-1]; - #line 1837 "Python/bytecodes.c" if (Py_IsTrue(cond)) { _Py_DECREF_NO_DEALLOC(cond); } @@ -2683,9 +2616,7 @@ } else { int err = PyObject_IsTrue(cond); - #line 2687 "Python/generated_cases.c.h" Py_DECREF(cond); - #line 1847 "Python/bytecodes.c" if (err == 0) { JUMPBY(oparg); } @@ -2693,14 +2624,36 @@ if (err < 0) goto pop_1_error; } } - #line 2697 "Python/generated_cases.c.h" + STACK_SHRINK(1); + DISPATCH(); + } + + TARGET(BB_TEST_POP_IF_FALSE) { + PyObject *cond = stack_pointer[-1]; + if (Py_IsTrue(cond)) { + _Py_DECREF_NO_DEALLOC(cond); + bb_test = BB_TEST(1, 0); + } + else if (Py_IsFalse(cond)) { + _Py_DECREF_NO_DEALLOC(cond); + bb_test = BB_TEST(0, 0); + } + else { + int err = PyObject_IsTrue(cond); + Py_DECREF(cond); + if (err == 0) { + bb_test = BB_TEST(0, 0); + } + else { + if (err < 0) goto pop_1_error; + } + } STACK_SHRINK(1); DISPATCH(); } TARGET(POP_JUMP_IF_TRUE) { PyObject *cond = stack_pointer[-1]; - #line 1857 "Python/bytecodes.c" if (Py_IsFalse(cond)) { _Py_DECREF_NO_DEALLOC(cond); } @@ -2710,9 +2663,7 @@ } else { int err = PyObject_IsTrue(cond); - #line 2714 "Python/generated_cases.c.h" Py_DECREF(cond); - #line 1867 "Python/bytecodes.c" if (err > 0) { JUMPBY(oparg); } @@ -2720,67 +2671,106 @@ if (err < 0) goto pop_1_error; } } - #line 2724 "Python/generated_cases.c.h" + STACK_SHRINK(1); + DISPATCH(); + } + + TARGET(BB_TEST_POP_IF_TRUE) { + PyObject *cond = stack_pointer[-1]; + if (Py_IsFalse(cond)) { + _Py_DECREF_NO_DEALLOC(cond); + bb_test = BB_TEST(1, 0); + } + else if (Py_IsTrue(cond)) { + _Py_DECREF_NO_DEALLOC(cond); + bb_test = BB_TEST(0, 0); + } + else { + int err = PyObject_IsTrue(cond); + Py_DECREF(cond); + if (err > 0) { + bb_test = BB_TEST(0, 0); + } + else { + if (err < 0) goto pop_1_error; + } + } STACK_SHRINK(1); DISPATCH(); } TARGET(POP_JUMP_IF_NOT_NONE) { PyObject *value = stack_pointer[-1]; - #line 1877 "Python/bytecodes.c" if (!Py_IsNone(value)) { - #line 2733 "Python/generated_cases.c.h" Py_DECREF(value); - #line 1879 "Python/bytecodes.c" JUMPBY(oparg); } else { _Py_DECREF_NO_DEALLOC(value); } - #line 2741 "Python/generated_cases.c.h" + STACK_SHRINK(1); + DISPATCH(); + } + + TARGET(BB_TEST_POP_IF_NOT_NONE) { + PyObject *value = stack_pointer[-1]; + if (!Py_IsNone(value)) { + Py_DECREF(value); + bb_test = BB_TEST(0, 0); + } + else { + _Py_DECREF_NO_DEALLOC(value); + bb_test = BB_TEST(1, 0); + } STACK_SHRINK(1); DISPATCH(); } TARGET(POP_JUMP_IF_NONE) { PyObject *value = stack_pointer[-1]; - #line 1887 "Python/bytecodes.c" if (Py_IsNone(value)) { _Py_DECREF_NO_DEALLOC(value); JUMPBY(oparg); } else { - #line 2754 "Python/generated_cases.c.h" Py_DECREF(value); - #line 1893 "Python/bytecodes.c" } - #line 2758 "Python/generated_cases.c.h" + STACK_SHRINK(1); + DISPATCH(); + } + + TARGET(BB_TEST_POP_IF_NONE) { + PyObject *value = stack_pointer[-1]; + if (Py_IsNone(value)) { + Py_DECREF(value); + bb_test = BB_TEST(0, 0); + } + else { + _Py_DECREF_NO_DEALLOC(value); + bb_test = BB_TEST(1, 0); + } STACK_SHRINK(1); DISPATCH(); } TARGET(JUMP_BACKWARD_NO_INTERRUPT) { - #line 1897 "Python/bytecodes.c" /* This bytecode is used in the `yield from` or `await` loop. * If there is an interrupt, we want it handled in the innermost * generator or coroutine, so we deliberately do not check it here. * (see bpo-30039). */ JUMPBY(-oparg); - #line 2771 "Python/generated_cases.c.h" DISPATCH(); } TARGET(GET_LEN) { PyObject *obj = stack_pointer[-1]; PyObject *len_o; - #line 1906 "Python/bytecodes.c" // PUSH(len(TOS)) Py_ssize_t len_i = PyObject_Length(obj); if (len_i < 0) goto error; len_o = PyLong_FromSsize_t(len_i); if (len_o == NULL) goto error; - #line 2784 "Python/generated_cases.c.h" STACK_GROW(1); stack_pointer[-1] = len_o; DISPATCH(); @@ -2791,16 +2781,13 @@ PyObject *type = stack_pointer[-2]; PyObject *subject = stack_pointer[-3]; PyObject *attrs; - #line 1914 "Python/bytecodes.c" // Pop TOS and TOS1. Set TOS to a tuple of attributes on success, or // None on failure. assert(PyTuple_CheckExact(names)); attrs = match_class(tstate, subject, type, oparg, names); - #line 2800 "Python/generated_cases.c.h" Py_DECREF(subject); Py_DECREF(type); Py_DECREF(names); - #line 1919 "Python/bytecodes.c" if (attrs) { assert(PyTuple_CheckExact(attrs)); // Success! } @@ -2808,7 +2795,6 @@ if (_PyErr_Occurred(tstate)) goto pop_3_error; attrs = Py_NewRef(Py_None); // Failure! } - #line 2812 "Python/generated_cases.c.h" STACK_SHRINK(2); stack_pointer[-1] = attrs; DISPATCH(); @@ -2817,10 +2803,8 @@ TARGET(MATCH_MAPPING) { PyObject *subject = stack_pointer[-1]; PyObject *res; - #line 1929 "Python/bytecodes.c" int match = Py_TYPE(subject)->tp_flags & Py_TPFLAGS_MAPPING; res = Py_NewRef(match ? Py_True : Py_False); - #line 2824 "Python/generated_cases.c.h" STACK_GROW(1); stack_pointer[-1] = res; PREDICT(POP_JUMP_IF_FALSE); @@ -2830,10 +2814,8 @@ TARGET(MATCH_SEQUENCE) { PyObject *subject = stack_pointer[-1]; PyObject *res; - #line 1935 "Python/bytecodes.c" int match = Py_TYPE(subject)->tp_flags & Py_TPFLAGS_SEQUENCE; res = Py_NewRef(match ? Py_True : Py_False); - #line 2837 "Python/generated_cases.c.h" STACK_GROW(1); stack_pointer[-1] = res; PREDICT(POP_JUMP_IF_FALSE); @@ -2844,11 +2826,9 @@ PyObject *keys = stack_pointer[-1]; PyObject *subject = stack_pointer[-2]; PyObject *values_or_none; - #line 1941 "Python/bytecodes.c" // On successful match, PUSH(values). Otherwise, PUSH(None). values_or_none = match_keys(tstate, subject, keys); if (values_or_none == NULL) goto error; - #line 2852 "Python/generated_cases.c.h" STACK_GROW(1); stack_pointer[-1] = values_or_none; DISPATCH(); @@ -2857,14 +2837,10 @@ TARGET(GET_ITER) { PyObject *iterable = stack_pointer[-1]; PyObject *iter; - #line 1947 "Python/bytecodes.c" /* before: [obj]; after [getiter(obj)] */ iter = PyObject_GetIter(iterable); - #line 2864 "Python/generated_cases.c.h" Py_DECREF(iterable); - #line 1950 "Python/bytecodes.c" if (iter == NULL) goto pop_1_error; - #line 2868 "Python/generated_cases.c.h" stack_pointer[-1] = iter; DISPATCH(); } @@ -2872,7 +2848,6 @@ TARGET(GET_YIELD_FROM_ITER) { PyObject *iterable = stack_pointer[-1]; PyObject *iter; - #line 1954 "Python/bytecodes.c" /* before: [obj]; after [getiter(obj)] */ if (PyCoro_CheckExact(iterable)) { /* `iterable` is a coroutine */ @@ -2895,11 +2870,8 @@ if (iter == NULL) { goto error; } - #line 2899 "Python/generated_cases.c.h" Py_DECREF(iterable); - #line 1977 "Python/bytecodes.c" } - #line 2903 "Python/generated_cases.c.h" stack_pointer[-1] = iter; PREDICT(LOAD_CONST); DISPATCH(); @@ -2910,13 +2882,12 @@ static_assert(INLINE_CACHE_ENTRIES_FOR_ITER == 1, "incorrect cache size"); PyObject *iter = stack_pointer[-1]; PyObject *next; - #line 1996 "Python/bytecodes.c" #if ENABLE_SPECIALIZATION _PyForIterCache *cache = (_PyForIterCache *)next_instr; if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { assert(cframe.use_tracing == 0); next_instr--; - _Py_Specialize_ForIter(iter, next_instr, oparg); + _Py_Specialize_ForIter(iter, next_instr, oparg, 0); DISPATCH_SAME_OPARG(); } STAT_INC(FOR_ITER, deferred); @@ -2943,7 +2914,46 @@ DISPATCH(); } // Common case: no jump, leave it to the code generator - #line 2947 "Python/generated_cases.c.h" + STACK_GROW(1); + stack_pointer[-1] = next; + next_instr += 1; + DISPATCH(); + } + + TARGET(BB_TEST_ITER) { + PREDICTED(BB_TEST_ITER); + PyObject *iter = stack_pointer[-1]; + PyObject *next; + #if ENABLE_SPECIALIZATION + _PyForIterCache *cache = (_PyForIterCache *)next_instr; + if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { + assert(cframe.use_tracing == 0); + next_instr--; + _Py_Specialize_ForIter(iter, next_instr, oparg, 1); + DISPATCH_SAME_OPARG(); + } + STAT_INC(BB_TEST_ITER, deferred); + DECREMENT_ADAPTIVE_COUNTER(cache->counter); + #endif /* ENABLE_SPECIALIZATION */ + next = (*Py_TYPE(iter)->tp_iternext)(iter); + if (next == NULL) { + if (_PyErr_Occurred(tstate)) { + if (!_PyErr_ExceptionMatches(tstate, PyExc_StopIteration)) { + goto error; + } + else if (tstate->c_tracefunc != NULL) { + call_exc_trace(tstate->c_tracefunc, tstate->c_traceobj, tstate, frame); + } + _PyErr_Clear(tstate); + } + /* iterator ended normally */ + Py_DECREF(iter); + STACK_SHRINK(1); + bb_test = BB_TEST(0, 2); + JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER); + DISPATCH(); + } + bb_test = BB_TEST(1, 0); STACK_GROW(1); stack_pointer[-1] = next; next_instr += 1; @@ -2953,7 +2963,6 @@ TARGET(FOR_ITER_LIST) { PyObject *iter = stack_pointer[-1]; PyObject *next; - #line 2031 "Python/bytecodes.c" assert(cframe.use_tracing == 0); DEOPT_IF(Py_TYPE(iter) != &PyListIter_Type, FOR_ITER); _PyListIterObject *it = (_PyListIterObject *)iter; @@ -2974,7 +2983,36 @@ DISPATCH(); end_for_iter_list: // Common case: no jump, leave it to the code generator - #line 2978 "Python/generated_cases.c.h" + STACK_GROW(1); + stack_pointer[-1] = next; + next_instr += 1; + DISPATCH(); + } + + TARGET(BB_TEST_ITER_LIST) { + PyObject *iter = stack_pointer[-1]; + PyObject *next; + assert(cframe.use_tracing == 0); + DEOPT_IF(Py_TYPE(iter) != &PyListIter_Type, BB_TEST_ITER); + _PyListIterObject *it = (_PyListIterObject *)iter; + STAT_INC(FOR_ITER, hit); + PyListObject *seq = it->it_seq; + if (seq) { + if (it->it_index < PyList_GET_SIZE(seq)) { + next = Py_NewRef(PyList_GET_ITEM(seq, it->it_index++)); + goto end_bb_iter_list; // End of this instruction + } + it->it_seq = NULL; + Py_DECREF(seq); + } + Py_DECREF(iter); + STACK_SHRINK(1); + bb_test = BB_TEST(0, 2); + JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER); + DISPATCH(); + end_bb_iter_list: + // Common case: no jump, leave it to the code generator + bb_test = BB_TEST(1, 0); STACK_GROW(1); stack_pointer[-1] = next; next_instr += 1; @@ -2984,7 +3022,6 @@ TARGET(FOR_ITER_TUPLE) { PyObject *iter = stack_pointer[-1]; PyObject *next; - #line 2054 "Python/bytecodes.c" assert(cframe.use_tracing == 0); _PyTupleIterObject *it = (_PyTupleIterObject *)iter; DEOPT_IF(Py_TYPE(it) != &PyTupleIter_Type, FOR_ITER); @@ -3005,7 +3042,36 @@ DISPATCH(); end_for_iter_tuple: // Common case: no jump, leave it to the code generator - #line 3009 "Python/generated_cases.c.h" + STACK_GROW(1); + stack_pointer[-1] = next; + next_instr += 1; + DISPATCH(); + } + + TARGET(BB_TEST_ITER_TUPLE) { + PyObject *iter = stack_pointer[-1]; + PyObject *next; + assert(cframe.use_tracing == 0); + _PyTupleIterObject *it = (_PyTupleIterObject *)iter; + DEOPT_IF(Py_TYPE(it) != &PyTupleIter_Type, BB_TEST_ITER); + STAT_INC(FOR_ITER, hit); + PyTupleObject *seq = it->it_seq; + if (seq) { + if (it->it_index < PyTuple_GET_SIZE(seq)) { + next = Py_NewRef(PyTuple_GET_ITEM(seq, it->it_index++)); + goto end_test_iter_tuple; // End of this instruction + } + it->it_seq = NULL; + Py_DECREF(seq); + } + Py_DECREF(iter); + STACK_SHRINK(1); + bb_test = BB_TEST(0, 2); + JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER); + DISPATCH(); + end_test_iter_tuple: + // Common case: no jump, leave it to the code generator + bb_test = BB_TEST(1, 0); STACK_GROW(1); stack_pointer[-1] = next; next_instr += 1; @@ -3015,7 +3081,6 @@ TARGET(FOR_ITER_RANGE) { PyObject *iter = stack_pointer[-1]; PyObject *next; - #line 2077 "Python/bytecodes.c" assert(cframe.use_tracing == 0); _PyRangeIterObject *r = (_PyRangeIterObject *)iter; DEOPT_IF(Py_TYPE(r) != &PyRangeIter_Type, FOR_ITER); @@ -3034,7 +3099,34 @@ if (next == NULL) { goto error; } - #line 3038 "Python/generated_cases.c.h" + STACK_GROW(1); + stack_pointer[-1] = next; + next_instr += 1; + DISPATCH(); + } + + TARGET(BB_TEST_ITER_RANGE) { + PyObject *iter = stack_pointer[-1]; + PyObject *next; + assert(cframe.use_tracing == 0); + _PyRangeIterObject *r = (_PyRangeIterObject *)iter; + DEOPT_IF(Py_TYPE(r) != &PyRangeIter_Type, BB_TEST_ITER); + STAT_INC(FOR_ITER, hit); + if (r->len <= 0) { + STACK_SHRINK(1); + Py_DECREF(r); + bb_test = BB_TEST(0, 2); + JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER); + DISPATCH(); + } + long value = r->start; + r->start = value + r->step; + r->len--; + next = PyLong_FromLong(value); + if (next == NULL) { + goto error; + } + bb_test = BB_TEST(1, 0); STACK_GROW(1); stack_pointer[-1] = next; next_instr += 1; @@ -3043,7 +3135,6 @@ TARGET(FOR_ITER_GEN) { PyObject *iter = stack_pointer[-1]; - #line 2098 "Python/bytecodes.c" assert(cframe.use_tracing == 0); PyGenObject *gen = (PyGenObject *)iter; DEOPT_IF(Py_TYPE(gen) != &PyGen_Type, FOR_ITER); @@ -3058,14 +3149,12 @@ JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER + oparg); assert(next_instr->op.code == END_FOR); DISPATCH_INLINED(gen_frame); - #line 3062 "Python/generated_cases.c.h" } TARGET(BEFORE_ASYNC_WITH) { PyObject *mgr = stack_pointer[-1]; PyObject *exit; PyObject *res; - #line 2115 "Python/bytecodes.c" PyObject *enter = _PyObject_LookupSpecial(mgr, &_Py_ID(__aenter__)); if (enter == NULL) { if (!_PyErr_Occurred(tstate)) { @@ -3088,16 +3177,13 @@ Py_DECREF(enter); goto error; } - #line 3092 "Python/generated_cases.c.h" Py_DECREF(mgr); - #line 2138 "Python/bytecodes.c" res = _PyObject_CallNoArgs(enter); Py_DECREF(enter); if (res == NULL) { Py_DECREF(exit); if (true) goto pop_1_error; } - #line 3101 "Python/generated_cases.c.h" STACK_GROW(1); stack_pointer[-1] = res; stack_pointer[-2] = exit; @@ -3109,7 +3195,6 @@ PyObject *mgr = stack_pointer[-1]; PyObject *exit; PyObject *res; - #line 2148 "Python/bytecodes.c" /* pop the context manager, push its __exit__ and the * value returned from calling its __enter__ */ @@ -3135,16 +3220,13 @@ Py_DECREF(enter); goto error; } - #line 3139 "Python/generated_cases.c.h" Py_DECREF(mgr); - #line 2174 "Python/bytecodes.c" res = _PyObject_CallNoArgs(enter); Py_DECREF(enter); if (res == NULL) { Py_DECREF(exit); if (true) goto pop_1_error; } - #line 3148 "Python/generated_cases.c.h" STACK_GROW(1); stack_pointer[-1] = res; stack_pointer[-2] = exit; @@ -3156,7 +3238,6 @@ PyObject *lasti = stack_pointer[-3]; PyObject *exit_func = stack_pointer[-4]; PyObject *res; - #line 2183 "Python/bytecodes.c" /* At the top of the stack are 4 values: - val: TOP = exc_info() - unused: SECOND = previous exception @@ -3177,7 +3258,6 @@ res = PyObject_Vectorcall(exit_func, stack + 1, 3 | PY_VECTORCALL_ARGUMENTS_OFFSET, NULL); if (res == NULL) goto error; - #line 3181 "Python/generated_cases.c.h" STACK_GROW(1); stack_pointer[-1] = res; DISPATCH(); @@ -3186,7 +3266,6 @@ TARGET(PUSH_EXC_INFO) { PyObject *new_exc = stack_pointer[-1]; PyObject *prev_exc; - #line 2206 "Python/bytecodes.c" _PyErr_StackItem *exc_info = tstate->exc_info; if (exc_info->exc_value != NULL) { prev_exc = exc_info->exc_value; @@ -3196,7 +3275,6 @@ } assert(PyExceptionInstance_Check(new_exc)); exc_info->exc_value = Py_NewRef(new_exc); - #line 3200 "Python/generated_cases.c.h" STACK_GROW(1); stack_pointer[-1] = new_exc; stack_pointer[-2] = prev_exc; @@ -3210,7 +3288,6 @@ uint32_t type_version = read_u32(&next_instr[1].cache); uint32_t keys_version = read_u32(&next_instr[3].cache); PyObject *descr = read_obj(&next_instr[5].cache); - #line 2218 "Python/bytecodes.c" /* Cached method object */ assert(cframe.use_tracing == 0); PyTypeObject *self_cls = Py_TYPE(self); @@ -3228,7 +3305,6 @@ assert(_PyType_HasFeature(Py_TYPE(res2), Py_TPFLAGS_METHOD_DESCRIPTOR)); res = self; assert(oparg & 1); - #line 3232 "Python/generated_cases.c.h" STACK_GROW(((oparg & 1) ? 1 : 0)); stack_pointer[-1] = res; if (oparg & 1) { stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = res2; } @@ -3242,7 +3318,6 @@ PyObject *res; uint32_t type_version = read_u32(&next_instr[1].cache); PyObject *descr = read_obj(&next_instr[5].cache); - #line 2238 "Python/bytecodes.c" assert(cframe.use_tracing == 0); PyTypeObject *self_cls = Py_TYPE(self); DEOPT_IF(self_cls->tp_version_tag != type_version, LOAD_ATTR); @@ -3253,7 +3328,6 @@ res2 = Py_NewRef(descr); res = self; assert(oparg & 1); - #line 3257 "Python/generated_cases.c.h" STACK_GROW(((oparg & 1) ? 1 : 0)); stack_pointer[-1] = res; if (oparg & 1) { stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = res2; } @@ -3267,7 +3341,6 @@ PyObject *res; uint32_t type_version = read_u32(&next_instr[1].cache); PyObject *descr = read_obj(&next_instr[5].cache); - #line 2251 "Python/bytecodes.c" assert(cframe.use_tracing == 0); PyTypeObject *self_cls = Py_TYPE(self); DEOPT_IF(self_cls->tp_version_tag != type_version, LOAD_ATTR); @@ -3282,7 +3355,6 @@ res2 = Py_NewRef(descr); res = self; assert(oparg & 1); - #line 3286 "Python/generated_cases.c.h" STACK_GROW(((oparg & 1) ? 1 : 0)); stack_pointer[-1] = res; if (oparg & 1) { stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = res2; } @@ -3291,11 +3363,9 @@ } TARGET(KW_NAMES) { - #line 2268 "Python/bytecodes.c" assert(kwnames == NULL); assert(oparg < PyTuple_GET_SIZE(frame->f_code->co_consts)); kwnames = GETITEM(frame->f_code->co_consts, oparg); - #line 3299 "Python/generated_cases.c.h" DISPATCH(); } @@ -3306,7 +3376,6 @@ PyObject *callable = stack_pointer[-(1 + oparg)]; PyObject *method = stack_pointer[-(2 + oparg)]; PyObject *res; - #line 2304 "Python/bytecodes.c" int is_meth = method != NULL; int total_args = oparg; if (is_meth) { @@ -3378,7 +3447,6 @@ Py_DECREF(args[i]); } if (res == NULL) { STACK_SHRINK(oparg); goto pop_2_error; } - #line 3382 "Python/generated_cases.c.h" STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = res; @@ -3390,7 +3458,6 @@ TARGET(CALL_BOUND_METHOD_EXACT_ARGS) { PyObject *callable = stack_pointer[-(1 + oparg)]; PyObject *method = stack_pointer[-(2 + oparg)]; - #line 2382 "Python/bytecodes.c" DEOPT_IF(method != NULL, CALL); DEOPT_IF(Py_TYPE(callable) != &PyMethod_Type, CALL); STAT_INC(CALL, hit); @@ -3400,7 +3467,6 @@ PEEK(oparg + 2) = Py_NewRef(meth); // method Py_DECREF(callable); GO_TO_INSTRUCTION(CALL_PY_EXACT_ARGS); - #line 3404 "Python/generated_cases.c.h" } TARGET(CALL_PY_EXACT_ARGS) { @@ -3409,7 +3475,6 @@ PyObject *callable = stack_pointer[-(1 + oparg)]; PyObject *method = stack_pointer[-(2 + oparg)]; uint32_t func_version = read_u32(&next_instr[1].cache); - #line 2394 "Python/bytecodes.c" assert(kwnames == NULL); DEOPT_IF(tstate->interp->eval_frame, CALL); int is_meth = method != NULL; @@ -3434,7 +3499,6 @@ STACK_SHRINK(oparg + 2); JUMPBY(INLINE_CACHE_ENTRIES_CALL); DISPATCH_INLINED(new_frame); - #line 3438 "Python/generated_cases.c.h" } TARGET(CALL_PY_WITH_DEFAULTS) { @@ -3443,7 +3507,6 @@ PyObject *method = stack_pointer[-(2 + oparg)]; uint32_t func_version = read_u32(&next_instr[1].cache); uint16_t min_args = read_u16(&next_instr[3].cache); - #line 2421 "Python/bytecodes.c" assert(kwnames == NULL); DEOPT_IF(tstate->interp->eval_frame, CALL); int is_meth = method != NULL; @@ -3473,7 +3536,6 @@ STACK_SHRINK(oparg + 2); JUMPBY(INLINE_CACHE_ENTRIES_CALL); DISPATCH_INLINED(new_frame); - #line 3477 "Python/generated_cases.c.h" } TARGET(CALL_NO_KW_TYPE_1) { @@ -3481,7 +3543,6 @@ PyObject *callable = stack_pointer[-(1 + oparg)]; PyObject *null = stack_pointer[-(2 + oparg)]; PyObject *res; - #line 2453 "Python/bytecodes.c" assert(kwnames == NULL); assert(cframe.use_tracing == 0); assert(oparg == 1); @@ -3492,7 +3553,6 @@ res = Py_NewRef(Py_TYPE(obj)); Py_DECREF(obj); Py_DECREF(&PyType_Type); // I.e., callable - #line 3496 "Python/generated_cases.c.h" STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = res; @@ -3505,7 +3565,6 @@ PyObject *callable = stack_pointer[-(1 + oparg)]; PyObject *null = stack_pointer[-(2 + oparg)]; PyObject *res; - #line 2466 "Python/bytecodes.c" assert(kwnames == NULL); assert(cframe.use_tracing == 0); assert(oparg == 1); @@ -3517,7 +3576,6 @@ Py_DECREF(arg); Py_DECREF(&PyUnicode_Type); // I.e., callable if (res == NULL) { STACK_SHRINK(oparg); goto pop_2_error; } - #line 3521 "Python/generated_cases.c.h" STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = res; @@ -3531,7 +3589,6 @@ PyObject *callable = stack_pointer[-(1 + oparg)]; PyObject *null = stack_pointer[-(2 + oparg)]; PyObject *res; - #line 2481 "Python/bytecodes.c" assert(kwnames == NULL); assert(oparg == 1); DEOPT_IF(null != NULL, CALL); @@ -3542,7 +3599,6 @@ Py_DECREF(arg); Py_DECREF(&PyTuple_Type); // I.e., tuple if (res == NULL) { STACK_SHRINK(oparg); goto pop_2_error; } - #line 3546 "Python/generated_cases.c.h" STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = res; @@ -3556,7 +3612,6 @@ PyObject *callable = stack_pointer[-(1 + oparg)]; PyObject *method = stack_pointer[-(2 + oparg)]; PyObject *res; - #line 2495 "Python/bytecodes.c" int is_meth = method != NULL; int total_args = oparg; if (is_meth) { @@ -3578,7 +3633,6 @@ } Py_DECREF(tp); if (res == NULL) { STACK_SHRINK(oparg); goto pop_2_error; } - #line 3582 "Python/generated_cases.c.h" STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = res; @@ -3592,7 +3646,6 @@ PyObject *callable = stack_pointer[-(1 + oparg)]; PyObject *method = stack_pointer[-(2 + oparg)]; PyObject *res; - #line 2520 "Python/bytecodes.c" assert(cframe.use_tracing == 0); /* Builtin METH_O functions */ assert(kwnames == NULL); @@ -3621,7 +3674,6 @@ Py_DECREF(arg); Py_DECREF(callable); if (res == NULL) { STACK_SHRINK(oparg); goto pop_2_error; } - #line 3625 "Python/generated_cases.c.h" STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = res; @@ -3635,7 +3687,6 @@ PyObject *callable = stack_pointer[-(1 + oparg)]; PyObject *method = stack_pointer[-(2 + oparg)]; PyObject *res; - #line 2552 "Python/bytecodes.c" assert(cframe.use_tracing == 0); /* Builtin METH_FASTCALL functions, without keywords */ assert(kwnames == NULL); @@ -3668,7 +3719,6 @@ 'invalid'). In those cases an exception is set, so we must handle it. */ - #line 3672 "Python/generated_cases.c.h" STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = res; @@ -3682,7 +3732,6 @@ PyObject *callable = stack_pointer[-(1 + oparg)]; PyObject *method = stack_pointer[-(2 + oparg)]; PyObject *res; - #line 2588 "Python/bytecodes.c" assert(cframe.use_tracing == 0); /* Builtin METH_FASTCALL | METH_KEYWORDS functions */ int is_meth = method != NULL; @@ -3715,7 +3764,6 @@ } Py_DECREF(callable); if (res == NULL) { STACK_SHRINK(oparg); goto pop_2_error; } - #line 3719 "Python/generated_cases.c.h" STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = res; @@ -3729,7 +3777,6 @@ PyObject *callable = stack_pointer[-(1 + oparg)]; PyObject *method = stack_pointer[-(2 + oparg)]; PyObject *res; - #line 2624 "Python/bytecodes.c" assert(cframe.use_tracing == 0); assert(kwnames == NULL); /* len(o) */ @@ -3755,7 +3802,6 @@ Py_DECREF(callable); Py_DECREF(arg); if (res == NULL) { STACK_SHRINK(oparg); goto pop_2_error; } - #line 3759 "Python/generated_cases.c.h" STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = res; @@ -3768,7 +3814,6 @@ PyObject *callable = stack_pointer[-(1 + oparg)]; PyObject *method = stack_pointer[-(2 + oparg)]; PyObject *res; - #line 2652 "Python/bytecodes.c" assert(cframe.use_tracing == 0); assert(kwnames == NULL); /* isinstance(o, o2) */ @@ -3796,7 +3841,6 @@ Py_DECREF(cls); Py_DECREF(callable); if (res == NULL) { STACK_SHRINK(oparg); goto pop_2_error; } - #line 3800 "Python/generated_cases.c.h" STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = res; @@ -3808,7 +3852,6 @@ PyObject **args = (stack_pointer - oparg); PyObject *self = stack_pointer[-(1 + oparg)]; PyObject *method = stack_pointer[-(2 + oparg)]; - #line 2683 "Python/bytecodes.c" assert(cframe.use_tracing == 0); assert(kwnames == NULL); assert(oparg == 1); @@ -3827,14 +3870,12 @@ JUMPBY(INLINE_CACHE_ENTRIES_CALL + 1); assert(next_instr[-1].op.code == POP_TOP); DISPATCH(); - #line 3831 "Python/generated_cases.c.h" } TARGET(CALL_NO_KW_METHOD_DESCRIPTOR_O) { PyObject **args = (stack_pointer - oparg); PyObject *method = stack_pointer[-(2 + oparg)]; PyObject *res; - #line 2704 "Python/bytecodes.c" assert(kwnames == NULL); int is_meth = method != NULL; int total_args = oparg; @@ -3865,7 +3906,6 @@ Py_DECREF(arg); Py_DECREF(callable); if (res == NULL) { STACK_SHRINK(oparg); goto pop_2_error; } - #line 3869 "Python/generated_cases.c.h" STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = res; @@ -3878,7 +3918,6 @@ PyObject **args = (stack_pointer - oparg); PyObject *method = stack_pointer[-(2 + oparg)]; PyObject *res; - #line 2738 "Python/bytecodes.c" int is_meth = method != NULL; int total_args = oparg; if (is_meth) { @@ -3907,7 +3946,6 @@ } Py_DECREF(callable); if (res == NULL) { STACK_SHRINK(oparg); goto pop_2_error; } - #line 3911 "Python/generated_cases.c.h" STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = res; @@ -3920,7 +3958,6 @@ PyObject **args = (stack_pointer - oparg); PyObject *method = stack_pointer[-(2 + oparg)]; PyObject *res; - #line 2770 "Python/bytecodes.c" assert(kwnames == NULL); assert(oparg == 0 || oparg == 1); int is_meth = method != NULL; @@ -3949,7 +3986,6 @@ Py_DECREF(self); Py_DECREF(callable); if (res == NULL) { STACK_SHRINK(oparg); goto pop_2_error; } - #line 3953 "Python/generated_cases.c.h" STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = res; @@ -3962,7 +3998,6 @@ PyObject **args = (stack_pointer - oparg); PyObject *method = stack_pointer[-(2 + oparg)]; PyObject *res; - #line 2802 "Python/bytecodes.c" assert(kwnames == NULL); int is_meth = method != NULL; int total_args = oparg; @@ -3990,7 +4025,6 @@ } Py_DECREF(callable); if (res == NULL) { STACK_SHRINK(oparg); goto pop_2_error; } - #line 3994 "Python/generated_cases.c.h" STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = res; @@ -4005,7 +4039,6 @@ PyObject *callargs = stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))]; PyObject *func = stack_pointer[-(2 + ((oparg & 1) ? 1 : 0))]; PyObject *result; - #line 2833 "Python/bytecodes.c" if (oparg & 1) { // DICT_MERGE is called before this opcode if there are kwargs. // It converts all dict subtypes in kwargs into regular dicts. @@ -4024,15 +4057,12 @@ assert(PyTuple_CheckExact(callargs)); result = do_call_core(tstate, func, callargs, kwargs, cframe.use_tracing); - #line 4028 "Python/generated_cases.c.h" Py_DECREF(func); Py_DECREF(callargs); Py_XDECREF(kwargs); - #line 2852 "Python/bytecodes.c" assert(PEEK(3 + (oparg & 1)) == NULL); if (result == NULL) { STACK_SHRINK(((oparg & 1) ? 1 : 0)); goto pop_3_error; } - #line 4036 "Python/generated_cases.c.h" STACK_SHRINK(((oparg & 1) ? 1 : 0)); STACK_SHRINK(2); stack_pointer[-1] = result; @@ -4047,7 +4077,6 @@ PyObject *kwdefaults = (oparg & 0x02) ? stack_pointer[-(1 + ((oparg & 0x08) ? 1 : 0) + ((oparg & 0x04) ? 1 : 0) + ((oparg & 0x02) ? 1 : 0))] : NULL; PyObject *defaults = (oparg & 0x01) ? stack_pointer[-(1 + ((oparg & 0x08) ? 1 : 0) + ((oparg & 0x04) ? 1 : 0) + ((oparg & 0x02) ? 1 : 0) + ((oparg & 0x01) ? 1 : 0))] : NULL; PyObject *func; - #line 2863 "Python/bytecodes.c" PyFunctionObject *func_obj = (PyFunctionObject *) PyFunction_New(codeobj, GLOBALS()); @@ -4076,14 +4105,12 @@ func_obj->func_version = ((PyCodeObject *)codeobj)->co_version; func = (PyObject *)func_obj; - #line 4080 "Python/generated_cases.c.h" STACK_SHRINK(((oparg & 0x01) ? 1 : 0) + ((oparg & 0x02) ? 1 : 0) + ((oparg & 0x04) ? 1 : 0) + ((oparg & 0x08) ? 1 : 0)); stack_pointer[-1] = func; DISPATCH(); } TARGET(RETURN_GENERATOR) { - #line 2894 "Python/bytecodes.c" assert(PyFunction_Check(frame->f_funcobj)); PyFunctionObject *func = (PyFunctionObject *)frame->f_funcobj; PyGenObject *gen = (PyGenObject *)_Py_MakeCoro(func); @@ -4104,7 +4131,6 @@ frame = cframe.current_frame = prev; _PyFrame_StackPush(frame, (PyObject *)gen); goto resume_frame; - #line 4108 "Python/generated_cases.c.h" } TARGET(BUILD_SLICE) { @@ -4112,15 +4138,11 @@ PyObject *stop = stack_pointer[-(1 + ((oparg == 3) ? 1 : 0))]; PyObject *start = stack_pointer[-(2 + ((oparg == 3) ? 1 : 0))]; PyObject *slice; - #line 2917 "Python/bytecodes.c" slice = PySlice_New(start, stop, step); - #line 4118 "Python/generated_cases.c.h" Py_DECREF(start); Py_DECREF(stop); Py_XDECREF(step); - #line 2919 "Python/bytecodes.c" if (slice == NULL) { STACK_SHRINK(((oparg == 3) ? 1 : 0)); goto pop_2_error; } - #line 4124 "Python/generated_cases.c.h" STACK_SHRINK(((oparg == 3) ? 1 : 0)); STACK_SHRINK(1); stack_pointer[-1] = slice; @@ -4131,7 +4153,6 @@ PyObject *fmt_spec = ((oparg & FVS_MASK) == FVS_HAVE_SPEC) ? stack_pointer[-((((oparg & FVS_MASK) == FVS_HAVE_SPEC) ? 1 : 0))] : NULL; PyObject *value = stack_pointer[-(1 + (((oparg & FVS_MASK) == FVS_HAVE_SPEC) ? 1 : 0))]; PyObject *result; - #line 2923 "Python/bytecodes.c" /* Handles f-string value formatting. */ PyObject *(*conv_fn)(PyObject *); int which_conversion = oparg & FVC_MASK; @@ -4166,7 +4187,6 @@ Py_DECREF(value); Py_XDECREF(fmt_spec); if (result == NULL) { STACK_SHRINK((((oparg & FVS_MASK) == FVS_HAVE_SPEC) ? 1 : 0)); goto pop_1_error; } - #line 4170 "Python/generated_cases.c.h" STACK_SHRINK((((oparg & FVS_MASK) == FVS_HAVE_SPEC) ? 1 : 0)); stack_pointer[-1] = result; DISPATCH(); @@ -4175,10 +4195,18 @@ TARGET(COPY) { PyObject *bottom = stack_pointer[-(1 + (oparg-1))]; PyObject *top; - #line 2960 "Python/bytecodes.c" assert(oparg > 0); top = Py_NewRef(bottom); - #line 4182 "Python/generated_cases.c.h" + STACK_GROW(1); + stack_pointer[-1] = top; + DISPATCH(); + } + + TARGET(COPY_NO_INCREF) { + PyObject *bottom = stack_pointer[-(1 + (oparg - 1))]; + PyObject *top; + assert(oparg > 0); + top = bottom; STACK_GROW(1); stack_pointer[-1] = top; DISPATCH(); @@ -4190,7 +4218,6 @@ PyObject *rhs = stack_pointer[-1]; PyObject *lhs = stack_pointer[-2]; PyObject *res; - #line 2965 "Python/bytecodes.c" #if ENABLE_SPECIALIZATION _PyBinaryOpCache *cache = (_PyBinaryOpCache *)next_instr; if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { @@ -4206,12 +4233,9 @@ assert((unsigned)oparg < Py_ARRAY_LENGTH(binary_ops)); assert(binary_ops[oparg]); res = binary_ops[oparg](lhs, rhs); - #line 4210 "Python/generated_cases.c.h" Py_DECREF(lhs); Py_DECREF(rhs); - #line 2981 "Python/bytecodes.c" if (res == NULL) goto pop_2_error; - #line 4215 "Python/generated_cases.c.h" STACK_SHRINK(1); stack_pointer[-1] = res; next_instr += 1; @@ -4221,27 +4245,146 @@ TARGET(SWAP) { PyObject *top = stack_pointer[-1]; PyObject *bottom = stack_pointer[-(2 + (oparg-2))]; - #line 2986 "Python/bytecodes.c" assert(oparg >= 2); - #line 4227 "Python/generated_cases.c.h" stack_pointer[-1] = bottom; stack_pointer[-(2 + (oparg-2))] = top; DISPATCH(); } TARGET(EXTENDED_ARG) { - #line 2990 "Python/bytecodes.c" - assert(oparg); + // assert(oparg); assert(cframe.use_tracing == 0); opcode = next_instr->op.code; oparg = oparg << 8 | next_instr->op.arg; PRE_DISPATCH_GOTO(); DISPATCH_GOTO(); - #line 4241 "Python/generated_cases.c.h" } TARGET(CACHE) { - #line 2999 "Python/bytecodes.c" Py_UNREACHABLE(); - #line 4247 "Python/generated_cases.c.h" + } + + TARGET(BB_BRANCH) { + _Py_CODEUNIT *t2_nextinstr = NULL; + _PyBBBranchCache *cache = (_PyBBBranchCache *)next_instr; + _Py_CODEUNIT *tier1_fallback = NULL; + if (BB_TEST_IS_SUCCESSOR(bb_test)) { + // Rewrite self + _py_set_opcode(next_instr - 1, BB_BRANCH_IF_FLAG_UNSET); + // Generate consequent. + t2_nextinstr = _PyTier2_GenerateNextBB( + frame, cache->bb_id_tagged, next_instr - 1, + 0, &tier1_fallback, bb_test); + if (t2_nextinstr == NULL) { + // Fall back to tier 1. + next_instr = tier1_fallback; + DISPATCH(); + } + } + else { + // Rewrite self + _py_set_opcode(next_instr - 1, BB_BRANCH_IF_FLAG_SET); + // Generate alternative. + t2_nextinstr = _PyTier2_GenerateNextBB( + frame, cache->bb_id_tagged, next_instr - 1, + oparg, &tier1_fallback, bb_test); + if (t2_nextinstr == NULL) { + // Fall back to tier 1. + next_instr = tier1_fallback + oparg; + DISPATCH(); + } + } + // Their addresses should be the same. Because + // The first BB should be generated right after the previous one. + assert(next_instr + INLINE_CACHE_ENTRIES_BB_BRANCH == t2_nextinstr); + next_instr = t2_nextinstr; + DISPATCH(); + } + + TARGET(BB_BRANCH_IF_FLAG_UNSET) { + if (!BB_TEST_IS_SUCCESSOR(bb_test)) { + _Py_CODEUNIT *curr = next_instr - 1; + _Py_CODEUNIT *t2_nextinstr = NULL; + _PyBBBranchCache *cache = (_PyBBBranchCache *)next_instr; + _Py_CODEUNIT *tier1_fallback = NULL; + + t2_nextinstr = _PyTier2_GenerateNextBB( + frame, cache->bb_id_tagged, next_instr - 1, + oparg, &tier1_fallback, bb_test); + if (t2_nextinstr == NULL) { + // Fall back to tier 1. + next_instr = tier1_fallback; + } + next_instr = t2_nextinstr; + + // Rewrite self + _PyTier2_RewriteForwardJump(curr, next_instr); + DISPATCH(); + } + next_instr += 1; + DISPATCH(); + } + + TARGET(BB_JUMP_IF_FLAG_UNSET) { + if (!BB_TEST_IS_SUCCESSOR(bb_test)) { + JUMPBY(oparg); + DISPATCH(); + } + // Fall through to next instruction. + next_instr += 1; + DISPATCH(); + } + + TARGET(BB_BRANCH_IF_FLAG_SET) { + if (BB_TEST_IS_SUCCESSOR(bb_test)) { + _Py_CODEUNIT *curr = next_instr - 1; + _Py_CODEUNIT *t2_nextinstr = NULL; + _PyBBBranchCache *cache = (_PyBBBranchCache *)next_instr; + _Py_CODEUNIT *tier1_fallback = NULL; + + t2_nextinstr = _PyTier2_GenerateNextBB( + frame, cache->bb_id_tagged, next_instr - 1, + oparg, &tier1_fallback, bb_test); + if (t2_nextinstr == NULL) { + // Fall back to tier 1. + next_instr = tier1_fallback; + } + next_instr = t2_nextinstr; + + // Rewrite self + _PyTier2_RewriteForwardJump(curr, next_instr); + DISPATCH(); + } + next_instr += 1; + DISPATCH(); + } + + TARGET(BB_JUMP_IF_FLAG_SET) { + if (BB_TEST_IS_SUCCESSOR(bb_test)) { + JUMPBY(oparg); + DISPATCH(); + } + // Fall through to next instruction. + next_instr += 1; + DISPATCH(); + } + + TARGET(BB_JUMP_BACKWARD_LAZY) { + _Py_CODEUNIT *curr = next_instr - 1; + _Py_CODEUNIT *t2_nextinstr = NULL; + _PyBBBranchCache *cache = (_PyBBBranchCache *)next_instr; + _Py_CODEUNIT *tier1_fallback = NULL; + + t2_nextinstr = _PyTier2_LocateJumpBackwardsBB( + frame, cache->bb_id_tagged, -oparg, &tier1_fallback, curr, + STACK_LEVEL()); + if (t2_nextinstr == NULL) { + // Fall back to tier 1. + next_instr = tier1_fallback; + } + next_instr = t2_nextinstr; + + // Rewrite self + _PyTier2_RewriteBackwardJump(curr, next_instr); + DISPATCH(); } diff --git a/Python/makeopcodetargets.py b/Python/makeopcodetargets.py index 33a4b4a76a1253..c5043542fb592d 100755 --- a/Python/makeopcodetargets.py +++ b/Python/makeopcodetargets.py @@ -41,6 +41,10 @@ def write_contents(f): while targets[next_op] != '_unknown_opcode': next_op += 1 targets[next_op] = "TARGET_%s" % opname + for opname in opcode._uops: + while targets[next_op] != '_unknown_opcode': + next_op += 1 + targets[next_op] = "TARGET_%s" % opname f.write("static void *opcode_targets[256] = {\n") f.write(",\n".join([" &&%s" % s for s in targets])) f.write("\n};\n") diff --git a/Python/opcode_metadata.h b/Python/opcode_metadata.h index 347a84dad46351..c62dd4c48b7c03 100644 --- a/Python/opcode_metadata.h +++ b/Python/opcode_metadata.h @@ -13,16 +13,26 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 0; case RESUME: return 0; + case RESUME_QUICK: + return 0; case LOAD_CLOSURE: return 0; case LOAD_FAST_CHECK: return 0; case LOAD_FAST: return 0; + case LOAD_FAST_NO_INCREF: + return 0; case LOAD_CONST: return 0; case STORE_FAST: return 1; + case STORE_FAST_BOXED_UNBOXED: + return 1; + case STORE_FAST_UNBOXED_BOXED: + return 1; + case STORE_FAST_UNBOXED_UNBOXED: + return 1; case LOAD_FAST__LOAD_FAST: return 0+0; case LOAD_FAST__LOAD_CONST: @@ -35,6 +45,8 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 0+0; case POP_TOP: return 1; + case POP_TOP_NO_DECREF: + return 1; case PUSH_NULL: return 0; case END_FOR: @@ -47,10 +59,14 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 1; case BINARY_OP_MULTIPLY_INT: return 2; + case BINARY_OP_MULTIPLY_INT_REST: + return 2; case BINARY_OP_MULTIPLY_FLOAT: return 2; case BINARY_OP_SUBTRACT_INT: return 2; + case BINARY_OP_SUBTRACT_INT_REST: + return 2; case BINARY_OP_SUBTRACT_FLOAT: return 2; case BINARY_OP_ADD_UNICODE: @@ -59,8 +75,24 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 2; case BINARY_OP_ADD_FLOAT: return 2; + case BINARY_CHECK_FLOAT: + return 2; + case BINARY_OP_ADD_FLOAT_UNBOXED: + return 2; + case BINARY_OP_SUBTRACT_FLOAT_UNBOXED: + return 2; + case BINARY_OP_MULTIPLY_FLOAT_UNBOXED: + return 2; + case UNBOX_FLOAT: + return oparg + 1; + case BOX_FLOAT: + return oparg + 1; case BINARY_OP_ADD_INT: return 2; + case BINARY_CHECK_INT: + return 2; + case BINARY_OP_ADD_INT_REST: + return 2; case BINARY_SUBSCR: return 2; case BINARY_SLICE: @@ -69,6 +101,10 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 4; case BINARY_SUBSCR_LIST_INT: return 2; + case BINARY_SUBSCR_LIST_INT_REST: + return 2; + case CHECK_LIST: + return oparg + 1; case BINARY_SUBSCR_TUPLE_INT: return 2; case BINARY_SUBSCR_DICT: @@ -83,6 +119,8 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 3; case STORE_SUBSCR_LIST_INT: return 3; + case STORE_SUBSCR_LIST_INT_REST: + return 3; case STORE_SUBSCR_DICT: return 3; case DELETE_SUBSCR: @@ -237,14 +275,24 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 0; case JUMP_BACKWARD: return 0; + case JUMP_BACKWARD_QUICK: + return 0; case POP_JUMP_IF_FALSE: return 1; + case BB_TEST_POP_IF_FALSE: + return 1; case POP_JUMP_IF_TRUE: return 1; + case BB_TEST_POP_IF_TRUE: + return 1; case POP_JUMP_IF_NOT_NONE: return 1; + case BB_TEST_POP_IF_NOT_NONE: + return 1; case POP_JUMP_IF_NONE: return 1; + case BB_TEST_POP_IF_NONE: + return 1; case JUMP_BACKWARD_NO_INTERRUPT: return 0; case GET_LEN: @@ -263,12 +311,20 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 1; case FOR_ITER: return 1; + case BB_TEST_ITER: + return 1; case FOR_ITER_LIST: return 1; + case BB_TEST_ITER_LIST: + return 1; case FOR_ITER_TUPLE: return 1; + case BB_TEST_ITER_TUPLE: + return 1; case FOR_ITER_RANGE: return 1; + case BB_TEST_ITER_RANGE: + return 1; case FOR_ITER_GEN: return 1; case BEFORE_ASYNC_WITH: @@ -335,6 +391,8 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return (((oparg & FVS_MASK) == FVS_HAVE_SPEC) ? 1 : 0) + 1; case COPY: return (oparg-1) + 1; + case COPY_NO_INCREF: + return (oparg - 1) + 1; case BINARY_OP: return 2; case SWAP: @@ -343,6 +401,18 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 0; case CACHE: return 0; + case BB_BRANCH: + return 0; + case BB_BRANCH_IF_FLAG_UNSET: + return 0; + case BB_JUMP_IF_FLAG_UNSET: + return 0; + case BB_BRANCH_IF_FLAG_SET: + return 0; + case BB_JUMP_IF_FLAG_SET: + return 0; + case BB_JUMP_BACKWARD_LAZY: + return 0; default: return -1; } @@ -359,16 +429,26 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 0; case RESUME: return 0; + case RESUME_QUICK: + return 0; case LOAD_CLOSURE: return 1; case LOAD_FAST_CHECK: return 1; case LOAD_FAST: return 1; + case LOAD_FAST_NO_INCREF: + return 1; case LOAD_CONST: return 1; case STORE_FAST: return 0; + case STORE_FAST_BOXED_UNBOXED: + return 0; + case STORE_FAST_UNBOXED_BOXED: + return 0; + case STORE_FAST_UNBOXED_UNBOXED: + return 0; case LOAD_FAST__LOAD_FAST: return 1+1; case LOAD_FAST__LOAD_CONST: @@ -381,6 +461,8 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 1+1; case POP_TOP: return 0; + case POP_TOP_NO_DECREF: + return 0; case PUSH_NULL: return 1; case END_FOR: @@ -393,10 +475,14 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 1; case BINARY_OP_MULTIPLY_INT: return 1; + case BINARY_OP_MULTIPLY_INT_REST: + return 1; case BINARY_OP_MULTIPLY_FLOAT: return 1; case BINARY_OP_SUBTRACT_INT: return 1; + case BINARY_OP_SUBTRACT_INT_REST: + return 1; case BINARY_OP_SUBTRACT_FLOAT: return 1; case BINARY_OP_ADD_UNICODE: @@ -405,8 +491,24 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 0; case BINARY_OP_ADD_FLOAT: return 1; + case BINARY_CHECK_FLOAT: + return 2; + case BINARY_OP_ADD_FLOAT_UNBOXED: + return 1; + case BINARY_OP_SUBTRACT_FLOAT_UNBOXED: + return 1; + case BINARY_OP_MULTIPLY_FLOAT_UNBOXED: + return 1; + case UNBOX_FLOAT: + return oparg + 1; + case BOX_FLOAT: + return oparg + 1; case BINARY_OP_ADD_INT: return 1; + case BINARY_CHECK_INT: + return 2; + case BINARY_OP_ADD_INT_REST: + return 1; case BINARY_SUBSCR: return 1; case BINARY_SLICE: @@ -415,6 +517,10 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 0; case BINARY_SUBSCR_LIST_INT: return 1; + case BINARY_SUBSCR_LIST_INT_REST: + return 1; + case CHECK_LIST: + return oparg + 1; case BINARY_SUBSCR_TUPLE_INT: return 1; case BINARY_SUBSCR_DICT: @@ -429,6 +535,8 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 0; case STORE_SUBSCR_LIST_INT: return 0; + case STORE_SUBSCR_LIST_INT_REST: + return 0; case STORE_SUBSCR_DICT: return 0; case DELETE_SUBSCR: @@ -482,7 +590,7 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { case UNPACK_SEQUENCE_LIST: return oparg; case UNPACK_EX: - return (oparg & 0xFF) + (oparg >> 8) + 1; + return (oparg >> 8) + (oparg & 0xFF) + 1; case STORE_ATTR: return 0; case DELETE_ATTR: @@ -583,14 +691,24 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 0; case JUMP_BACKWARD: return 0; + case JUMP_BACKWARD_QUICK: + return 0; case POP_JUMP_IF_FALSE: return 0; + case BB_TEST_POP_IF_FALSE: + return 0; case POP_JUMP_IF_TRUE: return 0; + case BB_TEST_POP_IF_TRUE: + return 0; case POP_JUMP_IF_NOT_NONE: return 0; + case BB_TEST_POP_IF_NOT_NONE: + return 0; case POP_JUMP_IF_NONE: return 0; + case BB_TEST_POP_IF_NONE: + return 0; case JUMP_BACKWARD_NO_INTERRUPT: return 0; case GET_LEN: @@ -609,12 +727,20 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 1; case FOR_ITER: return 2; + case BB_TEST_ITER: + return 2; case FOR_ITER_LIST: return 2; + case BB_TEST_ITER_LIST: + return 2; case FOR_ITER_TUPLE: return 2; + case BB_TEST_ITER_TUPLE: + return 2; case FOR_ITER_RANGE: return 2; + case BB_TEST_ITER_RANGE: + return 2; case FOR_ITER_GEN: return 2; case BEFORE_ASYNC_WITH: @@ -681,6 +807,8 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 1; case COPY: return (oparg-1) + 2; + case COPY_NO_INCREF: + return (oparg - 1) + 2; case BINARY_OP: return 1; case SWAP: @@ -689,6 +817,18 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 0; case CACHE: return 0; + case BB_BRANCH: + return 0; + case BB_BRANCH_IF_FLAG_UNSET: + return 0; + case BB_JUMP_IF_FLAG_UNSET: + return 0; + case BB_BRANCH_IF_FLAG_SET: + return 0; + case BB_JUMP_IF_FLAG_SET: + return 0; + case BB_JUMP_BACKWARD_LAZY: + return 0; default: return -1; } @@ -706,35 +846,53 @@ extern const struct opcode_metadata _PyOpcode_opcode_metadata[256]; #else const struct opcode_metadata _PyOpcode_opcode_metadata[256] = { [NOP] = { true, INSTR_FMT_IX }, - [RESUME] = { true, INSTR_FMT_IB }, + [RESUME] = { true, INSTR_FMT_IX }, + [RESUME_QUICK] = { true, INSTR_FMT_IB }, [LOAD_CLOSURE] = { true, INSTR_FMT_IB }, [LOAD_FAST_CHECK] = { true, INSTR_FMT_IB }, [LOAD_FAST] = { true, INSTR_FMT_IB }, + [LOAD_FAST_NO_INCREF] = { true, INSTR_FMT_IB }, [LOAD_CONST] = { true, INSTR_FMT_IB }, [STORE_FAST] = { true, INSTR_FMT_IB }, + [STORE_FAST_BOXED_UNBOXED] = { true, INSTR_FMT_IB }, + [STORE_FAST_UNBOXED_BOXED] = { true, INSTR_FMT_IB }, + [STORE_FAST_UNBOXED_UNBOXED] = { true, INSTR_FMT_IB }, [LOAD_FAST__LOAD_FAST] = { true, INSTR_FMT_IBIB }, [LOAD_FAST__LOAD_CONST] = { true, INSTR_FMT_IBIB }, [STORE_FAST__LOAD_FAST] = { true, INSTR_FMT_IBIB }, [STORE_FAST__STORE_FAST] = { true, INSTR_FMT_IBIB }, [LOAD_CONST__LOAD_FAST] = { true, INSTR_FMT_IBIB }, [POP_TOP] = { true, INSTR_FMT_IX }, + [POP_TOP_NO_DECREF] = { true, INSTR_FMT_IX }, [PUSH_NULL] = { true, INSTR_FMT_IX }, [END_FOR] = { true, INSTR_FMT_IB }, [UNARY_NEGATIVE] = { true, INSTR_FMT_IX }, [UNARY_NOT] = { true, INSTR_FMT_IX }, [UNARY_INVERT] = { true, INSTR_FMT_IX }, [BINARY_OP_MULTIPLY_INT] = { true, INSTR_FMT_IXC }, + [BINARY_OP_MULTIPLY_INT_REST] = { true, INSTR_FMT_IX }, [BINARY_OP_MULTIPLY_FLOAT] = { true, INSTR_FMT_IXC }, [BINARY_OP_SUBTRACT_INT] = { true, INSTR_FMT_IXC }, + [BINARY_OP_SUBTRACT_INT_REST] = { true, INSTR_FMT_IX }, [BINARY_OP_SUBTRACT_FLOAT] = { true, INSTR_FMT_IXC }, [BINARY_OP_ADD_UNICODE] = { true, INSTR_FMT_IXC }, [BINARY_OP_INPLACE_ADD_UNICODE] = { true, INSTR_FMT_IX }, [BINARY_OP_ADD_FLOAT] = { true, INSTR_FMT_IXC }, + [BINARY_CHECK_FLOAT] = { true, INSTR_FMT_IX }, + [BINARY_OP_ADD_FLOAT_UNBOXED] = { true, INSTR_FMT_IX }, + [BINARY_OP_SUBTRACT_FLOAT_UNBOXED] = { true, INSTR_FMT_IX }, + [BINARY_OP_MULTIPLY_FLOAT_UNBOXED] = { true, INSTR_FMT_IX }, + [UNBOX_FLOAT] = { true, INSTR_FMT_IB }, + [BOX_FLOAT] = { true, INSTR_FMT_IB }, [BINARY_OP_ADD_INT] = { true, INSTR_FMT_IXC }, + [BINARY_CHECK_INT] = { true, INSTR_FMT_IX }, + [BINARY_OP_ADD_INT_REST] = { true, INSTR_FMT_IX }, [BINARY_SUBSCR] = { true, INSTR_FMT_IXC000 }, [BINARY_SLICE] = { true, INSTR_FMT_IX }, [STORE_SLICE] = { true, INSTR_FMT_IX }, [BINARY_SUBSCR_LIST_INT] = { true, INSTR_FMT_IXC000 }, + [BINARY_SUBSCR_LIST_INT_REST] = { true, INSTR_FMT_IX }, + [CHECK_LIST] = { true, INSTR_FMT_IB }, [BINARY_SUBSCR_TUPLE_INT] = { true, INSTR_FMT_IXC000 }, [BINARY_SUBSCR_DICT] = { true, INSTR_FMT_IXC000 }, [BINARY_SUBSCR_GETITEM] = { true, INSTR_FMT_IXC000 }, @@ -742,6 +900,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[256] = { [SET_ADD] = { true, INSTR_FMT_IB }, [STORE_SUBSCR] = { true, INSTR_FMT_IXC }, [STORE_SUBSCR_LIST_INT] = { true, INSTR_FMT_IXC }, + [STORE_SUBSCR_LIST_INT_REST] = { true, INSTR_FMT_IX }, [STORE_SUBSCR_DICT] = { true, INSTR_FMT_IXC }, [DELETE_SUBSCR] = { true, INSTR_FMT_IX }, [CALL_INTRINSIC_1] = { true, INSTR_FMT_IB }, @@ -818,11 +977,16 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[256] = { [IMPORT_NAME] = { true, INSTR_FMT_IB }, [IMPORT_FROM] = { true, INSTR_FMT_IB }, [JUMP_FORWARD] = { true, INSTR_FMT_IB }, - [JUMP_BACKWARD] = { true, INSTR_FMT_IB }, + [JUMP_BACKWARD] = { true, INSTR_FMT_IX }, + [JUMP_BACKWARD_QUICK] = { true, INSTR_FMT_IB }, [POP_JUMP_IF_FALSE] = { true, INSTR_FMT_IB }, + [BB_TEST_POP_IF_FALSE] = { true, INSTR_FMT_IX }, [POP_JUMP_IF_TRUE] = { true, INSTR_FMT_IB }, + [BB_TEST_POP_IF_TRUE] = { true, INSTR_FMT_IX }, [POP_JUMP_IF_NOT_NONE] = { true, INSTR_FMT_IB }, + [BB_TEST_POP_IF_NOT_NONE] = { true, INSTR_FMT_IX }, [POP_JUMP_IF_NONE] = { true, INSTR_FMT_IB }, + [BB_TEST_POP_IF_NONE] = { true, INSTR_FMT_IX }, [JUMP_BACKWARD_NO_INTERRUPT] = { true, INSTR_FMT_IB }, [GET_LEN] = { true, INSTR_FMT_IX }, [MATCH_CLASS] = { true, INSTR_FMT_IB }, @@ -832,9 +996,13 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[256] = { [GET_ITER] = { true, INSTR_FMT_IX }, [GET_YIELD_FROM_ITER] = { true, INSTR_FMT_IX }, [FOR_ITER] = { true, INSTR_FMT_IBC }, + [BB_TEST_ITER] = { true, INSTR_FMT_IBC }, [FOR_ITER_LIST] = { true, INSTR_FMT_IBC }, + [BB_TEST_ITER_LIST] = { true, INSTR_FMT_IXC }, [FOR_ITER_TUPLE] = { true, INSTR_FMT_IBC }, + [BB_TEST_ITER_TUPLE] = { true, INSTR_FMT_IXC }, [FOR_ITER_RANGE] = { true, INSTR_FMT_IBC }, + [BB_TEST_ITER_RANGE] = { true, INSTR_FMT_IXC }, [FOR_ITER_GEN] = { true, INSTR_FMT_IBC }, [BEFORE_ASYNC_WITH] = { true, INSTR_FMT_IX }, [BEFORE_WITH] = { true, INSTR_FMT_IX }, @@ -868,9 +1036,16 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[256] = { [BUILD_SLICE] = { true, INSTR_FMT_IB }, [FORMAT_VALUE] = { true, INSTR_FMT_IB }, [COPY] = { true, INSTR_FMT_IB }, + [COPY_NO_INCREF] = { true, INSTR_FMT_IB }, [BINARY_OP] = { true, INSTR_FMT_IBC }, [SWAP] = { true, INSTR_FMT_IB }, [EXTENDED_ARG] = { true, INSTR_FMT_IB }, [CACHE] = { true, INSTR_FMT_IX }, + [BB_BRANCH] = { true, INSTR_FMT_IBC }, + [BB_BRANCH_IF_FLAG_UNSET] = { true, INSTR_FMT_IBC }, + [BB_JUMP_IF_FLAG_UNSET] = { true, INSTR_FMT_IBC }, + [BB_BRANCH_IF_FLAG_SET] = { true, INSTR_FMT_IBC }, + [BB_JUMP_IF_FLAG_SET] = { true, INSTR_FMT_IBC }, + [BB_JUMP_BACKWARD_LAZY] = { true, INSTR_FMT_IB }, }; #endif diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index c502471bcd17b6..55e0cd2a690948 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -4,17 +4,19 @@ static void *opcode_targets[256] = { &&TARGET_PUSH_NULL, &&TARGET_INTERPRETER_EXIT, &&TARGET_END_FOR, + &&TARGET_RESUME_QUICK, + &&TARGET_JUMP_BACKWARD_QUICK, &&TARGET_BINARY_OP_ADD_FLOAT, &&TARGET_BINARY_OP_ADD_INT, - &&TARGET_BINARY_OP_ADD_UNICODE, - &&TARGET_BINARY_OP_INPLACE_ADD_UNICODE, &&TARGET_NOP, - &&TARGET_BINARY_OP_MULTIPLY_FLOAT, + &&TARGET_BINARY_OP_ADD_UNICODE, &&TARGET_UNARY_NEGATIVE, &&TARGET_UNARY_NOT, + &&TARGET_BINARY_OP_INPLACE_ADD_UNICODE, + &&TARGET_BINARY_OP_MULTIPLY_FLOAT, + &&TARGET_UNARY_INVERT, &&TARGET_BINARY_OP_MULTIPLY_INT, &&TARGET_BINARY_OP_SUBTRACT_FLOAT, - &&TARGET_UNARY_INVERT, &&TARGET_BINARY_OP_SUBTRACT_INT, &&TARGET_BINARY_SUBSCR_DICT, &&TARGET_BINARY_SUBSCR_GETITEM, @@ -22,21 +24,21 @@ static void *opcode_targets[256] = { &&TARGET_BINARY_SUBSCR_TUPLE_INT, &&TARGET_CALL_PY_EXACT_ARGS, &&TARGET_CALL_PY_WITH_DEFAULTS, - &&TARGET_CALL_BOUND_METHOD_EXACT_ARGS, - &&TARGET_CALL_BUILTIN_CLASS, &&TARGET_BINARY_SUBSCR, &&TARGET_BINARY_SLICE, &&TARGET_STORE_SLICE, - &&TARGET_CALL_BUILTIN_FAST_WITH_KEYWORDS, - &&TARGET_CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS, + &&TARGET_CALL_BOUND_METHOD_EXACT_ARGS, + &&TARGET_CALL_BUILTIN_CLASS, &&TARGET_GET_LEN, &&TARGET_MATCH_MAPPING, &&TARGET_MATCH_SEQUENCE, &&TARGET_MATCH_KEYS, - &&TARGET_CALL_NO_KW_BUILTIN_FAST, + &&TARGET_CALL_BUILTIN_FAST_WITH_KEYWORDS, &&TARGET_PUSH_EXC_INFO, &&TARGET_CHECK_EXC_MATCH, &&TARGET_CHECK_EG_MATCH, + &&TARGET_CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS, + &&TARGET_CALL_NO_KW_BUILTIN_FAST, &&TARGET_CALL_NO_KW_BUILTIN_O, &&TARGET_CALL_NO_KW_ISINSTANCE, &&TARGET_CALL_NO_KW_LEN, @@ -46,8 +48,6 @@ static void *opcode_targets[256] = { &&TARGET_CALL_NO_KW_METHOD_DESCRIPTOR_O, &&TARGET_CALL_NO_KW_STR_1, &&TARGET_CALL_NO_KW_TUPLE_1, - &&TARGET_CALL_NO_KW_TYPE_1, - &&TARGET_COMPARE_OP_FLOAT, &&TARGET_WITH_EXCEPT_START, &&TARGET_GET_AITER, &&TARGET_GET_ANEXT, @@ -55,39 +55,39 @@ static void *opcode_targets[256] = { &&TARGET_BEFORE_WITH, &&TARGET_END_ASYNC_FOR, &&TARGET_CLEANUP_THROW, + &&TARGET_CALL_NO_KW_TYPE_1, + &&TARGET_COMPARE_OP_FLOAT, &&TARGET_COMPARE_OP_INT, &&TARGET_COMPARE_OP_STR, - &&TARGET_FOR_ITER_LIST, - &&TARGET_FOR_ITER_TUPLE, &&TARGET_STORE_SUBSCR, &&TARGET_DELETE_SUBSCR, + &&TARGET_FOR_ITER_LIST, + &&TARGET_FOR_ITER_TUPLE, &&TARGET_FOR_ITER_RANGE, &&TARGET_FOR_ITER_GEN, + &&TARGET_BB_TEST_ITER_LIST, + &&TARGET_BB_TEST_ITER_TUPLE, + &&TARGET_GET_ITER, + &&TARGET_GET_YIELD_FROM_ITER, + &&TARGET_BB_TEST_ITER_RANGE, + &&TARGET_LOAD_BUILD_CLASS, &&TARGET_LOAD_ATTR_CLASS, &&TARGET_LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN, + &&TARGET_LOAD_ASSERTION_ERROR, + &&TARGET_RETURN_GENERATOR, &&TARGET_LOAD_ATTR_INSTANCE_VALUE, &&TARGET_LOAD_ATTR_MODULE, - &&TARGET_GET_ITER, - &&TARGET_GET_YIELD_FROM_ITER, &&TARGET_LOAD_ATTR_PROPERTY, - &&TARGET_LOAD_BUILD_CLASS, &&TARGET_LOAD_ATTR_SLOT, &&TARGET_LOAD_ATTR_WITH_HINT, - &&TARGET_LOAD_ASSERTION_ERROR, - &&TARGET_RETURN_GENERATOR, &&TARGET_LOAD_ATTR_METHOD_LAZY_DICT, &&TARGET_LOAD_ATTR_METHOD_NO_DICT, + &&TARGET_RETURN_VALUE, &&TARGET_LOAD_ATTR_METHOD_WITH_VALUES, + &&TARGET_SETUP_ANNOTATIONS, &&TARGET_LOAD_CONST__LOAD_FAST, &&TARGET_LOAD_FAST__LOAD_CONST, &&TARGET_LOAD_FAST__LOAD_FAST, - &&TARGET_LOAD_GLOBAL_BUILTIN, - &&TARGET_RETURN_VALUE, - &&TARGET_LOAD_GLOBAL_MODULE, - &&TARGET_SETUP_ANNOTATIONS, - &&TARGET_STORE_ATTR_INSTANCE_VALUE, - &&TARGET_STORE_ATTR_SLOT, - &&TARGET_STORE_ATTR_WITH_HINT, &&TARGET_POP_EXCEPT, &&TARGET_STORE_NAME, &&TARGET_DELETE_NAME, @@ -110,9 +110,9 @@ static void *opcode_targets[256] = { &&TARGET_IMPORT_NAME, &&TARGET_IMPORT_FROM, &&TARGET_JUMP_FORWARD, - &&TARGET_STORE_FAST__LOAD_FAST, - &&TARGET_STORE_FAST__STORE_FAST, - &&TARGET_STORE_SUBSCR_DICT, + &&TARGET_LOAD_GLOBAL_BUILTIN, + &&TARGET_LOAD_GLOBAL_MODULE, + &&TARGET_STORE_ATTR_INSTANCE_VALUE, &&TARGET_POP_JUMP_IF_FALSE, &&TARGET_POP_JUMP_IF_TRUE, &&TARGET_LOAD_GLOBAL, @@ -140,9 +140,9 @@ static void *opcode_targets[256] = { &&TARGET_STORE_DEREF, &&TARGET_DELETE_DEREF, &&TARGET_JUMP_BACKWARD, - &&TARGET_STORE_SUBSCR_LIST_INT, + &&TARGET_STORE_ATTR_SLOT, &&TARGET_CALL_FUNCTION_EX, - &&TARGET_UNPACK_SEQUENCE_LIST, + &&TARGET_STORE_ATTR_WITH_HINT, &&TARGET_EXTENDED_ARG, &&TARGET_LIST_APPEND, &&TARGET_SET_ADD, @@ -152,58 +152,58 @@ static void *opcode_targets[256] = { &&TARGET_YIELD_VALUE, &&TARGET_RESUME, &&TARGET_MATCH_CLASS, - &&TARGET_UNPACK_SEQUENCE_TUPLE, - &&TARGET_UNPACK_SEQUENCE_TWO_TUPLE, + &&TARGET_STORE_FAST__LOAD_FAST, + &&TARGET_STORE_FAST__STORE_FAST, &&TARGET_FORMAT_VALUE, &&TARGET_BUILD_CONST_KEY_MAP, &&TARGET_BUILD_STRING, - &&TARGET_SEND_GEN, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, + &&TARGET_STORE_SUBSCR_DICT, + &&TARGET_STORE_SUBSCR_LIST_INT, + &&TARGET_UNPACK_SEQUENCE_LIST, + &&TARGET_UNPACK_SEQUENCE_TUPLE, &&TARGET_LIST_EXTEND, &&TARGET_SET_UPDATE, &&TARGET_DICT_MERGE, &&TARGET_DICT_UPDATE, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, + &&TARGET_UNPACK_SEQUENCE_TWO_TUPLE, + &&TARGET_SEND_GEN, + &&TARGET_BB_BRANCH, + &&TARGET_BB_BRANCH_IF_FLAG_UNSET, + &&TARGET_BB_BRANCH_IF_FLAG_SET, &&TARGET_CALL, &&TARGET_KW_NAMES, &&TARGET_CALL_INTRINSIC_1, &&TARGET_CALL_INTRINSIC_2, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, + &&TARGET_BB_JUMP_IF_FLAG_UNSET, + &&TARGET_BB_JUMP_IF_FLAG_SET, + &&TARGET_BB_TEST_ITER, + &&TARGET_BB_TEST_ITER_RANGE, + &&TARGET_BB_TEST_ITER_LIST, + &&TARGET_BB_TEST_ITER_TUPLE, + &&TARGET_BB_TEST_POP_IF_FALSE, + &&TARGET_BB_TEST_POP_IF_TRUE, + &&TARGET_BB_TEST_POP_IF_NOT_NONE, + &&TARGET_BB_TEST_POP_IF_NONE, + &&TARGET_BB_JUMP_BACKWARD_LAZY, + &&TARGET_BINARY_CHECK_INT, + &&TARGET_BINARY_CHECK_FLOAT, + &&TARGET_CHECK_LIST, + &&TARGET_BINARY_OP_ADD_INT_REST, + &&TARGET_BINARY_OP_ADD_FLOAT_UNBOXED, + &&TARGET_BINARY_OP_SUBTRACT_INT_REST, + &&TARGET_BINARY_OP_SUBTRACT_FLOAT_UNBOXED, + &&TARGET_BINARY_OP_MULTIPLY_INT_REST, + &&TARGET_BINARY_OP_MULTIPLY_FLOAT_UNBOXED, + &&TARGET_BINARY_SUBSCR_LIST_INT_REST, + &&TARGET_STORE_SUBSCR_LIST_INT_REST, + &&TARGET_POP_TOP_NO_DECREF, + &&TARGET_UNBOX_FLOAT, + &&TARGET_BOX_FLOAT, + &&TARGET_COPY_NO_INCREF, + &&TARGET_LOAD_FAST_NO_INCREF, + &&TARGET_STORE_FAST_BOXED_UNBOXED, + &&TARGET_STORE_FAST_UNBOXED_BOXED, + &&TARGET_STORE_FAST_UNBOXED_UNBOXED, &&_unknown_opcode, &&_unknown_opcode, &&_unknown_opcode, diff --git a/Python/specialize.c b/Python/specialize.c index dd5b22dd2346c5..b9e450c74efffd 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -2135,37 +2135,37 @@ int #endif void -_Py_Specialize_ForIter(PyObject *iter, _Py_CODEUNIT *instr, int oparg) +_Py_Specialize_ForIter(PyObject *iter, _Py_CODEUNIT *instr, int oparg, char is_bb) { assert(ENABLE_SPECIALIZATION); assert(_PyOpcode_Caches[FOR_ITER] == INLINE_CACHE_ENTRIES_FOR_ITER); _PyForIterCache *cache = (_PyForIterCache *)(instr + 1); PyTypeObject *tp = Py_TYPE(iter); if (tp == &PyListIter_Type) { - instr->op.code = FOR_ITER_LIST; + instr->op.code = is_bb ? BB_TEST_ITER_LIST : FOR_ITER_LIST; goto success; } else if (tp == &PyTupleIter_Type) { - instr->op.code = FOR_ITER_TUPLE; + instr->op.code = is_bb ? BB_TEST_ITER_TUPLE : FOR_ITER_TUPLE; goto success; } else if (tp == &PyRangeIter_Type) { - instr->op.code = FOR_ITER_RANGE; + instr->op.code = is_bb ? BB_TEST_ITER_RANGE : FOR_ITER_RANGE; goto success; } - else if (tp == &PyGen_Type && oparg <= SHRT_MAX) { - assert(instr[oparg + INLINE_CACHE_ENTRIES_FOR_ITER + 1].op.code == END_FOR); - instr->op.code = FOR_ITER_GEN; - goto success; - } - SPECIALIZATION_FAIL(FOR_ITER, + //else if (tp == &PyGen_Type && oparg <= SHRT_MAX) { + // assert(instr[oparg + INLINE_CACHE_ENTRIES_FOR_ITER + 1].op.code == END_FOR); + // instr->op.code = FOR_ITER_GEN; + // goto success; + //} + SPECIALIZATION_FAIL(is_bb ? BB_TEST_ITER : FOR_ITER, _PySpecialization_ClassifyIterator(iter)); - STAT_INC(FOR_ITER, failure); - instr->op.code = FOR_ITER; + STAT_INC(is_bb ? BB_TEST_ITER : FOR_ITER, failure); + instr->op.code = is_bb ? BB_TEST_ITER : FOR_ITER; cache->counter = adaptive_counter_backoff(cache->counter); return; success: - STAT_INC(FOR_ITER, success); + STAT_INC(is_bb ? BB_TEST_ITER : FOR_ITER, success); cache->counter = adaptive_counter_cooldown(); } diff --git a/Python/tier2.c b/Python/tier2.c new file mode 100644 index 00000000000000..847939d19f1e6b --- /dev/null +++ b/Python/tier2.c @@ -0,0 +1,2929 @@ +#include "Python.h" +#include "stdlib.h" +#include "pycore_code.h" +#include "pycore_frame.h" +#include "pycore_opcode.h" +#include "pycore_pystate.h" +#include "pycore_long.h" +#include "stdbool.h" + +#include "opcode.h" + +#define BB_DEBUG 0 +#define TYPEPROP_DEBUG 0 +// Max typed version basic blocks per basic block +#define MAX_BB_VERSIONS 10 + +#define OVERALLOCATE_FACTOR 6 + + +/* Dummy types used by the types propagator */ + +// Represents a 64-bit unboxed double +PyTypeObject PyRawFloat_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + "rawfloat", + sizeof(PyFloatObject), +}; + +// Represents a PyLong that fits in a 64-bit long. +PyTypeObject PySmallInt_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + "smallint", + sizeof(PyFloatObject), +}; + + +static inline int IS_SCOPE_EXIT_OPCODE(int opcode); + + +////////// TYPE CONTEXT FUNCTIONS + +/** + * @brief Allocates and initializes the type context for a code object. + * @param co The code object the type context belongs to. + * @return The newly-created type context. +*/ +static _PyTier2TypeContext * +initialize_type_context(const PyCodeObject *co) +{ + +#if TYPEPROP_DEBUG + fprintf(stderr, " [*] Initialize type context\n"); +#endif + + int nlocals = co->co_nlocals; + int nstack = co->co_stacksize; + + _Py_TYPENODE_t *type_locals = PyMem_Malloc(nlocals * sizeof(_Py_TYPENODE_t)); + if (type_locals == NULL) { + return NULL; + } + _Py_TYPENODE_t *type_stack = PyMem_Malloc(nstack * sizeof(_Py_TYPENODE_t)); + if (type_stack == NULL) { + PyMem_Free(type_locals); + return NULL; + } + + // Initialize to unknown type. + for (int i = 0; i < nlocals; i++) { + type_locals[i] = _Py_TYPENODE_NULLROOT; + } + for (int i = 0; i < nstack; i++) { + type_stack[i] = _Py_TYPENODE_NULLROOT; + } + + _PyTier2TypeContext *type_context = PyMem_Malloc(sizeof(_PyTier2TypeContext)); + if (type_context == NULL) { + PyMem_Free(type_locals); + PyMem_Free(type_stack); + return NULL; + } + type_context->type_locals_len = nlocals; + type_context->type_stack_len = nstack; + type_context->type_locals = type_locals; + type_context->type_stack = type_stack; + type_context->type_stack_ptr = type_stack; // init ptr at start of stack + return type_context; +} + +/** + * @brief Does a deepcopy of a type context and all its nodes. + * @param type_context The type context to copy. + * @return Newly copied type context. +*/ +static _PyTier2TypeContext * +_PyTier2TypeContext_Copy(const _PyTier2TypeContext *type_context) +{ + +#if TYPEPROP_DEBUG + fprintf(stderr, " [*] Copying type context\n"); + static void print_typestack(const _PyTier2TypeContext * type_context); + print_typestack(type_context); +#endif + + _Py_TYPENODE_t *orig_type_locals = type_context->type_locals; + _Py_TYPENODE_t *orig_type_stack = type_context->type_stack; + int nlocals = type_context->type_locals_len; + int nstack = type_context->type_stack_len; + + _Py_TYPENODE_t *type_locals = PyMem_Malloc(nlocals * sizeof(_Py_TYPENODE_t)); + if (type_locals == NULL) { + return NULL; + } + _Py_TYPENODE_t *type_stack = PyMem_Malloc(nstack * sizeof(_Py_TYPENODE_t)); + if (type_stack == NULL) { + PyMem_Free(type_locals); + return NULL; + } + + _PyTier2TypeContext *new_type_context = PyMem_Malloc(sizeof(_PyTier2TypeContext)); + if (new_type_context == NULL) { + PyMem_Free(type_locals); + PyMem_Free(type_stack); + return NULL; + } + + for (int i = 0; i < nlocals; i++) { + _Py_TYPENODE_t node = type_context->type_locals[i]; + switch _Py_TYPENODE_GET_TAG(node) + { + case TYPE_ROOT: + type_locals[i] = node; + break; + case TYPE_REF: { + // Check if part of locals + _Py_TYPENODE_t *parent = (_Py_TYPENODE_t *)_Py_TYPENODE_CLEAR_TAG(node); + + // Check if part of locals + int offset_locals = (int)(parent - type_context->type_locals); + if (0 <= offset_locals && offset_locals < nlocals) { + type_locals[i] = _Py_TYPENODE_MAKE_REF((_Py_TYPENODE_t)(&type_locals[offset_locals])); + } + // Is part of stack + else { + int offset_stack = (int)(parent - type_context->type_stack); +#if TYPEPROP_DEBUG + assert(0 <= offset_stack && offset_stack < nstack); +#endif + type_locals[i] = _Py_TYPENODE_MAKE_REF((_Py_TYPENODE_t)(&type_stack[offset_stack])); + } + break; + } + default: + Py_UNREACHABLE(); + } + } + + for (int i = 0; i < nstack; i++) { + _Py_TYPENODE_t node = type_context->type_stack[i]; + switch _Py_TYPENODE_GET_TAG(node) + { + case TYPE_ROOT: + type_stack[i] = node; + break; + case TYPE_REF: { + // Check if part of locals + _Py_TYPENODE_t *parent = (_Py_TYPENODE_t *)_Py_TYPENODE_CLEAR_TAG(node); + + // Check if part of locals + int plocals = (int)(parent - type_context->type_locals); + if (0 <= plocals && plocals < nlocals) { + type_stack[i] = _Py_TYPENODE_MAKE_REF((_Py_TYPENODE_t)(&type_locals[plocals])); + } + // Is part of stack + else { + int offset_stack = (int)(parent - type_context->type_stack); +#if TYPEPROP_DEBUG + assert(0 <= offset_stack && offset_stack < nstack); +#endif + type_stack[i] = _Py_TYPENODE_MAKE_REF((_Py_TYPENODE_t)(&type_stack[offset_stack])); + } + break; + } + default: + Py_UNREACHABLE(); + } + } + + new_type_context->type_locals_len = nlocals; + new_type_context->type_stack_len = nstack; + new_type_context->type_locals = type_locals; + new_type_context->type_stack = type_stack; + new_type_context->type_stack_ptr = type_stack - type_context->type_stack + type_context->type_stack_ptr; + return new_type_context; +} + +/** + * @brief Destructor for a type context. + * @param type_context The type context to destroy/free. +*/ +void +_PyTier2TypeContext_Free(_PyTier2TypeContext *type_context) +{ + +#if TYPEPROP_DEBUG + fprintf(stderr, " [*] Freeing type context\n"); +#endif + + PyMem_Free(type_context->type_locals); + PyMem_Free(type_context->type_stack); + PyMem_Free(type_context); +} + +static _Py_TYPENODE_t* +__typenode_get_rootptr(_Py_TYPENODE_t ref) +{ + uintptr_t tag; + _Py_TYPENODE_t *ref_ptr; + do { + ref_ptr = (_Py_TYPENODE_t *)(_Py_TYPENODE_CLEAR_TAG(ref)); + ref = *ref_ptr; + tag = _Py_TYPENODE_GET_TAG(ref); + } while (tag != TYPE_ROOT); + return ref_ptr; +} + +/** + * @brief Gets the actual PyTypeObject* that a type node points to. + * @param node The type propagator node to look up. + * @return The referenced PyTypeObject*. +*/ +static PyTypeObject* +typenode_get_type(_Py_TYPENODE_t node) +{ + uintptr_t tag = _Py_TYPENODE_GET_TAG(node); + switch (tag) { + case TYPE_ROOT: { + PyTypeObject *ret = (PyTypeObject *)_Py_TYPENODE_CLEAR_TAG(node); + return ret; + } + case TYPE_REF: { + _Py_TYPENODE_t *root_ref = __typenode_get_rootptr(node); + PyTypeObject *ret = (PyTypeObject *)_Py_TYPENODE_CLEAR_TAG(*root_ref); + return ret; + } + default: + Py_UNREACHABLE(); + } +} + +/** + * @brief Performs TYPE_SET operation. dst tree becomes part of src tree + * + * If src_is_new is set, src is interpreted as a TYPE_ROOT + * not part of the type_context. Otherwise, it is interpreted as a pointer + * to a _Py_TYPENODE_t. + * + * If src_is_new: + * Overwrites the root of the dst tree with the src node + * else: + * Makes the root of the dst tree a TYPE_REF to src + * + * @param src Source node + * @param dst Destination node + * @param src_is_new true if src is not part of the type_context +*/ +static void +__type_propagate_TYPE_SET( + _Py_TYPENODE_t *src, _Py_TYPENODE_t *dst, bool src_is_new) +{ + +#ifdef Py_DEBUG + // If `src_is_new` is set: + // - `src` doesn't belong inside the type context yet. + // - `src` has to be a TYPE_ROOT + // - `src` is to be interpreted as a _Py_TYPENODE_t + if (src_is_new) { + assert(_Py_TYPENODE_GET_TAG((_Py_TYPENODE_t)src) == TYPE_ROOT); + } +#endif + + uintptr_t tag = _Py_TYPENODE_GET_TAG(*dst); + switch (tag) { + case TYPE_ROOT: { + if (!src_is_new) { + // Make dst a reference to src + *dst = _Py_TYPENODE_MAKE_REF((_Py_TYPENODE_t)src); + break; + } + // Make dst the src + *dst = (_Py_TYPENODE_t)src; + break; + } + case TYPE_REF: { + _Py_TYPENODE_t *rootref = __typenode_get_rootptr(*dst); + if (!src_is_new) { + // Traverse up to the root of dst, make root a reference to src + *rootref = _Py_TYPENODE_MAKE_REF((_Py_TYPENODE_t)src); + break; + } + // Make root of dst the src + *rootref = (_Py_TYPENODE_t)src; + break; + } + default: + Py_UNREACHABLE(); + } +} + +/** + * @brief Performs TYPE_OVERWRITE operation. dst node gets overwritten by src node + * + * If src_is_new is set, src is interpreted as a TYPE_ROOT + * not part of the type_context. Otherwise, it is interpreted as a pointer + * to a _Py_TYPENODE_t. + * + * If src_is_new: + * Removes dst node from its tree (+fixes all the references to dst) + * Overwrite the dst node with the src node + * else: + * Removes dst node from its tree (+fixes all the references to dst) + * Makes the root of the dst tree a TYPE_REF to src + * + * @param type_context Type context to modify + * @param src Source node + * @param dst Destination node + * @param src_is_new true if src is not part of the type_context +*/ +static void +__type_propagate_TYPE_OVERWRITE( + _PyTier2TypeContext *type_context, + _Py_TYPENODE_t* src, _Py_TYPENODE_t* dst, bool src_is_new) +{ + +#ifdef Py_DEBUG + // See: __type_propagate_TYPE_SET + if (src_is_new) { + assert(_Py_TYPENODE_GET_TAG((_Py_TYPENODE_t)src) == TYPE_ROOT); + } +#endif + + uintptr_t tag = _Py_TYPENODE_GET_TAG(*dst); + switch (tag) { + case TYPE_ROOT: { + + _Py_TYPENODE_t old_dst = *dst; + if (!src_is_new) { + // Make dst a reference to src + *dst = _Py_TYPENODE_MAKE_REF((_Py_TYPENODE_t)src); + } + else { + // Make dst the src + *dst = (_Py_TYPENODE_t)src; + } + + /* Pick one child of dst andmake that the new root of the dst tree */ + + // Children of dst will have this form + _Py_TYPENODE_t child_test = _Py_TYPENODE_MAKE_REF( + _Py_TYPENODE_CLEAR_TAG((_Py_TYPENODE_t)dst)); + // Will be initialised to the first child we find (ptr to the new root) + _Py_TYPENODE_t *new_root_ptr = NULL; + + // Search locals for children + int nlocals = type_context->type_locals_len; + for (int i = 0; i < nlocals; i++) { + _Py_TYPENODE_t *node_ptr = &(type_context->type_locals[i]); + if (*node_ptr == child_test) { + if (new_root_ptr == NULL) { + // First child encountered! initialise root + new_root_ptr = node_ptr; + *node_ptr = old_dst; + } + else { + // Not the first child encounted, point it to the new root + *node_ptr = _Py_TYPENODE_MAKE_REF((_Py_TYPENODE_t)new_root_ptr); + } + } + } + + // Search stack for children + int nstack = type_context->type_stack_len; + for (int i = 0; i < nstack; i++) { + _Py_TYPENODE_t *node_ptr = &(type_context->type_stack[i]); + if (*node_ptr == child_test) { + if (new_root_ptr == NULL) { + // First child encountered! initialise root + new_root_ptr = node_ptr; + *node_ptr = old_dst; + } + else { + // Not the first child encounted, point it to the new root + *node_ptr = _Py_TYPENODE_MAKE_REF((_Py_TYPENODE_t)new_root_ptr); + } + } + } + break; + } + case TYPE_REF: { + + // Make dst a reference to src + _Py_TYPENODE_t old_dst = *dst; + if (!src_is_new) { + // Make dst a reference to src + *dst = _Py_TYPENODE_MAKE_REF((_Py_TYPENODE_t)src); + } + else { + // Make dst the src + *dst = (_Py_TYPENODE_t)src; + } + + /* Make all child of src be a reference to the parent of dst */ + + // Children of dst will have this form + _Py_TYPENODE_t child_test = _Py_TYPENODE_MAKE_REF( + _Py_TYPENODE_CLEAR_TAG((_Py_TYPENODE_t)dst)); + + // Search locals for children + int nlocals = type_context->type_locals_len; + for (int i = 0; i < nlocals; i++) { + _Py_TYPENODE_t *node_ptr = &(type_context->type_locals[i]); + if (*node_ptr == child_test) { + // Is a child of dst. Point it to the parent of dst + *node_ptr = old_dst; + } + } + + // Search stack for children + int nstack = type_context->type_stack_len; + for (int i = 0; i < nstack; i++) { + _Py_TYPENODE_t *node_ptr = &(type_context->type_stack[i]); + if (*node_ptr == child_test) { + // Is a child of dst. Point it to the parent of dst + *node_ptr = old_dst; + } + } + break; + } + default: + Py_UNREACHABLE(); + } +} + +/** + * @brief Performs TYPE_SWAP operation. dst node and src node swap positions + * + * src and dst are assumed to already be within the type context + * + * If src and dst are the same tree + * Do nothing + * else: + * Fix all references of dst to point to src and vice versa + * + * @param type_context Type context to modify + * @param src Source node + * @param dst Destination node + * +*/ +static void +__type_propagate_TYPE_SWAP( + _PyTier2TypeContext *type_context, + _Py_TYPENODE_t *src, _Py_TYPENODE_t *dst) +{ + // Check if they are the same tree + _Py_TYPENODE_t *srcrootref = src; + _Py_TYPENODE_t *dstrootref = dst; + uintptr_t dsttag = _Py_TYPENODE_GET_TAG(*dst); + uintptr_t srctag = _Py_TYPENODE_GET_TAG(*src); + switch (dsttag) { + case TYPE_REF: dstrootref = __typenode_get_rootptr(*dst); + case TYPE_ROOT: + switch (srctag) { + case TYPE_REF: srcrootref = __typenode_get_rootptr(*src); + case TYPE_ROOT: + if (srcrootref == dstrootref) { + // Same tree, no point swapping + return; + } + break; + default: + Py_UNREACHABLE(); + } + break; + default: + Py_UNREACHABLE(); + } + + // src and dst are different tree, + // Make all children of src be children of dst and vice versa + + _Py_TYPENODE_t src_child_test = _Py_TYPENODE_MAKE_REF( + _Py_TYPENODE_CLEAR_TAG((_Py_TYPENODE_t)src)); + _Py_TYPENODE_t dst_child_test = _Py_TYPENODE_MAKE_REF( + _Py_TYPENODE_CLEAR_TAG((_Py_TYPENODE_t)dst)); + + // Search locals for children + int nlocals = type_context->type_locals_len; + for (int i = 0; i < nlocals; i++) { + _Py_TYPENODE_t *node_ptr = &(type_context->type_locals[i]); + if (*node_ptr == src_child_test) { + *node_ptr = dst_child_test; + } + else if (*node_ptr == dst_child_test) { + *node_ptr = src_child_test; + } + } + + // Search stack for children + int nstack = type_context->type_stack_len; + for (int i = 0; i < nstack; i++) { + _Py_TYPENODE_t *node_ptr = &(type_context->type_stack[i]); + if (*node_ptr == src_child_test) { + *node_ptr = dst_child_test; + } + else if (*node_ptr == dst_child_test) { + *node_ptr = src_child_test; + } + } + + // Finally, actually swap the nodes + *src ^= *dst; + *dst ^= *src; + *src ^= *dst; +} + +/** + * @brief Shrink a type stack by `idx` entries. + * @param type_stackptr The pointer to one after the top of type stack. + * @param idx The number of entries to shrink the stack by. +*/ +static void +__type_stack_shrink(_Py_TYPENODE_t **type_stackptr, int idx) +{ + // TODO: + // If we don't touch the stack elements + // when shrinking, we need to check for references + // on these elements. + // Otherwise, if we NULL these elements, we need to refactor + // the type propagator to perform shrinking last. + //while (idx--) { + // __type_propagate_TYPE_OVERWRITE( + // type_context, + // (_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, *type_stackptr, + // true); + // *type_stackptr -= 1; + //} + *type_stackptr -= idx; +} + +#if TYPEPROP_DEBUG + +/** + * @brief Print the entries in a type context (along with locals). + * @param type_context The type context to display. +*/ +static void +print_typestack(const _PyTier2TypeContext *type_context) +{ + _Py_TYPENODE_t *type_stack = type_context->type_stack; + _Py_TYPENODE_t *type_locals = type_context->type_locals; + _Py_TYPENODE_t *type_stackptr = type_context->type_stack_ptr; + + int nstack_use = (int)(type_stackptr - type_stack); + int nstack = type_context->type_stack_len; + int nlocals = type_context->type_locals_len; + + int plocals = 0; + int pstack = 0; + bool is_local = false; + + fprintf(stderr, " Stack: %p: [", type_stack); + for (int i = 0; i < nstack; i++) { + _Py_TYPENODE_t node = type_stack[i]; + PyTypeObject *type = typenode_get_type(node); + _Py_TYPENODE_t tag = _Py_TYPENODE_GET_TAG(node); + + if (tag == TYPE_REF) { + _Py_TYPENODE_t *parent = (_Py_TYPENODE_t *)(_Py_TYPENODE_CLEAR_TAG(node)); + plocals = (int)(parent - type_context->type_locals); + pstack = (int)(parent - type_context->type_stack); + is_local = (0 <= plocals) && (plocals < nlocals); + if (!is_local) { + assert((0 <= pstack) && (pstack < nstack)); + } + } + + fprintf(stderr, "%s%s", + i == nstack_use ? "." : " ", + type == NULL ? "?" : type->tp_name); + if (tag == TYPE_REF) { + fprintf(stderr, "%s%d]", + is_local ? "->locals[" : "->stack[", + is_local ? plocals : pstack); + } + } + fprintf(stderr, "]\n"); + + fprintf(stderr, " Locals %p: [", type_locals); + for (int i = 0; i < nlocals; i++) { + _Py_TYPENODE_t node = type_locals[i]; + PyTypeObject *type = typenode_get_type(node); + _Py_TYPENODE_t tag = _Py_TYPENODE_GET_TAG(node); + + if (tag == TYPE_REF) { + _Py_TYPENODE_t *parent = (_Py_TYPENODE_t *)(_Py_TYPENODE_CLEAR_TAG(node)); + plocals = (int)(parent - type_context->type_locals); + pstack = (int)(parent - type_context->type_stack); + is_local = (0 <= plocals) && (plocals < nlocals); + if (!is_local) { + assert((0 <= pstack) && (pstack < nstack)); + } + } + + fprintf(stderr, " %s", + type == NULL ? "?" : type->tp_name); + if (tag == TYPE_REF) { + fprintf(stderr, "%s%d]", + is_local ? "->locals[" : "->stack[", + is_local ? plocals : pstack); + } + } + fprintf(stderr, "]\n"); +} +#endif + + +/** + * @brief Type propagate across a single instruction + * @param opcode The instruction opcode. + * @param oparg The instruction oparg. + * @param type_context The current type context in the basic block. + * @param consts The co_consts array of the code object that holds the constants. +*/ +static void +type_propagate( + int opcode, int oparg, + _PyTier2TypeContext *type_context, + const PyObject *consts) +{ + _Py_TYPENODE_t *type_stack = type_context->type_stack; + _Py_TYPENODE_t *type_locals = type_context->type_locals; + _Py_TYPENODE_t **type_stackptr = &type_context->type_stack_ptr; + +#define TARGET(op) case op: +#define TYPESTACK_PEEK(idx) (&((*type_stackptr)[-(idx)])) +#define TYPELOCALS_GET(idx) (&(type_locals[idx])) + +// Get the type of the const and make into a TYPENODE ROOT +#define TYPECONST_GET(idx) _Py_TYPENODE_MAKE_ROOT( \ + (_Py_TYPENODE_t)Py_TYPE( \ + PyTuple_GET_ITEM(consts, idx))) + +#define TYPE_SET(src, dst, flag) __type_propagate_TYPE_SET((src), (dst), (flag)) +#define TYPE_OVERWRITE(src, dst, flag) __type_propagate_TYPE_OVERWRITE(type_context, (src), (dst), (flag)) +#define TYPE_SWAP(src, dst) __type_propagate_TYPE_SWAP(type_context, (src), (dst)) + +#define STACK_GROW(idx) *type_stackptr += (idx) + +// Stack shrinking has to NULL the nodes +#define STACK_SHRINK(idx) __type_stack_shrink(type_stackptr, (idx)) + +#if TYPEPROP_DEBUG + fprintf(stderr, " [-] Type stack bef: %llu\n", (uint64_t)(*type_stackptr - type_stack)); +#ifdef Py_DEBUG + fprintf(stderr, " [-] Type propagating across: %s : %d\n", _PyOpcode_OpName[opcode], oparg); +#endif +#endif + + switch (opcode) { +#include "tier2_typepropagator.c.h" + TARGET(SWAP) { + _Py_TYPENODE_t *top = TYPESTACK_PEEK(1); + _Py_TYPENODE_t * bottom = TYPESTACK_PEEK(2 + (oparg - 2)); + TYPE_SWAP(top, bottom); + break; + } + default: +#ifdef Py_DEBUG + fprintf(stderr, "Unsupported opcode in type propagator: %s : %d\n", _PyOpcode_OpName[opcode], oparg); +#else + fprintf(stderr, "Unsupported opcode in type propagator: %d\n", opcode); +#endif + Py_UNREACHABLE(); + } + +#if TYPEPROP_DEBUG + fprintf(stderr, " [-] Type stack aft: %llu\n", (uint64_t)(*type_stackptr - type_stack)); + print_typestack(type_context); +#endif + +#undef TARGET +#undef STACK_ADJUST +#undef STACK_GROW +#undef STACK_SHRINK +#undef TYPECONST_GET +} + + +////////// BB SPACE FUNCTIONS + +/** + * @brief Creates the overallocated array for the BBs. + * @param space_to_alloc How much space to allocate. + * @return A new space that we can write basic block instructions to. +*/ +static _PyTier2BBSpace * +_PyTier2_CreateBBSpace(Py_ssize_t space_to_alloc) +{ + _PyTier2BBSpace *bb_space = PyMem_Malloc(space_to_alloc + sizeof(_PyTier2BBSpace)); + if (bb_space == NULL) { + return NULL; + } + bb_space->water_level = 0; + bb_space->max_capacity = space_to_alloc; + return bb_space; +} + +/** + * @brief Checks if there's enough space in the basic block space for space_requested. + * @param co The code object's tier2 basic block space to check + * @param space_requested The amount of extra space you need. + * @return The space of the code object after checks. +*/ +static _PyTier2BBSpace * +_PyTier2_BBSpaceCheckAndReallocIfNeeded(PyCodeObject *co, Py_ssize_t space_requested) +{ + assert(co->_tier2_info != NULL); + assert(co->_tier2_info->_bb_space != NULL); + _PyTier2BBSpace *curr = co->_tier2_info->_bb_space; + // Over max capacity + if (curr->water_level + space_requested > curr->max_capacity) { + // Note: overallocate + Py_ssize_t new_size = sizeof(_PyTier2BBSpace) + (curr->water_level + space_requested) * 2; +#if BB_DEBUG + fprintf(stderr, "Space requested: %lld, Allocating new BB of size %lld\n", (int64_t)space_requested, (int64_t)new_size); +#endif + // @TODO We can't Realloc, we actually need to do the linked list method. + Py_UNREACHABLE(); + } + // We have enouogh space. Don't do anything, j + return curr; +} + +//// BB METADATA FUNCTIONS + +/** + * @brief Allocate the metadata associate with a basic block. + * The metadata contains things like the type context at the end of the basic block.* + * + * @param co The code object this basic block belongs to. + * @param tier2_start The start of the tier 2 code (start of the basic block). + * @param tier1_end The end of the tie 1 code this basic block points to. + * @param type_context The type context associated with this basic block. + * @return Newly allocated metadata for this basic block. + * +*/ +static _PyTier2BBMetadata * +allocate_bb_metadata(PyCodeObject *co, _Py_CODEUNIT *tier2_start, + _Py_CODEUNIT *tier1_end, + _PyTier2TypeContext *type_context) +{ + _PyTier2BBMetadata *metadata = PyMem_Malloc(sizeof(_PyTier2BBMetadata)); + if (metadata == NULL) { + return NULL; + + } + + metadata->tier2_start = tier2_start; + metadata->tier1_end = tier1_end; + metadata->type_context = type_context; + return metadata; +} + + +/** + * @brief Writes BB metadata to code object's tier2info bb_data field. + * @param co The code object whose metadata we should write to. + * @param metadata The metadata to write. + * @return 0 on success, 1 on error. +*/ +static int +write_bb_metadata(PyCodeObject *co, _PyTier2BBMetadata *metadata) +{ + assert(co->_tier2_info != NULL); + // Not enough space left in bb_data, allocate new one. + if (co->_tier2_info->bb_data == NULL || + co->_tier2_info->bb_data_curr >= co->_tier2_info->bb_data_len) { + int new_len = (co->_tier2_info->bb_data_len + 1) * 2; + Py_ssize_t new_space = new_len * sizeof(_PyTier2BBMetadata *); + // Overflow + if (new_len < 0) { + return 1; + } + _PyTier2BBMetadata **new_data = PyMem_Realloc(co->_tier2_info->bb_data, new_space); + if (new_data == NULL) { + return 1; + } + co->_tier2_info->bb_data = new_data; + co->_tier2_info->bb_data_len = new_len; + } + int id = co->_tier2_info->bb_data_curr; + co->_tier2_info->bb_data[id] = metadata; + metadata->id = id; + co->_tier2_info->bb_data_curr++; +#if BB_DEBUG + fprintf(stderr, "Creating a BB Metadata with ID %d\n", id); +#endif + return 0; +} + +/** + * @brief Allocate BB metadata, then write it. + * Consume this instead of `allocate_bb_metadata`. + * + * @param co The code object the metadat belongs to. + * @param tier2_start The start of the tier 2 code (start of the basic block). + * @param tier1_end The end of the tie 1 code this basic block points to. + * @param type_context The type context associated with this basic block. + * @return Newly allocated metadata for this basic block. + * +*/ +static _PyTier2BBMetadata * +_PyTier2_AllocateBBMetaData(PyCodeObject *co, _Py_CODEUNIT *tier2_start, + _Py_CODEUNIT *tier1_end, + _PyTier2TypeContext *type_context) +{ + _PyTier2BBMetadata *meta = allocate_bb_metadata(co, + tier2_start, tier1_end, type_context); + if (meta == NULL) { + return NULL; + } + if (write_bb_metadata(co, meta)) { + PyMem_Free(meta); + return NULL; + } + + return meta; + +} + +/* Opcode detection functions. Keep in sync with compile.c and dis! */ + +/** + * @brief C equivalent of dis.hasjabs + * @param opcode Opcode of the instruction. + * @return Whether this is an absolute jump. +*/ +static inline int +IS_JABS_OPCODE(int opcode) +{ + return 0; +} + +/** + * @brief C equivalent of dis.hasjrel + * @param opcode Opcode of the instruction. + * @return Whether this is a relative jump. +*/ +static inline int +IS_JREL_OPCODE(int opcode) +{ + switch (opcode) { + case FOR_ITER: + case JUMP_FORWARD: + // These two tend to be after a COMPARE_OP + case POP_JUMP_IF_FALSE: + case POP_JUMP_IF_TRUE: + case SEND: + case POP_JUMP_IF_NOT_NONE: + case POP_JUMP_IF_NONE: + case JUMP_BACKWARD_QUICK: + case JUMP_BACKWARD_NO_INTERRUPT: + case JUMP_BACKWARD: + return 1; + default: + return 0; + + } +} + +/** + * @brief Checks if this is a backwards jump instruction. + * @param opcode Opcode of the instruction. + * @return Whether this is a backwards jump. +*/ +static inline int +IS_JUMP_BACKWARDS_OPCODE(int opcode) +{ + return opcode == JUMP_BACKWARD_NO_INTERRUPT || + opcode == JUMP_BACKWARD || + opcode == JUMP_BACKWARD_QUICK; +} + + +/** + * @brief C equivalent of dis.hasjrel || dis.hasjabs + * @param opcode Opcode of the instruction. + * @return Whether this is a jump instruction. +*/ +static inline int +IS_JUMP_OPCODE(int opcode) +{ + return IS_JREL_OPCODE(opcode) || IS_JABS_OPCODE(opcode); +} + + +/** + * @brief Checks whether the opcode is a scope exit. + * @param opcode Opcode of the instruction. + * @return Whether this is a scope exit. +*/ +static inline int +IS_SCOPE_EXIT_OPCODE(int opcode) +{ + switch (opcode) { + case RETURN_VALUE: + case RETURN_CONST: + case RAISE_VARARGS: + case RERAISE: + case INTERPRETER_EXIT: + return 1; + default: + return 0; + } +} + +// KEEP IN SYNC WITH COMPILE.c!!!! +/** + * @brief Checks whether the opcode terminates a basic block. + * @param opcode Opcode of the instruction. + * @return Whether this is the end of a basic block. +*/ +static int +IS_TERMINATOR_OPCODE(int opcode) +{ + return IS_JUMP_OPCODE(opcode) || IS_SCOPE_EXIT_OPCODE(opcode); +} + +/** + * @brief Opcodes that we can't handle at the moment. If we see them, ditch tier 2 attempts. + * @param opcode Opcode of the instruction. + * @param nextop The opcode of the following instruction. + * @return Whether this opcode is forbidden. +*/ +static inline int +IS_FORBIDDEN_OPCODE(int opcode, int nextop) +{ + switch (opcode) { + // Modifying containers + case LIST_EXTEND: + case SET_UPDATE: + case DICT_UPDATE: + // f-strings + case FORMAT_VALUE: + // Type hinting + case SETUP_ANNOTATIONS: + // Context manager + case BEFORE_WITH: + // Generators and coroutines + case SEND: + case YIELD_VALUE: + case GET_AITER: + case GET_ANEXT: + case BEFORE_ASYNC_WITH: + case END_ASYNC_FOR: + // Raise keyword + case RAISE_VARARGS: + // Exceptions, we could support these theoretically. + // Just too much work for now + case PUSH_EXC_INFO: + case RERAISE: + case POP_EXCEPT: + case CHECK_EXC_MATCH: + case CLEANUP_THROW: + // Closures + case LOAD_DEREF: + case LOAD_CLASSDEREF: + case MAKE_CELL: + // DELETE_FAST + case DELETE_FAST: + // Pattern matching + case MATCH_MAPPING: + case MATCH_SEQUENCE: + case MATCH_KEYS: + return 1; + // Two simultaneous EXTENDED_ARG + case EXTENDED_ARG: + return nextop == EXTENDED_ARG; + default: + return 0; + } +} + +/** + * @brief Decides what values we need to rebox. + * + * This function automatically emits rebox instructions if needed. + * + * @param write_curr Instruction write buffer. + * @param type_context The type context to base our decisions on. + * @param num_elements How many stack entries and thus how far from the TOS we want to rebox. + * @return Pointer to new instruction write buffer end. + * +*/ +static inline _Py_CODEUNIT * +rebox_stack(_Py_CODEUNIT *write_curr, + _PyTier2TypeContext *type_context, int num_elements) +{ + for (int i = 0; i < num_elements; i++) { + _Py_TYPENODE_t *curr = type_context->type_stack_ptr - 1 - i; + if (typenode_get_type(*curr) == &PyRawFloat_Type) { + write_curr->op.code = BOX_FLOAT; + write_curr->op.arg = i; + write_curr++; + type_propagate(BOX_FLOAT, i, type_context, NULL); + } + } + return write_curr; +} + +/** + * @brief Emit CACHE entries for an instruction. + * NOTE: THIS DOES NOT PRESERVE PREVIOUS CACHE INFORMATION. + * THUS THIS INITIALIZES A CLEAN SLATE. + * + * @param write_curr Tier 2 instruction write buffer. + * @param cache_entries Number of cache entries to emit. + * @return Pointer to end of tier 2 instruction write buffer. + * +*/ +static inline _Py_CODEUNIT * +emit_cache_entries(_Py_CODEUNIT *write_curr, int cache_entries) +{ + for (int i = 0; i < cache_entries; i++) { + _py_set_opcode(write_curr, CACHE); + write_curr++; + } + return write_curr; +} + +#define BB_ID(bb_id_raw) (bb_id_raw >> 1) +#define BB_IS_TYPE_BRANCH(bb_id_raw) (bb_id_raw & 1) +#define MAKE_TAGGED_BB_ID(bb_id, type_branch) (bb_id << 1 | type_branch) + +/** + * @brief Write a BB's ID to a CACHE entry. + * @param cache The CACHE entry to write to. + * @param bb_id The BB's ID. + * @param is_type_guard Whether the BB ends with a type guard. + * +*/ +static inline void +write_bb_id(_PyBBBranchCache *cache, int bb_id, bool is_type_guard) { + assert((uint16_t)(bb_id) == bb_id); + // Make sure MSB is unset, because we need to shift it. + assert((bb_id & 0x8000) == 0); + cache->bb_id_tagged = MAKE_TAGGED_BB_ID((uint16_t)bb_id, is_type_guard); +} + +/** + * @brief The order/hierarchy to emit type guards. + * + * NEED TO ADD TO THIS EVERY TIME WE ADD A NEW ONE. +*/ +static int type_guard_ladder[256] = { + -1, + BINARY_CHECK_FLOAT, + BINARY_CHECK_INT, + -1, + CHECK_LIST, + -1, +}; + +/** + * @brief Type guard to index in the ladder. + * + * KEEP IN SYNC WITH INDEX IN type_guard_ladder +*/ +static int type_guard_to_index[256] = { + [BINARY_CHECK_FLOAT] = 1, + [BINARY_CHECK_INT] = 2, + [CHECK_LIST] = 4, +}; + + +/** + * @brief Emit a type guard. + * @param write_curr The tier 2 instruction write buffer. + * @param guard_opcode The opcode of the type guard. + * @param guard_oparg The oparg of the type guard. + * @param bb_id The BB ID of the current BB we're writing to. + * @return Pointer to new end of the tier 2 instruction write buffer. +*/ +static inline _Py_CODEUNIT * +emit_type_guard(_Py_CODEUNIT *write_curr, int guard_opcode, int guard_oparg, int bb_id) +{ +#if BB_DEBUG && defined(Py_DEBUG) + fprintf(stderr, "emitted type guard %p %s\n", write_curr, + _PyOpcode_OpName[guard_opcode]); +#endif + write_curr->op.code = guard_opcode; + write_curr->op.arg = guard_oparg & 0xFF; + write_curr++; + + write_curr->op.code = NOP; + write_curr->op.arg = 0; + write_curr++; + + write_curr->op.code = BB_BRANCH; + write_curr->op.arg = 0; + write_curr++; + _PyBBBranchCache *cache = (_PyBBBranchCache *)write_curr; + write_curr = emit_cache_entries(write_curr, INLINE_CACHE_ENTRIES_BB_BRANCH); + write_bb_id(cache, bb_id, true); + return write_curr; +} + +/** + * @brief Converts the tier 1 branch bytecode to tier 2 branch bytecode. + * + * This converts sequence of instructions like + * POP_JUMP_IF_FALSE + * to + * BB_TEST_POP_IF_FALSE + * BB_BRANCH + * CACHE (bb_id of the current BB << 1 | is_type_branch)* + * + * @param type_context The type_context of the current BB. + * @param write_curr The tier 2 instruction write buffer. + * @param branch The tier 1 branch instruction to convert. + * @param bb_id The BB_ID of the current BB. + * @param oparg Oparg of the branch instruction (respects EXTENDED_ARGS). + * @return The updated tier 2 instruction write buffer end. + * +*/ +static inline _Py_CODEUNIT * +emit_logical_branch(_PyTier2TypeContext *type_context, _Py_CODEUNIT *write_curr, + _Py_CODEUNIT branch, int bb_id, int oparg) +{ + int opcode; + // @TODO handle JUMP_BACKWARDS and JUMP_BACKWARDS_NO_INTERRUPT + switch (_PyOpcode_Deopt[_Py_OPCODE(branch)]) { + case JUMP_BACKWARD_QUICK: + case JUMP_BACKWARD: + // The initial backwards jump needs to find the right basic block. + // Subsequent jumps don't need to check this anymore. They can just + // jump directly with JUMP_BACKWARD. + opcode = BB_JUMP_BACKWARD_LAZY; + // v BB_JUMP_BACKWARD_LAZY has nothing to propagate + // type_propagate(opcode, oparg, type_context, NULL); + break; + case FOR_ITER: + opcode = BB_TEST_ITER; + // This inst has conditional stack effect according to whether the branch is taken. + // This inst sets the `gen_bb_requires_pop` flag to handle stack effect of this opcode in BB_BRANCH + break; + case POP_JUMP_IF_FALSE: + opcode = BB_TEST_POP_IF_FALSE; + type_propagate(opcode, oparg, type_context, NULL); + break; + case POP_JUMP_IF_TRUE: + opcode = BB_TEST_POP_IF_TRUE; + type_propagate(opcode, oparg, type_context, NULL); + break; + case POP_JUMP_IF_NOT_NONE: + opcode = BB_TEST_POP_IF_NOT_NONE; + type_propagate(opcode, oparg, type_context, NULL); + break; + case POP_JUMP_IF_NONE: + opcode = BB_TEST_POP_IF_NONE; + type_propagate(opcode, oparg, type_context, NULL); + break; + default: + // Honestly shouldn't happen because branches that + // we can't handle are in IS_FORBIDDEN_OPCODE +#if BB_DEBUG + fprintf(stderr, + "emit_logical_branch unreachable opcode %d\n", _Py_OPCODE(branch)); +#endif + Py_UNREACHABLE(); + } + assert(oparg <= 0XFFFF); + bool requires_extended_arg = oparg > 0xFF; + // Backwards jumps should be handled specially. + if (opcode == BB_JUMP_BACKWARD_LAZY) { +#if BB_DEBUG + fprintf(stderr, "emitted backwards jump %p %d\n", write_curr, + _Py_OPCODE(branch)); +#endif + // Just in case, can be swapped out with an EXTENDED_ARG + _py_set_opcode(write_curr, requires_extended_arg ? EXTENDED_ARG : NOP); + write_curr->op.arg = (oparg >> 8) & 0xFF; + write_curr++; + // We don't need to recalculate the backward jump, because that only needs to be done + // when it locates the next BB in JUMP_BACKWARD_LAZY. + _py_set_opcode(write_curr, BB_JUMP_BACKWARD_LAZY); + write_curr->op.arg = oparg & 0xFF; + write_curr++; + _PyBBBranchCache *cache = (_PyBBBranchCache *)write_curr; + write_curr = emit_cache_entries(write_curr, INLINE_CACHE_ENTRIES_BB_BRANCH); + write_bb_id(cache, bb_id, false); + return write_curr; + } + // FOR_ITER is also a special jump + else if (opcode == BB_TEST_ITER) { +#if BB_DEBUG + fprintf(stderr, "emitted iter branch %p %d\n", write_curr, + _Py_OPCODE(branch)); +#endif + // The oparg of FOR_ITER is a little special, the actual jump has to jump over + // its own cache entries, the oparg, -1 to tell it to start generating from the + // END_FOR. However, at runtime, we will skip this END_FOR. + // NOTE: IF YOU CHANGE ANY OF THE INSTRUCTIONS BELOW, MAKE SURE + // TO UPDATE THE CALCULATION OF OPARG. THIS IS EXTREMELY IMPORTANT. + oparg = INLINE_CACHE_ENTRIES_FOR_ITER + oparg; + requires_extended_arg = oparg > 0xFF; + _py_set_opcode(write_curr, requires_extended_arg ? EXTENDED_ARG : NOP); + write_curr->op.arg = (oparg >> 8) & 0xFF; + write_curr++; + _py_set_opcode(write_curr, BB_TEST_ITER); + write_curr->op.arg = oparg & 0xFF; + write_curr++; + // Initialize adaptive interpreter counter + write_curr->cache = adaptive_counter_warmup(); + write_curr = emit_cache_entries(write_curr, INLINE_CACHE_ENTRIES_FOR_ITER); + type_propagate(BB_TEST_ITER, oparg, type_context, NULL); + _py_set_opcode(write_curr, requires_extended_arg ? EXTENDED_ARG : NOP); + write_curr->op.arg = (oparg >> 8) & 0xFF; + write_curr++; + _py_set_opcode(write_curr, BB_BRANCH); + write_curr->op.arg = oparg & 0xFF; + write_curr++; + _PyBBBranchCache *cache = (_PyBBBranchCache *)write_curr; + write_curr = emit_cache_entries(write_curr, INLINE_CACHE_ENTRIES_BB_BRANCH); + write_bb_id(cache, bb_id, false); + return write_curr; + } + else { +#if BB_DEBUG + fprintf(stderr, "emitted logical branch %p %d\n", write_curr, + _Py_OPCODE(branch)); +#endif + _py_set_opcode(write_curr, requires_extended_arg ? EXTENDED_ARG : NOP); + write_curr->op.arg = (oparg >> 8) & 0xFF; + write_curr++; + _py_set_opcode(write_curr, opcode); + write_curr->op.arg = oparg & 0xFF; + write_curr++; + _py_set_opcode(write_curr, requires_extended_arg ? EXTENDED_ARG : NOP); + write_curr->op.arg = (oparg >> 8) & 0xFF; + write_curr++; + _py_set_opcode(write_curr, BB_BRANCH); + write_curr->op.arg = oparg & 0xFF; + write_curr++; + _PyBBBranchCache *cache = (_PyBBBranchCache *)write_curr; + write_curr = emit_cache_entries(write_curr, INLINE_CACHE_ENTRIES_BB_BRANCH); + write_bb_id(cache, bb_id, false); + return write_curr; + } +} + +/** + * @brief Emits the exit of a scope. + * @param write_curr The tier 2 instruction write buffer. + * @param exit The tier 1 exit instruction. + * @param type_context The BB's type context. + * @return The updated tier 2 instruction write buffer end. +*/ +static inline _Py_CODEUNIT * +emit_scope_exit(_Py_CODEUNIT *write_curr, _Py_CODEUNIT exit, + _PyTier2TypeContext *type_context) +{ + switch (_Py_OPCODE(exit)) { + case RETURN_VALUE: + write_curr = rebox_stack(write_curr, type_context, 1); + *write_curr = exit; + write_curr++; + return write_curr; + case RETURN_CONST: + case INTERPRETER_EXIT: +#if BB_DEBUG + fprintf(stderr, "emitted scope exit\n"); +#endif + //// @TODO we can propagate and chain BBs across call boundaries + //// Thanks to CPython's inlined call frames. + //_py_set_opcode(write_curr, BB_EXIT_FRAME); + *write_curr = exit; + write_curr++; + return write_curr; + default: + // The rest are forbidden. +#if BB_DEBUG + fprintf(stderr, "emit_scope_exit unreachable %d\n", _Py_OPCODE(exit)); +#endif + Py_UNREACHABLE(); + } +} + +/** + * @brief Emit a single instruction. (Respects EXTENDED_ARG). + * @param write_curr The tier 2 instruction write buffer. + * @param opcode The instruction's opcode. + * @param oparg The instruction's oparg. + * @return The updated tier 2 instruction write buffer end. +*/ +static inline _Py_CODEUNIT * +emit_i(_Py_CODEUNIT *write_curr, int opcode, int oparg) +{ + if (oparg > 0xFF) { + _py_set_opcode(write_curr, EXTENDED_ARG); + write_curr->op.arg = (oparg >> 8) & 0xFF; + write_curr++; + } + _py_set_opcode(write_curr, opcode); + write_curr->op.arg = oparg & 0xFF; + write_curr++; + return write_curr; +} + + +/** + * @brief Copy over cache entries, preserving their information. + * Note: we're copying over the actual caches to preserve information! + * This way instructions that we can't type propagate over still stay + * optimized. + * + * @param write_curr The tier 2 instruction write buffer + * @param cache The tier 1 CACHE to copy. + * @param n_entries How many CACHE entries to copy. + * @return The updated tier 2 instruction write buffer end. +*/ +static inline _Py_CODEUNIT * +copy_cache_entries(_Py_CODEUNIT *write_curr, _Py_CODEUNIT *cache, int n_entries) +{ + for (int i = 0; i < n_entries; i++) { + *write_curr = *cache; + cache++; + write_curr++; + } + return write_curr; +} + + +/** + * @brief Checks if the current instruction is a backwards jump target. + * @param co The code object the instruction belongs to. + * @param curr The current instruction to check. + * @return Yes/No. +*/ +static int +IS_BACKWARDS_JUMP_TARGET(PyCodeObject *co, _Py_CODEUNIT *curr) +{ + assert(co->_tier2_info != NULL); + int backward_jump_count = co->_tier2_info->backward_jump_count; + int *backward_jump_offsets = co->_tier2_info->backward_jump_offsets; + _Py_CODEUNIT *start = _PyCode_CODE(co); + // TODO: CHANGE TO BINARY SEARCH WHEN i > 40. For smaller values, linear search is quicker. + for (int i = 0; i < backward_jump_count; i++) { + if (curr == start + backward_jump_offsets[i]) { + return 1; + } + } + return 0; +} + +/** + * @brief Adds BB metadata to the jump 2D array that a tier 2 code object contains. + * This happens when a BB is a backwards jump target. + * + * @param t2_info Tier 2 info of that code object. + * @param meta The BB metadata to add. + * @param backwards_jump_target Offset (in number of codeunits) from start of code object where + * the backwards jump target is located. + * + * @param starting_context The type context at the start of the jump target BB. + * @param tier1_start The tier 1 starting instruction of the jump target BB. + * @return 1 for error, 0 for success. +*/ +static inline int +add_metadata_to_jump_2d_array(_PyTier2Info *t2_info, _PyTier2BBMetadata *meta, + int backwards_jump_target, _PyTier2TypeContext *starting_context, + _Py_CODEUNIT *tier1_start) +{ + // Locate where to insert the BB ID + int backward_jump_offset_index = 0; + bool found = false; + for (; backward_jump_offset_index < t2_info->backward_jump_count; + backward_jump_offset_index++) { + if (t2_info->backward_jump_offsets[backward_jump_offset_index] == + backwards_jump_target) { + found = true; + break; + } + } + assert(found); + int jump_i = 0; + found = false; + for (; jump_i < MAX_BB_VERSIONS; jump_i++) { + if (t2_info->backward_jump_target_bb_pairs[backward_jump_offset_index][jump_i].id == + -1) { + t2_info->backward_jump_target_bb_pairs[backward_jump_offset_index][jump_i].id = + meta->id; + t2_info->backward_jump_target_bb_pairs[backward_jump_offset_index][jump_i].start_type_context = starting_context; + t2_info->backward_jump_target_bb_pairs[backward_jump_offset_index][jump_i].tier1_start = tier1_start; + found = true; + break; + } + } + // Out of basic blocks versions. + if (!found) { + return 1; + } + assert(found); + return 0; +} + +/** + * @brief Infers the correct BINARY_OP to use. This is where we choose to emit + * more efficient arithmetic instructions. + * + * This converts sequence of instructions like + * BINARY_OP (ADD) + * to + * BINARY_CHECK_INT + * BB_BRANCH + * CACHE (bb_id of the current BB << 1 | is_type_branch) + * // The BINARY_ADD then goes to the next BB + * + * @param t2_start Start of the current basic block. + * @param oparg Oparg of the BINARY_OP. + * @param needs_guard Signals to the caller whether they should emit a type guard. + * @param prev_type_guard The previous basic block's ending type guard (this is + * required for the ladder of types). + * + * @param raw_op The tier 0/1 BINARY_OP. + * @param write_curr Tier 2 instruction write buffer. + * @param type_context Current type context to base our decisions on. + * @param bb_id The current BB's ID. + * @return Updated tier 2 instruction write buffer end. +*/ +static inline _Py_CODEUNIT * +infer_BINARY_OP( + _Py_CODEUNIT *t2_start, + int oparg, + bool *needs_guard, + _Py_CODEUNIT *prev_type_guard, + _Py_CODEUNIT raw_op, + _Py_CODEUNIT *write_curr, + _PyTier2TypeContext *type_context, + int bb_id) +{ + assert(oparg == NB_ADD || oparg == NB_SUBTRACT || oparg == NB_MULTIPLY); + bool is_first_instr = (write_curr == t2_start); + *needs_guard = false; + PyTypeObject *right = typenode_get_type(type_context->type_stack_ptr[-1]); + PyTypeObject *left = typenode_get_type(type_context->type_stack_ptr[-2]); + if (left == &PyLong_Type) { + if (right == &PyLong_Type) { + int opcode = oparg == NB_ADD + ? BINARY_OP_ADD_INT_REST + : oparg == NB_SUBTRACT + ? BINARY_OP_SUBTRACT_INT_REST + : oparg == NB_MULTIPLY + ? BINARY_OP_MULTIPLY_INT_REST + : (Py_UNREACHABLE(), 1); + write_curr->op.code = opcode; + write_curr++; + type_propagate(opcode, 0, type_context, NULL); + return write_curr; + } + } + if ((left == &PyRawFloat_Type || left == &PyFloat_Type) && + (right == &PyRawFloat_Type || right == &PyFloat_Type)) { + int opcode = oparg == NB_ADD + ? BINARY_OP_ADD_FLOAT_UNBOXED + : oparg == NB_SUBTRACT + ? BINARY_OP_SUBTRACT_FLOAT_UNBOXED + : oparg == NB_MULTIPLY + ? BINARY_OP_MULTIPLY_FLOAT_UNBOXED + : (Py_UNREACHABLE(), 1); + if (right == &PyFloat_Type) { + write_curr->op.code = UNBOX_FLOAT; + write_curr->op.arg = 0; + write_curr++; + type_propagate(UNBOX_FLOAT, 0, type_context, NULL); + } + if (left == &PyFloat_Type) { + write_curr->op.code = UNBOX_FLOAT; + write_curr->op.arg = 1; + write_curr++; + type_propagate(UNBOX_FLOAT, 1, type_context, NULL); + } + write_curr->op.code = opcode; + write_curr++; + type_propagate(opcode, 0, type_context, NULL); + return write_curr; + } + // Unknown, time to emit the chain of guards. + // No type guard before this, or it's not the first in the new BB. + // First in new BB usually indicates it's already part of a pre-existing ladder. + if (prev_type_guard == NULL || !is_first_instr) { + write_curr = rebox_stack(write_curr, type_context, 2); + *needs_guard = true; + return emit_type_guard(write_curr, BINARY_CHECK_FLOAT, 0, bb_id); + } + else { + int next_guard = type_guard_ladder[ + type_guard_to_index[prev_type_guard->op.code] + 1]; + if (next_guard != -1) { + write_curr = rebox_stack(write_curr, type_context, 2); + *needs_guard = true; + return emit_type_guard(write_curr, next_guard, 0, bb_id); + } + // End of ladder, fall through + } + return NULL; +} + +/** + * @brief Infers the correct BINARY_SUBSCR to use. This is where we choose to emit + * more efficient container instructions. + * + * @param t2_start Start of the current basic block. + * @param oparg Oparg of the BINARY_OP. + * @param needs_guard Signals to the caller whether they should emit a type guard. + * @param prev_type_guard The previous basic block's ending type guard (this is + * required for the ladder of types). + * + * @param raw_op The tier 0/1 BINARY_OP. + * @param write_curr Tier 2 instruction write buffer. + * @param type_context Current type context to base our decisions on. + * @param bb_id The current BB's ID. + * @param store Whether it's a store instruction (STORE_SUBSCR) or not (BINARY_SUBSCR). + * @return Updated tier 2 instruction write buffer end. + * @return +*/ +static inline _Py_CODEUNIT * +infer_BINARY_SUBSCR( + _Py_CODEUNIT *t2_start, + int oparg, + bool *needs_guard, + _Py_CODEUNIT *prev_type_guard, + _Py_CODEUNIT raw_op, + _Py_CODEUNIT *write_curr, + _PyTier2TypeContext *type_context, + int bb_id, + bool store) +{ + assert(oparg == NB_ADD || oparg == NB_SUBTRACT || oparg == NB_MULTIPLY); + bool is_first_instr = (write_curr == t2_start); + *needs_guard = false; + PyTypeObject *sub = typenode_get_type(type_context->type_stack_ptr[-1]); + PyTypeObject *container = typenode_get_type(type_context->type_stack_ptr[-2]); + if (container == &PyList_Type) { + if (sub == &PySmallInt_Type) { + int opcode = store + ? STORE_SUBSCR_LIST_INT_REST : BINARY_SUBSCR_LIST_INT_REST; + write_curr->op.code = opcode; + write_curr++; + type_propagate(opcode, 0, type_context, NULL); + return write_curr; + } + } + // Unknown, time to emit the chain of guards. + // No type guard before this, or it's not the first in the new BB. + // First in new BB usually indicates it's already part of a pre-existing ladder. + if (prev_type_guard == NULL || !is_first_instr) { + write_curr = rebox_stack(write_curr, type_context, 2); + *needs_guard = true; + return emit_type_guard(write_curr, CHECK_LIST, 1, bb_id); + } + else { + int next_guard = type_guard_ladder[ + type_guard_to_index[prev_type_guard->op.code] + 1]; + if (next_guard != -1) { + write_curr = rebox_stack(write_curr, type_context, store ? 3 : 2); + *needs_guard = true; + return emit_type_guard(write_curr, next_guard, 1, bb_id); + } + // End of ladder, fall through + } + return NULL; +} + +/** + * @brief Whether this is an unboxed type. + * @param t The type to check. + * @return Yes/No. +*/ +static inline bool +is_unboxed_type(PyTypeObject *t) +{ + return t == &PyRawFloat_Type; +} + + +/** + * @brief Detects a BB from the current instruction start to the end of the first basic block it sees. Then emits the instructions into the bb space. + * + * Instructions emitted depend on the type_context. + * For example, if it sees a BINARY_ADD instruction, but it knows the two operands are already of + * type PyLongObject, a BINARY_ADD_INT_REST will be emitted without an type checks. + * + * However, if one of the operands are unknown, a logical chain of CHECK instructions will be + * emitted, and the basic block will end at the first of the chain. + * Note: a BB end also includes a type guard. + * + * @param co The code object we're optimizing. + * @param bb_space The BB space of the code object to write to. + * @param prev_type_guard The type guard that ended the previous basic block (if present). + * @param tier1_start The tier 1 instructions to start referring from. + * @param starting_type_context The starting type context for this new basic block. + * @return A new tier 2 basic block. +*/ +_PyTier2BBMetadata * +_PyTier2_Code_DetectAndEmitBB( + PyCodeObject *co, + _PyTier2BBSpace *bb_space, + _Py_CODEUNIT *prev_type_guard, + _Py_CODEUNIT *tier1_start, + // starting_type_context will be modified in this function, + // do make a copy if needed before calling this function + _PyTier2TypeContext *starting_type_context) +{ + assert( + prev_type_guard == NULL || + prev_type_guard->op.code == BINARY_CHECK_INT || + prev_type_guard->op.code == BINARY_CHECK_FLOAT || + prev_type_guard->op.code == CHECK_LIST + ); +#define END() goto end; +#define JUMPBY(x) i += x + 1; +#define DISPATCH() write_i = emit_i(write_i, specop, curr->op.arg); \ + write_i = copy_cache_entries(write_i, curr+1, caches); \ + i += caches; \ + type_propagate(opcode, oparg, starting_type_context, consts); \ + continue; + +#define DISPATCH_REBOX(x) write_i = rebox_stack(write_i, starting_type_context, x); \ + write_i = emit_i(write_i, specop, curr->op.arg); \ + write_i = copy_cache_entries(write_i, curr+1, caches); \ + i += caches; \ + type_propagate(opcode, oparg, starting_type_context, consts); \ + continue; +#define DISPATCH_GOTO() goto dispatch_opcode; +#define TYPECONST_GET_RAWTYPE(idx) Py_TYPE(PyTuple_GET_ITEM(consts, idx)) +#define GET_CONST(idx) PyTuple_GET_ITEM(consts, idx) + + assert(co->_tier2_info != NULL); + // There are only two cases that a BB ends. + // 1. If there's a branch instruction / scope exit. + // 2. If there's a type guard. + bool needs_guard = 0; + + _PyTier2BBMetadata *meta = NULL; + _PyTier2BBMetadata *temp_meta = NULL; + _PyTier2BBMetadata *jump_end_meta = NULL; + + _PyTier2Info *t2_info = co->_tier2_info; + PyObject *consts = co->co_consts; + _Py_CODEUNIT *t2_start = (_Py_CODEUNIT *)(((char *)bb_space->u_code) + bb_space->water_level); + _Py_CODEUNIT *write_i = t2_start; + int tos = -1; + + // For handling of backwards jumps + bool starts_with_backwards_jump_target = false; + int backwards_jump_target_offset = -1; + bool virtual_start = false; + _PyTier2TypeContext *start_type_context_copy = NULL; + _Py_CODEUNIT *virtual_tier1_start = NULL; + + // A meta-interpreter for types. + Py_ssize_t i = (tier1_start - _PyCode_CODE(co)); + for (; i < Py_SIZE(co); i++) { + _Py_CODEUNIT *curr = _PyCode_CODE(co) + i; + _Py_CODEUNIT *next_instr = curr + 1; + int specop = _Py_OPCODE(*curr); + int opcode = _PyOpcode_Deopt[specop]; + int oparg = _Py_OPARG(*curr); + int caches = _PyOpcode_Caches[opcode]; + + // Just because an instruction requires a guard doesn't mean it's the end of a BB. + // We need to check whether we can eliminate the guard based on the current type context. + + dispatch_opcode: +#if TYPEPROP_DEBUG + fprintf(stderr, "offset: %Id\n", curr - _PyCode_CODE(co)); +#endif + switch (opcode) { + case RESUME: + opcode = specop = RESUME_QUICK; + DISPATCH(); + case END_FOR: + // Assert that we are the start of a BB + assert(t2_start == write_i); + // Though we want to emit this, we don't want to start execution from END_FOR. + // So we tell the BB to skip over it. + t2_start++; + DISPATCH(); + case POP_TOP: { + // Read-only, only for us to inspect the types. DO NOT MODIFY HERE. + // ONLY THE TYPES PROPAGATOR SHOULD MODIFY THEIR INTERNAL VALUES. + _Py_TYPENODE_t **type_stackptr = &starting_type_context->type_stack_ptr; + PyTypeObject *pop = typenode_get_type(*TYPESTACK_PEEK(1)); + // Writing unboxed val to a boxed val. + if (is_unboxed_type(pop)) { + opcode = specop = POP_TOP_NO_DECREF; + } + DISPATCH(); + } + case COPY: { + // Read-only, only for us to inspect the types. DO NOT MODIFY HERE. + // ONLY THE TYPES PROPAGATOR SHOULD MODIFY THEIR INTERNAL VALUES. + _Py_TYPENODE_t **type_stackptr = &starting_type_context->type_stack_ptr; + PyTypeObject *pop = typenode_get_type(*TYPESTACK_PEEK(1 + (oparg - 1))); + // Writing unboxed val to a boxed val. + if (is_unboxed_type(pop)) { + opcode = specop = COPY_NO_INCREF; + } + DISPATCH(); + } + case LOAD_CONST: { + PyTypeObject *typ = TYPECONST_GET_RAWTYPE(oparg); + if (typ == &PyFloat_Type) { + write_i = emit_i(write_i, LOAD_CONST, curr->op.arg); + type_propagate(LOAD_CONST, oparg, starting_type_context, consts); + write_i->op.code = UNBOX_FLOAT; + write_i->op.arg = 0; + write_i++; + type_propagate(UNBOX_FLOAT, 0, starting_type_context, consts); + continue; + } + else if (typ == &PyLong_Type) { + // We break our own rules for more efficient code here. + // NOTE: THIS MODIFIES THE TYPE CONTEXT. + if (_PyLong_IsNonNegativeCompact((PyLongObject *)GET_CONST(oparg))) { + write_i = emit_i(write_i, LOAD_CONST, curr->op.arg); + + // Type propagate + _PyTier2TypeContext *type_context = starting_type_context; + _Py_TYPENODE_t **type_stackptr = &type_context->type_stack_ptr; + *type_stackptr += 1; + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_MAKE_ROOT((_Py_TYPENODE_t)&PySmallInt_Type), TYPESTACK_PEEK(1), true); + continue; + } + } + DISPATCH(); + } + case LOAD_FAST: { + // Read-only, only for us to inspect the types. DO NOT MODIFY HERE. + // ONLY THE TYPES PROPAGATOR SHOULD MODIFY THEIR INTERNAL VALUES. + _Py_TYPENODE_t *type_locals = starting_type_context->type_locals; + // Writing unboxed val to a boxed val. + PyTypeObject *local = typenode_get_type(*TYPELOCALS_GET(oparg)); + if (is_unboxed_type(local)) { + opcode = specop = LOAD_FAST_NO_INCREF; + } + else { + if (local == &PyFloat_Type) { + write_i->op.code = LOAD_FAST; + write_i->op.arg = oparg; + write_i++; + type_propagate(LOAD_FAST, + oparg, starting_type_context, consts); + write_i->op.code = UNBOX_FLOAT; + write_i->op.arg = 0; + write_i++; + type_propagate(UNBOX_FLOAT, 0, starting_type_context, consts); + write_i->op.code = STORE_FAST_UNBOXED_BOXED; + write_i->op.arg = oparg; + write_i++; + type_propagate(STORE_FAST_UNBOXED_BOXED, + oparg, starting_type_context, consts); + write_i->op.code = LOAD_FAST_NO_INCREF; + write_i->op.arg = oparg; + write_i++; + type_propagate(LOAD_FAST_NO_INCREF, + oparg, starting_type_context, consts); + continue; + } + opcode = specop = LOAD_FAST; + } + DISPATCH(); + } + case LOAD_FAST_CHECK: { + // Read-only, only for us to inspect the types. DO NOT MODIFY HERE. + // ONLY THE TYPES PROPAGATOR SHOULD MODIFY THEIR INTERNAL VALUES. + _Py_TYPENODE_t *type_locals = starting_type_context->type_locals; + // Writing unboxed val to a boxed val. + PyTypeObject *local = typenode_get_type(*TYPELOCALS_GET(oparg)); + if (is_unboxed_type(local)) { + opcode = specop = LOAD_FAST_NO_INCREF; + } + else { + if (local == &PyFloat_Type) { + write_i->op.code = LOAD_FAST; + write_i->op.arg = oparg; + write_i++; + type_propagate(LOAD_FAST, + oparg, starting_type_context, consts); + write_i->op.code = UNBOX_FLOAT; + write_i->op.arg = 0; + write_i++; + type_propagate(UNBOX_FLOAT, 0, starting_type_context, consts); + write_i->op.code = STORE_FAST_UNBOXED_BOXED; + write_i->op.arg = oparg; + write_i++; + type_propagate(STORE_FAST_UNBOXED_BOXED, + oparg, starting_type_context, consts); + write_i->op.code = LOAD_FAST_NO_INCREF; + write_i->op.arg = oparg; + write_i++; + type_propagate(LOAD_FAST_NO_INCREF, + oparg, starting_type_context, consts); + continue; + } + opcode = specop = LOAD_FAST_CHECK; + } + DISPATCH(); + } + case STORE_FAST: { + // Read-only, only for us to inspect the types. DO NOT MODIFY HERE. + // ONLY THE TYPES PROPAGATOR SHOULD MODIFY THEIR INTERNAL VALUES. + _Py_TYPENODE_t *type_locals = starting_type_context->type_locals; + _Py_TYPENODE_t **type_stackptr = &starting_type_context->type_stack_ptr; + PyTypeObject *local = typenode_get_type(*TYPESTACK_PEEK(1)); + PyTypeObject *store = typenode_get_type(*TYPELOCALS_GET(oparg)); + // Writing unboxed val to a boxed val. + if (is_unboxed_type(local)) { + if (!is_unboxed_type(store)) { + opcode = specop = STORE_FAST_UNBOXED_BOXED; + } + else { + opcode = specop = STORE_FAST_UNBOXED_UNBOXED; + } + } + else { + if (is_unboxed_type(store)) { + opcode = specop = STORE_FAST_BOXED_UNBOXED; + } + else { + opcode = specop = STORE_FAST; + } + } + DISPATCH(); + } + // Need to handle reboxing at these boundaries. + case CALL: + DISPATCH_REBOX(oparg + 2); + case BUILD_MAP: + DISPATCH_REBOX(oparg * 2); + case BUILD_STRING: + case BUILD_LIST: + DISPATCH_REBOX(oparg); + case BINARY_OP: + if (oparg == NB_ADD || oparg == NB_SUBTRACT || oparg == NB_MULTIPLY) { + // Add operation. Need to check if we can infer types. + _Py_CODEUNIT *possible_next = infer_BINARY_OP(t2_start, + oparg, &needs_guard, + prev_type_guard, + *curr, + write_i, starting_type_context, + co->_tier2_info->bb_data_curr); + if (possible_next == NULL) { + DISPATCH_REBOX(2); + } + write_i = possible_next; + if (needs_guard) { + // Point to the same instruction, because in this BB we emit + // the guard. + // The next BB emits the instruction. + i--; + END(); + } + i += caches; + continue; + } + DISPATCH_REBOX(2); + case BINARY_SUBSCR: { + _Py_CODEUNIT *possible_next = infer_BINARY_SUBSCR( + t2_start, oparg, &needs_guard, + prev_type_guard, + *curr, + write_i, starting_type_context, + co->_tier2_info->bb_data_curr, false); + if (possible_next == NULL) { + DISPATCH_REBOX(2); + } + write_i = possible_next; + if (needs_guard) { + // Point to the same instruction, because in this BB we emit + // the guard. + // The next BB emits the instruction. + i--; + END(); + } + i += caches; + continue; + } + case STORE_SUBSCR: { + _Py_CODEUNIT *possible_next = infer_BINARY_SUBSCR( + t2_start, oparg, &needs_guard, + prev_type_guard, + *curr, + write_i, starting_type_context, + co->_tier2_info->bb_data_curr, true); + if (possible_next == NULL) { + DISPATCH_REBOX(3); + } + write_i = possible_next; + if (needs_guard) { + // Point to the same instruction, because in this BB we emit + // the guard. + // The next BB emits the instruction. + i--; + END(); + } + i += caches; + continue; + } + case LOAD_ATTR: + case CALL_INTRINSIC_1: + case UNARY_NEGATIVE: + case UNARY_NOT: + case UNARY_INVERT: + case GET_LEN: + case UNPACK_SEQUENCE: + DISPATCH_REBOX(1); + case CALL_INTRINSIC_2: + case BINARY_SLICE: + DISPATCH_REBOX(2); + case STORE_SLICE: + DISPATCH_REBOX(4); + default: +#if BB_DEBUG && !TYPEPROP_DEBUG + fprintf(stderr, "offset: %Id\n", curr - _PyCode_CODE(co)); +#endif + // This should be the end of another basic block, or the start of a new. + // Start of a new basic block, just ignore and continue. + if (virtual_start) { +#if BB_DEBUG + fprintf(stderr, "Emitted virtual start of basic block\n"); +#endif + starts_with_backwards_jump_target = true; + virtual_start = false; + start_type_context_copy = _PyTier2TypeContext_Copy(starting_type_context); + if (start_type_context_copy == NULL) { + _PyTier2TypeContext_Free(starting_type_context); + return NULL; + } + goto fall_through; + } + if (IS_BACKWARDS_JUMP_TARGET(co, curr)) { +#if BB_DEBUG + fprintf(stderr, "Encountered a backward jump target\n"); +#endif +#if TYPEPROP_DEBUG + print_typestack(starting_type_context); +#endif + // Else, create a virtual end to the basic block. + // But generate the block after that so it can fall through. + i--; + _PyTier2TypeContext *type_context_copy = _PyTier2TypeContext_Copy(starting_type_context); + if (type_context_copy == NULL) { + return NULL; + } + meta = _PyTier2_AllocateBBMetaData(co, + t2_start, _PyCode_CODE(co) + i, type_context_copy); + if (meta == NULL) { + _PyTier2TypeContext_Free(type_context_copy); + return NULL; + } + bb_space->water_level += (write_i - t2_start) * sizeof(_Py_CODEUNIT); + // Reset all our values + t2_start = write_i; + i++; + virtual_tier1_start = _PyCode_CODE(co) + i; + backwards_jump_target_offset = (int)(curr - _PyCode_CODE(co)); + virtual_start = true; + + if (opcode == EXTENDED_ARG) { + // Note: EXTENDED_ARG could be a jump target!!!!! + specop = next_instr->op.code; + opcode = _PyOpcode_Deopt[specop]; + caches = _PyOpcode_Caches[opcode]; + oparg = oparg << 8 | next_instr->op.arg; + curr++; + next_instr++; + i += 1; + DISPATCH_GOTO(); + } + // Don't change opcode or oparg, let us handle it again. + DISPATCH_GOTO(); + } + fall_through: + // These are definitely the end of a basic block. + if (IS_SCOPE_EXIT_OPCODE(opcode)) { + // Emit the scope exit instruction. + write_i = emit_scope_exit(write_i, *curr, starting_type_context); + END(); + } + + // Jumps may be the end of a basic block if they are conditional (a branch). + if (IS_JUMP_OPCODE(opcode)) { + // Unconditional forward jump... continue with the BB without writing the jump. + if (opcode == JUMP_FORWARD) { + // JUMP offset (oparg) + current instruction + cache entries + JUMPBY(oparg); + continue; + } + // Get the BB ID without incrementing it. + // AllocateBBMetaData will increment. + write_i = emit_logical_branch(starting_type_context, write_i, *curr, + co->_tier2_info->bb_data_curr, oparg); + i += caches; + END(); + } + if (opcode == EXTENDED_ARG) { + // Note: EXTENDED_ARG could be a jump target!!!!! + specop = next_instr->op.code; + opcode = _PyOpcode_Deopt[specop]; + caches = _PyOpcode_Caches[opcode]; + oparg = oparg << 8 | next_instr->op.arg; + curr++; + next_instr++; + i += 1; + DISPATCH_GOTO(); + } + DISPATCH(); + } + + } +end: + // Create the tier 2 BB + + temp_meta = _PyTier2_AllocateBBMetaData(co, t2_start, + // + 1 because we want to start with the NEXT instruction for the scan + _PyCode_CODE(co) + i + 1, starting_type_context); + if (temp_meta == NULL) { + _PyTier2TypeContext_Free(starting_type_context); + return NULL; + } + // We need to return the first block to enter into. If there is already a block generated + // before us, then we use that instead of the most recent block. + if (meta == NULL) { + meta = temp_meta; + } + if (starts_with_backwards_jump_target) { + // Add the basic block to the jump ids + assert(start_type_context_copy != NULL); + assert(virtual_tier1_start != NULL); + if (add_metadata_to_jump_2d_array(t2_info, temp_meta, + backwards_jump_target_offset, start_type_context_copy, + virtual_tier1_start) < 0) { + PyMem_Free(meta); + if (meta != temp_meta) { + PyMem_Free(temp_meta); + } + _PyTier2TypeContext_Free(starting_type_context); + return NULL; + } + } + // Tell BB space the number of bytes we wrote. + // -1 becaues write_i points to the instruction AFTER the end + bb_space->water_level += (write_i - t2_start) * sizeof(_Py_CODEUNIT); +#if BB_DEBUG + fprintf(stderr, "Generated BB T2 Start: %p, T1 offset: %zu\n", meta->tier2_start, + meta->tier1_end - _PyCode_CODE(co)); +#endif + return meta; + +} + + +////////// _PyTier2Info FUNCTIONS + +static int +compare_ints(const void *a, const void *b) +{ + return *(int *)a - *(int *)b; +} + +/** + * @brief Allocates the 2D array required to store information about backwards jump targets. + * @param backwards_jump_count How many backwards jump targets there are. + * @param backward_jump_target_bb_pairs Triplet information required about the backward jump target. + * @return 0 on success 1 on error. +*/ +static int +allocate_jump_offset_2d_array(int backwards_jump_count, + _PyTier2BBStartTypeContextTriplet **backward_jump_target_bb_pairs) +{ + int done = 0; + for (int i = 0; i < backwards_jump_count; i++) { + _PyTier2BBStartTypeContextTriplet *pair = + PyMem_Malloc(sizeof(_PyTier2BBStartTypeContextTriplet) * MAX_BB_VERSIONS); + if (pair == NULL) { + goto error; + } + for (int i = 0; i < MAX_BB_VERSIONS; i++) { + pair[i].id = -1; + } + done++; + backward_jump_target_bb_pairs[i] = pair; + } + return 0; +error: + for (int i = 0; i < done; i++) { + PyMem_Free(backward_jump_target_bb_pairs[i]); + } + return 1; +} + + +/** + * @brief Populates the backwards jump target offset array for a code object. + * @param co The code object to populate. + * @return Returns 1 on error, 0 on success. +*/ +static int +_PyCode_Tier2FillJumpTargets(PyCodeObject *co) +{ + assert(co->_tier2_info != NULL); + // Count all the backwards jump targets. + Py_ssize_t backwards_jump_count = 0; + for (Py_ssize_t i = 0; i < Py_SIZE(co); i++) { + _Py_CODEUNIT *instr_ptr = _PyCode_CODE(co) + i; + _Py_CODEUNIT instr = *instr_ptr; + int opcode = _PyOpcode_Deopt[_Py_OPCODE(instr)]; + backwards_jump_count += IS_JUMP_BACKWARDS_OPCODE(opcode); + i += _PyOpcode_Caches[opcode]; + } + + // Impossibly big. + if (backwards_jump_count != (int)backwards_jump_count) { + return 1; + } + + // Find all the jump target instructions + // Don't allocate a zero byte space as this may be undefined behavior. + if (backwards_jump_count == 0) { + co->_tier2_info->backward_jump_offsets = NULL; + // Successful (no jump targets)! + co->_tier2_info->backward_jump_count = (int)backwards_jump_count; + return 0; + } + int *backward_jump_offsets = PyMem_Malloc(backwards_jump_count * sizeof(int)); + if (backward_jump_offsets == NULL) { + return 1; + } + _PyTier2BBStartTypeContextTriplet **backward_jump_target_bb_pairs = + PyMem_Malloc(backwards_jump_count * sizeof(_PyTier2BBStartTypeContextTriplet *)); + if (backward_jump_target_bb_pairs == NULL) { + PyMem_Free(backward_jump_offsets); + return 1; + } + if (allocate_jump_offset_2d_array((int)backwards_jump_count, + backward_jump_target_bb_pairs)) { + PyMem_Free(backward_jump_offsets); + PyMem_Free(backward_jump_target_bb_pairs); + return 1; + } + + _Py_CODEUNIT *start = _PyCode_CODE(co); + int curr_i = 0; + int oparg = 0; + for (Py_ssize_t i = 0; i < Py_SIZE(co); i++) { + _Py_CODEUNIT *curr = start + i; + int opcode = _PyOpcode_Deopt[curr->op.code]; + oparg = curr->op.arg; + dispatch_same_oparg: + if (IS_JUMP_BACKWARDS_OPCODE(opcode)) { + // + 1 because it's calculated from nextinstr (see JUMPBY in ceval.c) + _Py_CODEUNIT *target = curr + 1 - oparg; +#if BB_DEBUG + fprintf(stderr, "jump target opcode is %d\n", _Py_OPCODE(*target)); +#endif + // (in terms of offset from start of co_code_adaptive) + backward_jump_offsets[curr_i] = (int)(target - start); + curr_i++; + } + else if (opcode == EXTENDED_ARG) { + oparg = oparg << 8 | (curr+1)->op.arg; + opcode = _PyOpcode_Deopt[(curr+1)->op.code]; + i++; + curr++; + goto dispatch_same_oparg; + } + i += _PyOpcode_Caches[opcode]; + } + assert(curr_i == backwards_jump_count); + qsort(backward_jump_offsets, backwards_jump_count, + sizeof(int), compare_ints); + // Deduplicate + for (int i = 0; i < backwards_jump_count - 1; i++) { + for (int x = i + 1; x < backwards_jump_count; x++) { + if (backward_jump_offsets[i] == backward_jump_offsets[x]) { + backward_jump_offsets[x] = -1; + } + } + } + qsort(backward_jump_offsets, backwards_jump_count, + sizeof(int), compare_ints); +#if BB_DEBUG + fprintf(stderr, "BACKWARD JUMP COUNT : %Id\n", backwards_jump_count); + fprintf(stderr, "BACKWARD JUMP TARGET OFFSETS (FROM START OF CODE): "); + for (Py_ssize_t i = 0; i < backwards_jump_count; i++) { + fprintf(stderr, "%d ,", backward_jump_offsets[i]); + } + fprintf(stderr, "\n"); +#endif + co->_tier2_info->backward_jump_count = (int)backwards_jump_count; + co->_tier2_info->backward_jump_offsets = backward_jump_offsets; + co->_tier2_info->backward_jump_target_bb_pairs = backward_jump_target_bb_pairs; + return 0; +} + + +/** + * @brief Initializes the tier 2 info of a code object. + * @param co The code object. + * @return The newly allocated tier 2 info. +*/ +static _PyTier2Info * +_PyTier2Info_Initialize(PyCodeObject *co) +{ + assert(co->_tier2_info == NULL); + _PyTier2Info *t2_info = PyMem_Malloc(sizeof(_PyTier2Info)); + if (t2_info == NULL) { + return NULL; + } + + t2_info->backward_jump_count = 0; + t2_info->backward_jump_offsets = NULL; + + // Initialize BB data array + t2_info->bb_data_len = 0; + t2_info->bb_data = NULL; + t2_info->bb_data_curr = 0; + Py_ssize_t bb_data_len = (Py_SIZE(co) / 5 + 1); + assert((int)bb_data_len == bb_data_len); + _PyTier2BBMetadata **bb_data = PyMem_Calloc(bb_data_len, sizeof(_PyTier2BBMetadata *)); + if (bb_data == NULL) { + PyMem_Free(t2_info); + return NULL; + } + t2_info->bb_data_len = (int)bb_data_len; + t2_info->bb_data = bb_data; + co->_tier2_info = t2_info; + + return t2_info; +} + +////////// OVERALL TIER2 FUNCTIONS + + +/** + * @brief Whether the opcode is optimizable. + * + * We use simple heuristics to determine if there are operations we can optimize. + * Specifically, we are looking for the presence of PEP 659 (tier 1) + * specialized forms of bytecode, because this indicates that it's a known form. + * + * ADD MORE HERE AS WE GO ALONG. + * + * @param opcode The opcode of the instruction. + * @param oparg The oparg of the instruction. + * @return Yes/No. +*/ +static inline int +IS_OPTIMIZABLE_OPCODE(int opcode, int oparg) +{ + switch (_PyOpcode_Deopt[opcode]) { + case BINARY_OP: + switch (oparg) { + case NB_SUBTRACT: + case NB_MULTIPLY: + case NB_ADD: + // We want a specialised form, not the generic BINARY_OP. + return opcode != _PyOpcode_Deopt[opcode]; + default: + return 0; + } + default: + return 0; + } +} + +/** + * @brief Single scan to replace RESUME and JUMP_BACKWARD instructions to faster + * variants so they stop warming up the tier 2. + * @param co The code object to optimize. +*/ +static inline void +replace_resume_and_jump_backwards(PyCodeObject *co) +{ + for (Py_ssize_t i = 0; i < Py_SIZE(co); i++) { + _Py_CODEUNIT *instr_ptr = _PyCode_CODE(co) + i; + _Py_CODEUNIT instr = *instr_ptr; + int opcode = _PyOpcode_Deopt[_Py_OPCODE(instr)]; + int oparg = _Py_OPARG(instr); + switch (opcode) { + case RESUME: + _py_set_opcode(instr_ptr, RESUME_QUICK); + break; + case JUMP_BACKWARD: + _py_set_opcode(instr_ptr, JUMP_BACKWARD_QUICK); + break; + } + i += _PyOpcode_Caches[opcode]; + } +} + +/** + * @brief Initializes the tier 2 of a code object. Called upon first transition from tier 1 + * to tier 2, when a code object is deemed hot. + * + * 1. Initialize whatever we need. + * 2. Create the entry BB. + * 3. Jump into that BB. + * @param frame The current executing frame. + * @param next_instr The next instruction of said frame. + * @return The next instruction (tier 2) to execute. +*/ +static _Py_CODEUNIT * +_PyCode_Tier2Initialize(_PyInterpreterFrame *frame, _Py_CODEUNIT *next_instr) +{ + assert(_Py_OPCODE(*(next_instr - 1)) == RESUME); + PyCodeObject *co = frame->f_code; + // Replace all the RESUME and JUMP_BACKWARDS so that it doesn't waste time again. + replace_resume_and_jump_backwards(co); + // Impossibly big. + if ((int)Py_SIZE(co) != Py_SIZE(co)) { + return NULL; + } + // First check for forbidden opcodes that we currently can't handle. + int optimizable = 0; + for (Py_ssize_t curr = 0; curr < Py_SIZE(co); curr++) { + _Py_CODEUNIT *curr_instr = _PyCode_CODE(co) + curr; + int deopt = _PyOpcode_Deopt[_Py_OPCODE(*curr_instr)]; + int next = curr < Py_SIZE(co) - 1 + ? _PyOpcode_Deopt[(curr_instr + 1)->op.code] + : 255; + if (IS_FORBIDDEN_OPCODE(deopt, next)) { +#if BB_DEBUG +#ifdef Py_DEBUG + fprintf(stderr, "FORBIDDEN OPCODE %s\n", _PyOpcode_OpName[_Py_OPCODE(*curr_instr)]); +#else + fprintf(stderr, "FORBIDDEN OPCODE %d\n", _Py_OPCODE(*curr_instr)); +#endif +#endif + return NULL; + } + optimizable |= IS_OPTIMIZABLE_OPCODE(_Py_OPCODE(*curr_instr), _Py_OPARG(*curr_instr)); + // Skip the cache entries + curr += _PyOpcode_Caches[deopt]; + } + + if (!optimizable) { +#if BB_DEBUG + fprintf(stderr, "NOT OPTIMIZABLE\n"); +#endif + return NULL; + } + + _PyTier2Info *t2_info = _PyTier2Info_Initialize(co); + if (t2_info == NULL) { + return NULL; + } + +#if BB_DEBUG + fprintf(stderr, "INITIALIZING\n"); +#endif + + Py_ssize_t space_to_alloc = (_PyCode_NBYTES(co)) * OVERALLOCATE_FACTOR; + + _PyTier2BBSpace *bb_space = _PyTier2_CreateBBSpace(space_to_alloc); + if (bb_space == NULL) { + PyMem_Free(t2_info); + return NULL; + } + if (_PyCode_Tier2FillJumpTargets(co)) { + goto cleanup; + } + + t2_info->_bb_space = bb_space; + + _PyTier2TypeContext *type_context = initialize_type_context(co); + if (type_context == NULL) { + goto cleanup; + } + _PyTier2BBMetadata *meta = _PyTier2_Code_DetectAndEmitBB( + co, bb_space, NULL, + _PyCode_CODE(co), type_context); + if (meta == NULL) { + _PyTier2TypeContext_Free(type_context); + goto cleanup; + } +#if BB_DEBUG + fprintf(stderr, "ENTRY BB END IS: %d\n", (int)(meta->tier1_end - _PyCode_CODE(co))); +#endif + + + t2_info->_entry_bb = meta; + + // SET THE FRAME INFO + frame->prev_instr = meta->tier2_start - 1; + // Set the starting instruction to the entry BB. + // frame->prev_instr = bb_ptr->u_code - 1; + return meta->tier2_start; + +cleanup: + PyMem_Free(t2_info); + PyMem_Free(bb_space); + return NULL; +} + +////////// CEVAL FUNCTIONS + +/** + * @brief Tier 2 warmup counter. + * @param frame Currente executing frame. + * @param next_instr The next instruction that frame is executing. + * @return The next instruction that should be executed (no change if the code object + * is not hot enough). +*/ +_Py_CODEUNIT * +_PyCode_Tier2Warmup(_PyInterpreterFrame *frame, _Py_CODEUNIT *next_instr) +{ + PyCodeObject *code = frame->f_code; + if (code->_tier2_warmup != 0) { + code->_tier2_warmup++; + if (code->_tier2_warmup >= 0) { + assert(code->_tier2_info == NULL); + // If it fails, due to lack of memory or whatever, + // just fall back to the tier 1 interpreter. + _Py_CODEUNIT *next = _PyCode_Tier2Initialize(frame, next_instr); + if (next != NULL) { + return next; + } + } + } + return next_instr; +} + +/** + * @brief Generates the next BB with a type context given. + * + * @param frame The current executing frame. + * @param bb_id_tagged The tagged version of the BB_ID (see macros above to understand). + * @param curr_executing_instr The current executing instruction in that frame. + * @param jumpby How many instructions to jump by before we start scanning what to generate. + * @param tier1_fallback Signals the tier 1 instruction to fall back to should generation fail. + * @param bb_flag Whether to genreate consequent or alternative BB. + * @param type_context_copy A given type context to start with. + * @param custom_tier1_end Custom tier 1 instruction to fall back to should we fail. + * @return The new BB's metadata. +*/ +_PyTier2BBMetadata * +_PyTier2_GenerateNextBBMetaWithTypeContext( + _PyInterpreterFrame *frame, + uint16_t bb_id_tagged, + _Py_CODEUNIT *curr_executing_instr, + int jumpby, + _Py_CODEUNIT **tier1_fallback, + char bb_flag, + _PyTier2TypeContext *type_context_copy, + _Py_CODEUNIT *custom_tier1_end) +{ + PyCodeObject *co = frame->f_code; + assert(co->_tier2_info != NULL); + assert(BB_ID(bb_id_tagged) <= co->_tier2_info->bb_data_curr); + _PyTier2BBMetadata *meta = co->_tier2_info->bb_data[BB_ID(bb_id_tagged)]; + _Py_CODEUNIT *tier1_end = custom_tier1_end == NULL + ? meta->tier1_end + jumpby : custom_tier1_end; + *tier1_fallback = tier1_end; + // Be a pessimist and assume we need to write the entire rest of code into the BB. + // The size of the BB generated will definitely be equal to or smaller than this. + _PyTier2BBSpace *space = _PyTier2_BBSpaceCheckAndReallocIfNeeded( + frame->f_code, + _PyCode_NBYTES(co) - + (tier1_end - _PyCode_CODE(co)) * sizeof(_Py_CODEUNIT)); + if (space == NULL) { + // DEOPTIMIZE TO TIER 1? + return NULL; + } + + int n_required_pop = BB_TEST_GET_N_REQUIRES_POP(bb_flag); + if (n_required_pop) { + __type_stack_shrink(&(type_context_copy->type_stack_ptr), n_required_pop); + } + // For type branches, they directly precede the bb branch instruction + // It's always + // TYPE_BRANCH + // NOP + // BB_BRANCH + _Py_CODEUNIT *prev_type_guard = BB_IS_TYPE_BRANCH(bb_id_tagged) + ? curr_executing_instr - 2 : NULL; + if (BB_TEST_IS_SUCCESSOR(bb_flag) && prev_type_guard != NULL) { + // Propagate the type guard information. +#if TYPEPROP_DEBUG && defined(Py_DEBUG) + fprintf(stderr, + " [-] Previous predicate BB ended with a type guard: %s\n", + _PyOpcode_OpName[prev_type_guard->op.code]); +#endif + type_propagate(prev_type_guard->op.code, + prev_type_guard->op.arg, type_context_copy, NULL); + } + _PyTier2BBMetadata *metadata = _PyTier2_Code_DetectAndEmitBB( + frame->f_code, space, + prev_type_guard, + tier1_end, + type_context_copy); + if (metadata == NULL) { + _PyTier2TypeContext_Free(type_context_copy); + return NULL; + } + return metadata; +} + +/** + * @brief Generates the next BB, with an automatically inferred type context. + * @param frame The current executing frame. + * @param bb_id_tagged The tagged version of the BB_ID (see macros above to understand). + * @param curr_executing_instr The current executing instruction in that frame. + * @param jumpby How many instructions to jump by before we start scanning what to generate. + * @param tier1_fallback Signals the tier 1 instruction to fall back to should generation fail. + * @param bb_flag Whether to genreate consequent or alternative BB. + * @return The new BB's metadata. +*/ +_PyTier2BBMetadata * +_PyTier2_GenerateNextBBMeta( + _PyInterpreterFrame *frame, + uint16_t bb_id_tagged, + _Py_CODEUNIT *curr_executing_instr, + int jumpby, + _Py_CODEUNIT **tier1_fallback, + char bb_flag) +{ + _PyTier2BBMetadata *meta = frame->f_code->_tier2_info->bb_data[BB_ID(bb_id_tagged)]; + + // Get type_context of previous BB + _PyTier2TypeContext *type_context = meta->type_context; + // Make a copy of the type context + _PyTier2TypeContext *type_context_copy = _PyTier2TypeContext_Copy(type_context); + if (type_context_copy == NULL) { + return NULL; + } + + _PyTier2BBMetadata *next = _PyTier2_GenerateNextBBMetaWithTypeContext( + frame, + bb_id_tagged, + curr_executing_instr, + jumpby, + tier1_fallback, + bb_flag, + type_context_copy, + NULL + ); + + if (next == NULL) { + PyMem_Free(type_context_copy); + return NULL; + } + return next; +} + +/** + * @brief Lazily generates successive BBs when required. + * The first basic block created will always be directly after the current tier 2 code. + * The second basic block created will always require a jump. + * + * @param frame The current executing frame. + * @param bb_id_tagged The tagged version of the BB_ID (see macros above to understand). + * @param curr_executing_instr The current executing instruction in that frame. + * @param jumpby How many instructions to jump by before we start scanning what to generate. + * @param tier1_fallback Signals the tier 1 instruction to fall back to should generation fail. + * @param bb_flag Whether to genreate consequent or alternative BB. + * @return The next tier 2 instruction to execute. +*/ +_Py_CODEUNIT * +_PyTier2_GenerateNextBB( + _PyInterpreterFrame *frame, + uint16_t bb_id_tagged, + _Py_CODEUNIT *curr_executing_instr, + int jumpby, + _Py_CODEUNIT **tier1_fallback, + char bb_flag) +{ + _PyTier2BBMetadata *metadata = _PyTier2_GenerateNextBBMeta( + frame, + bb_id_tagged, + curr_executing_instr, + jumpby, + tier1_fallback, + bb_flag); + if (metadata == NULL) { + return NULL; + } + return metadata->tier2_start; +} + +/** + * @brief Calculates the difference between two type contexts. + * @param ctx1 The base type context. + * @param ctx2 The type context to compare with. + * @return A positive number indicating the distance is returned. + * Incompatible type contexts return INT_MAX. +*/ +static int +diff_typecontext(_PyTier2TypeContext *ctx1, _PyTier2TypeContext *ctx2) +{ + assert(ctx1 != NULL); + assert(ctx2 != NULL); +#if BB_DEBUG + fprintf(stderr, " [*] Diffing type contexts\n"); + static void print_typestack(const _PyTier2TypeContext * type_context); + print_typestack(ctx1); + print_typestack(ctx2); +#endif + assert(ctx1->type_locals_len == ctx2->type_locals_len); + assert(ctx1->type_stack_len == ctx2->type_stack_len); + int stack_elems1 = (int)(ctx1->type_stack_ptr - ctx1->type_stack); + int stack_elems2 = (int)(ctx2->type_stack_ptr - ctx2->type_stack); + assert(stack_elems1 == stack_elems2); + + int diff = 0; + // Check the difference in the type locals + for (int i = 0; i < ctx1->type_locals_len; i++) { + PyTypeObject *a = typenode_get_type(ctx1->type_locals[i]); + PyTypeObject *b = typenode_get_type(ctx2->type_locals[i]); + // We allow type widening but not narrowing or conversion/casts. + // 1. Int -> Int (bueno, diff + 0) + // 2. Int -> Unknown/NULL (bueno, diff + 1) + // 3. Unknown -> Int (no bueno) + // 4. Int -> Float (no bueno) + // 5. Unboxed type -> Unknown/Boxed type (no bueno) + + // Case 3. Widening operation. + if (a == NULL && b != NULL) { + return INT_MAX; + } + + // Case 4. Incompatible type conversion. + if (a != b && b != NULL) { + return INT_MAX; + } + + // Case 5. Boxed to unboxed conversion. + if (is_unboxed_type(a) && a != b) { + return INT_MAX; + } + + // Case 1 and 2. Diff increases if 2. + diff += (a != b); + } + + // Check the difference in the type stack. + for (int i = 0; i < stack_elems1; i++) { + // Exact same as above. + PyTypeObject *a = typenode_get_type(ctx1->type_stack[i]); + PyTypeObject *b = typenode_get_type(ctx2->type_stack[i]); + + if (a == NULL && b != NULL) { + return INT_MAX; + } + + if (a != b && b != NULL) { + return INT_MAX; + } + + // Case 5. Boxed to unboxed conversion. + if (is_unboxed_type(a) && a != b) { + return INT_MAX; + } + + diff += (a != b); + } + return diff; +} + +/** + * @brief Locate the BB corresponding to a backwards jump target. Matches also the type context. + * If it fails to find a matching type context, a new backwards jump BB is generated with + * more specific type context. + * + * @param frame The current executing frame. + * @param bb_id_tagged The tagged version of the BB_ID (see macros above to understand). + * @param jumpby How many instructions away is the backwards jump target. + * @param tier1_fallback Signals the tier 1 instruction to fall back to should generation fail. + * @param curr Current executing instruction + * @param stacklevel The stack level of the operand stack. + * @return The next tier 2 instruction to execute. +*/ +_Py_CODEUNIT * +_PyTier2_LocateJumpBackwardsBB(_PyInterpreterFrame *frame, uint16_t bb_id_tagged, int jumpby, + _Py_CODEUNIT **tier1_fallback, + _Py_CODEUNIT *curr, int stacklevel) +{ + PyCodeObject *co = frame->f_code; + assert(co->_tier2_info != NULL); + assert(BB_ID(bb_id_tagged) <= co->_tier2_info->bb_data_curr); + _PyTier2BBMetadata *meta = co->_tier2_info->bb_data[BB_ID(bb_id_tagged)]; +#ifdef Py_DEBUG + // We assert that there are as many items on the operand stack as there are on the + // saved type stack. + Py_ssize_t typestack_level = meta->type_context->type_stack_ptr - meta->type_context->type_stack; + assert(typestack_level == stacklevel); +#endif + // The jump target + _Py_CODEUNIT *tier1_jump_target = meta->tier1_end + jumpby; + *tier1_fallback = tier1_jump_target; + // Be a pessimist and assume we need to write the entire rest of code into the BB. + // The size of the BB generated will definitely be equal to or smaller than this. + _PyTier2BBSpace *space = _PyTier2_BBSpaceCheckAndReallocIfNeeded( + frame->f_code, + _PyCode_NBYTES(co) - + (tier1_jump_target - _PyCode_CODE(co)) * sizeof(_Py_CODEUNIT)); + if (space == NULL) { + // DEOPTIMIZE TO TIER 1? + return NULL; + } + + // Get type_context of previous BB + _PyTier2TypeContext *curr_type_context = meta->type_context; + // Now, find the matching BB + _PyTier2Info *t2_info = co->_tier2_info; + int jump_offset = (int)(tier1_jump_target - _PyCode_CODE(co)); + int matching_bb_id = -1; + int candidate_bb_id = -1; + int min_diff = INT_MAX; + int jump_offset_id = -1; + _Py_CODEUNIT *candidate_bb_tier1_start = NULL; + +#if BB_DEBUG + fprintf(stderr, "finding jump target: %d\n", jump_offset); +#endif + for (int i = 0; i < t2_info->backward_jump_count; i++) { +#if BB_DEBUG + fprintf(stderr, "jump offset checked: %d\n", t2_info->backward_jump_offsets[i]); +#endif + if (t2_info->backward_jump_offsets[i] == jump_offset) { + jump_offset_id = i; + for (int x = 0; x < MAX_BB_VERSIONS; x++) { + int target_bb_id = t2_info->backward_jump_target_bb_pairs[i][x].id; + if (target_bb_id >= 0) { + candidate_bb_id = target_bb_id; + candidate_bb_tier1_start = t2_info->backward_jump_target_bb_pairs[i][x].tier1_start; +#if BB_DEBUG + fprintf(stderr, "candidate jump target BB ID: %d\n", + candidate_bb_id); +#endif + int diff = diff_typecontext(curr_type_context, + t2_info->backward_jump_target_bb_pairs[i][x].start_type_context); + if (diff < min_diff) { + min_diff = diff; + matching_bb_id = target_bb_id; + } + } + } + break; + } + } + assert(jump_offset_id >= 0); + assert(candidate_bb_id >= 0); + assert(candidate_bb_tier1_start != NULL); + // We couldn't find a matching BB to jump to. Time to generate our own. + // This also requires rewriting our backwards jump to a forward jump later. + if (matching_bb_id == -1) { + // We should use the type context occuring at the end of the loop. + _PyTier2TypeContext *copied = _PyTier2TypeContext_Copy(curr_type_context); + if (copied == NULL) { + return NULL; + } + _PyTier2TypeContext *second_copy = _PyTier2TypeContext_Copy(curr_type_context); + if (second_copy == NULL) { + return NULL; + } + _PyTier2BBMetadata *meta = _PyTier2_GenerateNextBBMetaWithTypeContext( + frame, MAKE_TAGGED_BB_ID(candidate_bb_id, 0), + NULL, + 0, + tier1_fallback, + 0, + copied, + candidate_bb_tier1_start); + if (meta == NULL) { + _PyTier2TypeContext_Free(copied); + _PyTier2TypeContext_Free(second_copy); + return NULL; + } + // Store the metadata in the jump ids. + assert(t2_info->backward_jump_offsets[jump_offset_id] == jump_offset); + bool found = false; + for (int x = 0; x < MAX_BB_VERSIONS; x++) { + int target_bb_id = t2_info->backward_jump_target_bb_pairs[jump_offset_id][x].id; + // Write to an available space + if (target_bb_id < 0) { + t2_info->backward_jump_target_bb_pairs[jump_offset_id][x].id = meta->id; + t2_info->backward_jump_target_bb_pairs[jump_offset_id][x].start_type_context = second_copy; + t2_info->backward_jump_target_bb_pairs[jump_offset_id][x].tier1_start = candidate_bb_tier1_start; + found = true; + break; + } + } + assert(found); + return meta->tier2_start; + } + assert(matching_bb_id >= 0); + assert(matching_bb_id <= t2_info->bb_data_curr); +#if BB_DEBUG + fprintf(stderr, "Found jump target BB ID: %d\n", matching_bb_id); +#endif + _PyTier2BBMetadata *target_metadata = t2_info->bb_data[matching_bb_id]; + return target_metadata->tier2_start; +} + + +/** + * @brief Rewrites the BB_BRANCH_IF* instructions to a forward jump. + * At generation of the second outgoing edge (basic block), the instructions look like this: + * BB_TEST_POP_IF_TRUE + * BB_BRANCH_IF_FLAG_SET + * CACHE + * + * Since both edges are now generated, we want to rewrite it to: + * + * BB_TEST_POP_IF_TRUE + * BB_JUMP_IF_FLAG_SET + * CACHE (will be converted to EXTENDED_ARGS if we need a bigger jump) + * + * Backwards jumps are handled by another function. + * + * @param bb_branch Whether the next BB to execute is the consequent/alternative BB. + * @param target The jump target. +*/ +void +_PyTier2_RewriteForwardJump(_Py_CODEUNIT *bb_branch, _Py_CODEUNIT *target) +{ + int branch = _Py_OPCODE(*bb_branch); + assert(branch == BB_BRANCH_IF_FLAG_SET || + branch == BB_BRANCH_IF_FLAG_UNSET); + _Py_CODEUNIT *write_curr = bb_branch - 1; + // -1 because the PC is auto incremented + int oparg = (int)(target - bb_branch - 1); + assert(oparg > 0); + bool requires_extended = oparg > 0xFF; + assert(oparg <= 0xFFFF); + if (requires_extended) { + _py_set_opcode(write_curr, EXTENDED_ARG); + write_curr->op.arg = (oparg >> 8) & 0xFF; + write_curr++; + } + else { + _py_set_opcode(write_curr, NOP); + write_curr++; + } + _py_set_opcode(write_curr, + branch == BB_BRANCH_IF_FLAG_SET ? BB_JUMP_IF_FLAG_SET : BB_JUMP_IF_FLAG_UNSET); + write_curr->op.arg = oparg & 0xFF; + write_curr++; +} + + +/** + * @brief Rewrites a BB_JUMP_BACKWARD_LAZY to a more efficient standard BACKWARD_JUMP. + * + * Before: + * + * EXTENDED_ARG/NOP + * BB_JUMP_BACKWARD_LAZY + * CACHE + * + * After: + * + * EXTENDED_ARG (if needed, else NOP) + * JUMP_BACKWARD_QUICK + * END_FOR + * + * @param jump_backward_lazy The backwards jump instruction. + * @param target The target we're jumping to. +*/ +void +_PyTier2_RewriteBackwardJump(_Py_CODEUNIT *jump_backward_lazy, _Py_CODEUNIT *target) +{ + _Py_CODEUNIT *write_curr = jump_backward_lazy - 1; + _Py_CODEUNIT *prev = jump_backward_lazy - 1; + assert(_Py_OPCODE(*jump_backward_lazy) == BB_JUMP_BACKWARD_LAZY); + assert(_Py_OPCODE(*prev) == EXTENDED_ARG || + _Py_OPCODE(*prev) == NOP); + + // +1 because we increment the PC before JUMPBY + int oparg = (int)(target - (jump_backward_lazy + 1)); + assert(oparg != 0); + // Is backwards jump. + bool is_backwards_jump = oparg < 0; + if (is_backwards_jump) { + oparg = -oparg; + } + assert(oparg > 0); + assert(oparg <= 0xFFFF); + + bool requires_extended = oparg > 0xFF; + if (requires_extended) { + _py_set_opcode(write_curr, EXTENDED_ARG); + write_curr->op.arg = (oparg >> 8) & 0xFF; + write_curr++; + } + else { + _py_set_opcode(write_curr, NOP); + write_curr++; + } + _py_set_opcode(write_curr, is_backwards_jump + ? JUMP_BACKWARD_QUICK + : JUMP_FORWARD); + write_curr->op.arg = oparg & 0xFF; + write_curr++; + _py_set_opcode(write_curr, END_FOR); + write_curr++; + return; +} + +#undef TYPESTACK_PEEK +#undef TYPESTACK_POKE +#undef TYPELOCALS_SET +#undef TYPELOCALS_GET +#undef TYPE_SET +#undef TYPE_OVERWRITE +#undef GET_CONST diff --git a/Python/tier2_typepropagator.c.h b/Python/tier2_typepropagator.c.h new file mode 100644 index 00000000000000..cdab65faf0c314 --- /dev/null +++ b/Python/tier2_typepropagator.c.h @@ -0,0 +1,1047 @@ +// This file is generated by Tools/cases_generator/generate_cases.py @TODO: make this a seperate argument +// from: +// Python/bytecodes.c +// Do not edit! + + TARGET(NOP) { + break; + } + + TARGET(RESUME) { + break; + } + + TARGET(RESUME_QUICK) { + break; + } + + TARGET(LOAD_CLOSURE) { + STACK_GROW(1); + TYPE_OVERWRITE(TYPELOCALS_GET(oparg), TYPESTACK_PEEK(1), false); + break; + } + + TARGET(LOAD_FAST_CHECK) { + STACK_GROW(1); + TYPE_OVERWRITE(TYPELOCALS_GET(oparg), TYPESTACK_PEEK(1), false); + break; + } + + TARGET(LOAD_FAST) { + STACK_GROW(1); + TYPE_OVERWRITE(TYPELOCALS_GET(oparg), TYPESTACK_PEEK(1), false); + break; + } + + TARGET(LOAD_FAST_NO_INCREF) { + STACK_GROW(1); + TYPE_OVERWRITE(TYPELOCALS_GET(oparg), TYPESTACK_PEEK(1), false); + break; + } + + TARGET(LOAD_CONST) { + STACK_GROW(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)TYPECONST_GET(oparg), TYPESTACK_PEEK(1), true); + break; + } + + TARGET(STORE_FAST) { + _Py_TYPENODE_t *value = TYPESTACK_PEEK(1); + TYPE_OVERWRITE(value, TYPELOCALS_GET(oparg), false); + STACK_SHRINK(1); + break; + } + + TARGET(STORE_FAST_BOXED_UNBOXED) { + _Py_TYPENODE_t *value = TYPESTACK_PEEK(1); + TYPE_OVERWRITE(value, TYPELOCALS_GET(oparg), false); + STACK_SHRINK(1); + break; + } + + TARGET(STORE_FAST_UNBOXED_BOXED) { + _Py_TYPENODE_t *value = TYPESTACK_PEEK(1); + TYPE_OVERWRITE(value, TYPELOCALS_GET(oparg), false); + STACK_SHRINK(1); + break; + } + + TARGET(STORE_FAST_UNBOXED_UNBOXED) { + _Py_TYPENODE_t *value = TYPESTACK_PEEK(1); + TYPE_OVERWRITE(value, TYPELOCALS_GET(oparg), false); + STACK_SHRINK(1); + break; + } + + TARGET(POP_TOP) { + STACK_SHRINK(1); + break; + } + + TARGET(POP_TOP_NO_DECREF) { + STACK_SHRINK(1); + break; + } + + TARGET(PUSH_NULL) { + STACK_GROW(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(END_FOR) { + { + STACK_SHRINK(1); + } + { + STACK_SHRINK(1); + } + break; + } + + TARGET(UNARY_NEGATIVE) { + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(UNARY_NOT) { + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(UNARY_INVERT) { + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(BINARY_OP_MULTIPLY_INT) { + STACK_SHRINK(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(BINARY_OP_MULTIPLY_INT_REST) { + STACK_SHRINK(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_MAKE_ROOT((_Py_TYPENODE_t)&PyLong_Type), TYPESTACK_PEEK(1), true); + break; + } + + TARGET(BINARY_OP_MULTIPLY_FLOAT) { + STACK_SHRINK(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(BINARY_OP_SUBTRACT_INT) { + STACK_SHRINK(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(BINARY_OP_SUBTRACT_INT_REST) { + STACK_SHRINK(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_MAKE_ROOT((_Py_TYPENODE_t)&PyLong_Type), TYPESTACK_PEEK(1), true); + break; + } + + TARGET(BINARY_OP_SUBTRACT_FLOAT) { + STACK_SHRINK(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(BINARY_OP_ADD_UNICODE) { + STACK_SHRINK(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(BINARY_OP_INPLACE_ADD_UNICODE) { + STACK_SHRINK(2); + break; + } + + TARGET(BINARY_OP_ADD_FLOAT) { + STACK_SHRINK(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(BINARY_CHECK_FLOAT) { + TYPE_SET((_Py_TYPENODE_t *)_Py_TYPENODE_MAKE_ROOT((_Py_TYPENODE_t)&PyFloat_Type), TYPESTACK_PEEK(1), true); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_MAKE_ROOT((_Py_TYPENODE_t)&PyRawFloat_Type), TYPESTACK_PEEK(1), true); + TYPE_SET((_Py_TYPENODE_t *)_Py_TYPENODE_MAKE_ROOT((_Py_TYPENODE_t)&PyFloat_Type), TYPESTACK_PEEK(2), true); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_MAKE_ROOT((_Py_TYPENODE_t)&PyRawFloat_Type), TYPESTACK_PEEK(2), true); + break; + } + + TARGET(BINARY_OP_ADD_FLOAT_UNBOXED) { + STACK_SHRINK(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_MAKE_ROOT((_Py_TYPENODE_t)&PyRawFloat_Type), TYPESTACK_PEEK(1), true); + break; + } + + TARGET(BINARY_OP_SUBTRACT_FLOAT_UNBOXED) { + STACK_SHRINK(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_MAKE_ROOT((_Py_TYPENODE_t)&PyRawFloat_Type), TYPESTACK_PEEK(1), true); + break; + } + + TARGET(BINARY_OP_MULTIPLY_FLOAT_UNBOXED) { + STACK_SHRINK(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_MAKE_ROOT((_Py_TYPENODE_t)&PyRawFloat_Type), TYPESTACK_PEEK(1), true); + break; + } + + TARGET(UNBOX_FLOAT) { + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_MAKE_ROOT((_Py_TYPENODE_t)&PyRawFloat_Type), TYPESTACK_PEEK(1 + oparg), true); + break; + } + + TARGET(BOX_FLOAT) { + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_MAKE_ROOT((_Py_TYPENODE_t)&PyFloat_Type), TYPESTACK_PEEK(1 + oparg), true); + break; + } + + TARGET(BINARY_OP_ADD_INT) { + STACK_SHRINK(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(BINARY_CHECK_INT) { + TYPE_SET((_Py_TYPENODE_t *)_Py_TYPENODE_MAKE_ROOT((_Py_TYPENODE_t)&PyLong_Type), TYPESTACK_PEEK(1), true); + TYPE_SET((_Py_TYPENODE_t *)_Py_TYPENODE_MAKE_ROOT((_Py_TYPENODE_t)&PyLong_Type), TYPESTACK_PEEK(2), true); + break; + } + + TARGET(BINARY_OP_ADD_INT_REST) { + STACK_SHRINK(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_MAKE_ROOT((_Py_TYPENODE_t)&PyLong_Type), TYPESTACK_PEEK(1), true); + break; + } + + TARGET(BINARY_SUBSCR) { + STACK_SHRINK(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(BINARY_SLICE) { + STACK_SHRINK(2); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(STORE_SLICE) { + STACK_SHRINK(4); + break; + } + + TARGET(BINARY_SUBSCR_LIST_INT) { + STACK_SHRINK(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(BINARY_SUBSCR_LIST_INT_REST) { + STACK_SHRINK(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(CHECK_LIST) { + TYPE_SET((_Py_TYPENODE_t *)_Py_TYPENODE_MAKE_ROOT((_Py_TYPENODE_t)&PyList_Type), TYPESTACK_PEEK(1 + oparg), true); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_MAKE_ROOT((_Py_TYPENODE_t)&PyList_Type), TYPESTACK_PEEK(1 + oparg), true); + break; + } + + TARGET(BINARY_SUBSCR_TUPLE_INT) { + STACK_SHRINK(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(BINARY_SUBSCR_DICT) { + STACK_SHRINK(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(BINARY_SUBSCR_GETITEM) { + STACK_SHRINK(1); + break; + } + + TARGET(LIST_APPEND) { + STACK_SHRINK(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_MAKE_ROOT((_Py_TYPENODE_t)&PyList_Type), TYPESTACK_PEEK(1 + (oparg-1)), true); + break; + } + + TARGET(SET_ADD) { + STACK_SHRINK(1); + break; + } + + TARGET(STORE_SUBSCR) { + STACK_SHRINK(3); + break; + } + + TARGET(STORE_SUBSCR_LIST_INT) { + STACK_SHRINK(3); + break; + } + + TARGET(STORE_SUBSCR_LIST_INT_REST) { + STACK_SHRINK(3); + break; + } + + TARGET(STORE_SUBSCR_DICT) { + STACK_SHRINK(3); + break; + } + + TARGET(DELETE_SUBSCR) { + STACK_SHRINK(2); + break; + } + + TARGET(CALL_INTRINSIC_1) { + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(CALL_INTRINSIC_2) { + STACK_SHRINK(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(INTERPRETER_EXIT) { + STACK_SHRINK(1); + break; + } + + TARGET(RETURN_VALUE) { + STACK_SHRINK(1); + break; + } + + TARGET(RETURN_CONST) { + break; + } + + TARGET(GET_AITER) { + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(GET_ANEXT) { + STACK_GROW(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(GET_AWAITABLE) { + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(END_ASYNC_FOR) { + STACK_SHRINK(2); + break; + } + + TARGET(CLEANUP_THROW) { + STACK_SHRINK(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(2), true); + break; + } + + TARGET(LOAD_ASSERTION_ERROR) { + STACK_GROW(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(LOAD_BUILD_CLASS) { + STACK_GROW(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(STORE_NAME) { + STACK_SHRINK(1); + break; + } + + TARGET(DELETE_NAME) { + break; + } + + TARGET(UNPACK_SEQUENCE) { + STACK_SHRINK(1); + STACK_GROW(oparg); + for (int i = 0; i < (oparg); i++) {TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(oparg - i), true);} + break; + } + + TARGET(UNPACK_SEQUENCE_TWO_TUPLE) { + STACK_SHRINK(1); + STACK_GROW(oparg); + for (int i = 0; i < (oparg); i++) {TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(oparg - i), true);} + break; + } + + TARGET(UNPACK_SEQUENCE_TUPLE) { + STACK_SHRINK(1); + STACK_GROW(oparg); + for (int i = 0; i < (oparg); i++) {TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(oparg - i), true);} + break; + } + + TARGET(UNPACK_SEQUENCE_LIST) { + STACK_SHRINK(1); + STACK_GROW(oparg); + for (int i = 0; i < (oparg); i++) {TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(oparg - i), true);} + break; + } + + TARGET(UNPACK_EX) { + STACK_GROW((oparg >> 8) + (oparg & 0xFF)); + for (int i = 0; i < (oparg & 0xFF); i++) {TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK((oparg & 0xFF) - i), true);} + break; + } + + TARGET(STORE_ATTR) { + STACK_SHRINK(2); + break; + } + + TARGET(DELETE_ATTR) { + STACK_SHRINK(1); + break; + } + + TARGET(STORE_GLOBAL) { + STACK_SHRINK(1); + break; + } + + TARGET(DELETE_GLOBAL) { + break; + } + + TARGET(LOAD_NAME) { + STACK_GROW(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(LOAD_GLOBAL) { + STACK_GROW(1); + STACK_GROW(((oparg & 1) ? 1 : 0)); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + if (oparg & 1) { TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1 + ((oparg & 1) ? 1 : 0)), true); } + break; + } + + TARGET(LOAD_GLOBAL_MODULE) { + STACK_GROW(1); + STACK_GROW(((oparg & 1) ? 1 : 0)); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + if (oparg & 1) { TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1 + ((oparg & 1) ? 1 : 0)), true); } + break; + } + + TARGET(LOAD_GLOBAL_BUILTIN) { + STACK_GROW(1); + STACK_GROW(((oparg & 1) ? 1 : 0)); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + if (oparg & 1) { TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1 + ((oparg & 1) ? 1 : 0)), true); } + break; + } + + TARGET(DELETE_DEREF) { + break; + } + + TARGET(LOAD_CLASSDEREF) { + STACK_GROW(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(STORE_DEREF) { + STACK_SHRINK(1); + break; + } + + TARGET(COPY_FREE_VARS) { + break; + } + + TARGET(BUILD_STRING) { + STACK_SHRINK(oparg); + STACK_GROW(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_MAKE_ROOT((_Py_TYPENODE_t)&PyUnicode_Type), TYPESTACK_PEEK(1), true); + break; + } + + TARGET(BUILD_TUPLE) { + STACK_SHRINK(oparg); + STACK_GROW(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_MAKE_ROOT((_Py_TYPENODE_t)&PyTuple_Type), TYPESTACK_PEEK(1), true); + break; + } + + TARGET(BUILD_LIST) { + STACK_SHRINK(oparg); + STACK_GROW(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_MAKE_ROOT((_Py_TYPENODE_t)&PyList_Type), TYPESTACK_PEEK(1), true); + break; + } + + TARGET(LIST_EXTEND) { + STACK_SHRINK(1); + break; + } + + TARGET(SET_UPDATE) { + STACK_SHRINK(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_MAKE_ROOT((_Py_TYPENODE_t)&PySet_Type), TYPESTACK_PEEK(1 + (oparg-1)), true); + break; + } + + TARGET(BUILD_SET) { + STACK_SHRINK(oparg); + STACK_GROW(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_MAKE_ROOT((_Py_TYPENODE_t)&PySet_Type), TYPESTACK_PEEK(1), true); + break; + } + + TARGET(BUILD_MAP) { + STACK_SHRINK(oparg*2); + STACK_GROW(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_MAKE_ROOT((_Py_TYPENODE_t)&PyDict_Type), TYPESTACK_PEEK(1), true); + break; + } + + TARGET(SETUP_ANNOTATIONS) { + break; + } + + TARGET(BUILD_CONST_KEY_MAP) { + STACK_SHRINK(oparg); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_MAKE_ROOT((_Py_TYPENODE_t)&PyDict_Type), TYPESTACK_PEEK(1), true); + break; + } + + TARGET(DICT_UPDATE) { + STACK_SHRINK(1); + break; + } + + TARGET(DICT_MERGE) { + STACK_SHRINK(1); + break; + } + + TARGET(MAP_ADD) { + STACK_SHRINK(2); + break; + } + + TARGET(LOAD_ATTR) { + STACK_GROW(((oparg & 1) ? 1 : 0)); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + if (oparg & 1) { TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1 + ((oparg & 1) ? 1 : 0)), true); } + break; + } + + TARGET(LOAD_ATTR_INSTANCE_VALUE) { + STACK_GROW(((oparg & 1) ? 1 : 0)); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + if (oparg & 1) { TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1 + ((oparg & 1) ? 1 : 0)), true); } + break; + } + + TARGET(LOAD_ATTR_MODULE) { + STACK_GROW(((oparg & 1) ? 1 : 0)); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + if (oparg & 1) { TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1 + ((oparg & 1) ? 1 : 0)), true); } + break; + } + + TARGET(LOAD_ATTR_WITH_HINT) { + STACK_GROW(((oparg & 1) ? 1 : 0)); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + if (oparg & 1) { TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1 + ((oparg & 1) ? 1 : 0)), true); } + break; + } + + TARGET(LOAD_ATTR_SLOT) { + STACK_GROW(((oparg & 1) ? 1 : 0)); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + if (oparg & 1) { TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1 + ((oparg & 1) ? 1 : 0)), true); } + break; + } + + TARGET(LOAD_ATTR_CLASS) { + STACK_GROW(((oparg & 1) ? 1 : 0)); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + if (oparg & 1) { TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1 + ((oparg & 1) ? 1 : 0)), true); } + break; + } + + TARGET(LOAD_ATTR_PROPERTY) { + STACK_GROW(((oparg & 1) ? 1 : 0)); + break; + } + + TARGET(LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN) { + STACK_GROW(((oparg & 1) ? 1 : 0)); + break; + } + + TARGET(STORE_ATTR_INSTANCE_VALUE) { + STACK_SHRINK(2); + break; + } + + TARGET(STORE_ATTR_WITH_HINT) { + STACK_SHRINK(2); + break; + } + + TARGET(STORE_ATTR_SLOT) { + STACK_SHRINK(2); + break; + } + + TARGET(COMPARE_OP) { + STACK_SHRINK(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(COMPARE_OP_FLOAT) { + STACK_SHRINK(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(COMPARE_OP_INT) { + STACK_SHRINK(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(COMPARE_OP_STR) { + STACK_SHRINK(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(IS_OP) { + STACK_SHRINK(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_MAKE_ROOT((_Py_TYPENODE_t)&PyBool_Type), TYPESTACK_PEEK(1), true); + break; + } + + TARGET(CONTAINS_OP) { + STACK_SHRINK(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_MAKE_ROOT((_Py_TYPENODE_t)&PyBool_Type), TYPESTACK_PEEK(1), true); + break; + } + + TARGET(CHECK_EG_MATCH) { + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(2), true); + break; + } + + TARGET(CHECK_EXC_MATCH) { + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_MAKE_ROOT((_Py_TYPENODE_t)&PyBool_Type), TYPESTACK_PEEK(1), true); + break; + } + + TARGET(IMPORT_NAME) { + STACK_SHRINK(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(IMPORT_FROM) { + STACK_GROW(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(JUMP_FORWARD) { + break; + } + + TARGET(JUMP_BACKWARD) { + break; + } + + TARGET(JUMP_BACKWARD_QUICK) { + break; + } + + TARGET(POP_JUMP_IF_FALSE) { + STACK_SHRINK(1); + break; + } + + TARGET(BB_TEST_POP_IF_FALSE) { + STACK_SHRINK(1); + break; + } + + TARGET(POP_JUMP_IF_TRUE) { + STACK_SHRINK(1); + break; + } + + TARGET(BB_TEST_POP_IF_TRUE) { + STACK_SHRINK(1); + break; + } + + TARGET(POP_JUMP_IF_NOT_NONE) { + STACK_SHRINK(1); + break; + } + + TARGET(BB_TEST_POP_IF_NOT_NONE) { + STACK_SHRINK(1); + break; + } + + TARGET(POP_JUMP_IF_NONE) { + STACK_SHRINK(1); + break; + } + + TARGET(BB_TEST_POP_IF_NONE) { + STACK_SHRINK(1); + break; + } + + TARGET(JUMP_BACKWARD_NO_INTERRUPT) { + break; + } + + TARGET(GET_LEN) { + STACK_GROW(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_MAKE_ROOT((_Py_TYPENODE_t)&PyLong_Type), TYPESTACK_PEEK(1), true); + break; + } + + TARGET(MATCH_CLASS) { + STACK_SHRINK(2); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(GET_ITER) { + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(GET_YIELD_FROM_ITER) { + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(BB_TEST_ITER) { + STACK_GROW(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(FOR_ITER_LIST) { + STACK_GROW(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(BB_TEST_ITER_LIST) { + STACK_GROW(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(FOR_ITER_TUPLE) { + STACK_GROW(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(BB_TEST_ITER_TUPLE) { + STACK_GROW(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(FOR_ITER_RANGE) { + STACK_GROW(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(BB_TEST_ITER_RANGE) { + STACK_GROW(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(FOR_ITER_GEN) { + STACK_GROW(1); + break; + } + + TARGET(BEFORE_ASYNC_WITH) { + STACK_GROW(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(2), true); + break; + } + + TARGET(BEFORE_WITH) { + STACK_GROW(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(2), true); + break; + } + + TARGET(LOAD_ATTR_METHOD_WITH_VALUES) { + STACK_GROW(((oparg & 1) ? 1 : 0)); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + if (oparg & 1) { TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1 + ((oparg & 1) ? 1 : 0)), true); } + break; + } + + TARGET(LOAD_ATTR_METHOD_NO_DICT) { + STACK_GROW(((oparg & 1) ? 1 : 0)); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + if (oparg & 1) { TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1 + ((oparg & 1) ? 1 : 0)), true); } + break; + } + + TARGET(LOAD_ATTR_METHOD_LAZY_DICT) { + STACK_GROW(((oparg & 1) ? 1 : 0)); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + if (oparg & 1) { TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1 + ((oparg & 1) ? 1 : 0)), true); } + break; + } + + TARGET(KW_NAMES) { + break; + } + + TARGET(CALL) { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(CALL_BOUND_METHOD_EXACT_ARGS) { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + break; + } + + TARGET(CALL_PY_EXACT_ARGS) { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + break; + } + + TARGET(CALL_PY_WITH_DEFAULTS) { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + break; + } + + TARGET(CALL_NO_KW_TYPE_1) { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(CALL_NO_KW_STR_1) { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(CALL_NO_KW_TUPLE_1) { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(CALL_BUILTIN_CLASS) { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(CALL_NO_KW_BUILTIN_O) { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(CALL_NO_KW_BUILTIN_FAST) { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(CALL_BUILTIN_FAST_WITH_KEYWORDS) { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(CALL_NO_KW_LEN) { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(CALL_NO_KW_ISINSTANCE) { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(CALL_NO_KW_LIST_APPEND) { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + break; + } + + TARGET(CALL_NO_KW_METHOD_DESCRIPTOR_O) { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS) { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(CALL_NO_KW_METHOD_DESCRIPTOR_NOARGS) { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(CALL_NO_KW_METHOD_DESCRIPTOR_FAST) { + STACK_SHRINK(oparg); + STACK_SHRINK(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(CALL_FUNCTION_EX) { + STACK_SHRINK(((oparg & 1) ? 1 : 0)); + STACK_SHRINK(2); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(MAKE_FUNCTION) { + STACK_SHRINK(((oparg & 0x01) ? 1 : 0) + ((oparg & 0x02) ? 1 : 0) + ((oparg & 0x04) ? 1 : 0) + ((oparg & 0x08) ? 1 : 0)); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(RETURN_GENERATOR) { + break; + } + + TARGET(BUILD_SLICE) { + STACK_SHRINK(((oparg == 3) ? 1 : 0)); + STACK_SHRINK(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(FORMAT_VALUE) { + STACK_SHRINK((((oparg & FVS_MASK) == FVS_HAVE_SPEC) ? 1 : 0)); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(COPY) { + _Py_TYPENODE_t *bottom = TYPESTACK_PEEK(1 + (oparg-1)); + STACK_GROW(1); + TYPE_OVERWRITE(bottom, TYPESTACK_PEEK(1), false); + break; + } + + TARGET(COPY_NO_INCREF) { + _Py_TYPENODE_t *bottom = TYPESTACK_PEEK(1 + (oparg - 1)); + STACK_GROW(1); + TYPE_OVERWRITE(bottom, TYPESTACK_PEEK(1), false); + break; + } + + TARGET(BINARY_OP) { + STACK_SHRINK(1); + TYPE_OVERWRITE((_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT, TYPESTACK_PEEK(1), true); + break; + } + + TARGET(CACHE) { + break; + } + + TARGET(BB_BRANCH) { + break; + } + + TARGET(BB_BRANCH_IF_FLAG_UNSET) { + break; + } + + TARGET(BB_JUMP_IF_FLAG_UNSET) { + break; + } + + TARGET(BB_BRANCH_IF_FLAG_SET) { + break; + } + + TARGET(BB_JUMP_IF_FLAG_SET) { + break; + } + + TARGET(BB_JUMP_BACKWARD_LAZY) { + break; + } diff --git a/Tools/build/deepfreeze.py b/Tools/build/deepfreeze.py index ec808526b7bbb7..b22d5b75139d80 100644 --- a/Tools/build/deepfreeze.py +++ b/Tools/build/deepfreeze.py @@ -260,7 +260,8 @@ def generate_code(self, name: str, code: types.CodeType) -> str: self.field(code, "co_posonlyargcount") self.field(code, "co_kwonlyargcount") # The following should remain in sync with _PyFrame_NumSlotsForCodeObject - self.write(f".co_framesize = {code.co_stacksize + len(localsplusnames)} + FRAME_SPECIALS_SIZE,") + self.write(f".co_framesize = {code.co_stacksize + len(localsplusnames)} + FRAME_SPECIALS_SIZE" + f" + ({len(localsplusnames)} * sizeof(char) / sizeof(PyObject *) + 1),") self.field(code, "co_stacksize") self.field(code, "co_firstlineno") self.write(f".co_nlocalsplus = {len(localsplusnames)},") @@ -277,6 +278,8 @@ def generate_code(self, name: str, code: types.CodeType) -> str: self.write(f".co_linetable = {co_linetable},") self.write(f"._co_cached = NULL,") self.write("._co_linearray = NULL,") + self.write("._tier2_warmup = -64,") + self.write("._tier2_info = NULL,") self.write(f".co_code_adaptive = {co_code_adaptive},") for i, op in enumerate(code.co_code[::2]): if op == RESUME: diff --git a/Tools/build/generate_opcode_h.py b/Tools/build/generate_opcode_h.py index 9b2112f7f5f31d..a8f3d5460f11f7 100644 --- a/Tools/build/generate_opcode_h.py +++ b/Tools/build/generate_opcode_h.py @@ -105,14 +105,28 @@ def main(opcode_py, outfile='Include/opcode.h', internaloutfile='Include/interna specialized_opmap[name] = next_op opname_including_specialized[next_op] = name used[next_op] = True + specialized_opmap['DO_TRACING'] = 255 opname_including_specialized[255] = 'DO_TRACING' used[255] = True + # The Tier 2 ops + next_op = 1 + uop_opmap = {} + # Add microops + for name in opcode['_uops']: + while used[next_op]: + next_op += 1 + uop_opmap[name] = next_op + opname_including_specialized[next_op] = name + used[next_op] = True + with open(outfile, 'w') as fobj, open(internaloutfile, 'w') as iobj: fobj.write(header) iobj.write(internal_header) + # Tier 1 opcodes + for name in opname: if name in opmap: op = opmap[name] @@ -126,8 +140,13 @@ def main(opcode_py, outfile='Include/opcode.h', internaloutfile='Include/interna if op == MAX_PSEUDO_OPCODE: fobj.write(DEFINE.format("MAX_PSEUDO_OPCODE", MAX_PSEUDO_OPCODE)) - for name, op in specialized_opmap.items(): + if name not in uop_opmap: + fobj.write(DEFINE.format(name, op)) + + # Tier 2 opcodes + fobj.write("// Tier 2 interpreter ops\n") + for name, op in uop_opmap.items(): fobj.write(DEFINE.format(name, op)) iobj.write("\nextern const uint8_t _PyOpcode_Caches[256];\n") @@ -177,6 +196,7 @@ def main(opcode_py, outfile='Include/opcode.h', internaloutfile='Include/interna fobj.write(f"#define ENABLE_SPECIALIZATION {int(ENABLE_SPECIALIZATION)}") iobj.write("\n") + # Tier 1 opnames iobj.write("#ifdef Py_DEBUG\n") iobj.write(f"static const char *const _PyOpcode_OpName[{NUM_OPCODES}] = {{\n") for op, name in enumerate(opname_including_specialized): @@ -186,6 +206,8 @@ def main(opcode_py, outfile='Include/opcode.h', internaloutfile='Include/interna iobj.write("};\n") iobj.write("#endif\n") + iobj.write("\n") + iobj.write("\n") iobj.write("#define EXTRA_CASES \\\n") for i, flag in enumerate(used): diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index a0bba65545d4f8..caa224493ae5d9 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -13,19 +13,36 @@ import sys import typing +from enum import Enum, auto + import lexer as lx import parser from parser import StackEffect +from parser import TypeSrcLiteral, TypeSrcConst, TypeSrcLocals, TypeSrcStackInput +from parser import LocalEffect HERE = os.path.dirname(__file__) ROOT = os.path.join(HERE, "../..") THIS = os.path.relpath(__file__, ROOT).replace(os.path.sep, posixpath.sep) DEFAULT_INPUT = os.path.relpath(os.path.join(ROOT, "Python/bytecodes.c")) + +# Tier 1 interpreter DEFAULT_OUTPUT = os.path.relpath(os.path.join(ROOT, "Python/generated_cases.c.h")) DEFAULT_METADATA_OUTPUT = os.path.relpath( os.path.join(ROOT, "Python/opcode_metadata.h") ) + +# Tier 2 interpreter +TIER2_MACRO_TO_MICRO_MAP_OUTPUT = os.path.relpath( + os.path.join(ROOT, "Include/internal/pycore_opcode_macro_to_micro.h") +) + +# Tier 2 type propagator +TIER2_TYPE_PROPAGATOR_OUTPUT = os.path.relpath( + os.path.join(ROOT, "Python/tier2_typepropagator.c.h") +) + BEGIN_MARKER = "// BEGIN BYTECODES //" END_MARKER = "// END BYTECODES //" RE_PREDICTED = ( @@ -34,6 +51,28 @@ UNUSED = "unused" BITS_PER_CODE_UNIT = 16 +TYPE_PROPAGATOR_FORBIDDEN = [ + # Type propagator shouldn't see these + "FOR_ITER", + "SWAP", + # Not supported + "SEND", + "SEND_GEN", + "YIELD_VALUE", + "RAISE_VARARGS", + "PUSH_EXC_INFO", + "RERAISE", + "POP_EXCEPT", + "LOAD_DEREF", + "MAKE_CELL", + "DELETE_FAST", + "MATCH_MAPPING", + "MATCH_SEQUENCE", + "MATCH_KEYS", + "EXTENDED_ARG", + "WITH_EXCEPT_START", +] + arg_parser = argparse.ArgumentParser( description="Generate the code for the interpreter switch.", formatter_class=argparse.ArgumentDefaultsHelpFormatter, @@ -51,6 +90,14 @@ "input", nargs=argparse.REMAINDER, help="Instruction definition file(s)" ) +arg_parser.add_argument( + "-u", + action="store_true", + help=f"Generate macro to micro instruction map instead," + f" along with the type for uop type guards" + f"changes output default to {TIER2_MACRO_TO_MICRO_MAP_OUTPUT}", +) + def effect_size(effect: StackEffect) -> tuple[int, str]: """Return the 'size' impact of a stack effect. @@ -109,6 +156,7 @@ class Formatter: stream: typing.TextIO prefix: str + postfix: str emit_line_directives: bool = False lineno: int # Next line number, 1-based filename: str # Slightly improved stream.filename @@ -120,6 +168,7 @@ def __init__( ) -> None: self.stream = stream self.prefix = " " * indent + self.postfix = "" self.emit_line_directives = emit_line_directives self.lineno = 1 filename = os.path.relpath(self.stream.name, ROOT) @@ -141,7 +190,7 @@ def write_raw(self, s: str) -> None: def emit(self, arg: str) -> None: if arg: - self.write_raw(f"{self.prefix}{arg}\n") + self.write_raw(f"{self.prefix}{arg}{self.postfix}\n") else: self.write_raw("\n") @@ -230,7 +279,7 @@ class Instruction: # Parts of the underlying instruction definition inst: parser.InstDef - kind: typing.Literal["inst", "op", "legacy"] # Legacy means no (input -- output) + kind: parser.INST_KINDS name: str block: parser.Block block_text: list[str] # Block.text, less curlies, less PREDICT() calls @@ -243,6 +292,7 @@ class Instruction: cache_effects: list[parser.CacheEffect] input_effects: list[StackEffect] output_effects: list[StackEffect] + local_effects: LocalEffect | None unmoved_names: frozenset[str] instr_fmt: str @@ -266,6 +316,7 @@ def __init__(self, inst: parser.InstDef): effect for effect in inst.inputs if isinstance(effect, StackEffect) ] self.output_effects = inst.outputs # For consistency/completeness + self.local_effects = inst.localeffect unmoved_names: set[str] = set() for ieffect, oeffect in zip(self.input_effects, self.output_effects): if ieffect.name == oeffect.name: @@ -396,7 +447,10 @@ def write_body(self, out: Formatter, dedent: int, cache_adjust: int = 0) -> None for line in self.block_text: out.set_lineno(self.block_line + offset, filename) offset += 1 - if m := re.match(r"(\s*)ERROR_IF\((.+), (\w+)\);\s*(?://.*)?$", line): + if m := re.match(r"(\s*)U_INST\((.+)\);\s*$", line): + space, label = m.groups() + out.emit(f"UOP_{label}();") + elif m := re.match(r"(\s*)ERROR_IF\((.+), (\w+)\);\s*(?://.*)?$", line): space, cond, label = m.groups() space = extra + space # ERROR_IF() must pop the inputs from the stack. @@ -418,7 +472,7 @@ def write_body(self, out: Formatter, dedent: int, cache_adjust: int = 0) -> None f"{space}if ({cond}) {{ STACK_SHRINK({symbolic}); goto {label}; }}\n" ) else: - out.write_raw(f"{space}if ({cond}) goto {label};\n") + out.write_raw(f"{space}if ({cond}) goto {label};{out.postfix}\n") elif m := re.match(r"(\s*)DECREF_INPUTS\(\);\s*(?://.*)?$", line): out.reset_lineno() space = extra + m.group(1) @@ -435,9 +489,163 @@ def write_body(self, out: Formatter, dedent: int, cache_adjust: int = 0) -> None decref = "XDECREF" if ieff.cond else "DECREF" out.write_raw(f"{space}Py_{decref}({ieff.name});\n") else: - out.write_raw(extra + line) + out.write_raw(extra + line.rstrip("\n") + out.postfix + "\n") out.reset_lineno() + def write_typeprop(self, out: Formatter) -> None: + """Write one instruction's type propagation rules""" + + # TODO: Add SWAP to DSL + + need_to_declare = [] + # Stack input is used in local effect + if self.local_effects and \ + isinstance(val := self.local_effects.value, TypeSrcStackInput): + need_to_declare.append(val.name) + # Stack input is used in output effect + for oeffect in self.output_effects: + if not (typ := oeffect.type_annotation): continue + ops = typ.ops + for op in ops: + if not isinstance(src := op.src, TypeSrcStackInput): continue + if oeffect.name in self.unmoved_names and oeffect.name == src.name: + print( + f"Warn: {self.name} type annotation for {oeffect.name} will be ignored " + "as it is unmoved") + continue + need_to_declare.append(src.name) + + # Write input stack effect variable declarations and initializations + ieffects = list(reversed(self.input_effects)) + usable_for_local_effect = {} + all_input_effect_names = {} + for i, ieffect in enumerate(ieffects): + + if ieffect.name not in need_to_declare: continue + + isize = string_effect_size( + list_effect_size([ieff for ieff in ieffects[: i + 1]]) + ) + all_input_effect_names[ieffect.name] = (ieffect, i) + dst = StackEffect(ieffect.name, "_Py_TYPENODE_t *") + if ieffect.size: + # TODO: Support more cases as needed + raise Exception("Type propagation across sized input effect not implemented") + elif ieffect.cond: + src = StackEffect(f"({ieffect.cond}) ? TYPESTACK_PEEK({isize}) : NULL", "_Py_TYPENODE_t *") + else: + usable_for_local_effect[ieffect.name] = ieffect + src = StackEffect(f"TYPESTACK_PEEK({isize})", "_Py_TYPENODE_t *") + out.declare(dst, src) + + # Write localarr effect + if self.local_effects: + + idx = self.local_effects.index + val = self.local_effects.value + + typ_op = "TYPE_OVERWRITE" + dst = f"TYPELOCALS_GET({idx})" + match val: + case TypeSrcLiteral(name=valstr): + if valstr == "NULL": + src = "(_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT" + flag = "true" + else: + src = f"(_Py_TYPENODE_t *)_Py_TYPENODE_MAKE_ROOT((_Py_TYPENODE_t)&{valstr})" + flag = "true" + case TypeSrcStackInput(name=valstr): + assert valstr in usable_for_local_effect, \ + "`cond` and `size` stackvar not supported for localeffect" + src = valstr + flag = "false" + # TODO: Support more cases as needed + case TypeSrcConst(): + raise Exception("Not implemented") + case TypeSrcLocals(): + raise Exception("Not implemented") + case _: + typing.assert_never(val) + out.emit(f"{typ_op}({src}, {dst}, {flag});") + + # Update stack size + out.stack_adjust( + 0, + [ieff for ieff in self.input_effects], + [oeff for oeff in self.output_effects], + ) + + # Stack effect + oeffects = list(reversed(self.output_effects)) + for i, oeffect in enumerate(oeffects): + osize = string_effect_size( + list_effect_size([oeff for oeff in oeffects[: i + 1]]) + ) + dst = f"TYPESTACK_PEEK({osize})" + + # Check if it's even used + if oeffect.name == UNUSED: continue + + # For now assume OVERWRITE with NULL + if oeffect.size: + op = "TYPE_OVERWRITE" + src = "(_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT" + flag = "true" + dst = f"TYPESTACK_PEEK({osize} - i)" + opstr = "".join([ + f"for (int i = 0; i < ({oeffect.size}); i++) {{" + f"{op}({src}, {dst}, {flag});" + f"}}" + ]) + out.emit(opstr) + continue + + # Check if there's type info + if typ := oeffect.type_annotation: + for op in typ.ops: + match op.src: + case TypeSrcLiteral(literal=valstr): + if valstr == "NULL": + src = "(_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT" + flag = "true" + else: + src = f"(_Py_TYPENODE_t *)_Py_TYPENODE_MAKE_ROOT((_Py_TYPENODE_t)&{valstr})" + flag = "true" + case TypeSrcStackInput(name=valstr): + assert valstr in need_to_declare + assert oeffect.name not in self.unmoved_names + src = valstr + flag = "false" + case TypeSrcConst(index=idx): + src = f"(_Py_TYPENODE_t *)TYPECONST_GET({idx})" + flag = "true" + case TypeSrcLocals(index=idx): + src = f"TYPELOCALS_GET({idx})" + flag = "false" + case _: + typing.assert_never(op.src) + + opstr = f"{op.op}({src}, {dst}, {flag})" + if oeffect.cond: + out.emit(f"if ({oeffect.cond}) {{ {opstr}; }}") + else: + out.emit(f"{opstr};") + continue + + # Don't touch unmoved stack vars + if oeffect.name in self.unmoved_names: + continue + + # Just output null + typ_op = "TYPE_OVERWRITE" + src = "(_Py_TYPENODE_t *)_Py_TYPENODE_NULLROOT" + flag = "true" + opstr = f"{typ_op}({src}, {dst}, {flag})" + if oeffect.cond: + out.emit(f"if ({oeffect.cond}) {{ {opstr}; }}") + else: + out.emit(f"{opstr};") + InstructionOrCacheEffect = Instruction | parser.CacheEffect StackEffectMapping = list[tuple[StackEffect, StackEffect]] @@ -463,6 +671,10 @@ def write_body(self, out: Formatter, cache_adjust: int) -> None: for var, oeffect in self.output_mapping: out.assign(var, oeffect) + def write_typeprop(self, out: Formatter) -> None: + with out.block(""): + self.instr.write_typeprop(out) + @dataclasses.dataclass class SuperOrMacroInstruction: @@ -536,6 +748,8 @@ def error(self, msg: str, node: parser.Node) -> None: super_instrs: dict[str, SuperInstruction] macros: dict[str, parser.Macro] macro_instrs: dict[str, MacroInstruction] + macro_instdefs: list[parser.InstDef] + u_insts: list[parser.InstDef] families: dict[str, parser.Family] def parse(self) -> None: @@ -549,6 +763,9 @@ def parse(self) -> None: self.instrs = {} self.supers = {} self.macros = {} + self.macro_instrs = {} + self.macro_instdefs = [] + self.u_insts = [] self.families = {} instrs_idx: dict[str, int] = dict() @@ -615,6 +832,10 @@ def parse_file(self, filename: str, instrs_idx: dict[str, int]) -> None: self.instrs[name] = Instruction(thing) instrs_idx[name] = len(self.everything) self.everything.append(thing) + if thing.kind == "macro_inst": + self.macro_instdefs.append(thing) + elif thing.kind == "u_inst": + self.u_insts.append(thing) case parser.Super(name): self.supers[name] = thing self.everything.append(thing) @@ -751,6 +972,8 @@ def analyze_supers_and_macros(self) -> None: self.super_instrs[name] = self.analyze_super(super) for name, macro in self.macros.items(): self.macro_instrs[name] = self.analyze_macro(macro) + for macro_instdef in self.macro_instdefs: + self.analyze_macro_instdefs(macro_instdef) def analyze_super(self, super: parser.Super) -> SuperInstruction: components = self.check_super_components(super) @@ -795,6 +1018,11 @@ def analyze_macro(self, macro: parser.Macro) -> MacroInstruction: macro.name, stack, initial_sp, final_sp, format, macro, parts ) + def analyze_macro_instdefs(self, macro_def: parser.InstDef): + for uop in macro_def.u_insts: + if uop not in self.instrs: + self.error(f"Unknown instruction {uop} in {macro_def!r}", macro_def) + def analyze_instruction( self, instr: Instruction, stack: list[StackEffect], sp: int ) -> tuple[Component, int]: @@ -949,6 +1177,34 @@ def write_function( write_function("pushed", pushed_data) self.out.emit("") + def write_typepropagator(self) -> None: + """Write the type propagator""" + + with open(self.output_filename, "w") as f: + # Write provenance header + f.write(f"// This file is generated by {THIS} @TODO: make this a seperate argument\n") + f.write(self.from_source_files()) + f.write(f"// Do not edit!\n") + + # Create formatter + self.out = Formatter(f, 8) + + for thing in self.everything: + if thing.name in TYPE_PROPAGATOR_FORBIDDEN: + continue + match thing: + case parser.InstDef(kind=kind, name=name): + match kind: + case "op": pass + case _: + self.write_instr_typeprop(self.instrs[name]) + case parser.Super(name=name): + self.write_super_typeprop(self.super_instrs[name]) + case parser.Macro(name=name): + self.write_macro_typeprop(self.macro_instrs[name]) + case _: + typing.assert_never(thing) + def from_source_files(self) -> str: paths = "\n// ".join( os.path.relpath(filename, ROOT).replace(os.path.sep, posixpath.sep) @@ -1010,8 +1266,9 @@ def write_metadata(self) -> None: case OverriddenInstructionPlaceHolder(): continue case parser.InstDef(): - if thing.kind != "op": - self.write_metadata_for_inst(self.instrs[thing.name]) + if thing.kind == "op": + continue + self.write_metadata_for_inst(self.instrs[thing.name]) case parser.Super(): self.write_metadata_for_super(self.super_instrs[thing.name]) case parser.Macro(): @@ -1056,14 +1313,29 @@ def write_instructions(self) -> None: n_instrs = 0 n_supers = 0 n_macros = 0 + + # Single pass to hoist all the u_instructions to the top. for thing in self.everything: match thing: case OverriddenInstructionPlaceHolder(): self.write_overridden_instr_place_holder(thing) case parser.InstDef(): - if thing.kind != "op": - n_instrs += 1 - self.write_instr(self.instrs[thing.name]) + if thing.kind == "u_inst": + self.write_u_inst_as_c_macro( + self.instrs[thing.name]) + case _: + pass + + # Everything else + for thing in self.everything: + match thing: + case parser.InstDef(): + match thing.kind: + case "op": + pass + case _: + n_instrs += 1 + self.write_instr(self.instrs[thing.name]) case parser.Super(): n_supers += 1 self.write_super(self.super_instrs[thing.name]) @@ -1101,6 +1373,16 @@ def write_instr(self, instr: Instruction) -> None: self.out.emit("CHECK_EVAL_BREAKER();") self.out.emit(f"DISPATCH();") + def write_u_inst_as_c_macro(self, instr: Instruction) -> None: + name = instr.name + self.out.emit("") + self.out.emit(f"#define UOP_{name}() \\") + self.out.emit("do { \\") + self.out.postfix = "\\" + instr.write_body(self.out, 0) + self.out.postfix = "" + self.out.emit("} while (0)") + def write_super(self, sup: SuperInstruction) -> None: """Write code for a super-instruction.""" with self.wrap_super_or_macro(sup): @@ -1142,6 +1424,31 @@ def write_macro(self, mac: MacroInstruction) -> None: f'{cache_adjust}, "incorrect cache size");' ) + def write_instr_typeprop(self, instr: Instruction) -> None: + name = instr.name + self.out.emit("") + with self.out.block(f"TARGET({name})"): + instr.write_typeprop(self.out) + self.out.emit("break;") + + def write_super_typeprop(self, sup: SuperInstruction) -> None: + # TODO: Support super instructions + # Currently not support because of the need for NEXTOPARG + ... + + def write_macro_typeprop(self, mac: MacroInstruction) -> None: + # TODO: Make the code emitted more efficient by + # combining stack effect + name = mac.name + self.out.emit("") + with self.out.block(f"TARGET({name})"): + for comp in mac.parts: + if not isinstance(comp, Component): continue + comp.write_typeprop(self.out) + self.out.emit("break;") + + + @contextlib.contextmanager def wrap_super_or_macro(self, up: SuperOrMacroInstruction): """Shared boilerplate for super- and macro instructions.""" @@ -1238,7 +1545,6 @@ def variable_used(node: parser.Node, name: str) -> bool: token.kind == "IDENTIFIER" and token.text == name for token in node.tokens ) - def main(): """Parse command line, parse input, analyze, write output.""" args = arg_parser.parse_args() # Prints message and sys.exit(2) on error @@ -1253,6 +1559,12 @@ def main(): sys.exit(f"Found {a.errors} errors") a.write_instructions() # Raises OSError if output can't be written a.write_metadata() + # a.output_filename = TIER2_MACRO_TO_MICRO_MAP_OUTPUT + # a.write_macromap_and_typedata() + + # Quick hack. @TODO refactor + a.output_filename = TIER2_TYPE_PROPAGATOR_OUTPUT + a.write_typepropagator() if __name__ == "__main__": diff --git a/Tools/cases_generator/parser.py b/Tools/cases_generator/parser.py index 7bf45a350bc84b..5c87a2b85a2dd6 100644 --- a/Tools/cases_generator/parser.py +++ b/Tools/cases_generator/parser.py @@ -1,8 +1,9 @@ """Parser for bytecodes.inst.""" from dataclasses import dataclass, field -from typing import NamedTuple, Callable, TypeVar, Literal +from typing import NamedTuple, Callable, TypeVar, Literal, get_args, TypeAlias +import re import lexer as lx from plexer import PLexer @@ -67,14 +68,54 @@ class Block(Node): pass +@dataclass +class TypeSrcLiteral(Node): + literal: str + + +@dataclass +class TypeSrcConst(Node): + index: str + + +@dataclass +class TypeSrcLocals(Node): + index: str + + +@dataclass +class TypeSrcStackInput(Node): + name: str + + +TypeSrc: TypeAlias = ( + TypeSrcLiteral + | TypeSrcConst + | TypeSrcLocals + | TypeSrcStackInput +) + +@dataclass +class TypeOperation(Node): + op: Literal["TYPE_SET", "TYPE_OVERWRITE"] + src: TypeSrc + +@dataclass +class TypeAnnotation(Node): + ops: tuple[TypeOperation] + @dataclass class StackEffect(Node): name: str type: str = "" # Optional `:type` + type_annotation: TypeAnnotation | None = None # Default is None cond: str = "" # Optional `if (cond)` size: str = "" # Optional `[size]` # Note: size cannot be combined with type or cond + def __eq__(self, other: 'StackEffect') -> bool: + return self.name == other.name + @dataclass class Expression(Node): @@ -87,6 +128,12 @@ class CacheEffect(Node): size: int +@dataclass +class LocalEffect(Node): + index: str + value: TypeSrc + + @dataclass class OpName(Node): name: str @@ -97,25 +144,48 @@ class OpName(Node): UOp = OpName | CacheEffect +# Note: A mapping of macro_inst -> u_inst+ is created later. +INST_KINDS: TypeAlias = Literal[ + # Legacy means no (inputs -- outputs) + "legacy", + # This generates an instruction definition in the tier 1 and 2 interpreter. + "inst", + # This is a pseudo instruction used only internally by the cases generator. + "op", + # This generates an instruction definition strictly only in the + # tier 1 interpreter. + "macro_inst", + # This generates an instruction definition strictly only in the + # tier 2 interpreter. + "u_inst", +] + +# Remove legacy +INST_LABELS: tuple[INST_KINDS] = get_args(INST_KINDS)[1:] + + @dataclass class InstHeader(Node): override: bool register: bool - kind: Literal["inst", "op", "legacy"] # Legacy means no (inputs -- outputs) + kind: INST_KINDS name: str inputs: list[InputEffect] outputs: list[OutputEffect] + localeffect: LocalEffect | None = None @dataclass class InstDef(Node): override: bool register: bool - kind: Literal["inst", "op", "legacy"] + kind: INST_KINDS name: str inputs: list[InputEffect] outputs: list[OutputEffect] block: Block + u_insts: list[str] + localeffect: LocalEffect | None = None @dataclass @@ -129,7 +199,6 @@ class Macro(Node): name: str uops: list[UOp] - @dataclass class Family(Node): name: str @@ -153,8 +222,14 @@ def definition(self) -> InstDef | Super | Macro | Family | None: def inst_def(self) -> InstDef | None: if hdr := self.inst_header(): if block := self.block(): + u_insts = [] + if hdr.kind == "macro_inst": + for line in block.text.splitlines(): + if m := re.match(r"(\s*)U_INST\((.+)\);\s*$", line): + space, label = m.groups() + u_insts.append(label) return InstDef( - hdr.override, hdr.register, hdr.kind, hdr.name, hdr.inputs, hdr.outputs, block + hdr.override, hdr.register, hdr.kind, hdr.name, hdr.inputs, hdr.outputs, block, u_insts, hdr.localeffect ) raise self.make_syntax_error("Expected block") return None @@ -167,7 +242,7 @@ def inst_header(self) -> InstHeader | None: # TODO: Make INST a keyword in the lexer. override = bool(self.expect(lx.OVERRIDE)) register = bool(self.expect(lx.REGISTER)) - if (tkn := self.expect(lx.IDENTIFIER)) and (kind := tkn.text) in ("inst", "op"): + if (tkn := self.expect(lx.IDENTIFIER)) and (kind := tkn.text) in INST_LABELS: if self.expect(lx.LPAREN) and (tkn := self.expect(lx.IDENTIFIER)): name = tkn.text if self.expect(lx.COMMA): @@ -175,6 +250,11 @@ def inst_header(self) -> InstHeader | None: if self.expect(lx.RPAREN): if (tkn := self.peek()) and tkn.kind == lx.LBRACE: return InstHeader(override, register, kind, name, inp, outp) + elif self.expect(lx.COMMA): + leffect = self.local_effect() + if self.expect(lx.RPAREN): + if (tkn := self.peek()) and tkn.kind == lx.LBRACE: + return InstHeader(override, register, kind, name, inp, outp, leffect) elif self.expect(lx.RPAREN) and kind == "inst": # No legacy stack effect if kind is "op". return InstHeader(override, register, "legacy", name, [], []) @@ -205,7 +285,13 @@ def inputs(self) -> list[InputEffect] | None: @contextual def input(self) -> InputEffect | None: - return self.cache_effect() or self.stack_effect() + if r := self.cache_effect(): + return r + r = self.stack_effect() + if r is None: return r + assert r.type_annotation is None, \ + "Type annotations aren't allowed in input stack effect." + return r def outputs(self) -> list[OutputEffect] | None: # output (, output)* @@ -242,9 +328,12 @@ def stack_effect(self) -> StackEffect | None: # IDENTIFIER [':' IDENTIFIER] ['if' '(' expression ')'] # | IDENTIFIER '[' expression ']' if tkn := self.expect(lx.IDENTIFIER): - type_text = "" + _type = "" + has_type_annotation = False + type_annotation = None if self.expect(lx.COLON): - type_text = self.require(lx.IDENTIFIER).text.strip() + has_type_annotation = True + type_annotation = self.stackvar_typeannotation() cond_text = "" if self.expect(lx.IF): self.require(lx.LPAREN) @@ -254,14 +343,81 @@ def stack_effect(self) -> StackEffect | None: cond_text = cond.text.strip() size_text = "" if self.expect(lx.LBRACKET): - if type_text or cond_text: + # TODO: Support type annotation for size output + if has_type_annotation or cond_text: raise self.make_syntax_error("Unexpected [") if not (size := self.expression()): raise self.make_syntax_error("Expected expression") self.require(lx.RBRACKET) - type_text = "PyObject **" + _type = "PyObject **" size_text = size.text.strip() - return StackEffect(tkn.text, type_text, cond_text, size_text) + return StackEffect(tkn.text, _type, type_annotation, cond_text, size_text) + + @contextual + def stackvar_typesrc(self) -> TypeSrc | None: + if id := self.expect(lx.IDENTIFIER): + idstr = id.text.strip() + if not self.expect(lx.LBRACKET): + return TypeSrcLiteral(idstr) + if idstr not in ["locals", "consts"]: return + if id := self.expect(lx.IDENTIFIER): + index = id.text.strip() + self.require(lx.RBRACKET) + if idstr == "locals": + return TypeSrcLocals(index) + return TypeSrcConst(index) + elif self.expect(lx.TIMES): + id = self.require(lx.IDENTIFIER) + return TypeSrcStackInput(id.text.strip()) + + @contextual + def stackvar_typeoperation(self) -> TypeOperation | None: + if self.expect(lx.LSHIFTEQUAL): + src = self.stackvar_typesrc() + if src is None: return None + return TypeOperation("TYPE_SET", src) + src = self.stackvar_typesrc() + if src is None: return None + return TypeOperation("TYPE_OVERWRITE", src) + + @contextual + def stackvar_typeannotation(self) -> TypeAnnotation | None: + ops = [] + if self.expect(lx.LBRACE): + while True: + typ = self.stackvar_typeoperation() + ops.append(typ) + if typ is None: return None + if self.expect(lx.RBRACE): + break + self.require(lx.COMMA) + else: + typ = self.stackvar_typeoperation() + if typ is None: return None + ops.append(typ) + return TypeAnnotation(tuple(ops)) + + @contextual + def local_effect(self) -> LocalEffect | None: + if tok := self.expect(lx.IDENTIFIER): + if tok.text.strip() != "locals": + return + self.require(lx.LBRACKET) + if id := self.expect(lx.IDENTIFIER): + index = id.text.strip() + self.require(lx.RBRACKET) + self.require(lx.EQUALS) + if self.expect(lx.TIMES): # stackvar + value = self.require(lx.IDENTIFIER).text.strip() + return LocalEffect( + index, + TypeSrcStackInput(value) + ) + value = self.require(lx.IDENTIFIER).text.strip() + return LocalEffect( + index, + TypeSrcLiteral(value) + ) @contextual def expression(self) -> Expression | None: @@ -344,6 +500,16 @@ def uop(self) -> UOp | None: else: return OpName(tkn.text) + def u_insts(self) -> list[str] | None: + if tkn := self.expect(lx.IDENTIFIER): + u_insts = [tkn.text] + while self.expect(lx.PLUS): + if tkn := self.expect(lx.IDENTIFIER): + u_insts.append(tkn.text) + else: + raise self.make_syntax_error("Expected op name") + return u_insts + @contextual def family_def(self) -> Family | None: if (tkn := self.expect(lx.IDENTIFIER)) and tkn.text == "family": diff --git a/bm_float_unboxed.py b/bm_float_unboxed.py new file mode 100644 index 00000000000000..0c79fcd6311c8e --- /dev/null +++ b/bm_float_unboxed.py @@ -0,0 +1,17 @@ +import time + +def f(a, b, loops=200_000_000): + z = a + b + for _ in range(loops): + z + z + z + z + z + z + z + z + z + z + z + z + z + z + z + z + +# Warmup +f(1.0 , 2.0, 64) +f(1.0 , 2.0, 64) + +# Running the actual benchmark + +print("Starting benchmark...") +start = time.perf_counter() +f(1.0 , 2.0) +print("Time taken is", time.perf_counter() - start, "s") \ No newline at end of file diff --git a/bm_nbody.py b/bm_nbody.py new file mode 100644 index 00000000000000..f9642bb9480318 --- /dev/null +++ b/bm_nbody.py @@ -0,0 +1,149 @@ +""" +N-body benchmark from the Computer Language Benchmarks Game. + +This is intended to support Unladen Swallow's pyperf.py. Accordingly, it has been +modified from the Shootout version: +- Accept standard Unladen Swallow benchmark options. +- Run report_energy()/advance() in a loop. +- Reimplement itertools.combinations() to work with older Python versions. + +Pulled from: +http://benchmarksgame.alioth.debian.org/u64q/program.php?test=nbody&lang=python3&id=1 + +Contributed by Kevin Carson. +Modified by Tupteq, Fredrik Johansson, and Daniel Nanz. +""" + +import time + +__contact__ = "collinwinter@google.com (Collin Winter)" +DEFAULT_ITERATIONS = 100 +DEFAULT_REFERENCE = 'sun' + + +def combinations(l): + """Pure-Python implementation of itertools.combinations(l, 2).""" + result = [] + for x in range(len(l) - 1): + ls = l[x + 1:] + for y in ls: + result.append((l[x], y)) + return result + + +PI = 3.14159265358979323 +SOLAR_MASS = 4 * PI * PI +DAYS_PER_YEAR = 365.24 + +BODIES = { + 'sun': ([0.0, 0.0, 0.0], [0.0, 0.0, 0.0], SOLAR_MASS), + + 'jupiter': ([4.84143144246472090e+00, + -1.16032004402742839e+00, + -1.03622044471123109e-01], + [1.66007664274403694e-03 * DAYS_PER_YEAR, + 7.69901118419740425e-03 * DAYS_PER_YEAR, + -6.90460016972063023e-05 * DAYS_PER_YEAR], + 9.54791938424326609e-04 * SOLAR_MASS), + + 'saturn': ([8.34336671824457987e+00, + 4.12479856412430479e+00, + -4.03523417114321381e-01], + [-2.76742510726862411e-03 * DAYS_PER_YEAR, + 4.99852801234917238e-03 * DAYS_PER_YEAR, + 2.30417297573763929e-05 * DAYS_PER_YEAR], + 2.85885980666130812e-04 * SOLAR_MASS), + + 'uranus': ([1.28943695621391310e+01, + -1.51111514016986312e+01, + -2.23307578892655734e-01], + [2.96460137564761618e-03 * DAYS_PER_YEAR, + 2.37847173959480950e-03 * DAYS_PER_YEAR, + -2.96589568540237556e-05 * DAYS_PER_YEAR], + 4.36624404335156298e-05 * SOLAR_MASS), + + 'neptune': ([1.53796971148509165e+01, + -2.59193146099879641e+01, + 1.79258772950371181e-01], + [2.68067772490389322e-03 * DAYS_PER_YEAR, + 1.62824170038242295e-03 * DAYS_PER_YEAR, + -9.51592254519715870e-05 * DAYS_PER_YEAR], + 5.15138902046611451e-05 * SOLAR_MASS)} + + +SYSTEM = list(BODIES.values()) +PAIRS = combinations(SYSTEM) + + +def advance(dt, n, bodies=SYSTEM, pairs=PAIRS): + for i in range(n): + for (([x1, y1, z1], v1, m1), + ([x2, y2, z2], v2, m2)) in pairs: + dx = x1 - x2 + dy = y1 - y2 + dz = z1 - z2 + mag = dt * ((dx * dx + dy * dy + dz * dz) ** (-1.5)) + b1m = m1 * mag + b2m = m2 * mag + v1[0] -= dx * b2m + v1[1] -= dy * b2m + v1[2] -= dz * b2m + v2[0] += dx * b1m + v2[1] += dy * b1m + v2[2] += dz * b1m + for (r, [vx, vy, vz], m) in bodies: + r[0] += dt * vx + r[1] += dt * vy + r[2] += dt * vz + + +def report_energy(bodies=SYSTEM, pairs=PAIRS, e=0.0): + for (((x1, y1, z1), v1, m1), + ((x2, y2, z2), v2, m2)) in pairs: + dx = x1 - x2 + dy = y1 - y2 + dz = z1 - z2 + e -= (m1 * m2) / ((dx * dx + dy * dy + dz * dz) ** 0.5) + for (r, [vx, vy, vz], m) in bodies: + e += m * (vx * vx + vy * vy + vz * vz) / 2. + return e + + +def offset_momentum(ref, bodies=SYSTEM, px=0.0, py=0.0, pz=0.0): + for (r, [vx, vy, vz], m) in bodies: + px -= vx * m + py -= vy * m + pz -= vz * m + (r, v, m) = ref + v[0] = px / m + v[1] = py / m + v[2] = pz / m + + +def bench_nbody(loops, reference, iterations): + # Set up global state + offset_momentum(BODIES[reference]) + + range_it = range(loops) + t0 = time.perf_counter() + + for _ in range_it: + # report_energy() + advance(0.01, iterations) + # report_energy() + + return time.perf_counter() - t0 + + +def add_cmdline_args(cmd, args): + cmd.extend(("--iterations", str(args.iterations))) + + +if __name__ == '__main__': + # Warmup + bench_nbody(128, DEFAULT_REFERENCE, 128) + # Showtime + print("Starting benchmark...") + taken = bench_nbody(DEFAULT_ITERATIONS, DEFAULT_REFERENCE, 50_000) + print("Time taken is", taken, "s") + \ No newline at end of file diff --git a/report/CPython_Tier_2_LBBV_Report_For_Repo.pdf b/report/CPython_Tier_2_LBBV_Report_For_Repo.pdf new file mode 100644 index 00000000000000..9b018fc84fec2d Binary files /dev/null and b/report/CPython_Tier_2_LBBV_Report_For_Repo.pdf differ diff --git a/tier2_results/cpython/7703def37e4fa7d25c3d23756de8f527daa4e165_bm_float_unboxed.txt b/tier2_results/cpython/7703def37e4fa7d25c3d23756de8f527daa4e165_bm_float_unboxed.txt new file mode 100644 index 00000000000000..6d7260a34045bd --- /dev/null +++ b/tier2_results/cpython/7703def37e4fa7d25c3d23756de8f527daa4e165_bm_float_unboxed.txt @@ -0,0 +1,2 @@ +Starting benchmark... +Time taken is 21.911415401999875 s diff --git a/tier2_results/cpython/7703def37e4fa7d25c3d23756de8f527daa4e165_bm_nbody.txt b/tier2_results/cpython/7703def37e4fa7d25c3d23756de8f527daa4e165_bm_nbody.txt new file mode 100644 index 00000000000000..20a86e19604e7b --- /dev/null +++ b/tier2_results/cpython/7703def37e4fa7d25c3d23756de8f527daa4e165_bm_nbody.txt @@ -0,0 +1,2 @@ +Starting benchmark... +Time taken is 30.735117989000173 s diff --git a/tier2_results/pylbbv/3d44cb4dcbda80c750e292a9598cfddb41a92aff_bm_float_unboxed.txt b/tier2_results/pylbbv/3d44cb4dcbda80c750e292a9598cfddb41a92aff_bm_float_unboxed.txt new file mode 100644 index 00000000000000..09c519cefe8e5e --- /dev/null +++ b/tier2_results/pylbbv/3d44cb4dcbda80c750e292a9598cfddb41a92aff_bm_float_unboxed.txt @@ -0,0 +1,2 @@ +Starting benchmark... +Time taken is 13.180637167999976 s diff --git a/tier2_results/pylbbv/3d44cb4dcbda80c750e292a9598cfddb41a92aff_bm_nbody.txt b/tier2_results/pylbbv/3d44cb4dcbda80c750e292a9598cfddb41a92aff_bm_nbody.txt new file mode 100644 index 00000000000000..6192c2d72646ee --- /dev/null +++ b/tier2_results/pylbbv/3d44cb4dcbda80c750e292a9598cfddb41a92aff_bm_nbody.txt @@ -0,0 +1,2 @@ +Starting benchmark... +Time taken is 30.82709504000013 s diff --git a/tier2_test.py b/tier2_test.py new file mode 100644 index 00000000000000..50de7692fcbf70 --- /dev/null +++ b/tier2_test.py @@ -0,0 +1,423 @@ +import dis + +print("Begin tests...") + +######### +# Utils # +######### + +def trigger_tier2(f, args): + for _ in range(64): + f(*args) + +def writeinst(opc:str, arg:int=0): + + "Makes life easier in writing python bytecode" + + nb = max(1,-(-arg.bit_length()//8)) + ab = arg.to_bytes(nb, 'big') + ext_arg = dis._all_opmap['EXTENDED_ARG'] + inst = bytearray() + for i in range(nb-1): + inst.append(ext_arg) + inst.append(ab[i]) + inst.append(dis._all_opmap[opc]) + inst.append(ab[-1]) + + return bytes(inst) + + +################################################ +# Type prop tests: TYPE_SET and TYPE_OVERWRITE # +################################################ + +def test_typeprop1(a): + # Dummy code won't be ran + return a+(a+(a+a)) + +bytecode = b"".join([ + # Tests TYPE_SET and TYPE_OVERWRITE + writeinst("RESUME", 0), + writeinst("LOAD_FAST", 0), + writeinst("COPY", 1), + writeinst("COPY", 1), + writeinst("BINARY_OP", 0), + writeinst("CACHE", 0), # For tier1 + writeinst("BINARY_OP", 0), + writeinst("CACHE", 0), # For tier1 + writeinst("RETURN_VALUE", 0) +]) + +# Switch to bytecode +test_typeprop1.__code__ = test_typeprop1.__code__.replace(co_code=bytecode) + +trigger_tier2(test_typeprop1, (0,)) +expected = [ + "RESUME_QUICK", + "LOAD_FAST", # Load locals + "COPY", + "COPY", # Copy variable on stack + # All stack variables part of the tree + "BINARY_CHECK_FLOAT", + "NOP", # Space for an EXTENDED_ARG if needed + "BB_BRANCH_IF_FLAG_SET", + + # This should let the typeprop know all the locals and stack be int + # TYPE_SET + # Locals: [int] + # Stack : [int->locals[0], int->stack[0], int->stack[1]] + "BINARY_CHECK_INT", + "NOP", + "BB_BRANCH_IF_FLAG_UNSET", # Fallthrough! + + # Should propagate the result as int + # TYPE_OVERWRITE + # Locals: [int] + # Stack : [int->locals[0], int] + "BINARY_OP_ADD_INT_REST", + + # There should be no more guards here + # if the type propagator is working + "BINARY_OP_ADD_INT_REST", + "RETURN_VALUE" +] +insts = dis.get_instructions(test_typeprop1, tier2=True) +for x,y in zip(insts, expected): + assert x.opname == y + +################################################ +# Type prop tests: TYPE_SWAP # +################################################ + +bytecode = b"".join([ + # Tests TYPE_SWAP + writeinst("RESUME", 0), + writeinst("LOAD_FAST", 0), # float + writeinst("LOAD_FAST", 1), # int + writeinst("SWAP", 2), # Stack: [int, float] + + writeinst("COPY", 1), + # Should generate the FLOAT specialisation + writeinst("BINARY_OP", 0), + writeinst("CACHE", 0), # For tier1 + + writeinst("SWAP", 2), # [float, int] + writeinst("COPY", 1), + # Should generate the INT specialisation + writeinst("BINARY_OP", 0), + writeinst("CACHE", 0), # For tier1 + + # float + int + writeinst("BINARY_OP", 0), + writeinst("CACHE", 0), # For tier1 + writeinst("RETURN_VALUE", 0) +]) + +def test_typeprop2(a,b): + # Dummy code won't be ran + return a+(a+(a+a)) + +# Switch to bytecode +test_typeprop2.__code__ = test_typeprop2.__code__.replace(co_code=bytecode) +test_typeprop2(0.1,1) + +trigger_tier2(test_typeprop2, (0.1,1)) +expected = [ + "RESUME_QUICK", + "LOAD_FAST", + "LOAD_FAST", + "SWAP", + "COPY", + + # Should gen specialised float + "BINARY_CHECK_FLOAT", + "NOP", + "BB_BRANCH_IF_FLAG_UNSET", + "BINARY_OP_ADD_FLOAT_UNBOXED", + "SWAP", + "COPY", + + # Ladder of types guards + "BINARY_CHECK_FLOAT", + "NOP", + "BB_BRANCH_IF_FLAG_SET", + + # Should gen specialised int + "BINARY_CHECK_INT", + "NOP", + "BB_BRANCH_IF_FLAG_UNSET", + "BINARY_OP_ADD_INT_REST", + # Don't care about the rest of the insts +] +insts = dis.get_instructions(test_typeprop2, tier2=True) +# Assert the value is correct +assert abs(test_typeprop2(0.1,1) - 2.2) < 0.001 +for x,y in zip(insts, expected): + assert x.opname == y + + +####################################### +# Tests for: Type guard # +# + Float unboxing # +# + Jump rewriting test # +# + Tier2 guard stability # +####################################### + +def test_guard_elimination(a,b): + x = b + y = b + # First a+x should inform the type prop that + # `a`, `x`, `b` and `y` are int + # So guard should be eliminated in (a+x) + y + return a + x + y + +trigger_tier2(test_guard_elimination, (0,0)) +expected = [ + # From tier1 bytecode + "RESUME_QUICK", + "LOAD_FAST", + "STORE_FAST", + "LOAD_FAST", + "STORE_FAST", + "LOAD_FAST", + "LOAD_FAST", + + "BINARY_CHECK_FLOAT", # First ladder check + "NOP", + "BB_BRANCH_IF_FLAG_SET", + "BINARY_CHECK_INT", # Second ladder check + "NOP", + "BB_BRANCH_IF_FLAG_UNSET", # Fall through! + + "BINARY_OP_ADD_INT_REST", # a+x + "LOAD_FAST", + "BINARY_OP_ADD_INT_REST", # (a+x) + y (guard eliminated) + "RETURN_VALUE" +] +insts = dis.get_instructions(test_guard_elimination, tier2=True) +for x,y in zip(insts, expected): + assert x.opname == y + +# We only wanna test the stability of the first type guards +# later on +first_guard_test_until = insts[-1].offset + +# Trigger generation of other branch +test_guard_elimination(0.1, 0.1) +insts = dis.get_instructions(test_guard_elimination, tier2=True) +expected = [ + # From tier1 bytecode + "RESUME_QUICK", + "LOAD_FAST", + "STORE_FAST", + "LOAD_FAST", + "STORE_FAST", + "LOAD_FAST", + "LOAD_FAST", + + "BINARY_CHECK_FLOAT", # First ladder check + "NOP", + "BB_JUMP_IF_FLAG_SET", # Rewrite to jump to float case + "POP_TOP", # Pop result + + # The same as above + "BINARY_CHECK_INT", + "NOP", + "BB_BRANCH_IF_FLAG_UNSET", + "BINARY_OP_ADD_INT_REST", + "LOAD_FAST", + "BINARY_OP_ADD_INT_REST", + "RETURN_VALUE", + + # Float case + "BINARY_OP_ADD_FLOAT_UNBOXED", # Unbox + "LOAD_FAST", + "UNBOX_FLOAT", # Unbox local + "STORE_FAST_UNBOXED_BOXED", # Store unboxed float into local + "LOAD_FAST_NO_INCREF", # Load (unboxed) local again + "BINARY_OP_ADD_FLOAT_UNBOXED", # No type guard here + "BOX_FLOAT", # Box to return + "RETURN_VALUE" +] + +test_guard_elimination(1,1) +for x,y in zip(insts, expected): + assert x.opname == y + +# Perform other polymorphism stuff +# We've not implemented type guard elimination +# For these mixed types (e.g., float+int) +# So these will generate more type guards with the same +# mechanisms as above. +# So codegen wise tier2 takes a while to stabilise +assert (test_guard_elimination(1,0.1) - 1.2) < 0.001 +assert (test_guard_elimination(0.1,1) - 2.1) < 0.001 +assert (test_guard_elimination(.4,.5) - 1.4) < 0.001 +assert test_guard_elimination(2,3) == 8 + +# At this point all cases should be generated +# so check if the generated cases are the same +expected = dis.get_instructions(test_guard_elimination, tier2=True) +test_guard_elimination(-192,203) +test_guard_elimination(2.3, 12) +test_guard_elimination(324, 0.12) +test_guard_elimination(0.12,32.1) +insts = dis.get_instructions(test_guard_elimination, tier2=True) + +# Make sure the first type guard is stable +for x,y in zip(insts, expected): + if x.offset >= first_guard_test_until: + break + assert x.opname == y.opname + + +############################## +# Test: Backward jump offset # +############################## + +def test_backwards_jump(a): + for i in range(64): + a = i + a + return a + +# Trigger only one JUMP_BACKWARD_QUICK +# i.e., perfect specialisation the first time +trigger_tier2(test_backwards_jump, (0,)) + +# Make sure it looped 64 times +assert test_backwards_jump(7) == 2023 # <-- Hi! ~ Jules + +# Make sure it jumped to the correct spot +insts = dis.get_instructions(test_backwards_jump, tier2=True) +backwards_jump = next(x for x in insts if x.opname == "JUMP_BACKWARD_QUICK") +instidx, jmp_target = next((i,x) for i,x in enumerate(insts) if x.offset == backwards_jump.argval) +assert jmp_target.opname == "NOP" # Space for an EXTENDED_ARG +assert insts[instidx + 1].opname == "BB_TEST_ITER_RANGE" # The loop predicate + + +###################### +# Test: Loop peeling # +###################### + +def test_loop_peeling(a): + for i in range(64): + a = float(i) + a + return a + +# This triggers loop peeling, because +# the first iteration `a` type is int +# and the 2nd iteration `a` type is float +# This should triger a JUMP_FORWARD in place of +# a JUMP_BACKWARD_QUICK +trigger_tier2(test_loop_peeling, (0,)) + +# Make sure it looped 64 times +assert abs(test_loop_peeling(7) - 2023) < 0.001 + +# Make sure the JUMP_FORWARD jumped correctly +insts = dis.get_instructions(test_loop_peeling, tier2=True) +forwards_jump = next(x for x in insts if x.opname == "JUMP_FORWARD") +instidx, jmp_target = next((i,x) for i,x in enumerate(insts) if x.offset == forwards_jump.argval) +assert jmp_target.opname == "NOP" # Space for an EXTENDED_ARG +assert insts[instidx + 1].opname == "BB_TEST_ITER_RANGE" # The loop predicate + +# We also need to make sure JUMP_FORWARD +# jumped into the float-specialised loop body +endidx, _ = next( + (i,x) for i,x in enumerate(insts) + if (x.opname == "JUMP_BACKWARD_QUICK" and x.offset > jmp_target.offset)) +# Check for existence of float-specialised instruction in loop body +assert any(1 for _ in + filter(lambda i: i.opname == 'BINARY_OP_ADD_FLOAT_UNBOXED', insts[instidx:endidx])) + + +################################## +# Test: Container specialisation # +################################## + +def test_container(l): + l[2] = l[0] + l[1] + + +trigger_tier2(test_container, ([1,2,3,4],)) +insts = dis.get_instructions(test_container, tier2=True) +expected = [ + "RESUME_QUICK", + "LOAD_FAST", + "LOAD_CONST", + + "CHECK_LIST", + "NOP", + "BB_BRANCH_IF_FLAG_UNSET", # Fallthrough! + + # Type prop from const array: No type guard needed + "BINARY_SUBSCR_LIST_INT_REST", + "LOAD_FAST", + "LOAD_CONST", + # CHECK_LIST should eliminate the type guard here + "BINARY_SUBSCR_LIST_INT_REST", + + # We haven't implemented type prop into container types + # so these checks should get generated + "BINARY_CHECK_FLOAT", + "NOP", + "BB_BRANCH_IF_FLAG_SET", + "BINARY_CHECK_INT", + "NOP", + "BB_BRANCH_IF_FLAG_UNSET", + "BINARY_OP_ADD_INT_REST", + + "LOAD_FAST", + "LOAD_CONST", + # CHECK_LIST should eliminate the type guard here + "STORE_SUBSCR_LIST_INT_REST", + "RETURN_CONST", +] +for x,y in zip(insts, expected): + assert x.opname == y + +#################################################### +# Tests for: Tier 2 BB_TEST_ITER specialisation # +#################################################### + +lst = [1, 2, 3] +def test_iter_list(a): + for i in lst: + a = i + a + return a + +# Trigger only one JUMP_BACKWARD_QUICK +# i.e., perfect specialisation the first time +trigger_tier2(test_iter_list, (0,)) + +# Make sure it looped 64 times +assert test_iter_list(0) == 6 + +# Make sure it jumped to the correct spot +insts = dis.get_instructions(test_iter_list, tier2=True) +backwards_jump = next(x for x in insts if x.opname == "JUMP_BACKWARD_QUICK") +instidx, jmp_target = next((i,x) for i,x in enumerate(insts) if x.offset == backwards_jump.argval) +assert jmp_target.opname == "NOP" # Space for an EXTENDED_ARG +assert insts[instidx + 1].opname == "BB_TEST_ITER_LIST" # The loop predicate + + +def test_iter_tuple(a): + for i in (1, 2, 3): + a = i + a + return a + +# Trigger only one JUMP_BACKWARD_QUICK +# i.e., perfect specialisation the first time +trigger_tier2(test_iter_tuple, (0,)) + +# Make sure it looped 64 times +assert test_iter_tuple(0) == 6 + +# Make sure it jumped to the correct spot +insts = dis.get_instructions(test_iter_tuple, tier2=True) +backwards_jump = next(x for x in insts if x.opname == "JUMP_BACKWARD_QUICK") +instidx, jmp_target = next((i,x) for i,x in enumerate(insts) if x.offset == backwards_jump.argval) +assert jmp_target.opname == "NOP" # Space for an EXTENDED_ARG +assert insts[instidx + 1].opname == "BB_TEST_ITER_TUPLE" # The loop predicate + +print("Tests completed") \ No newline at end of file