Skip to content

bpo-44525: Specialize CALL_FUNCTION for C function calls #26934

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 55 commits into from
Oct 19, 2021
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
55 commits
Select commit Hold shift + click to select a range
5e73b74
WIP: Specialize CALL_FUNCTION for builtins
Fidget-Spinner Jun 26, 2021
1539105
fix some GCC compilation warnings
Fidget-Spinner Jun 26, 2021
68e5451
hopefully fix the segfaults
Fidget-Spinner Jun 26, 2021
1d841b0
Rename to CALL_CFUNCTION and generalize to all c functions
Fidget-Spinner Jun 27, 2021
f41b623
fix formatting, remove redundant check
Fidget-Spinner Jun 27, 2021
de520bd
goto fail rather than return -1
Fidget-Spinner Jun 28, 2021
0e0a3a4
Create 2021-06-28-22-23-59.bpo-44525.sSvUKG.rst
Fidget-Spinner Jun 28, 2021
65de42d
Apply easier suggestions from Mark's review
Fidget-Spinner Jun 29, 2021
685557f
Only specialize METH_FASTCALL and METH_O
Fidget-Spinner Jun 30, 2021
5baa936
Update 2021-06-28-22-23-59.bpo-44525.sSvUKG.rst
Fidget-Spinner Jun 30, 2021
bc69360
turn off specialization stats flag
Fidget-Spinner Jun 30, 2021
8671a60
Apply suggestions by Mark
Fidget-Spinner Jul 3, 2021
a8b8b4f
reduce diff
Fidget-Spinner Jul 3, 2021
736d9af
use PyMapping_HasKeyString since PyDict_GetItemString is discouraged
Fidget-Spinner Jul 3, 2021
2e3195d
fix reference leak
Fidget-Spinner Jul 3, 2021
d8b3a09
remove unused variable, add more specialization fails
Fidget-Spinner Jul 3, 2021
25b002c
don't allow specialized function calls when tracing
Fidget-Spinner Jul 3, 2021
557e4bc
deopt when tracing
Fidget-Spinner Jul 3, 2021
e554d64
Merge remote-tracking branch 'upstream/main' into call_function_speci…
Fidget-Spinner Jul 8, 2021
ea0d432
apply mark's comments
Fidget-Spinner Jul 8, 2021
97749b7
change deopts to asserts
Fidget-Spinner Jul 8, 2021
feb966a
add blank lines between each case
Fidget-Spinner Jul 13, 2021
9a5a407
Remove CALL_CFUNCTION_FAST
Fidget-Spinner Jul 13, 2021
4dafd8d
Apply Mark's suggestions
Fidget-Spinner Jul 14, 2021
84d2367
delete useless comment, add back useful one
Fidget-Spinner Jul 14, 2021
12a5333
remove complicated checks for classes
Fidget-Spinner Jul 14, 2021
b99f65c
apply suggestions by Mark
Fidget-Spinner Jul 15, 2021
5239342
actually move it into the block this time
Fidget-Spinner Jul 15, 2021
c4d6ca3
Merge remote-tracking branch 'upstream/main' into call_function_speci…
Fidget-Spinner Jul 15, 2021
7250329
Regen opcodes
Fidget-Spinner Jul 15, 2021
c649070
move type earlier
Fidget-Spinner Jul 15, 2021
5dfce16
change to assert
Fidget-Spinner Jul 16, 2021
0da5ed2
increment unquickened stats
Fidget-Spinner Jul 16, 2021
40b919f
Re-add CALL_FUNCTION_BUILTIN_FAST
Fidget-Spinner Jul 16, 2021
75e3540
add check for C methods
Fidget-Spinner Jul 16, 2021
3e2766c
Merge remote-tracking branch 'upstream/main' into call_function_speci…
Fidget-Spinner Jul 27, 2021
c06f5b8
Merge remote-tracking branch 'upstream/main' into call_function_speci…
Fidget-Spinner Sep 15, 2021
3c1129a
fix build errors
Fidget-Spinner Sep 15, 2021
226c591
Add CALL_FUNCTION_LEN
Fidget-Spinner Sep 17, 2021
2dc2738
add CALL_FUNCTION_ISINSTANCE
Fidget-Spinner Sep 17, 2021
7bd4338
fix specialization stats
Fidget-Spinner Sep 17, 2021
e2aada7
Merge remote-tracking branch 'upstream/main' into call_function_speci…
Fidget-Spinner Sep 17, 2021
f8c0957
Merge remote-tracking branch 'upstream/main' into call_function_speci…
Fidget-Spinner Oct 18, 2021
2500ab6
Refactor
Fidget-Spinner Oct 18, 2021
08ef4d8
convert to static
Fidget-Spinner Oct 18, 2021
41f6fa6
fix news and formatting
Fidget-Spinner Oct 18, 2021
8b113d1
remove typo
Fidget-Spinner Oct 18, 2021
9642df5
remove nit
Fidget-Spinner Oct 18, 2021
8a74cff
fix wrong return code
Fidget-Spinner Oct 18, 2021
907c5cb
partly address code review
Fidget-Spinner Oct 18, 2021
3e09485
Exclude function if not collecting stats
Fidget-Spinner Oct 19, 2021
b28d85c
check for error first
Fidget-Spinner Oct 19, 2021
617424b
Record cache hit earlier
Fidget-Spinner Oct 19, 2021
e73b69f
fix isinstance bug
Fidget-Spinner Oct 19, 2021
f191720
apply suggestions from review: move up cache hits
Fidget-Spinner Oct 19, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions Include/internal/pycore_code.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,14 @@ typedef struct {
uint32_t builtin_keys_version;
} _PyLoadGlobalCache;

typedef struct {
union {
PyCFunction cfunc;
/* TODO: func_version field for Python function calls*/
uint64_t _; /* Just for alignment on 32-bit */
};
} _PyCallFunctionCache;

/* Add specialized versions of entries to this union.
*
* Do not break the invariant: sizeof(SpecializedCacheEntry) == 8
Expand All @@ -68,6 +76,7 @@ typedef union {
_PyAdaptiveEntry adaptive;
_PyLoadAttrCache load_attr;
_PyLoadGlobalCache load_global;
_PyCallFunctionCache call_function;
} SpecializedCacheEntry;

#define INSTRUCTIONS_PER_ENTRY (sizeof(SpecializedCacheEntry)/sizeof(_Py_CODEUNIT))
Expand Down Expand Up @@ -319,10 +328,25 @@ cache_backoff(_PyAdaptiveEntry *entry) {
entry->counter = BACKOFF;
}

/* Corresponds to various function pointers
https://docs.python.org/3/c-api/structures.html#implementing-functions-and-methods
*/
typedef enum {
PYCFUNCTION = 1,
PYCFUNCTION_O = 2,
PYCFUNCTION_NOARGS = 3,
PYCFUNCTION_WITH_KEYWORDS = 4,
_PYCFUNCTION_FAST = 5,
_PYCFUNCTION_FAST_WITH_KEYWORDS = 6,
PYCMETHOD = 7,
} _BuiltinCallKinds;

/* Specialization functions */

int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache);
int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache);
int _Py_Specialize_CallFunction(PyObject **stack_pointer, uint8_t original_oparg,
_Py_CODEUNIT *instr, SpecializedCacheEntry *cache);

#define SPECIALIZATION_STATS 0
#define SPECIALIZATION_STATS_DETAILED 0
Expand Down
2 changes: 2 additions & 0 deletions Include/opcode.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Lib/opcode.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,4 +229,6 @@ def jabs_op(name, op):
"LOAD_GLOBAL_ADAPTIVE",
"LOAD_GLOBAL_MODULE",
"LOAD_GLOBAL_BUILTIN",
"CALL_FUNCTION_ADAPTIVE",
"CALL_CFUNCTION",
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Specialize ``CALL_FUNCTION`` opcode with ``CALL_CFUNCTION``. This speeds up
calls to ``PyCFunctionObject``. As a result, many builtin functions and
C-extension functions should experience reduced call overhead.
115 changes: 115 additions & 0 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@ Py_LOCAL_INLINE(PyObject *) call_function(
static PyObject * do_call_core(
PyThreadState *tstate, PyObject *func,
PyObject *callargs, PyObject *kwdict, int use_tracing);
Py_LOCAL_INLINE(PyObject *) call_cfunction(
PyThreadState *tstate,
_PyAdaptiveEntry *cache0,
_PyCallFunctionCache *cache1,
PyObject ***pp_stack,
Py_ssize_t oparg, int use_tracing);

#ifdef LLTRACE
static int lltrace;
Expand Down Expand Up @@ -4066,7 +4072,48 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
CHECK_EVAL_BREAKER();
DISPATCH();
}
case TARGET(CALL_FUNCTION_ADAPTIVE): {
SpecializedCacheEntry *cache = GET_CACHE();
if (cache->adaptive.counter == 0) {
next_instr--;
if (_Py_Specialize_CallFunction(stack_pointer, cache->adaptive.original_oparg, next_instr, cache) < 0) {
goto error;
}
DISPATCH();
}
else {
STAT_INC(CALL_FUNCTION, deferred);
cache->adaptive.counter--;
oparg = cache->adaptive.original_oparg;
JUMP_TO_INSTRUCTION(CALL_FUNCTION);
}
}
case TARGET(CALL_CFUNCTION): {
/* Builtin functions, WITHOUT keywords */
SpecializedCacheEntry *caches = GET_CACHE();
_PyAdaptiveEntry *cache0 = &caches[0].adaptive;
_PyCallFunctionCache *cache1 = &caches[-1].call_function;
PyObject *callable = PEEK(cache0->original_oparg + 1);
DEOPT_IF(!PyCFunction_CheckExact(callable), CALL_FUNCTION);
DEOPT_IF(PyCFunction_GET_FUNCTION(callable) != cache1->cfunc, CALL_FUNCTION);

PyObject **sp, *res;
sp = stack_pointer;
res = call_cfunction(tstate, cache0, cache1, &sp,
cache0->original_oparg, cframe.use_tracing);
stack_pointer = sp;
PUSH(res);
if (res == NULL) {
/* Not deopting because this doesn't mean our optimization was wrong.
`res` can be NULL for valid reasons. Eg. getattr(x, 'invalid').
In those cases an exception is set, so we must handle it.
*/
goto error;
}
record_cache_hit(cache0);
STAT_INC(CALL_FUNCTION, hit);
DISPATCH();
}
case TARGET(CALL_FUNCTION_KW): {
PyObject **sp, *res, *names;

Expand Down Expand Up @@ -4297,6 +4344,7 @@ opname ## _miss: \

MISS_WITH_CACHE(LOAD_ATTR)
MISS_WITH_CACHE(LOAD_GLOBAL)
MISS_WITH_CACHE(CALL_FUNCTION)

error:
/* Double-check exception status. */
Expand Down Expand Up @@ -5872,6 +5920,73 @@ do_call_core(PyThreadState *tstate,
return PyObject_Call(func, callargs, kwdict);
}

/* Fast alternative for non-keyword calls to C functions. */
Py_LOCAL_INLINE(PyObject *) _Py_HOT_FUNCTION
call_cfunction(PyThreadState *tstate,
_PyAdaptiveEntry *cache0,
_PyCallFunctionCache *cache1,
PyObject ***pp_stack,
Py_ssize_t oparg,
int use_tracing)
{
#define MAYBE_TRACE(cfunc) if (use_tracing) {C_TRACE(x, cfunc);} else {x = cfunc;}

PyObject **pfunc = (*pp_stack) - oparg - 1;
PyObject *x = NULL, *w;
PyObject **stack = (*pp_stack) - oparg;

PyObject *func = *pfunc; /* Only for tracing purposes */
PyObject *self = PyCFunction_GET_SELF(func);
PyCFunction cfunc = PyCFunction_GET_FUNCTION(func);

switch ((_BuiltinCallKinds)cache0->index) {
case PYCFUNCTION_NOARGS:
case PYCFUNCTION_O:
MAYBE_TRACE(cfunc(self, *stack));
break;
case _PYCFUNCTION_FAST:
MAYBE_TRACE(((_PyCFunctionFast)(void(*)(void))cfunc)(self, stack, oparg));
break;
case _PYCFUNCTION_FAST_WITH_KEYWORDS:
MAYBE_TRACE(((_PyCFunctionFastWithKeywords)(void(*)(void))cfunc)(
self, stack, oparg, 0));
break;
case PYCFUNCTION: {
PyObject *args = _PyTuple_FromArray(stack, oparg);
if (args == NULL) {
break;
}
MAYBE_TRACE(cfunc(self, args));
Py_DECREF(args);
break;
}
case PYCFUNCTION_WITH_KEYWORDS: {
PyObject *args = _PyTuple_FromArray(stack, oparg);
if (args == NULL) {
break;
}
MAYBE_TRACE(((PyCFunctionWithKeywords)(void(*)(void))cfunc)(self, args, NULL));
Py_DECREF(args);
break;
}
/* This flag only applies to PyMethodObject.
We're only optimizing for PyCfunctionObject
*/
case PYCMETHOD:
default:
Py_UNREACHABLE();
break;
}
assert((x != NULL) ^ (_PyErr_Occurred(tstate) != NULL));

/* Clear the stack of the function object. */
while ((*pp_stack) > pfunc) {
w = EXT_POP(*pp_stack);
Py_DECREF(w);
}

return x;
}
/* Extract a slice index from a PyLong or an object with the
nb_index slot defined, and store in *pi.
Silently reduce values larger than PY_SSIZE_T_MAX to PY_SSIZE_T_MAX,
Expand Down
4 changes: 2 additions & 2 deletions Python/opcode_targets.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

70 changes: 70 additions & 0 deletions Python/specialize.c
Original file line number Diff line number Diff line change
Expand Up @@ -158,12 +158,14 @@ get_cache_count(SpecializedCacheOrInstruction *quickened) {
static uint8_t adaptive_opcodes[256] = {
[LOAD_ATTR] = LOAD_ATTR_ADAPTIVE,
[LOAD_GLOBAL] = LOAD_GLOBAL_ADAPTIVE,
[CALL_FUNCTION] = CALL_FUNCTION_ADAPTIVE,
};

/* The number of cache entries required for a "family" of instructions. */
static uint8_t cache_requirements[256] = {
[LOAD_ATTR] = 2, /* _PyAdaptiveEntry and _PyLoadAttrCache */
[LOAD_GLOBAL] = 2, /* _PyAdaptiveEntry and _PyLoadGlobalCache */
[CALL_FUNCTION] = 2, /* _PyAdaptiveEntry and _PyCallFunctionCache */
};

/* Return the oparg for the cache_offset and instruction index.
Expand Down Expand Up @@ -633,3 +635,71 @@ _Py_Specialize_LoadGlobal(
cache0->counter = saturating_start();
return 0;
}

/* TODO:
- Specialize calling C types like int() with CALL_CTYPE
- Specialize python function calls.
*/
int
_Py_Specialize_CallFunction(PyObject **stack_pointer, uint8_t original_oparg,
_Py_CODEUNIT *instr, SpecializedCacheEntry *cache)
{
PyObject *callable = stack_pointer[-(original_oparg + 1)];
_PyAdaptiveEntry *cache0 = &cache->adaptive;
_PyCallFunctionCache *cache1 = &cache[-1].call_function;
/* Specialize C functions */
if (PyCFunction_CheckExact(callable)) {
PyCFunctionObject *meth = (PyCFunctionObject *)callable;
if (meth->m_ml == NULL) {
goto fail;
}
const char *name_ascii = meth->m_ml->ml_name;
_BuiltinCallKinds kind = -1;
switch (PyCFunction_GET_FLAGS(meth) & (METH_VARARGS | METH_FASTCALL |
METH_NOARGS | METH_O | METH_KEYWORDS | METH_METHOD)) {
case METH_VARARGS:
kind = PYCFUNCTION;
break;
case METH_VARARGS | METH_KEYWORDS:
kind = PYCFUNCTION_WITH_KEYWORDS;
break;
case METH_FASTCALL:
kind = _PYCFUNCTION_FAST;
break;
case METH_FASTCALL | METH_KEYWORDS:
kind = _PYCFUNCTION_FAST_WITH_KEYWORDS;
break;
case METH_NOARGS:
kind = PYCFUNCTION_NOARGS;
break;
case METH_O:
kind = PYCFUNCTION_O;
break;
/* This case should never happen with PyCFunctionObject -- only
PyMethodObject. See zlib.compressobj()'s methods for an example.
*/
case METH_METHOD | METH_FASTCALL | METH_KEYWORDS:
// kind = PYCMETHOD;
default:
SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "bad call flags");
goto fail;
}
assert(kind > 0);
PyCFunction cfunc = PyCFunction_GET_FUNCTION(meth);
assert(cfunc != NULL);
*instr = _Py_MAKECODEUNIT(CALL_CFUNCTION, _Py_OPARG(*instr));
cache0->index = (uint16_t)kind;
cache1->cfunc = cfunc;
goto success;
}
fail:
STAT_INC(CALL_FUNCTION, specialization_failure);
assert(!PyErr_Occurred());
cache_backoff(cache0);
return 0;
success:
STAT_INC(CALL_FUNCTION, specialization_success);
assert(!PyErr_Occurred());
cache0->counter = saturating_start();
return 0;
}