Skip to content

bpo-45219: Factor dictkey indexing #28389

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Sep 17, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion Include/cpython/dictobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,4 +85,6 @@ PyAPI_FUNC(PyObject *) _PyDictView_Intersect(PyObject* self, PyObject *other);

/* Gets a version number unique to the current state of the keys of dict, if possible.
* Returns the version number, or zero if it was not possible to get a version number. */
uint32_t _PyDictKeys_GetVersionForCurrentState(PyDictObject *dict);
uint32_t _PyDictKeys_GetVersionForCurrentState(PyDictKeysObject *dictkeys);

Py_ssize_t _PyDictKeys_StringLookup(PyDictKeysObject* dictkeys, PyObject *key);
126 changes: 84 additions & 42 deletions Objects/dictobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -734,6 +734,73 @@ lookdict_index(PyDictKeysObject *k, Py_hash_t hash, Py_ssize_t index)
Py_UNREACHABLE();
}

static Py_ssize_t
dictkeys_stringlookup(PyDictKeysObject* dk, PyObject *key, Py_hash_t hash)
{
PyDictKeyEntry *ep0 = DK_ENTRIES(dk);
size_t mask = DK_MASK(dk);
size_t perturb = hash;
size_t i = (size_t)hash & mask;
Py_ssize_t ix;
for (;;) {
ix = dictkeys_get_index(dk, i);
if (ix >= 0) {
PyDictKeyEntry *ep = &ep0[ix];
assert(ep->me_key != NULL);
assert(PyUnicode_CheckExact(ep->me_key));
if (ep->me_key == key ||
(ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
return ix;
}
}
else if (ix == DKIX_EMPTY) {
return DKIX_EMPTY;
}
perturb >>= PERTURB_SHIFT;
i = mask & (i*5 + perturb + 1);
ix = dictkeys_get_index(dk, i);
if (ix >= 0) {
PyDictKeyEntry *ep = &ep0[ix];
assert(ep->me_key != NULL);
assert(PyUnicode_CheckExact(ep->me_key));
if (ep->me_key == key ||
(ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
return ix;
}
}
else if (ix == DKIX_EMPTY) {
return DKIX_EMPTY;
}
perturb >>= PERTURB_SHIFT;
i = mask & (i*5 + perturb + 1);
}
Py_UNREACHABLE();
}

/* Lookup a string in a (all unicode) dict keys.
* Returns DKIX_ERROR if key is not a string,
* or if the dict keys is not all strings.
* If the keys is present then return the index of key.
* If the key is not present then return DKIX_EMPTY.
*/
Py_ssize_t
_PyDictKeys_StringLookup(PyDictKeysObject* dk, PyObject *key)
{
DictKeysKind kind = dk->dk_kind;
if (!PyUnicode_CheckExact(key) || kind == DICT_KEYS_GENERAL) {
return DKIX_ERROR;
}
Py_hash_t hash = ((PyASCIIObject *)key)->hash;
if (hash == -1) {
hash = PyUnicode_Type.tp_hash(key);
if (hash == -1) {
PyErr_Clear();
return DKIX_ERROR;
}
}
return dictkeys_stringlookup(dk, key, hash);
}

/*
The basic lookup function used by all operations.
This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
Expand All @@ -756,49 +823,24 @@ _Py_dict_lookup(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject **valu
start:
dk = mp->ma_keys;
DictKeysKind kind = dk->dk_kind;
if (PyUnicode_CheckExact(key) && kind != DICT_KEYS_GENERAL) {
Py_ssize_t ix = dictkeys_stringlookup(dk, key, hash);
if (ix == DKIX_EMPTY) {
*value_addr = NULL;
}
else if (kind == DICT_KEYS_SPLIT) {
*value_addr = mp->ma_values[ix];
}
else {
*value_addr = DK_ENTRIES(dk)[ix].me_value;
}
return ix;
}
PyDictKeyEntry *ep0 = DK_ENTRIES(dk);
size_t mask = DK_MASK(dk);
size_t perturb = hash;
size_t i = (size_t)hash & mask;
Py_ssize_t ix;
if (PyUnicode_CheckExact(key) && kind != DICT_KEYS_GENERAL) {
/* Strings only */
for (;;) {
ix = dictkeys_get_index(mp->ma_keys, i);
if (ix >= 0) {
PyDictKeyEntry *ep = &ep0[ix];
assert(ep->me_key != NULL);
assert(PyUnicode_CheckExact(ep->me_key));
if (ep->me_key == key ||
(ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
goto found;
}
}
else if (ix == DKIX_EMPTY) {
*value_addr = NULL;
return DKIX_EMPTY;
}
perturb >>= PERTURB_SHIFT;
i = mask & (i*5 + perturb + 1);
ix = dictkeys_get_index(mp->ma_keys, i);
if (ix >= 0) {
PyDictKeyEntry *ep = &ep0[ix];
assert(ep->me_key != NULL);
assert(PyUnicode_CheckExact(ep->me_key));
if (ep->me_key == key ||
(ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
goto found;
}
}
else if (ix == DKIX_EMPTY) {
*value_addr = NULL;
return DKIX_EMPTY;
}
perturb >>= PERTURB_SHIFT;
i = mask & (i*5 + perturb + 1);
}
Py_UNREACHABLE();
}
for (;;) {
ix = dictkeys_get_index(dk, i);
if (ix == DKIX_EMPTY) {
Expand Down Expand Up @@ -4997,15 +5039,15 @@ _PyDictKeys_DecRef(PyDictKeysObject *keys)

static uint32_t next_dict_keys_version = 2;

uint32_t _PyDictKeys_GetVersionForCurrentState(PyDictObject *dict)
uint32_t _PyDictKeys_GetVersionForCurrentState(PyDictKeysObject *dictkeys)
{
if (dict->ma_keys->dk_version != 0) {
return dict->ma_keys->dk_version;
if (dictkeys->dk_version != 0) {
return dictkeys->dk_version;
}
if (next_dict_keys_version == 0) {
return 0;
}
uint32_t v = next_dict_keys_version++;
dict->ma_keys->dk_version = v;
dictkeys->dk_version = v;
return v;
}
39 changes: 19 additions & 20 deletions Python/specialize.c
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,7 @@ specialize_module_load_attr(
SPECIALIZATION_FAIL(opcode, SPEC_FAIL_OUT_OF_RANGE);
return -1;
}
uint32_t keys_version = _PyDictKeys_GetVersionForCurrentState(dict);
uint32_t keys_version = _PyDictKeys_GetVersionForCurrentState(dict->ma_keys);
if (keys_version == 0) {
SPECIALIZATION_FAIL(opcode, SPEC_FAIL_OUT_OF_VERSIONS);
return -1;
Expand Down Expand Up @@ -601,23 +601,19 @@ specialize_dict_access(
}
// We found an instance with a __dict__.
PyDictObject *dict = (PyDictObject *)*dictptr;
PyDictKeysObject *keys = dict->ma_keys;
if ((type->tp_flags & Py_TPFLAGS_HEAPTYPE)
&& dict->ma_keys == ((PyHeapTypeObject*)type)->ht_cached_keys
&& keys == ((PyHeapTypeObject*)type)->ht_cached_keys
) {
// Keys are shared
assert(PyUnicode_CheckExact(name));
Py_hash_t hash = PyObject_Hash(name);
if (hash == -1) {
return -1;
}
PyObject *value;
Py_ssize_t index = _Py_dict_lookup(dict, name, hash, &value);
Py_ssize_t index = _PyDictKeys_StringLookup(keys, name);
assert (index != DKIX_ERROR);
if (index != (uint16_t)index) {
SPECIALIZATION_FAIL(base_op, SPEC_FAIL_OUT_OF_RANGE);
return 0;
}
uint32_t keys_version = _PyDictKeys_GetVersionForCurrentState(dict);
uint32_t keys_version = _PyDictKeys_GetVersionForCurrentState(keys);
if (keys_version == 0) {
SPECIALIZATION_FAIL(base_op, SPEC_FAIL_OUT_OF_VERSIONS);
return 0;
Expand Down Expand Up @@ -966,7 +962,7 @@ _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name,
goto fail;
}
}
keys_version = _PyDictKeys_GetVersionForCurrentState(owner_dict);
keys_version = _PyDictKeys_GetVersionForCurrentState(owner_dict->ma_keys);
if (keys_version == 0) {
SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_OUT_OF_VERSIONS);
goto fail;
Expand Down Expand Up @@ -1021,17 +1017,17 @@ _Py_Specialize_LoadGlobal(
if (!PyDict_CheckExact(globals)) {
goto fail;
}
if (((PyDictObject *)globals)->ma_keys->dk_kind != DICT_KEYS_UNICODE) {
PyDictKeysObject * globals_keys = ((PyDictObject *)globals)->ma_keys;
Py_ssize_t index = _PyDictKeys_StringLookup(globals_keys, name);
if (index == DKIX_ERROR) {
SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_NON_STRING_OR_SPLIT);
goto fail;
}
PyObject *value = NULL;
Py_ssize_t index = _PyDict_GetItemHint((PyDictObject *)globals, name, -1, &value);
assert (index != DKIX_ERROR);
if (index != DKIX_EMPTY) {
if (index != (uint16_t)index) {
goto fail;
}
uint32_t keys_version = _PyDictKeys_GetVersionForCurrentState((PyDictObject *)globals);
uint32_t keys_version = _PyDictKeys_GetVersionForCurrentState(globals_keys);
if (keys_version == 0) {
goto fail;
}
Expand All @@ -1043,20 +1039,23 @@ _Py_Specialize_LoadGlobal(
if (!PyDict_CheckExact(builtins)) {
goto fail;
}
if (((PyDictObject *)builtins)->ma_keys->dk_kind != DICT_KEYS_UNICODE) {
PyDictKeysObject * builtin_keys = ((PyDictObject *)builtins)->ma_keys;
index = _PyDictKeys_StringLookup(builtin_keys, name);
if (index == DKIX_ERROR) {
SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_NON_STRING_OR_SPLIT);
goto fail;
}
index = _PyDict_GetItemHint((PyDictObject *)builtins, name, -1, &value);
assert (index != DKIX_ERROR);
if (index != (uint16_t)index) {
goto fail;
}
uint32_t globals_version = _PyDictKeys_GetVersionForCurrentState((PyDictObject *)globals);
uint32_t globals_version = _PyDictKeys_GetVersionForCurrentState(globals_keys);
if (globals_version == 0) {
SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_OUT_OF_VERSIONS);
goto fail;
}
uint32_t builtins_version = _PyDictKeys_GetVersionForCurrentState((PyDictObject *)builtins);
uint32_t builtins_version = _PyDictKeys_GetVersionForCurrentState(builtin_keys);
if (builtins_version == 0) {
SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_OUT_OF_VERSIONS);
goto fail;
}
cache1->module_keys_version = globals_version;
Expand Down