Skip to content

Commit 064464f

Browse files
authored
bpo-45219: Factor dictkey indexing (GH-28389)
1 parent cb07838 commit 064464f

File tree

3 files changed

+106
-63
lines changed

3 files changed

+106
-63
lines changed

Include/cpython/dictobject.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,4 +85,6 @@ PyAPI_FUNC(PyObject *) _PyDictView_Intersect(PyObject* self, PyObject *other);
8585

8686
/* Gets a version number unique to the current state of the keys of dict, if possible.
8787
* Returns the version number, or zero if it was not possible to get a version number. */
88-
uint32_t _PyDictKeys_GetVersionForCurrentState(PyDictObject *dict);
88+
uint32_t _PyDictKeys_GetVersionForCurrentState(PyDictKeysObject *dictkeys);
89+
90+
Py_ssize_t _PyDictKeys_StringLookup(PyDictKeysObject* dictkeys, PyObject *key);

Objects/dictobject.c

Lines changed: 84 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -734,6 +734,73 @@ lookdict_index(PyDictKeysObject *k, Py_hash_t hash, Py_ssize_t index)
734734
Py_UNREACHABLE();
735735
}
736736

737+
static Py_ssize_t
738+
dictkeys_stringlookup(PyDictKeysObject* dk, PyObject *key, Py_hash_t hash)
739+
{
740+
PyDictKeyEntry *ep0 = DK_ENTRIES(dk);
741+
size_t mask = DK_MASK(dk);
742+
size_t perturb = hash;
743+
size_t i = (size_t)hash & mask;
744+
Py_ssize_t ix;
745+
for (;;) {
746+
ix = dictkeys_get_index(dk, i);
747+
if (ix >= 0) {
748+
PyDictKeyEntry *ep = &ep0[ix];
749+
assert(ep->me_key != NULL);
750+
assert(PyUnicode_CheckExact(ep->me_key));
751+
if (ep->me_key == key ||
752+
(ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
753+
return ix;
754+
}
755+
}
756+
else if (ix == DKIX_EMPTY) {
757+
return DKIX_EMPTY;
758+
}
759+
perturb >>= PERTURB_SHIFT;
760+
i = mask & (i*5 + perturb + 1);
761+
ix = dictkeys_get_index(dk, i);
762+
if (ix >= 0) {
763+
PyDictKeyEntry *ep = &ep0[ix];
764+
assert(ep->me_key != NULL);
765+
assert(PyUnicode_CheckExact(ep->me_key));
766+
if (ep->me_key == key ||
767+
(ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
768+
return ix;
769+
}
770+
}
771+
else if (ix == DKIX_EMPTY) {
772+
return DKIX_EMPTY;
773+
}
774+
perturb >>= PERTURB_SHIFT;
775+
i = mask & (i*5 + perturb + 1);
776+
}
777+
Py_UNREACHABLE();
778+
}
779+
780+
/* Lookup a string in a (all unicode) dict keys.
781+
* Returns DKIX_ERROR if key is not a string,
782+
* or if the dict keys is not all strings.
783+
* If the keys is present then return the index of key.
784+
* If the key is not present then return DKIX_EMPTY.
785+
*/
786+
Py_ssize_t
787+
_PyDictKeys_StringLookup(PyDictKeysObject* dk, PyObject *key)
788+
{
789+
DictKeysKind kind = dk->dk_kind;
790+
if (!PyUnicode_CheckExact(key) || kind == DICT_KEYS_GENERAL) {
791+
return DKIX_ERROR;
792+
}
793+
Py_hash_t hash = ((PyASCIIObject *)key)->hash;
794+
if (hash == -1) {
795+
hash = PyUnicode_Type.tp_hash(key);
796+
if (hash == -1) {
797+
PyErr_Clear();
798+
return DKIX_ERROR;
799+
}
800+
}
801+
return dictkeys_stringlookup(dk, key, hash);
802+
}
803+
737804
/*
738805
The basic lookup function used by all operations.
739806
This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
@@ -756,49 +823,24 @@ _Py_dict_lookup(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject **valu
756823
start:
757824
dk = mp->ma_keys;
758825
DictKeysKind kind = dk->dk_kind;
826+
if (PyUnicode_CheckExact(key) && kind != DICT_KEYS_GENERAL) {
827+
Py_ssize_t ix = dictkeys_stringlookup(dk, key, hash);
828+
if (ix == DKIX_EMPTY) {
829+
*value_addr = NULL;
830+
}
831+
else if (kind == DICT_KEYS_SPLIT) {
832+
*value_addr = mp->ma_values[ix];
833+
}
834+
else {
835+
*value_addr = DK_ENTRIES(dk)[ix].me_value;
836+
}
837+
return ix;
838+
}
759839
PyDictKeyEntry *ep0 = DK_ENTRIES(dk);
760840
size_t mask = DK_MASK(dk);
761841
size_t perturb = hash;
762842
size_t i = (size_t)hash & mask;
763843
Py_ssize_t ix;
764-
if (PyUnicode_CheckExact(key) && kind != DICT_KEYS_GENERAL) {
765-
/* Strings only */
766-
for (;;) {
767-
ix = dictkeys_get_index(mp->ma_keys, i);
768-
if (ix >= 0) {
769-
PyDictKeyEntry *ep = &ep0[ix];
770-
assert(ep->me_key != NULL);
771-
assert(PyUnicode_CheckExact(ep->me_key));
772-
if (ep->me_key == key ||
773-
(ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
774-
goto found;
775-
}
776-
}
777-
else if (ix == DKIX_EMPTY) {
778-
*value_addr = NULL;
779-
return DKIX_EMPTY;
780-
}
781-
perturb >>= PERTURB_SHIFT;
782-
i = mask & (i*5 + perturb + 1);
783-
ix = dictkeys_get_index(mp->ma_keys, i);
784-
if (ix >= 0) {
785-
PyDictKeyEntry *ep = &ep0[ix];
786-
assert(ep->me_key != NULL);
787-
assert(PyUnicode_CheckExact(ep->me_key));
788-
if (ep->me_key == key ||
789-
(ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
790-
goto found;
791-
}
792-
}
793-
else if (ix == DKIX_EMPTY) {
794-
*value_addr = NULL;
795-
return DKIX_EMPTY;
796-
}
797-
perturb >>= PERTURB_SHIFT;
798-
i = mask & (i*5 + perturb + 1);
799-
}
800-
Py_UNREACHABLE();
801-
}
802844
for (;;) {
803845
ix = dictkeys_get_index(dk, i);
804846
if (ix == DKIX_EMPTY) {
@@ -4997,15 +5039,15 @@ _PyDictKeys_DecRef(PyDictKeysObject *keys)
49975039

49985040
static uint32_t next_dict_keys_version = 2;
49995041

5000-
uint32_t _PyDictKeys_GetVersionForCurrentState(PyDictObject *dict)
5042+
uint32_t _PyDictKeys_GetVersionForCurrentState(PyDictKeysObject *dictkeys)
50015043
{
5002-
if (dict->ma_keys->dk_version != 0) {
5003-
return dict->ma_keys->dk_version;
5044+
if (dictkeys->dk_version != 0) {
5045+
return dictkeys->dk_version;
50045046
}
50055047
if (next_dict_keys_version == 0) {
50065048
return 0;
50075049
}
50085050
uint32_t v = next_dict_keys_version++;
5009-
dict->ma_keys->dk_version = v;
5051+
dictkeys->dk_version = v;
50105052
return v;
50115053
}

Python/specialize.c

Lines changed: 19 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -489,7 +489,7 @@ specialize_module_load_attr(
489489
SPECIALIZATION_FAIL(opcode, SPEC_FAIL_OUT_OF_RANGE);
490490
return -1;
491491
}
492-
uint32_t keys_version = _PyDictKeys_GetVersionForCurrentState(dict);
492+
uint32_t keys_version = _PyDictKeys_GetVersionForCurrentState(dict->ma_keys);
493493
if (keys_version == 0) {
494494
SPECIALIZATION_FAIL(opcode, SPEC_FAIL_OUT_OF_VERSIONS);
495495
return -1;
@@ -601,23 +601,19 @@ specialize_dict_access(
601601
}
602602
// We found an instance with a __dict__.
603603
PyDictObject *dict = (PyDictObject *)*dictptr;
604+
PyDictKeysObject *keys = dict->ma_keys;
604605
if ((type->tp_flags & Py_TPFLAGS_HEAPTYPE)
605-
&& dict->ma_keys == ((PyHeapTypeObject*)type)->ht_cached_keys
606+
&& keys == ((PyHeapTypeObject*)type)->ht_cached_keys
606607
) {
607608
// Keys are shared
608609
assert(PyUnicode_CheckExact(name));
609-
Py_hash_t hash = PyObject_Hash(name);
610-
if (hash == -1) {
611-
return -1;
612-
}
613-
PyObject *value;
614-
Py_ssize_t index = _Py_dict_lookup(dict, name, hash, &value);
610+
Py_ssize_t index = _PyDictKeys_StringLookup(keys, name);
615611
assert (index != DKIX_ERROR);
616612
if (index != (uint16_t)index) {
617613
SPECIALIZATION_FAIL(base_op, SPEC_FAIL_OUT_OF_RANGE);
618614
return 0;
619615
}
620-
uint32_t keys_version = _PyDictKeys_GetVersionForCurrentState(dict);
616+
uint32_t keys_version = _PyDictKeys_GetVersionForCurrentState(keys);
621617
if (keys_version == 0) {
622618
SPECIALIZATION_FAIL(base_op, SPEC_FAIL_OUT_OF_VERSIONS);
623619
return 0;
@@ -966,7 +962,7 @@ _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name,
966962
goto fail;
967963
}
968964
}
969-
keys_version = _PyDictKeys_GetVersionForCurrentState(owner_dict);
965+
keys_version = _PyDictKeys_GetVersionForCurrentState(owner_dict->ma_keys);
970966
if (keys_version == 0) {
971967
SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_OUT_OF_VERSIONS);
972968
goto fail;
@@ -1020,17 +1016,17 @@ _Py_Specialize_LoadGlobal(
10201016
if (!PyDict_CheckExact(globals)) {
10211017
goto fail;
10221018
}
1023-
if (((PyDictObject *)globals)->ma_keys->dk_kind != DICT_KEYS_UNICODE) {
1019+
PyDictKeysObject * globals_keys = ((PyDictObject *)globals)->ma_keys;
1020+
Py_ssize_t index = _PyDictKeys_StringLookup(globals_keys, name);
1021+
if (index == DKIX_ERROR) {
1022+
SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_NON_STRING_OR_SPLIT);
10241023
goto fail;
10251024
}
1026-
PyObject *value = NULL;
1027-
Py_ssize_t index = _PyDict_GetItemHint((PyDictObject *)globals, name, -1, &value);
1028-
assert (index != DKIX_ERROR);
10291025
if (index != DKIX_EMPTY) {
10301026
if (index != (uint16_t)index) {
10311027
goto fail;
10321028
}
1033-
uint32_t keys_version = _PyDictKeys_GetVersionForCurrentState((PyDictObject *)globals);
1029+
uint32_t keys_version = _PyDictKeys_GetVersionForCurrentState(globals_keys);
10341030
if (keys_version == 0) {
10351031
goto fail;
10361032
}
@@ -1042,20 +1038,23 @@ _Py_Specialize_LoadGlobal(
10421038
if (!PyDict_CheckExact(builtins)) {
10431039
goto fail;
10441040
}
1045-
if (((PyDictObject *)builtins)->ma_keys->dk_kind != DICT_KEYS_UNICODE) {
1041+
PyDictKeysObject * builtin_keys = ((PyDictObject *)builtins)->ma_keys;
1042+
index = _PyDictKeys_StringLookup(builtin_keys, name);
1043+
if (index == DKIX_ERROR) {
1044+
SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_NON_STRING_OR_SPLIT);
10461045
goto fail;
10471046
}
1048-
index = _PyDict_GetItemHint((PyDictObject *)builtins, name, -1, &value);
1049-
assert (index != DKIX_ERROR);
10501047
if (index != (uint16_t)index) {
10511048
goto fail;
10521049
}
1053-
uint32_t globals_version = _PyDictKeys_GetVersionForCurrentState((PyDictObject *)globals);
1050+
uint32_t globals_version = _PyDictKeys_GetVersionForCurrentState(globals_keys);
10541051
if (globals_version == 0) {
1052+
SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_OUT_OF_VERSIONS);
10551053
goto fail;
10561054
}
1057-
uint32_t builtins_version = _PyDictKeys_GetVersionForCurrentState((PyDictObject *)builtins);
1055+
uint32_t builtins_version = _PyDictKeys_GetVersionForCurrentState(builtin_keys);
10581056
if (builtins_version == 0) {
1057+
SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_OUT_OF_VERSIONS);
10591058
goto fail;
10601059
}
10611060
cache1->module_keys_version = globals_version;

0 commit comments

Comments
 (0)