Skip to content

Commit 468e282

Browse files
committed
New cache key generation algorithm
Instead of building an URL-ish string that could be complex to describe and reproduce, generate a dictionary that is hashed with a simple algorithm.
1 parent 49801cb commit 468e282

File tree

2 files changed

+26
-11
lines changed

2 files changed

+26
-11
lines changed

src/pip/_internal/cache.py

+18-10
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,25 @@
2121
from pip._internal.wheel import InvalidWheelFilename, Wheel
2222

2323
if MYPY_CHECK_RUNNING:
24-
from typing import Optional, Set, List, Any
24+
from typing import Optional, Set, List, Any, Dict
2525
from pip._internal.models.format_control import FormatControl
2626
from pip._internal.pep425tags import Pep425Tag
2727

2828
logger = logging.getLogger(__name__)
2929

3030

31+
def _hash_dict(d):
32+
# type: (Dict[str, str]) -> str
33+
"""Return a sha224 of a dictionary where keys and values are strings."""
34+
h = hashlib.new('sha224')
35+
for k in sorted(d.keys()):
36+
h.update(k.encode())
37+
h.update("=".encode())
38+
h.update(d[k].encode())
39+
h.update(b"\0")
40+
return h.hexdigest()
41+
42+
3143
class Cache(object):
3244
"""An abstract class - provides cache directories for data from links
3345
@@ -57,32 +69,28 @@ def _get_cache_path_parts(self, link):
5769
# We want to generate an url to use as our cache key, we don't want to
5870
# just re-use the URL because it might have other items in the fragment
5971
# and we don't care about those.
60-
key_parts = [link.url_without_fragment]
72+
key_parts = {"url": link.url_without_fragment}
6173
if link.hash_name is not None and link.hash is not None:
62-
key_parts.append("=".join([link.hash_name, link.hash]))
74+
key_parts[link.hash_name] = link.hash
6375
if link.subdirectory_fragment:
64-
key_parts.append(
65-
"=".join(["subdirectory", link.subdirectory_fragment])
66-
)
67-
key_url = "#".join(key_parts)
76+
key_parts["subdirectory"] = link.subdirectory_fragment
6877

6978
# Include interpreter name, major and minor version in cache key
7079
# to cope with ill-behaved sdists that build a different wheel
7180
# depending on the python version their setup.py is being run on,
7281
# and don't encode the difference in compatibility tags.
7382
# https://github.com/pypa/pip/issues/7296
74-
key = "{}-{}.{} {}".format(
83+
key_parts["interpreter"] = "{}-{}.{}".format(
7584
interpreter_name(),
7685
sys.version_info[0],
7786
sys.version_info[1],
78-
key_url,
7987
)
8088

8189
# Encode our key url with sha224, we'll use this because it has similar
8290
# security properties to sha256, but with a shorter total output (and
8391
# thus less secure). However the differences don't make a lot of
8492
# difference for our use case here.
85-
hashed = hashlib.sha224(key.encode()).hexdigest()
93+
hashed = _hash_dict(key_parts)
8694

8795
# We want to nest the directories some to prevent having a ton of top
8896
# level directories where we might run out of sub directories on some

tests/unit/test_cache.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import os
22

3-
from pip._internal.cache import WheelCache
3+
from pip._internal.cache import WheelCache, _hash_dict
44
from pip._internal.models.format_control import FormatControl
55
from pip._internal.models.link import Link
66
from pip._internal.utils.compat import expanduser
@@ -42,3 +42,10 @@ def test_wheel_name_filter(tmpdir):
4242
assert wc.get(link, "package", [("py3", "none", "any")]) is not link
4343
# package2 does not match wheel name
4444
assert wc.get(link, "package2", [("py3", "none", "any")]) is link
45+
46+
47+
def test_cache_hash():
48+
h = _hash_dict({"url": "https://g.c/o/r"})
49+
assert h == "c7d60d08b1079254d236e983501fa26c016d58d16010725b27ed0af2"
50+
h = _hash_dict({"url": "https://g.c/o/r", "subdirectory": "sd"})
51+
assert h == "9cba35d4ccf04b7cde751b44db347fd0f21fa47d1276e32f9d47864c"

0 commit comments

Comments
 (0)