Skip to content

Commit ae0acff

Browse files
committed
Hash checking of cached wheels
1 parent c247ddc commit ae0acff

File tree

10 files changed

+100
-31
lines changed

10 files changed

+100
-31
lines changed

news/5037.feature.rst

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Support wheel cache when using ``--require-hashes``.

src/pip/_internal/cache.py

+6
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,14 @@
55
import json
66
import logging
77
import os
8+
from pathlib import Path
89
from typing import Any, Dict, List, Optional, Set
910

1011
from pip._vendor.packaging.tags import Tag, interpreter_name, interpreter_version
1112
from pip._vendor.packaging.utils import canonicalize_name
1213

1314
from pip._internal.exceptions import InvalidWheelFilename
15+
from pip._internal.models.direct_url import DirectUrl
1416
from pip._internal.models.format_control import FormatControl
1517
from pip._internal.models.link import Link
1618
from pip._internal.models.wheel import Wheel
@@ -204,6 +206,10 @@ def __init__(
204206
):
205207
self.link = link
206208
self.persistent = persistent
209+
self.origin: Optional[DirectUrl] = None
210+
origin_direct_url_path = Path(self.link.file_path).parent / "origin.json"
211+
if origin_direct_url_path.exists():
212+
self.origin = DirectUrl.from_json(origin_direct_url_path.read_text())
207213

208214

209215
class WheelCache(Cache):

src/pip/_internal/exceptions.py

+12-4
Original file line numberDiff line numberDiff line change
@@ -8,19 +8,20 @@
88
import configparser
99
import re
1010
from itertools import chain, groupby, repeat
11-
from typing import TYPE_CHECKING, Dict, List, Optional, Union
11+
from typing import TYPE_CHECKING, Dict, List, Mapping, Optional, Union
1212

1313
from pip._vendor.requests.models import Request, Response
1414
from pip._vendor.rich.console import Console, ConsoleOptions, RenderResult
1515
from pip._vendor.rich.markup import escape
1616
from pip._vendor.rich.text import Text
1717

1818
if TYPE_CHECKING:
19-
from hashlib import _Hash
20-
from typing import Literal
19+
from typing import Literal, Protocol
2120

2221
from pip._internal.metadata import BaseDistribution
2322
from pip._internal.req.req_install import InstallRequirement
23+
else:
24+
Protocol = object
2425

2526

2627
#
@@ -570,6 +571,11 @@ class HashUnpinned(HashError):
570571
)
571572

572573

574+
class SupportsHexDigest(Protocol):
575+
def hexdigest(self) -> str:
576+
...
577+
578+
573579
class HashMismatch(HashError):
574580
"""
575581
Distribution file hash values don't match.
@@ -588,7 +594,9 @@ class HashMismatch(HashError):
588594
"someone may have tampered with them."
589595
)
590596

591-
def __init__(self, allowed: Dict[str, List[str]], gots: Dict[str, "_Hash"]) -> None:
597+
def __init__(
598+
self, allowed: Dict[str, List[str]], gots: Mapping[str, SupportsHexDigest]
599+
) -> None:
592600
"""
593601
:param allowed: A dict of algorithm names pointing to lists of allowed
594602
hex digests

src/pip/_internal/operations/prepare.py

+28-4
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,12 @@
3737
from pip._internal.req.req_install import InstallRequirement
3838
from pip._internal.utils.hashes import Hashes, MissingHashes
3939
from pip._internal.utils.logging import indent_log
40-
from pip._internal.utils.misc import display_path, hide_url, is_installable_dir
40+
from pip._internal.utils.misc import (
41+
display_path,
42+
hash_file,
43+
hide_url,
44+
is_installable_dir,
45+
)
4146
from pip._internal.utils.temp_dir import TempDirectory
4247
from pip._internal.utils.unpacking import unpack_file
4348
from pip._internal.vcs import vcs
@@ -98,7 +103,10 @@ def get_http_url(
98103

99104

100105
def get_file_url(
101-
link: Link, download_dir: Optional[str] = None, hashes: Optional[Hashes] = None
106+
link: Link,
107+
download_dir: Optional[str] = None,
108+
hashes: Optional[Hashes] = None,
109+
archive_hash: Optional[str] = None,
102110
) -> File:
103111
"""Get file and optionally check its hash."""
104112
# If a download dir is specified, is the file already there and valid?
@@ -117,7 +125,13 @@ def get_file_url(
117125
# hash in `hashes` matching: a URL-based or an option-based
118126
# one; no internet-sourced hash will be in `hashes`.
119127
if hashes:
120-
hashes.check_against_path(from_path)
128+
if archive_hash:
129+
# When we get a wheel from the cache, we don't check the file hash but
130+
# rather compare expected hash against the hash of the original archive
131+
# that was downloaded to build the cached wheel.
132+
hashes.check_against_hash(archive_hash)
133+
else:
134+
hashes.check_against_path(from_path)
121135
return File(from_path, None)
122136

123137

@@ -128,6 +142,7 @@ def unpack_url(
128142
verbosity: int,
129143
download_dir: Optional[str] = None,
130144
hashes: Optional[Hashes] = None,
145+
archive_hash: Optional[str] = None,
131146
) -> Optional[File]:
132147
"""Unpack link into location, downloading if required.
133148
@@ -145,7 +160,9 @@ def unpack_url(
145160

146161
# file urls
147162
if link.is_file:
148-
file = get_file_url(link, download_dir, hashes=hashes)
163+
file = get_file_url(
164+
link, download_dir, hashes=hashes, archive_hash=archive_hash
165+
)
149166

150167
# http urls
151168
else:
@@ -470,6 +487,7 @@ def _prepare_linked_requirement(
470487
self.verbosity,
471488
self.download_dir,
472489
hashes,
490+
req.archive_hash,
473491
)
474492
except NetworkConnectionError as exc:
475493
raise InstallationError(
@@ -486,6 +504,12 @@ def _prepare_linked_requirement(
486504
# preserve the file path on the requirement.
487505
if local_file:
488506
req.local_file_path = local_file.path
507+
# Also compute and preserve the hash of the file we downloaded.
508+
# Note: as an optimization we may use link.hash if it is a sha256,
509+
# as we verify elsewhere that it matches the downloaded content.
510+
# TODO Should we use hashes.FAVORITE_HASH type ?
511+
hash = hash_file(local_file.path)[0].hexdigest()
512+
req.archive_hash = f"sha256={hash}"
489513

490514
dist = _get_prepared_distribution(
491515
req,

src/pip/_internal/req/req_install.py

+6
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,12 @@ def __init__(
108108
# PEP 508 URL requirement
109109
link = Link(req.url)
110110
self.link = self.original_link = link
111+
112+
# The locally computed hash of the (source) archive we downloaded. If no
113+
# download occured because a corresponding wheel was found in the local wheel
114+
# cache, this is the hash that was recorded in the cache entry.
115+
self.archive_hash: Optional[str] = None
116+
111117
self.original_link_is_in_wheel_cache = False
112118

113119
# Path to any downloaded or already-existing package.

src/pip/_internal/resolution/legacy/resolver.py

+6-7
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
)
3434
from pip._internal.index.package_finder import PackageFinder
3535
from pip._internal.metadata import BaseDistribution
36+
from pip._internal.models import direct_url
3637
from pip._internal.models.link import Link
3738
from pip._internal.operations.prepare import RequirementPreparer
3839
from pip._internal.req.req_install import (
@@ -289,17 +290,11 @@ def _populate_link(self, req: InstallRequirement) -> None:
289290
Note that req.link may still be None - if the requirement is already
290291
installed and not needed to be upgraded based on the return value of
291292
_is_upgrade_allowed().
292-
293-
If preparer.require_hashes is True, don't use the wheel cache, because
294-
cached wheels, always built locally, have different hashes than the
295-
files downloaded from the index server and thus throw false hash
296-
mismatches. Furthermore, cached wheels at present have undeterministic
297-
contents due to file modification times.
298293
"""
299294
if req.link is None:
300295
req.link = self._find_requirement_link(req)
301296

302-
if self.wheel_cache is None or self.preparer.require_hashes:
297+
if self.wheel_cache is None:
303298
return
304299
cache_entry = self.wheel_cache.get_cache_entry(
305300
link=req.link,
@@ -310,6 +305,10 @@ def _populate_link(self, req: InstallRequirement) -> None:
310305
logger.debug("Using cached wheel link: %s", cache_entry.link)
311306
if req.link is req.original_link and cache_entry.persistent:
312307
req.original_link_is_in_wheel_cache = True
308+
if cache_entry.origin is not None and isinstance(
309+
cache_entry.origin.info, direct_url.ArchiveInfo
310+
):
311+
req.archive_hash = cache_entry.origin.info.hash
313312
req.link = cache_entry.link
314313

315314
def _get_dist_for(self, req: InstallRequirement) -> BaseDistribution:

src/pip/_internal/resolution/resolvelib/candidates.py

+5
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
MetadataInconsistent,
1212
)
1313
from pip._internal.metadata import BaseDistribution
14+
from pip._internal.models import direct_url
1415
from pip._internal.models.link import Link, links_equivalent
1516
from pip._internal.models.wheel import Wheel
1617
from pip._internal.req.constructors import (
@@ -284,6 +285,10 @@ def __init__(
284285
and template.link is template.original_link
285286
):
286287
ireq.original_link_is_in_wheel_cache = True
288+
if cache_entry.origin is not None and isinstance(
289+
cache_entry.origin.info, direct_url.ArchiveInfo
290+
):
291+
ireq.archive_hash = cache_entry.origin.info.hash
287292

288293
super().__init__(
289294
link=link,

src/pip/_internal/resolution/resolvelib/factory.py

+2-9
Original file line numberDiff line numberDiff line change
@@ -536,15 +536,8 @@ def make_requires_python_requirement(
536536
def get_wheel_cache_entry(
537537
self, link: Link, name: Optional[str]
538538
) -> Optional[CacheEntry]:
539-
"""Look up the link in the wheel cache.
540-
541-
If ``preparer.require_hashes`` is True, don't use the wheel cache,
542-
because cached wheels, always built locally, have different hashes
543-
than the files downloaded from the index server and thus throw false
544-
hash mismatches. Furthermore, cached wheels at present have
545-
nondeterministic contents due to file modification times.
546-
"""
547-
if self._wheel_cache is None or self.preparer.require_hashes:
539+
"""Look up the link in the wheel cache."""
540+
if self._wheel_cache is None:
548541
return None
549542
return self._wheel_cache.get_cache_entry(
550543
link=link,

src/pip/_internal/utils/hashes.py

+20-7
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
import hashlib
2-
from typing import TYPE_CHECKING, BinaryIO, Dict, Iterable, List
3-
4-
from pip._internal.exceptions import HashMismatch, HashMissing, InstallationError
2+
from typing import TYPE_CHECKING, BinaryIO, Dict, Iterable, List, Mapping
3+
4+
from pip._internal.exceptions import (
5+
HashMismatch,
6+
HashMissing,
7+
InstallationError,
8+
SupportsHexDigest,
9+
)
510
from pip._internal.utils.misc import read_chunks
611

712
if TYPE_CHECKING:
8-
from hashlib import _Hash
9-
1013
# NoReturn introduced in 3.6.2; imported only for type checking to maintain
1114
# pip compatibility with older patch versions of Python 3.6
1215
from typing import NoReturn
@@ -22,6 +25,11 @@
2225
STRONG_HASHES = ["sha256", "sha384", "sha512"]
2326

2427

28+
class HexDigestStr(str, SupportsHexDigest):
29+
def hexdigest(self) -> str:
30+
return self
31+
32+
2533
class Hashes:
2634
"""A wrapper that builds multiple hashes at once and checks them against
2735
known-good values
@@ -94,7 +102,7 @@ def check_against_chunks(self, chunks: Iterable[bytes]) -> None:
94102
return
95103
self._raise(gots)
96104

97-
def _raise(self, gots: Dict[str, "_Hash"]) -> "NoReturn":
105+
def _raise(self, gots: Mapping[str, SupportsHexDigest]) -> "NoReturn":
98106
raise HashMismatch(self._allowed, gots)
99107

100108
def check_against_file(self, file: BinaryIO) -> None:
@@ -109,6 +117,11 @@ def check_against_path(self, path: str) -> None:
109117
with open(path, "rb") as file:
110118
return self.check_against_file(file)
111119

120+
def check_against_hash(self, hash: str) -> None:
121+
alg, value = hash.split("=", 1)
122+
if value not in self._allowed.get(alg, []):
123+
self._raise({alg: HexDigestStr(value)})
124+
112125
def __bool__(self) -> bool:
113126
"""Return whether I know any known-good hashes."""
114127
return bool(self._allowed)
@@ -144,5 +157,5 @@ def __init__(self) -> None:
144157
# empty list, it will never match, so an error will always raise.
145158
super().__init__(hashes={FAVORITE_HASH: []})
146159

147-
def _raise(self, gots: Dict[str, "_Hash"]) -> "NoReturn":
160+
def _raise(self, gots: Mapping[str, SupportsHexDigest]) -> "NoReturn":
148161
raise HashMissing(gots[FAVORITE_HASH].hexdigest())

src/pip/_internal/wheel_builder.py

+14
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import os.path
66
import re
77
import shutil
8+
from pathlib import Path
89
from typing import Any, Callable, Iterable, List, Optional, Tuple
910

1011
from pip._vendor.packaging.utils import canonicalize_name, canonicalize_version
@@ -13,12 +14,14 @@
1314
from pip._internal.cache import WheelCache
1415
from pip._internal.exceptions import InvalidWheelFilename, UnsupportedWheel
1516
from pip._internal.metadata import FilesystemWheel, get_wheel_distribution
17+
from pip._internal.models import direct_url
1618
from pip._internal.models.link import Link
1719
from pip._internal.models.wheel import Wheel
1820
from pip._internal.operations.build.wheel import build_wheel_pep517
1921
from pip._internal.operations.build.wheel_editable import build_wheel_editable
2022
from pip._internal.operations.build.wheel_legacy import build_wheel_legacy
2123
from pip._internal.req.req_install import InstallRequirement
24+
from pip._internal.utils.direct_url_helpers import direct_url_from_link
2225
from pip._internal.utils.logging import indent_log
2326
from pip._internal.utils.misc import ensure_dir, hash_file, is_wheel_installed
2427
from pip._internal.utils.setuptools_build import make_setuptools_clean_args
@@ -344,6 +347,7 @@ def build(
344347
build_successes, build_failures = [], []
345348
for req in requirements:
346349
assert req.name
350+
assert req.link
347351
cache_dir = _get_cache_dir(req, wheel_cache)
348352
wheel_file = _build_one(
349353
req,
@@ -354,6 +358,16 @@ def build(
354358
req.editable and req.permit_editable_wheels,
355359
)
356360
if wheel_file:
361+
# Store the origin URL of this cache entry
362+
# TODO move this to cache.py / refactor
363+
origin_direct_url = direct_url_from_link(req.link, req.source_dir)
364+
if isinstance(origin_direct_url.info, direct_url.ArchiveInfo):
365+
# Record the hash of the file that was downloaded.
366+
assert req.archive_hash
367+
origin_direct_url.info.hash = req.archive_hash
368+
Path(cache_dir).joinpath("origin.json").write_text(
369+
origin_direct_url.to_json()
370+
)
357371
# Update the link for this.
358372
req.link = Link(path_to_url(wheel_file))
359373
req.local_file_path = req.link.file_path

0 commit comments

Comments
 (0)