Skip to content

Better workaround for cache poisoning (see #3025) #7319

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Dec 12, 2019
5 changes: 5 additions & 0 deletions news/7296.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Make sure ``pip wheel`` never outputs pure python wheels with a
python implementation tag. Better fix/workaround for
`#3025 <https://github.com/pypa/pip/issues/3025>`_ by
using a per-implementation wheel cache instead of caching pure python
wheels with an implementation tag in their name.
3 changes: 3 additions & 0 deletions news/7296.removal
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
The pip>=20 wheel cache is not retro-compatible with previous versions. Until
pip 21.0, pip will continue to take advantage of existing legacy cache
entries.
84 changes: 70 additions & 14 deletions src/pip/_internal/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
# The following comment should be removed at some point in the future.
# mypy: strict-optional=False

import errno
import hashlib
import json
import logging
import os

Expand All @@ -14,19 +14,27 @@
from pip._internal.exceptions import InvalidWheelFilename
from pip._internal.models.link import Link
from pip._internal.models.wheel import Wheel
from pip._internal.pep425tags import interpreter_name, interpreter_version
from pip._internal.utils.compat import expanduser
from pip._internal.utils.temp_dir import TempDirectory
from pip._internal.utils.typing import MYPY_CHECK_RUNNING
from pip._internal.utils.urls import path_to_url

if MYPY_CHECK_RUNNING:
from typing import Optional, Set, List, Any
from typing import Optional, Set, List, Any, Dict
from pip._internal.models.format_control import FormatControl
from pip._internal.pep425tags import Pep425Tag

logger = logging.getLogger(__name__)


def _hash_dict(d):
# type: (Dict[str, str]) -> str
"""Return a stable sha224 of a dictionary."""
s = json.dumps(d, sort_keys=True, separators=(",", ":"), ensure_ascii=True)
return hashlib.sha224(s.encode("ascii")).hexdigest()


class Cache(object):
"""An abstract class - provides cache directories for data from links

Expand All @@ -48,9 +56,11 @@ def __init__(self, cache_dir, format_control, allowed_formats):
_valid_formats = {"source", "binary"}
assert self.allowed_formats.union(_valid_formats) == _valid_formats

def _get_cache_path_parts(self, link):
def _get_cache_path_parts_legacy(self, link):
# type: (Link) -> List[str]
"""Get parts of part that must be os.path.joined with cache_dir

Legacy cache key (pip < 20) for compatibility with older caches.
"""

# We want to generate an url to use as our cache key, we don't want to
Expand All @@ -59,10 +69,6 @@ def _get_cache_path_parts(self, link):
key_parts = [link.url_without_fragment]
if link.hash_name is not None and link.hash is not None:
key_parts.append("=".join([link.hash_name, link.hash]))
if link.subdirectory_fragment:
key_parts.append(
"=".join(["subdirectory", link.subdirectory_fragment])
)
key_url = "#".join(key_parts)

# Encode our key url with sha224, we'll use this because it has similar
Expand All @@ -78,6 +84,41 @@ def _get_cache_path_parts(self, link):

return parts

def _get_cache_path_parts(self, link):
# type: (Link) -> List[str]
"""Get parts of part that must be os.path.joined with cache_dir
"""

# We want to generate an url to use as our cache key, we don't want to
# just re-use the URL because it might have other items in the fragment
# and we don't care about those.
key_parts = {"url": link.url_without_fragment}
if link.hash_name is not None and link.hash is not None:
key_parts[link.hash_name] = link.hash
if link.subdirectory_fragment:
key_parts["subdirectory"] = link.subdirectory_fragment

# Include interpreter name, major and minor version in cache key
# to cope with ill-behaved sdists that build a different wheel
# depending on the python version their setup.py is being run on,
# and don't encode the difference in compatibility tags.
# https://github.com/pypa/pip/issues/7296
key_parts["interpreter_name"] = interpreter_name()
key_parts["interpreter_version"] = interpreter_version()

# Encode our key url with sha224, we'll use this because it has similar
# security properties to sha256, but with a shorter total output (and
# thus less secure). However the differences don't make a lot of
# difference for our use case here.
hashed = _hash_dict(key_parts)

# We want to nest the directories some to prevent having a ton of top
# level directories where we might run out of sub directories on some
# FS.
parts = [hashed[:2], hashed[2:4], hashed[4:6], hashed[6:]]

return parts

def _get_candidates(self, link, canonical_package_name):
# type: (Link, Optional[str]) -> List[Any]
can_not_cache = (
Expand All @@ -94,13 +135,19 @@ def _get_candidates(self, link, canonical_package_name):
if not self.allowed_formats.intersection(formats):
return []

root = self.get_path_for_link(link)
try:
return os.listdir(root)
except OSError as err:
if err.errno in {errno.ENOENT, errno.ENOTDIR}:
return []
raise
candidates = []
path = self.get_path_for_link(link)
if os.path.isdir(path):
candidates.extend(os.listdir(path))
# TODO remove legacy path lookup in pip>=21
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should make an issue to add a deprecation warning around here when this is merged.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What kind of deprecation warning do you have in mind?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm wondering the same as I think pip._internal.utils.deprecation.deprecated would be too noisy for such a thing...
Maybe something like

if gone_in is not None and parse(current_version) >= parse(gone_in):
raise PipDeprecationWarning(message)
to make sure we don't forget about it ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To not forget about things to do in the future, you could maybe use GitHub milestones and assign such TODO issues to future milestones. Now that pip has a regular release cadence it might be easy enough to manage.

legacy_path = self.get_path_for_link_legacy(link)
if os.path.isdir(legacy_path):
candidates.extend(os.listdir(legacy_path))
return candidates

def get_path_for_link_legacy(self, link):
# type: (Link) -> str
raise NotImplementedError()

def get_path_for_link(self, link):
# type: (Link) -> str
Expand Down Expand Up @@ -142,6 +189,11 @@ def __init__(self, cache_dir, format_control):
cache_dir, format_control, {"binary"}
)

def get_path_for_link_legacy(self, link):
# type: (Link) -> str
parts = self._get_cache_path_parts_legacy(link)
return os.path.join(self.cache_dir, "wheels", *parts)

def get_path_for_link(self, link):
# type: (Link) -> str
"""Return a directory to store cached wheels for link
Expand Down Expand Up @@ -234,6 +286,10 @@ def __init__(self, cache_dir, format_control):
self._wheel_cache = SimpleWheelCache(cache_dir, format_control)
self._ephem_cache = EphemWheelCache(format_control)

def get_path_for_link_legacy(self, link):
# type: (Link) -> str
return self._wheel_cache.get_path_for_link_legacy(link)

def get_path_for_link(self, link):
# type: (Link) -> str
return self._wheel_cache.get_path_for_link(link)
Expand Down
17 changes: 6 additions & 11 deletions src/pip/_internal/pep425tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ def get_abbr_impl():
return pyimpl


interpreter_name = get_abbr_impl


def version_info_to_nodot(version_info):
# type: (Tuple[int, ...]) -> str
# Only use up to the first two numbers.
Expand All @@ -69,6 +72,9 @@ def get_impl_ver():
return impl_ver


interpreter_version = get_impl_ver


def get_impl_version_info():
# type: () -> Tuple[int, ...]
"""Return sys.version_info-like tuple for use in decrementing the minor
Expand All @@ -83,14 +89,6 @@ def get_impl_version_info():
return sys.version_info[0], sys.version_info[1]


def get_impl_tag():
# type: () -> str
"""
Returns the Tag for this specific implementation.
"""
return "{}{}".format(get_abbr_impl(), get_impl_ver())


def get_flag(var, fallback, expected=True, warn=True):
# type: (str, Callable[..., bool], Union[bool, int], bool) -> bool
"""Use a fallback method for determining SOABI flags if the needed config
Expand Down Expand Up @@ -459,6 +457,3 @@ def get_supported(
supported.append(('py%s' % (version,), 'none', 'any'))

return supported


implementation_tag = get_impl_tag()
3 changes: 0 additions & 3 deletions src/pip/_internal/utils/setuptools_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ def make_setuptools_bdist_wheel_args(
global_options, # type: Sequence[str]
build_options, # type: Sequence[str]
destination_dir, # type: str
python_tag, # type: Optional[str]
):
# type: (...) -> List[str]
# NOTE: Eventually, we'd want to also -S to the flags here, when we're
Expand All @@ -66,8 +65,6 @@ def make_setuptools_bdist_wheel_args(
)
args += ["bdist_wheel", "-d", destination_dir]
args += build_options
if python_tag is not None:
args += ["--python-tag", python_tag]
return args


Expand Down
42 changes: 3 additions & 39 deletions src/pip/_internal/wheel_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import re
import shutil

from pip._internal import pep425tags
from pip._internal.models.link import Link
from pip._internal.utils.logging import indent_log
from pip._internal.utils.marker_files import has_delete_marker_file
Expand Down Expand Up @@ -47,14 +46,6 @@
logger = logging.getLogger(__name__)


def replace_python_tag(wheelname, new_tag):
# type: (str, str) -> str
"""Replace the Python tag in a wheel file name with a new value."""
parts = wheelname.split('-')
parts[-3] = new_tag
return '-'.join(parts)


def _contains_egg_info(
s, _egg_info_re=re.compile(r'([a-z0-9_.]+)-([a-z0-9_.!+-]+)', re.I)):
# type: (str, Pattern[str]) -> bool
Expand Down Expand Up @@ -197,7 +188,6 @@ def _build_wheel_legacy(
global_options, # type: List[str]
build_options, # type: List[str]
tempd, # type: str
python_tag=None, # type: Optional[str]
):
# type: (...) -> Optional[str]
"""Build one unpacked package using the "legacy" build process.
Expand All @@ -209,7 +199,6 @@ def _build_wheel_legacy(
global_options=global_options,
build_options=build_options,
destination_dir=tempd,
python_tag=python_tag,
)

spin_message = 'Building wheel for %s (setup.py)' % (name,)
Expand Down Expand Up @@ -277,7 +266,6 @@ def _build_one(
self,
req, # type: InstallRequirement
output_dir, # type: str
python_tag=None, # type: Optional[str]
):
# type: (...) -> Optional[str]
"""Build one wheel.
Expand All @@ -286,21 +274,17 @@ def _build_one(
"""
# Install build deps into temporary directory (PEP 518)
with req.build_env:
return self._build_one_inside_env(req, output_dir,
python_tag=python_tag)
return self._build_one_inside_env(req, output_dir)

def _build_one_inside_env(
self,
req, # type: InstallRequirement
output_dir, # type: str
python_tag=None, # type: Optional[str]
):
# type: (...) -> Optional[str]
with TempDirectory(kind="wheel") as temp_dir:
if req.use_pep517:
wheel_path = self._build_one_pep517(
req, temp_dir.path, python_tag=python_tag
)
wheel_path = self._build_one_pep517(req, temp_dir.path)
else:
wheel_path = _build_wheel_legacy(
name=req.name,
Expand All @@ -309,7 +293,6 @@ def _build_one_inside_env(
global_options=self.global_options,
build_options=self.build_options,
tempd=temp_dir.path,
python_tag=python_tag,
)

if wheel_path is not None:
Expand All @@ -334,7 +317,6 @@ def _build_one_pep517(
self,
req, # type: InstallRequirement
tempd, # type: str
python_tag=None, # type: Optional[str]
):
# type: (...) -> Optional[str]
"""Build one InstallRequirement using the PEP 517 build process.
Expand All @@ -359,17 +341,6 @@ def _build_one_pep517(
tempd,
metadata_directory=req.metadata_directory,
)
if python_tag:
# General PEP 517 backends don't necessarily support
# a "--python-tag" option, so we rename the wheel
# file directly.
new_name = replace_python_tag(wheel_name, python_tag)
os.rename(
os.path.join(tempd, wheel_name),
os.path.join(tempd, new_name)
)
# Reassign to simplify the return at the end of function
wheel_name = new_name
except Exception:
logger.error('Failed building wheel for %s', req.name)
return None
Expand Down Expand Up @@ -447,10 +418,6 @@ def build(
', '.join([req.name for (req, _) in buildset]),
)

python_tag = None
if should_unpack:
python_tag = pep425tags.implementation_tag

with indent_log():
build_success, build_failure = [], []
for req, output_dir in buildset:
Expand All @@ -464,10 +431,7 @@ def build(
build_failure.append(req)
continue

wheel_file = self._build_one(
req, output_dir,
python_tag=python_tag,
)
wheel_file = self._build_one(req, output_dir)
if wheel_file:
if should_unpack:
# XXX: This is mildly duplicative with prepare_files,
Expand Down
5 changes: 2 additions & 3 deletions tests/functional/test_install.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from pip._vendor.six import PY2

from pip import __version__ as pip_current_version
from pip._internal import pep425tags
from pip._internal.cli.status_codes import ERROR, SUCCESS
from pip._internal.models.index import PyPI, TestPyPI
from pip._internal.utils.misc import rmtree
Expand Down Expand Up @@ -1297,9 +1296,9 @@ def test_install_builds_wheels(script, data, with_wheel):
assert "Running setup.py install for requir" not in str(res), str(res)
# wheelbroken has to run install
assert "Running setup.py install for wheelb" in str(res), str(res)
# We want to make sure we used the correct implementation tag
# We want to make sure pure python wheels do not have an implementation tag
assert wheels == [
"Upper-2.0-{}-none-any.whl".format(pep425tags.implementation_tag),
"Upper-2.0-py{}-none-any.whl".format(sys.version_info[0]),
]


Expand Down
Loading