Skip to content

Commit 26778e9

Browse files
authored
Merge pull request #10079 from new-resolver-url-equivalent-no-hash
Smarter (and looser) link equivalency logic
2 parents 5672827 + 2da77e9 commit 26778e9

File tree

5 files changed

+219
-16
lines changed

5 files changed

+219
-16
lines changed

news/10002.feature.rst

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
New resolver: Loosen URL comparison logic when checking for direct URL reference
2+
equivalency. The logic includes the following notable characteristics:
3+
4+
* The authentication part of the URL is explicitly ignored.
5+
* Most of the fragment part, including ``egg=``, is explicitly ignored. Only
6+
``subdirectory=`` and hash values (e.g. ``sha256=``) are kept.
7+
* The query part of the URL is parsed to allow ordering differences.

src/pip/_internal/models/link.py

+66-4
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
1+
import functools
2+
import logging
13
import os
24
import posixpath
35
import re
46
import urllib.parse
5-
from typing import TYPE_CHECKING, Optional, Tuple, Union
7+
from typing import TYPE_CHECKING, Dict, List, NamedTuple, Optional, Tuple, Union
68

79
from pip._internal.utils.filetypes import WHEEL_EXTENSION
810
from pip._internal.utils.hashes import Hashes
@@ -17,6 +19,11 @@
1719
if TYPE_CHECKING:
1820
from pip._internal.index.collector import HTMLPage
1921

22+
logger = logging.getLogger(__name__)
23+
24+
25+
_SUPPORTED_HASHES = ("sha1", "sha224", "sha384", "sha256", "sha512", "md5")
26+
2027

2128
class Link(KeyBasedCompareMixin):
2229
"""Represents a parsed link from a Package Index's simple URL
@@ -159,7 +166,7 @@ def subdirectory_fragment(self) -> Optional[str]:
159166
return match.group(1)
160167

161168
_hash_re = re.compile(
162-
r'(sha1|sha224|sha384|sha256|sha512|md5)=([a-f0-9]+)'
169+
r'({choices})=([a-f0-9]+)'.format(choices="|".join(_SUPPORTED_HASHES))
163170
)
164171

165172
@property
@@ -218,6 +225,61 @@ def is_hash_allowed(self, hashes: Optional[Hashes]) -> bool:
218225
return hashes.is_hash_allowed(self.hash_name, hex_digest=self.hash)
219226

220227

221-
# TODO: Relax this comparison logic to ignore, for example, fragments.
228+
class _CleanResult(NamedTuple):
229+
"""Convert link for equivalency check.
230+
231+
This is used in the resolver to check whether two URL-specified requirements
232+
likely point to the same distribution and can be considered equivalent. This
233+
equivalency logic avoids comparing URLs literally, which can be too strict
234+
(e.g. "a=1&b=2" vs "b=2&a=1") and produce conflicts unexpecting to users.
235+
236+
Currently this does three things:
237+
238+
1. Drop the basic auth part. This is technically wrong since a server can
239+
serve different content based on auth, but if it does that, it is even
240+
impossible to guarantee two URLs without auth are equivalent, since
241+
the user can input different auth information when prompted. So the
242+
practical solution is to assume the auth doesn't affect the response.
243+
2. Parse the query to avoid the ordering issue. Note that ordering under the
244+
same key in the query are NOT cleaned; i.e. "a=1&a=2" and "a=2&a=1" are
245+
still considered different.
246+
3. Explicitly drop most of the fragment part, except ``subdirectory=`` and
247+
hash values, since it should have no impact the downloaded content. Note
248+
that this drops the "egg=" part historically used to denote the requested
249+
project (and extras), which is wrong in the strictest sense, but too many
250+
people are supplying it inconsistently to cause superfluous resolution
251+
conflicts, so we choose to also ignore them.
252+
"""
253+
254+
parsed: urllib.parse.SplitResult
255+
query: Dict[str, List[str]]
256+
subdirectory: str
257+
hashes: Dict[str, str]
258+
259+
@classmethod
260+
def from_link(cls, link: Link) -> "_CleanResult":
261+
parsed = link._parsed_url
262+
netloc = parsed.netloc.rsplit("@", 1)[-1]
263+
fragment = urllib.parse.parse_qs(parsed.fragment)
264+
if "egg" in fragment:
265+
logger.debug("Ignoring egg= fragment in %s", link)
266+
try:
267+
# If there are multiple subdirectory values, use the first one.
268+
# This matches the behavior of Link.subdirectory_fragment.
269+
subdirectory = fragment["subdirectory"][0]
270+
except (IndexError, KeyError):
271+
subdirectory = ""
272+
# If there are multiple hash values under the same algorithm, use the
273+
# first one. This matches the behavior of Link.hash_value.
274+
hashes = {k: fragment[k][0] for k in _SUPPORTED_HASHES if k in fragment}
275+
return cls(
276+
parsed=parsed._replace(netloc=netloc, query="", fragment=""),
277+
query=urllib.parse.parse_qs(parsed.query),
278+
subdirectory=subdirectory,
279+
hashes=hashes,
280+
)
281+
282+
283+
@functools.lru_cache(maxsize=None)
222284
def links_equivalent(link1: Link, link2: Link) -> bool:
223-
return link1 == link2
285+
return _CleanResult.from_link(link1) == _CleanResult.from_link(link2)

tests/functional/test_install_reqs.py

+2-11
Original file line numberDiff line numberDiff line change
@@ -423,25 +423,16 @@ def test_constraints_constrain_to_local(script, data, resolver_variant):
423423
assert 'Running setup.py install for singlemodule' in result.stdout
424424

425425

426-
def test_constrained_to_url_install_same_url(script, data, resolver_variant):
426+
def test_constrained_to_url_install_same_url(script, data):
427427
to_install = data.src.joinpath("singlemodule")
428428
constraints = path_to_url(to_install) + "#egg=singlemodule"
429429
script.scratch_path.joinpath("constraints.txt").write_text(constraints)
430430
result = script.pip(
431431
'install', '--no-index', '-f', data.find_links, '-c',
432432
script.scratch_path / 'constraints.txt', to_install,
433433
allow_stderr_warning=True,
434-
expect_error=(resolver_variant == "2020-resolver"),
435434
)
436-
if resolver_variant == "2020-resolver":
437-
assert 'Cannot install singlemodule 0.0.1' in result.stderr, str(result)
438-
assert (
439-
'because these package versions have conflicting dependencies.'
440-
in result.stderr
441-
), str(result)
442-
else:
443-
assert ('Running setup.py install for singlemodule'
444-
in result.stdout), str(result)
435+
assert 'Running setup.py install for singlemodule' in result.stdout, str(result)
445436

446437

447438
def test_double_install_spurious_hash_mismatch(

tests/functional/test_new_resolver.py

+90
Original file line numberDiff line numberDiff line change
@@ -1777,6 +1777,96 @@ def test_new_resolver_avoids_incompatible_wheel_tags_in_constraint_url(
17771777
assert_not_installed(script, "dep")
17781778

17791779

1780+
@pytest.mark.parametrize(
1781+
"suffixes_equivalent, depend_suffix, request_suffix",
1782+
[
1783+
pytest.param(
1784+
True,
1785+
"#egg=foo",
1786+
"",
1787+
id="drop-depend-egg",
1788+
),
1789+
pytest.param(
1790+
True,
1791+
"",
1792+
"#egg=foo",
1793+
id="drop-request-egg",
1794+
),
1795+
pytest.param(
1796+
True,
1797+
"#subdirectory=bar&egg=foo",
1798+
"#subdirectory=bar&egg=bar",
1799+
id="drop-egg-only",
1800+
),
1801+
pytest.param(
1802+
True,
1803+
"#subdirectory=bar&egg=foo",
1804+
"#egg=foo&subdirectory=bar",
1805+
id="fragment-ordering",
1806+
),
1807+
pytest.param(
1808+
True,
1809+
"?a=1&b=2",
1810+
"?b=2&a=1",
1811+
id="query-opordering",
1812+
),
1813+
pytest.param(
1814+
False,
1815+
"#sha512=1234567890abcdef",
1816+
"#sha512=abcdef1234567890",
1817+
id="different-keys",
1818+
),
1819+
pytest.param(
1820+
False,
1821+
"#sha512=1234567890abcdef",
1822+
"#md5=1234567890abcdef",
1823+
id="different-values",
1824+
),
1825+
pytest.param(
1826+
False,
1827+
"#subdirectory=bar&egg=foo",
1828+
"#subdirectory=rex",
1829+
id="drop-egg-still-different",
1830+
),
1831+
],
1832+
)
1833+
def test_new_resolver_direct_url_equivalent(
1834+
tmp_path,
1835+
script,
1836+
suffixes_equivalent,
1837+
depend_suffix,
1838+
request_suffix,
1839+
):
1840+
pkga = create_basic_wheel_for_package(script, name="pkga", version="1")
1841+
pkgb = create_basic_wheel_for_package(
1842+
script,
1843+
name="pkgb",
1844+
version="1",
1845+
depends=[f"pkga@{path_to_url(pkga)}{depend_suffix}"],
1846+
)
1847+
1848+
# Make pkgb visible via --find-links, but not pkga.
1849+
find_links = tmp_path.joinpath("find_links")
1850+
find_links.mkdir()
1851+
with open(pkgb, "rb") as f:
1852+
find_links.joinpath(pkgb.name).write_bytes(f.read())
1853+
1854+
# Install pkgb from --find-links, and pkga directly but from a different
1855+
# URL suffix as specified in pkgb. This should work!
1856+
script.pip(
1857+
"install",
1858+
"--no-cache-dir", "--no-index",
1859+
"--find-links", str(find_links),
1860+
f"{path_to_url(pkga)}{request_suffix}", "pkgb",
1861+
expect_error=(not suffixes_equivalent),
1862+
)
1863+
1864+
if suffixes_equivalent:
1865+
assert_installed(script, pkga="1", pkgb="1")
1866+
else:
1867+
assert_not_installed(script, "pkga", "pkgb")
1868+
1869+
17801870
def test_new_resolver_direct_url_with_extras(tmp_path, script):
17811871
pkg1 = create_basic_wheel_for_package(script, name="pkg1", version="1")
17821872
pkg2 = create_basic_wheel_for_package(

tests/unit/test_link.py

+54-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import pytest
22

3-
from pip._internal.models.link import Link
3+
from pip._internal.models.link import Link, links_equivalent
44
from pip._internal.utils.hashes import Hashes
55

66

@@ -138,3 +138,56 @@ def test_is_hash_allowed__none_hashes(self, hashes, expected):
138138
def test_is_vcs(self, url, expected):
139139
link = Link(url)
140140
assert link.is_vcs is expected
141+
142+
143+
@pytest.mark.parametrize(
144+
"url1, url2",
145+
[
146+
pytest.param(
147+
"https://example.com/foo#egg=foo",
148+
"https://example.com/foo",
149+
id="drop-egg",
150+
),
151+
pytest.param(
152+
"https://example.com/foo#subdirectory=bar&egg=foo",
153+
"https://example.com/foo#subdirectory=bar&egg=bar",
154+
id="drop-egg-only",
155+
),
156+
pytest.param(
157+
"https://example.com/foo#subdirectory=bar&egg=foo",
158+
"https://example.com/foo#egg=foo&subdirectory=bar",
159+
id="fragment-ordering",
160+
),
161+
pytest.param(
162+
"https://example.com/foo?a=1&b=2",
163+
"https://example.com/foo?b=2&a=1",
164+
id="query-opordering",
165+
),
166+
],
167+
)
168+
def test_links_equivalent(url1, url2):
169+
assert links_equivalent(Link(url1), Link(url2))
170+
171+
172+
@pytest.mark.parametrize(
173+
"url1, url2",
174+
[
175+
pytest.param(
176+
"https://example.com/foo#sha512=1234567890abcdef",
177+
"https://example.com/foo#sha512=abcdef1234567890",
178+
id="different-keys",
179+
),
180+
pytest.param(
181+
"https://example.com/foo#sha512=1234567890abcdef",
182+
"https://example.com/foo#md5=1234567890abcdef",
183+
id="different-values",
184+
),
185+
pytest.param(
186+
"https://example.com/foo#subdirectory=bar&egg=foo",
187+
"https://example.com/foo#subdirectory=rex",
188+
id="drop-egg-still-different",
189+
),
190+
],
191+
)
192+
def test_links_equivalent_false(url1, url2):
193+
assert not links_equivalent(Link(url1), Link(url2))

0 commit comments

Comments
 (0)