Skip to content

Commit c53ed18

Browse files
committed
Smarter (and looser) link equivalency logic
1 parent 4704da4 commit c53ed18

File tree

5 files changed

+219
-16
lines changed

5 files changed

+219
-16
lines changed

news/10002.feature.rst

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
New resolver: Loosen URL comparison logic when checking for direct URL reference
2+
equivalency. The logic includes the following notable characteristics:
3+
4+
* The authentication part of the URL is explicitly ignored.
5+
* Most of the fragment part, including ``egg=``, is explicitly ignored. Only
6+
``subdirectory=`` and hash values (e.g. ``sha256=``) are kept.
7+
* The query part of the URL is hashed to allow ordering differences.

src/pip/_internal/models/link.py

+66-4
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
1+
import functools
2+
import logging
13
import os
24
import posixpath
35
import re
46
import urllib.parse
5-
from typing import TYPE_CHECKING, Optional, Tuple, Union
7+
from typing import TYPE_CHECKING, Dict, List, NamedTuple, Optional, Tuple, Union
68

79
from pip._internal.utils.filetypes import WHEEL_EXTENSION
810
from pip._internal.utils.hashes import Hashes
@@ -17,6 +19,11 @@
1719
if TYPE_CHECKING:
1820
from pip._internal.index.collector import HTMLPage
1921

22+
logger = logging.getLogger(__name__)
23+
24+
25+
_SUPPORTED_HASHES = ("sha1", "sha224", "sha384", "sha256", "sha512", "md5")
26+
2027

2128
class Link(KeyBasedCompareMixin):
2229
"""Represents a parsed link from a Package Index's simple URL
@@ -173,7 +180,7 @@ def subdirectory_fragment(self):
173180
return match.group(1)
174181

175182
_hash_re = re.compile(
176-
r'(sha1|sha224|sha384|sha256|sha512|md5)=([a-f0-9]+)'
183+
r'({choices})=([a-f0-9]+)'.format(choices="|".join(_SUPPORTED_HASHES))
177184
)
178185

179186
@property
@@ -242,7 +249,62 @@ def is_hash_allowed(self, hashes):
242249
return hashes.is_hash_allowed(self.hash_name, hex_digest=self.hash)
243250

244251

245-
# TODO: Relax this comparison logic to ignore, for example, fragments.
252+
class _CleanResult(NamedTuple):
253+
"""Convert link for equivalency check.
254+
255+
This is used in the resolver to check whether two URL-specified requirements
256+
likely point to the same distribution and can be considered equivalent. This
257+
equivalency logic avoids comparing URLs literally, which can be too strict
258+
(e.g. "a=1&b=2" vs "b=2&a=1") and produce conflicts unexpecting to users.
259+
260+
Currently this does three things:
261+
262+
1. Drop the basic auth part. This is technically wrong since a server can
263+
serve different content based on auth, but if it does that, it is even
264+
impossible to guarantee two URLs without auth are equivalent, since
265+
the user can input different auth information when prompted. So the
266+
practical solution is to assume the auth doesn't affect the response.
267+
2. Parse the query to avoid the ordering issue. Note that ordering under the
268+
same key in the query are NOT cleaned; i.e. "a=1&a=2" and "a=2&a=1" are
269+
still considered different.
270+
3. Explicitly drop most of the fragment part, except ``subdirectory=`` and
271+
hash values, since it should have no impact the downloaded content. Note
272+
that this drops the "egg=" part historically used to denote the requested
273+
project (and extras), which is wrong in the strictest sense, but too many
274+
people are supplying it inconsistently to cause superfluous resolution
275+
conflicts, so we choose to also ignore them.
276+
"""
277+
278+
parsed: urllib.parse.SplitResult
279+
query: Dict[str, List[str]]
280+
subdirectory: str
281+
hashes: Dict[str, str]
282+
283+
@classmethod
284+
def from_link(cls, link: Link) -> "_CleanResult":
285+
parsed = link._parsed_url
286+
netloc = parsed.netloc.rsplit("@", 1)[-1]
287+
fragment = urllib.parse.parse_qs(parsed.fragment)
288+
if "egg" in fragment:
289+
logger.debug("Ignoring egg= fragment in %s", link)
290+
try:
291+
# If there are multiple subdirectory values, use the first one.
292+
# This matches the behavior of Link.subdirectory_fragment.
293+
subdirectory = fragment["subdirectory"][0]
294+
except (IndexError, KeyError):
295+
subdirectory = ""
296+
# If there are multiple hash values under the same algorithm, use the
297+
# first one. This matches the behavior of Link.hash_value.
298+
hashes = {k: fragment[k][0] for k in _SUPPORTED_HASHES if k in fragment}
299+
return cls(
300+
parsed=parsed._replace(netloc=netloc, query="", fragment=""),
301+
query=urllib.parse.parse_qs(parsed.query),
302+
subdirectory=subdirectory,
303+
hashes=hashes,
304+
)
305+
306+
307+
@functools.lru_cache(maxsize=None)
246308
def links_equivalent(link1, link2):
247309
# type: (Link, Link) -> bool
248-
return link1 == link2
310+
return _CleanResult.from_link(link1) == _CleanResult.from_link(link2)

tests/functional/test_install_reqs.py

+2-11
Original file line numberDiff line numberDiff line change
@@ -423,25 +423,16 @@ def test_constraints_constrain_to_local(script, data, resolver_variant):
423423
assert 'Running setup.py install for singlemodule' in result.stdout
424424

425425

426-
def test_constrained_to_url_install_same_url(script, data, resolver_variant):
426+
def test_constrained_to_url_install_same_url(script, data):
427427
to_install = data.src.joinpath("singlemodule")
428428
constraints = path_to_url(to_install) + "#egg=singlemodule"
429429
script.scratch_path.joinpath("constraints.txt").write_text(constraints)
430430
result = script.pip(
431431
'install', '--no-index', '-f', data.find_links, '-c',
432432
script.scratch_path / 'constraints.txt', to_install,
433433
allow_stderr_warning=True,
434-
expect_error=(resolver_variant == "2020-resolver"),
435434
)
436-
if resolver_variant == "2020-resolver":
437-
assert 'Cannot install singlemodule 0.0.1' in result.stderr, str(result)
438-
assert (
439-
'because these package versions have conflicting dependencies.'
440-
in result.stderr
441-
), str(result)
442-
else:
443-
assert ('Running setup.py install for singlemodule'
444-
in result.stdout), str(result)
435+
assert 'Running setup.py install for singlemodule' in result.stdout, str(result)
445436

446437

447438
def test_double_install_spurious_hash_mismatch(

tests/functional/test_new_resolver.py

+90
Original file line numberDiff line numberDiff line change
@@ -1791,6 +1791,96 @@ def test_new_resolver_avoids_incompatible_wheel_tags_in_constraint_url(
17911791
assert_not_installed(script, "dep")
17921792

17931793

1794+
@pytest.mark.parametrize(
1795+
"suffixes_equivalent, depend_suffix, request_suffix",
1796+
[
1797+
pytest.param(
1798+
True,
1799+
"#egg=foo",
1800+
"",
1801+
id="drop-depend-egg",
1802+
),
1803+
pytest.param(
1804+
True,
1805+
"",
1806+
"#egg=foo",
1807+
id="drop-request-egg",
1808+
),
1809+
pytest.param(
1810+
True,
1811+
"#subdirectory=bar&egg=foo",
1812+
"#subdirectory=bar&egg=bar",
1813+
id="drop-egg-only",
1814+
),
1815+
pytest.param(
1816+
True,
1817+
"#subdirectory=bar&egg=foo",
1818+
"#egg=foo&subdirectory=bar",
1819+
id="fragment-ordering",
1820+
),
1821+
pytest.param(
1822+
True,
1823+
"?a=1&b=2",
1824+
"?b=2&a=1",
1825+
id="query-opordering",
1826+
),
1827+
pytest.param(
1828+
False,
1829+
"#sha512=1234567890abcdef",
1830+
"#sha512=abcdef1234567890",
1831+
id="different-keys",
1832+
),
1833+
pytest.param(
1834+
False,
1835+
"#sha512=1234567890abcdef",
1836+
"#md5=1234567890abcdef",
1837+
id="different-values",
1838+
),
1839+
pytest.param(
1840+
False,
1841+
"#subdirectory=bar&egg=foo",
1842+
"#subdirectory=rex",
1843+
id="drop-egg-still-different",
1844+
),
1845+
],
1846+
)
1847+
def test_new_resolver_direct_url_equivalent(
1848+
tmp_path,
1849+
script,
1850+
suffixes_equivalent,
1851+
depend_suffix,
1852+
request_suffix,
1853+
):
1854+
pkga = create_basic_wheel_for_package(script, name="pkga", version="1")
1855+
pkgb = create_basic_wheel_for_package(
1856+
script,
1857+
name="pkgb",
1858+
version="1",
1859+
depends=[f"pkga@{path_to_url(pkga)}{depend_suffix}"],
1860+
)
1861+
1862+
# Make pkgb visible via --find-links, but not pkga.
1863+
find_links = tmp_path.joinpath("find_links")
1864+
find_links.mkdir()
1865+
with open(pkgb, "rb") as f:
1866+
find_links.joinpath(pkgb.name).write_bytes(f.read())
1867+
1868+
# Install pkgb from --find-links, and pkga directly but from a different
1869+
# URL suffix as specified in pkgb. This should work!
1870+
script.pip(
1871+
"install",
1872+
"--no-cache-dir", "--no-index",
1873+
"--find-links", str(find_links),
1874+
f"{path_to_url(pkga)}{request_suffix}", "pkgb",
1875+
expect_error=(not suffixes_equivalent),
1876+
)
1877+
1878+
if suffixes_equivalent:
1879+
assert_installed(script, pkga="1", pkgb="1")
1880+
else:
1881+
assert_not_installed(script, "pkga", "pkgb")
1882+
1883+
17941884
def test_new_resolver_direct_url_with_extras(tmp_path, script):
17951885
pkg1 = create_basic_wheel_for_package(script, name="pkg1", version="1")
17961886
pkg2 = create_basic_wheel_for_package(

tests/unit/test_link.py

+54-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import pytest
22

3-
from pip._internal.models.link import Link
3+
from pip._internal.models.link import Link, links_equivalent
44
from pip._internal.utils.hashes import Hashes
55

66

@@ -138,3 +138,56 @@ def test_is_hash_allowed__none_hashes(self, hashes, expected):
138138
def test_is_vcs(self, url, expected):
139139
link = Link(url)
140140
assert link.is_vcs is expected
141+
142+
143+
@pytest.mark.parametrize(
144+
"url1, url2",
145+
[
146+
pytest.param(
147+
"https://example.com/foo#egg=foo",
148+
"https://example.com/foo",
149+
id="drop-egg",
150+
),
151+
pytest.param(
152+
"https://example.com/foo#subdirectory=bar&egg=foo",
153+
"https://example.com/foo#subdirectory=bar&egg=bar",
154+
id="drop-egg-only",
155+
),
156+
pytest.param(
157+
"https://example.com/foo#subdirectory=bar&egg=foo",
158+
"https://example.com/foo#egg=foo&subdirectory=bar",
159+
id="fragment-ordering",
160+
),
161+
pytest.param(
162+
"https://example.com/foo?a=1&b=2",
163+
"https://example.com/foo?b=2&a=1",
164+
id="query-opordering",
165+
),
166+
],
167+
)
168+
def test_links_equivalent(url1, url2):
169+
assert links_equivalent(Link(url1), Link(url2))
170+
171+
172+
@pytest.mark.parametrize(
173+
"url1, url2",
174+
[
175+
pytest.param(
176+
"https://example.com/foo#sha512=1234567890abcdef",
177+
"https://example.com/foo#sha512=abcdef1234567890",
178+
id="different-keys",
179+
),
180+
pytest.param(
181+
"https://example.com/foo#sha512=1234567890abcdef",
182+
"https://example.com/foo#md5=1234567890abcdef",
183+
id="different-values",
184+
),
185+
pytest.param(
186+
"https://example.com/foo#subdirectory=bar&egg=foo",
187+
"https://example.com/foo#subdirectory=rex",
188+
id="drop-egg-still-different",
189+
),
190+
],
191+
)
192+
def test_links_equivalent_false(url1, url2):
193+
assert not links_equivalent(Link(url1), Link(url2))

0 commit comments

Comments
 (0)