Skip to content

Commit a323183

Browse files
committed
Smarter (and looser) link equivalency logic
1 parent 4704da4 commit a323183

File tree

5 files changed

+201
-15
lines changed

5 files changed

+201
-15
lines changed

news/10002.feature.rst

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
New resolver: Loosen URL comparison logic when checking for direct URL reference
2+
equivalency. The logic includes the following notable characteristics:
3+
4+
* The authentication part of the URL is explicitly ignored.
5+
* The ``egg=`` fragment is explicitly ignored.
6+
* Query and fragment parts of the URL are hashed to allow ordering differences.

src/pip/_internal/models/link.py

+49-3
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
1+
import functools
2+
import logging
13
import os
24
import posixpath
35
import re
46
import urllib.parse
5-
from typing import TYPE_CHECKING, Optional, Tuple, Union
7+
from typing import TYPE_CHECKING, Dict, List, NamedTuple, Optional, Tuple, Union
68

79
from pip._internal.utils.filetypes import WHEEL_EXTENSION
810
from pip._internal.utils.hashes import Hashes
@@ -17,6 +19,8 @@
1719
if TYPE_CHECKING:
1820
from pip._internal.index.collector import HTMLPage
1921

22+
logger = logging.getLogger(__name__)
23+
2024

2125
class Link(KeyBasedCompareMixin):
2226
"""Represents a parsed link from a Package Index's simple URL
@@ -242,7 +246,49 @@ def is_hash_allowed(self, hashes):
242246
return hashes.is_hash_allowed(self.hash_name, hex_digest=self.hash)
243247

244248

245-
# TODO: Relax this comparison logic to ignore, for example, fragments.
249+
class _CleanResult(NamedTuple):
250+
"""Convert link for equivalency check.
251+
252+
This is used in the resolver to check whether two URL-specified requirements
253+
likely point to the same distribution and can be considered equivalent. This
254+
equivalency logic avoids comparing URLs literally, which can be too strict
255+
(e.g. "a=1&b=2" vs "b=2&a=1") and produce conflicts unexpecting to users.
256+
257+
Currently this does three things:
258+
259+
1. Drop the basic auth part. This is technically wrong since a server can
260+
serve different content based on auth, but if it does that, it is even
261+
impossible to guarantee two URLs without auth are equivalent, since
262+
the user can input different auth information when prompted. So the
263+
practical solution is to assume the auth doesn't affect the response.
264+
2. Parse the query to avoid the ordering issue.
265+
3. Parse the fragment, and explicitly drop the "egg=" part since it is
266+
commonly provided as the project name for compatibility. This is wrong in
267+
the strictest sense, but too many people are doing it.
268+
269+
Note that query value ordering under the same key in query and fragment are
270+
NOT cleaned; i.e. "a=1&a=2" and "a=2&a=1" are still considered different.
271+
"""
272+
273+
parsed: urllib.parse.SplitResult
274+
query: Dict[str, List[str]]
275+
fragment: Dict[str, List[str]]
276+
277+
@classmethod
278+
def from_link(cls, link: Link) -> "_CleanResult":
279+
parsed = link._parsed_url
280+
netloc = parsed.netloc.rsplit("@", 1)[-1]
281+
fragment = urllib.parse.parse_qs(parsed.fragment)
282+
if fragment.pop("egg", None):
283+
logger.debug("Ignoring egg= fragment in %s", link)
284+
return _CleanResult(
285+
parsed=parsed._replace(netloc=netloc, query="", fragment=""),
286+
query=urllib.parse.parse_qs(parsed.query),
287+
fragment=fragment,
288+
)
289+
290+
291+
@functools.lru_cache(maxsize=None)
246292
def links_equivalent(link1, link2):
247293
# type: (Link, Link) -> bool
248-
return link1 == link2
294+
return _CleanResult.from_link(link1) == _CleanResult.from_link(link2)

tests/functional/test_install_reqs.py

+2-11
Original file line numberDiff line numberDiff line change
@@ -423,25 +423,16 @@ def test_constraints_constrain_to_local(script, data, resolver_variant):
423423
assert 'Running setup.py install for singlemodule' in result.stdout
424424

425425

426-
def test_constrained_to_url_install_same_url(script, data, resolver_variant):
426+
def test_constrained_to_url_install_same_url(script, data):
427427
to_install = data.src.joinpath("singlemodule")
428428
constraints = path_to_url(to_install) + "#egg=singlemodule"
429429
script.scratch_path.joinpath("constraints.txt").write_text(constraints)
430430
result = script.pip(
431431
'install', '--no-index', '-f', data.find_links, '-c',
432432
script.scratch_path / 'constraints.txt', to_install,
433433
allow_stderr_warning=True,
434-
expect_error=(resolver_variant == "2020-resolver"),
435434
)
436-
if resolver_variant == "2020-resolver":
437-
assert 'Cannot install singlemodule 0.0.1' in result.stderr, str(result)
438-
assert (
439-
'because these package versions have conflicting dependencies.'
440-
in result.stderr
441-
), str(result)
442-
else:
443-
assert ('Running setup.py install for singlemodule'
444-
in result.stdout), str(result)
435+
assert 'Running setup.py install for singlemodule' in result.stdout, str(result)
445436

446437

447438
def test_double_install_spurious_hash_mismatch(

tests/functional/test_new_resolver.py

+90
Original file line numberDiff line numberDiff line change
@@ -1791,6 +1791,96 @@ def test_new_resolver_avoids_incompatible_wheel_tags_in_constraint_url(
17911791
assert_not_installed(script, "dep")
17921792

17931793

1794+
@pytest.mark.parametrize(
1795+
"suffixes_equivalent, depend_suffix, request_suffix",
1796+
[
1797+
pytest.param(
1798+
True,
1799+
"#egg=foo",
1800+
"",
1801+
id="drop-depend-egg",
1802+
),
1803+
pytest.param(
1804+
True,
1805+
"",
1806+
"#egg=foo",
1807+
id="drop-request-egg",
1808+
),
1809+
pytest.param(
1810+
True,
1811+
"#subdirectory=bar&egg=foo",
1812+
"#subdirectory=bar&egg=bar",
1813+
id="drop-egg-only",
1814+
),
1815+
pytest.param(
1816+
True,
1817+
"#subdirectory=bar&egg=foo",
1818+
"#egg=foo&subdirectory=bar",
1819+
id="fragment-ordering",
1820+
),
1821+
pytest.param(
1822+
True,
1823+
"?a=1&b=2",
1824+
"?b=2&a=1",
1825+
id="query-opordering",
1826+
),
1827+
pytest.param(
1828+
False,
1829+
"#sha512=1234567890abcdef",
1830+
"#sha512=abcdef1234567890",
1831+
id="different-keys",
1832+
),
1833+
pytest.param(
1834+
False,
1835+
"#sha512=1234567890abcdef",
1836+
"#md5=1234567890abcdef",
1837+
id="different-values",
1838+
),
1839+
pytest.param(
1840+
False,
1841+
"#subdirectory=bar&egg=foo",
1842+
"#subdirectory=rex",
1843+
id="drop-egg-still-different",
1844+
),
1845+
],
1846+
)
1847+
def test_new_resolver_direct_url_equivalent(
1848+
tmp_path,
1849+
script,
1850+
suffixes_equivalent,
1851+
depend_suffix,
1852+
request_suffix,
1853+
):
1854+
pkga = create_basic_wheel_for_package(script, name="pkga", version="1")
1855+
pkgb = create_basic_wheel_for_package(
1856+
script,
1857+
name="pkgb",
1858+
version="1",
1859+
depends=[f"pkga@{path_to_url(pkga)}{depend_suffix}"],
1860+
)
1861+
1862+
# Make pkgb visible via --find-links, but not pkga.
1863+
find_links = tmp_path.joinpath("find_links")
1864+
find_links.mkdir()
1865+
with open(pkgb, "rb") as f:
1866+
find_links.joinpath(pkgb.name).write_bytes(f.read())
1867+
1868+
# Install pkgb from --find-links, and pkga directly but from a different
1869+
# URL suffix as specified in pkgb. This should work!
1870+
script.pip(
1871+
"install",
1872+
"--no-cache-dir", "--no-index",
1873+
"--find-links", str(find_links),
1874+
f"{path_to_url(pkga)}{request_suffix}", "pkgb",
1875+
expect_error=(not suffixes_equivalent),
1876+
)
1877+
1878+
if suffixes_equivalent:
1879+
assert_installed(script, pkga="1", pkgb="1")
1880+
else:
1881+
assert_not_installed(script, "pkga", "pkgb")
1882+
1883+
17941884
def test_new_resolver_direct_url_with_extras(tmp_path, script):
17951885
pkg1 = create_basic_wheel_for_package(script, name="pkg1", version="1")
17961886
pkg2 = create_basic_wheel_for_package(

tests/unit/test_link.py

+54-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import pytest
22

3-
from pip._internal.models.link import Link
3+
from pip._internal.models.link import Link, links_equivalent
44
from pip._internal.utils.hashes import Hashes
55

66

@@ -138,3 +138,56 @@ def test_is_hash_allowed__none_hashes(self, hashes, expected):
138138
def test_is_vcs(self, url, expected):
139139
link = Link(url)
140140
assert link.is_vcs is expected
141+
142+
143+
@pytest.mark.parametrize(
144+
"url1, url2",
145+
[
146+
pytest.param(
147+
"https://example.com/foo#egg=foo",
148+
"https://example.com/foo",
149+
id="drop-egg",
150+
),
151+
pytest.param(
152+
"https://example.com/foo#subdirectory=bar&egg=foo",
153+
"https://example.com/foo#subdirectory=bar&egg=bar",
154+
id="drop-egg-only",
155+
),
156+
pytest.param(
157+
"https://example.com/foo#subdirectory=bar&egg=foo",
158+
"https://example.com/foo#egg=foo&subdirectory=bar",
159+
id="fragment-ordering",
160+
),
161+
pytest.param(
162+
"https://example.com/foo?a=1&b=2",
163+
"https://example.com/foo?b=2&a=1",
164+
id="query-opordering",
165+
),
166+
],
167+
)
168+
def test_links_equivalent(url1, url2):
169+
assert links_equivalent(Link(url1), Link(url2))
170+
171+
172+
@pytest.mark.parametrize(
173+
"url1, url2",
174+
[
175+
pytest.param(
176+
"https://example.com/foo#sha512=1234567890abcdef",
177+
"https://example.com/foo#sha512=abcdef1234567890",
178+
id="different-keys",
179+
),
180+
pytest.param(
181+
"https://example.com/foo#sha512=1234567890abcdef",
182+
"https://example.com/foo#md5=1234567890abcdef",
183+
id="different-values",
184+
),
185+
pytest.param(
186+
"https://example.com/foo#subdirectory=bar&egg=foo",
187+
"https://example.com/foo#subdirectory=rex",
188+
id="drop-egg-still-different",
189+
),
190+
],
191+
)
192+
def test_links_equivalent_false(url1, url2):
193+
assert not links_equivalent(Link(url1), Link(url2))

0 commit comments

Comments
 (0)