Skip to content

Commit c55d17c

Browse files
committed
Smarter (and looser) link equivalency logic
1 parent 7c3abcc commit c55d17c

File tree

1 file changed

+46
-3
lines changed

1 file changed

+46
-3
lines changed

src/pip/_internal/models/link.py

+46-3
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import posixpath
33
import re
44
import urllib.parse
5-
from typing import TYPE_CHECKING, Optional, Tuple, Union
5+
from typing import TYPE_CHECKING, Dict, List, NamedTuple, Optional, Tuple, Union
66

77
from pip._internal.utils.filetypes import WHEEL_EXTENSION
88
from pip._internal.utils.hashes import Hashes
@@ -242,7 +242,50 @@ def is_hash_allowed(self, hashes):
242242
return hashes.is_hash_allowed(self.hash_name, hex_digest=self.hash)
243243

244244

245-
# TODO: Relax this comparison logic to ignore, for example, fragments.
245+
class _CleanResult(NamedTuple):
246+
"""Convert link for equivalency check.
247+
248+
This is used in the resolver to check whether two URL-specified requirements
249+
likely point to the same distribution and can be considered equivalent. This
250+
equivalency logic avoids comparing URLs literally, which can be too strict
251+
(e.g. "a=1&b=2" vs "b=2&a=1") and produce conflicts unexpecting to users.
252+
253+
Currently this does three things:
254+
255+
1. Drop the basic auth part. This is technically wrong since a server can
256+
serve different content based on auth, but if it does that, it is even
257+
impossible to guarantee two URLs without auth are equivalent, since
258+
the user can input different auth information when prompted. So the
259+
practical solution is to assume the auth doesn't affect the response.
260+
2. Parse the query to avoid the ordering issue.
261+
3. Parse the fragment, and explicitly drop the "egg=" part since it is
262+
commonly provided as the project name for compatibility. This is wrong in
263+
the strictest sense, but too many people are doing it.
264+
265+
Note that query value ordering under the same key in query and fragment are
266+
NOT cleaned; i.e. "a=1&a=2" and "a=2&a=1" are still considered different.
267+
"""
268+
269+
parsed: urllib.parse.SplitResult
270+
query: Dict[str, List[str]]
271+
fragment: Dict[str, List[str]]
272+
273+
@classmethod
274+
def from_link(cls, link: Link) -> "_CleanResult":
275+
parsed = link._parsed_url
276+
netloc = parsed.netloc.rsplit("@", 1)[-1]
277+
# The fragment does not necessarily use the query string format
278+
# (it's a pip-specific syntax), so we set keep_blank_values to keep
279+
# a fragment that's not a key-value pair (e.g. "#title_1").
280+
frag_qs = urllib.parse.parse_qs(parsed.fragment, keep_blank_values=True)
281+
frag_qs.pop("egg", None)
282+
return _CleanResult(
283+
parsed=parsed._replace(netloc=netloc, query="", fragment=""),
284+
query=urllib.parse.parse_qs(parsed.query),
285+
fragment=frag_qs,
286+
)
287+
288+
246289
def links_equivalent(link1, link2):
247290
# type: (Link, Link) -> bool
248-
return link1 == link2
291+
return _CleanResult.from_link(link1) == _CleanResult.from_link(link2)

0 commit comments

Comments
 (0)