|
2 | 2 | import posixpath
|
3 | 3 | import re
|
4 | 4 | import urllib.parse
|
5 |
| -from typing import TYPE_CHECKING, Optional, Tuple, Union |
| 5 | +from typing import TYPE_CHECKING, Dict, List, NamedTuple, Optional, Tuple, Union |
6 | 6 |
|
7 | 7 | from pip._internal.utils.filetypes import WHEEL_EXTENSION
|
8 | 8 | from pip._internal.utils.hashes import Hashes
|
@@ -242,7 +242,50 @@ def is_hash_allowed(self, hashes):
|
242 | 242 | return hashes.is_hash_allowed(self.hash_name, hex_digest=self.hash)
|
243 | 243 |
|
244 | 244 |
|
245 |
| -# TODO: Relax this comparison logic to ignore, for example, fragments. |
| 245 | +class _CleanResult(NamedTuple): |
| 246 | + """Convert link for equivalency check. |
| 247 | +
|
| 248 | + This is used in the resolver to check whether two URL-specified requirements |
| 249 | + likely point to the same distribution and can be considered equivalent. This |
| 250 | + equivalency logic avoids comparing URLs literally, which can be too strict |
| 251 | + (e.g. "a=1&b=2" vs "b=2&a=1") and produce conflicts unexpecting to users. |
| 252 | +
|
| 253 | + Currently this does three things: |
| 254 | +
|
| 255 | + 1. Drop the basic auth part. This is technically wrong since a server can |
| 256 | + serve different content based on auth, but if it does that, it is even |
| 257 | + impossible to guarantee two URLs without auth are equivalent, since |
| 258 | + the user can input different auth information when prompted. So the |
| 259 | + practical solution is to assume the auth doesn't affect the response. |
| 260 | + 2. Parse the query to avoid the ordering issue. |
| 261 | + 3. Parse the fragment, and explicitly drop the "egg=" part since it is |
| 262 | + commonly provided as the project name for compatibility. This is wrong in |
| 263 | + the strictest sense, but too many people are doing it. |
| 264 | +
|
| 265 | + Note that query value ordering under the same key in query and fragment are |
| 266 | + NOT cleaned; i.e. "a=1&a=2" and "a=2&a=1" are still considered different. |
| 267 | + """ |
| 268 | + |
| 269 | + parsed: urllib.parse.SplitResult |
| 270 | + query: Dict[str, List[str]] |
| 271 | + fragment: Dict[str, List[str]] |
| 272 | + |
| 273 | + @classmethod |
| 274 | + def from_link(cls, link: Link) -> "_CleanResult": |
| 275 | + parsed = link._parsed_url |
| 276 | + netloc = parsed.netloc.rsplit("@", 1)[-1] |
| 277 | + # The fragment does not necessarily use the query string format |
| 278 | + # (it's a pip-specific syntax), so we set keep_blank_values to keep |
| 279 | + # a fragment that's not a key-value pair (e.g. "#title_1"). |
| 280 | + frag_qs = urllib.parse.parse_qs(parsed.fragment, keep_blank_values=True) |
| 281 | + frag_qs.pop("egg", None) |
| 282 | + return _CleanResult( |
| 283 | + parsed=parsed._replace(netloc=netloc, query="", fragment=""), |
| 284 | + query=urllib.parse.parse_qs(parsed.query), |
| 285 | + fragment=frag_qs, |
| 286 | + ) |
| 287 | + |
| 288 | + |
246 | 289 | def links_equivalent(link1, link2):
|
247 | 290 | # type: (Link, Link) -> bool
|
248 |
| - return link1 == link2 |
| 291 | + return _CleanResult.from_link(link1) == _CleanResult.from_link(link2) |
0 commit comments