|
| 1 | +import functools |
| 2 | +import logging |
1 | 3 | import os
|
2 | 4 | import posixpath
|
3 | 5 | import re
|
4 | 6 | import urllib.parse
|
5 |
| -from typing import TYPE_CHECKING, Optional, Tuple, Union |
| 7 | +from typing import TYPE_CHECKING, Dict, List, NamedTuple, Optional, Tuple, Union |
6 | 8 |
|
7 | 9 | from pip._internal.utils.filetypes import WHEEL_EXTENSION
|
8 | 10 | from pip._internal.utils.hashes import Hashes
|
|
17 | 19 | if TYPE_CHECKING:
|
18 | 20 | from pip._internal.index.collector import HTMLPage
|
19 | 21 |
|
| 22 | +logger = logging.getLogger(__name__) |
| 23 | + |
20 | 24 |
|
21 | 25 | class Link(KeyBasedCompareMixin):
|
22 | 26 | """Represents a parsed link from a Package Index's simple URL
|
@@ -242,7 +246,49 @@ def is_hash_allowed(self, hashes):
|
242 | 246 | return hashes.is_hash_allowed(self.hash_name, hex_digest=self.hash)
|
243 | 247 |
|
244 | 248 |
|
245 |
| -# TODO: Relax this comparison logic to ignore, for example, fragments. |
| 249 | +class _CleanResult(NamedTuple): |
| 250 | + """Convert link for equivalency check. |
| 251 | +
|
| 252 | + This is used in the resolver to check whether two URL-specified requirements |
| 253 | + likely point to the same distribution and can be considered equivalent. This |
| 254 | + equivalency logic avoids comparing URLs literally, which can be too strict |
| 255 | + (e.g. "a=1&b=2" vs "b=2&a=1") and produce conflicts unexpecting to users. |
| 256 | +
|
| 257 | + Currently this does three things: |
| 258 | +
|
| 259 | + 1. Drop the basic auth part. This is technically wrong since a server can |
| 260 | + serve different content based on auth, but if it does that, it is even |
| 261 | + impossible to guarantee two URLs without auth are equivalent, since |
| 262 | + the user can input different auth information when prompted. So the |
| 263 | + practical solution is to assume the auth doesn't affect the response. |
| 264 | + 2. Parse the query to avoid the ordering issue. |
| 265 | + 3. Parse the fragment, and explicitly drop the "egg=" part since it is |
| 266 | + commonly provided as the project name for compatibility. This is wrong in |
| 267 | + the strictest sense, but too many people are doing it. |
| 268 | +
|
| 269 | + Note that query value ordering under the same key in query and fragment are |
| 270 | + NOT cleaned; i.e. "a=1&a=2" and "a=2&a=1" are still considered different. |
| 271 | + """ |
| 272 | + |
| 273 | + parsed: urllib.parse.SplitResult |
| 274 | + query: Dict[str, List[str]] |
| 275 | + fragment: Dict[str, List[str]] |
| 276 | + |
| 277 | + @classmethod |
| 278 | + def from_link(cls, link: Link) -> "_CleanResult": |
| 279 | + parsed = link._parsed_url |
| 280 | + netloc = parsed.netloc.rsplit("@", 1)[-1] |
| 281 | + fragment = urllib.parse.parse_qs(parsed.fragment) |
| 282 | + if fragment.pop("egg", None): |
| 283 | + logger.debug("Ignoring egg= fragment in %s", link) |
| 284 | + return _CleanResult( |
| 285 | + parsed=parsed._replace(netloc=netloc, query="", fragment=""), |
| 286 | + query=urllib.parse.parse_qs(parsed.query), |
| 287 | + fragment=fragment, |
| 288 | + ) |
| 289 | + |
| 290 | + |
| 291 | +@functools.lru_cache(maxsize=None) |
246 | 292 | def links_equivalent(link1, link2):
|
247 | 293 | # type: (Link, Link) -> bool
|
248 |
| - return link1 == link2 |
| 294 | + return _CleanResult.from_link(link1) == _CleanResult.from_link(link2) |
0 commit comments