|
| 1 | +import functools |
| 2 | +import logging |
1 | 3 | import os
|
2 | 4 | import posixpath
|
3 | 5 | import re
|
4 | 6 | import urllib.parse
|
5 |
| -from typing import TYPE_CHECKING, Optional, Tuple, Union |
| 7 | +from typing import TYPE_CHECKING, Dict, List, NamedTuple, Optional, Tuple, Union |
6 | 8 |
|
7 | 9 | from pip._internal.utils.filetypes import WHEEL_EXTENSION
|
8 | 10 | from pip._internal.utils.hashes import Hashes
|
|
17 | 19 | if TYPE_CHECKING:
|
18 | 20 | from pip._internal.index.collector import HTMLPage
|
19 | 21 |
|
| 22 | +logger = logging.getLogger(__name__) |
| 23 | + |
| 24 | + |
| 25 | +_SUPPORTED_HASHES = ("sha1", "sha224", "sha384", "sha256", "sha512", "md5") |
| 26 | + |
20 | 27 |
|
21 | 28 | class Link(KeyBasedCompareMixin):
|
22 | 29 | """Represents a parsed link from a Package Index's simple URL
|
@@ -173,7 +180,7 @@ def subdirectory_fragment(self):
|
173 | 180 | return match.group(1)
|
174 | 181 |
|
175 | 182 | _hash_re = re.compile(
|
176 |
| - r'(sha1|sha224|sha384|sha256|sha512|md5)=([a-f0-9]+)' |
| 183 | + r'({choices})=([a-f0-9]+)'.format(choices="|".join(_SUPPORTED_HASHES)) |
177 | 184 | )
|
178 | 185 |
|
179 | 186 | @property
|
@@ -242,7 +249,62 @@ def is_hash_allowed(self, hashes):
|
242 | 249 | return hashes.is_hash_allowed(self.hash_name, hex_digest=self.hash)
|
243 | 250 |
|
244 | 251 |
|
245 |
| -# TODO: Relax this comparison logic to ignore, for example, fragments. |
| 252 | +class _CleanResult(NamedTuple): |
| 253 | + """Convert link for equivalency check. |
| 254 | +
|
| 255 | + This is used in the resolver to check whether two URL-specified requirements |
| 256 | + likely point to the same distribution and can be considered equivalent. This |
| 257 | + equivalency logic avoids comparing URLs literally, which can be too strict |
| 258 | + (e.g. "a=1&b=2" vs "b=2&a=1") and produce conflicts unexpecting to users. |
| 259 | +
|
| 260 | + Currently this does three things: |
| 261 | +
|
| 262 | + 1. Drop the basic auth part. This is technically wrong since a server can |
| 263 | + serve different content based on auth, but if it does that, it is even |
| 264 | + impossible to guarantee two URLs without auth are equivalent, since |
| 265 | + the user can input different auth information when prompted. So the |
| 266 | + practical solution is to assume the auth doesn't affect the response. |
| 267 | + 2. Parse the query to avoid the ordering issue. Note that ordering under the |
| 268 | + same key in the query are NOT cleaned; i.e. "a=1&a=2" and "a=2&a=1" are |
| 269 | + still considered different. |
| 270 | + 3. Explicitly drop most of the fragment part, except ``subdirectory=`` and |
| 271 | + hash values, since it should have no impact the downloaded content. Note |
| 272 | + that this drops the "egg=" part historically used to denote the requested |
| 273 | + project (and extras), which is wrong in the strictest sense, but too many |
| 274 | + people are supplying it inconsistently to cause superfluous resolution |
| 275 | + conflicts, so we choose to also ignore them. |
| 276 | + """ |
| 277 | + |
| 278 | + parsed: urllib.parse.SplitResult |
| 279 | + query: Dict[str, List[str]] |
| 280 | + subdirectory: str |
| 281 | + hashes: Dict[str, str] |
| 282 | + |
| 283 | + @classmethod |
| 284 | + def from_link(cls, link: Link) -> "_CleanResult": |
| 285 | + parsed = link._parsed_url |
| 286 | + netloc = parsed.netloc.rsplit("@", 1)[-1] |
| 287 | + fragment = urllib.parse.parse_qs(parsed.fragment) |
| 288 | + if "egg" in fragment: |
| 289 | + logger.debug("Ignoring egg= fragment in %s", link) |
| 290 | + try: |
| 291 | + # If there are multiple subdirectory values, use the first one. |
| 292 | + # This matches the behavior of Link.subdirectory_fragment. |
| 293 | + subdirectory = fragment["subdirectory"][0] |
| 294 | + except (IndexError, KeyError): |
| 295 | + subdirectory = "" |
| 296 | + # If there are multiple hash values under the same algorithm, use the |
| 297 | + # first one. This matches the behavior of Link.hash_value. |
| 298 | + hashes = {k: fragment[k][0] for k in _SUPPORTED_HASHES if k in fragment} |
| 299 | + return cls( |
| 300 | + parsed=parsed._replace(netloc=netloc, query="", fragment=""), |
| 301 | + query=urllib.parse.parse_qs(parsed.query), |
| 302 | + subdirectory=subdirectory, |
| 303 | + hashes=hashes, |
| 304 | + ) |
| 305 | + |
| 306 | + |
| 307 | +@functools.lru_cache(maxsize=None) |
246 | 308 | def links_equivalent(link1, link2):
|
247 | 309 | # type: (Link, Link) -> bool
|
248 |
| - return link1 == link2 |
| 310 | + return _CleanResult.from_link(link1) == _CleanResult.from_link(link2) |
0 commit comments