Skip to content

Commit 913757c

Browse files
committed
Do not clean base_url
When the `base_url` is a `[]` protected IPv6 address, the `_clean_link()` function converts `[` to `%5B` and `]` to `%5D`, which renders the `base_url` invalid. For example: ``` Starting new HTTP connection (1): fd00:0:0:236::100:8181 http://fd00:0:0:236::100:8181 "GET /os-releases/19.0.0.0b1/opensuse_leap-42.3-x86_64/requirements_absolute_requirements.txt HTTP/1.1" 200 None Setting setuptools==40.6.3 (from -c http://[fd00:0:0:236::100]:8181/os-releases/19.0.0.0b1/opensuse_leap-42.3-x86_64/requirements_absolute_requirements.txt (line 204)) extras to: () Looking in indexes: http://[fd00:0:0:236::100]:8181/simple Collecting setuptools==40.6.3 (from -c http://[fd00:0:0:236::100]:8181/os-releases/19.0.0.0b1/opensuse_leap-42.3-x86_64/requirements_absolute_requirements.txt (line 204)) 1 location(s) to search for versions of setuptools: * http://[fd00:0:0:236::100]:8181/simple/setuptools/ Getting page http://[fd00:0:0:236::100]:8181/simple/setuptools/ http://fd00:0:0:236::100:8181 "GET /simple/setuptools/ HTTP/1.1" 200 376 Analyzing links from page http://[fd00:0:0:236::100]:8181/simple/setuptools/ _package_versions: link = http://%5bfd00:0:0:236::100%5d:8181/packages/opensuse_leap-42.3-x86_64/setuptools/setuptools-40.6.3-py2.py3-none-any.whl#md5=389d3cd088d7afec3a1133b1d8e15df0 (from http://[fd00:0:0: 236::100]:8181/simple/setuptools/) _link_package_versions: link = http://%5bfd00:0:0:236::100%5d:8181/packages/opensuse_leap-42.3-x86_64/setuptools/setuptools-40.6.3-py2.py3-none-any.whl#md5=389d3cd088d7afec3a1133b1d8e15df0 (from http://[fd00 :0:0:236::100]:8181/simple/setuptools/) Found link http://%5bfd00:0:0:236::100%5d:8181/packages/opensuse_leap-42.3-x86_64/setuptools/setuptools-40.6.3-py2.py3-none-any.whl#md5=389d3cd088d7afec3a1133b1d8e15df0 (from http://[fd00:0:0:236::100]:8181/ simple/setuptools/), version: 40.6.3 Using version 40.6.3 (newest of versions: 40.6.3) Could not install packages due to an EnvironmentError. InvalidURL: Failed to parse: %5bfd00:0:0:236::100%5d:8181 ``` This change uses the vendored `urllib` library to split the host part off of the url before URL quoting only the path part. Fixes: #6285 Signed-off-by: Nicolas Bock <[email protected]>
1 parent 78744e8 commit 913757c

File tree

3 files changed

+81
-5
lines changed

3 files changed

+81
-5
lines changed

news/6285.bugfix

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix incorrect URL quoting of IPv6 addresses.

src/pip/_internal/index.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -939,15 +939,28 @@ def _get_encoding_from_headers(headers):
939939
return None
940940

941941

942-
_CLEAN_LINK_RE = re.compile(r'[^a-z0-9$&+,/:;=?@.#%_\\|-]', re.I)
943-
944-
945942
def _clean_link(url):
946943
# type: (str) -> str
947944
"""Makes sure a link is fully encoded. That is, if a ' ' shows up in
948945
the link, it will be rewritten to %20 (while not over-quoting
949946
% or other characters)."""
950-
return _CLEAN_LINK_RE.sub(lambda match: '%%%2x' % ord(match.group(0)), url)
947+
# Split the URL into parts according to the general structure
948+
# `scheme://netloc/path;parameters?query#fragment`. Note that the
949+
# `netloc` can be empty and the URI will then refer to a local
950+
# filesystem path.
951+
result = urllib_parse.urlparse(url)
952+
# In both cases below we unquote prior to quoting to make sure
953+
# nothing is double quoted.
954+
if result.netloc == "":
955+
# On Windows the path part might contain a drive letter which
956+
# should not be quoted. On Linux where drive letters do not
957+
# exist, the colon should be quoted. We rely on urllib.request
958+
# to do the right thing here.
959+
path = urllib_request.pathname2url(
960+
urllib_request.url2pathname(result.path))
961+
else:
962+
path = urllib_parse.quote(urllib_parse.unquote(result.path))
963+
return urllib_parse.urlunparse(result._replace(path=path))
951964

952965

953966
class HTMLPage(object):

tests/unit/test_index.py

Lines changed: 63 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
from pip._internal.download import PipSession
99
from pip._internal.index import (
10-
Link, PackageFinder, _determine_base_url, _egg_info_matches,
10+
Link, PackageFinder, _clean_link, _determine_base_url, _egg_info_matches,
1111
_find_name_version_sep, _get_html_page,
1212
)
1313

@@ -280,3 +280,65 @@ def test_request_retries(caplog):
280280
'Could not fetch URL http://localhost: Retry error - skipping'
281281
in caplog.text
282282
)
283+
284+
285+
@pytest.mark.parametrize(
286+
("url", "clean_url"),
287+
[
288+
# URL with hostname and port. Port separator should not be quoted.
289+
("https://localhost.localdomain:8181/path/with space/",
290+
"https://localhost.localdomain:8181/path/with%20space/"),
291+
# URL that is already properly quoted. The quoting `%`
292+
# characters should not be quoted again.
293+
("https://localhost.localdomain:8181/path/with%20quoted%20space/",
294+
"https://localhost.localdomain:8181/path/with%20quoted%20space/"),
295+
# URL with IPv4 address and port.
296+
("https://127.0.0.1:8181/path/with space/",
297+
"https://127.0.0.1:8181/path/with%20space/"),
298+
# URL with IPv6 address and port. The `[]` brackets around the
299+
# IPv6 address should not be quoted.
300+
("https://[fd00:0:0:236::100]:8181/path/with space/",
301+
"https://[fd00:0:0:236::100]:8181/path/with%20space/"),
302+
# URL with query. The leading `?` should not be quoted.
303+
("https://localhost.localdomain:8181/path/with/query?request=test",
304+
"https://localhost.localdomain:8181/path/with/query?request=test"),
305+
# URL with colon in the path portion.
306+
("https://localhost.localdomain:8181/path:/with:/colon",
307+
"https://localhost.localdomain:8181/path%3A/with%3A/colon"),
308+
# URL with something that looks like a drive letter, but is
309+
# not. The `:` should be quoted.
310+
("https://localhost.localdomain/T:/path/",
311+
"https://localhost.localdomain/T%3A/path/")
312+
]
313+
)
314+
def test_clean_link(url, clean_url):
315+
assert(_clean_link(url) == clean_url)
316+
317+
318+
@pytest.mark.parametrize(
319+
("url", "clean_url"),
320+
[
321+
# URL with Windows drive letter. The `:` after the drive
322+
# letter should not be quoted. The trailing `/` should be
323+
# removed.
324+
("file:///T:/path/with spaces/",
325+
"file:///T:/path/with%20spaces")
326+
]
327+
)
328+
@pytest.mark.skipif("sys.platform != 'win32'")
329+
def test_clean_link_windows(url, clean_url):
330+
assert(_clean_link(url) == clean_url)
331+
332+
333+
@pytest.mark.parametrize(
334+
("url", "clean_url"),
335+
[
336+
# URL with Windows drive letter, running on non-windows
337+
# platform. The `:` after the drive should be quoted.
338+
("file:///T:/path/with spaces/",
339+
"file:///T%3A/path/with%20spaces/")
340+
]
341+
)
342+
@pytest.mark.skipif("sys.platform == 'win32'")
343+
def test_clean_link_non_windows(url, clean_url):
344+
assert(_clean_link(url) == clean_url)

0 commit comments

Comments
 (0)