Skip to content

Commit 50231c5

Browse files
committed
pythonGH-126838: url2pathname(): handle non-empty authority section on POSIX
Adjust `urllib.request.url2pathname()` to parse the URL authority and path with `urlsplit()` on POSIX. If the authority is empty or resolves to the current host, it is ignored and the URL path is used as the pathname. If not, we raise `URLError`.
1 parent cae9d9d commit 50231c5

File tree

3 files changed

+25
-34
lines changed

3 files changed

+25
-34
lines changed

Lib/test/test_urllib.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from test.support import socket_helper
1414
from test.support import warnings_helper
1515
import os
16+
import socket
1617
try:
1718
import ssl
1819
except ImportError:
@@ -713,7 +714,7 @@ def constructLocalFileUrl(self, filePath):
713714
filePath.encode("utf-8")
714715
except UnicodeEncodeError:
715716
raise unittest.SkipTest("filePath is not encodable to utf8")
716-
return "file://%s" % urllib.request.pathname2url(filePath)
717+
return "file:%s" % urllib.request.pathname2url(filePath)
717718

718719
def createNewTempFile(self, data=b""):
719720
"""Creates a new temporary file containing the specified data,
@@ -1607,10 +1608,12 @@ def test_url2pathname_win(self):
16071608
def test_url2pathname_posix(self):
16081609
fn = urllib.request.url2pathname
16091610
self.assertEqual(fn('/foo/bar'), '/foo/bar')
1610-
self.assertEqual(fn('//foo/bar'), '//foo/bar')
1611+
self.assertRaises(urllib.error.URLError, fn, '//foo/bar')
16111612
self.assertEqual(fn('///foo/bar'), '/foo/bar')
16121613
self.assertEqual(fn('////foo/bar'), '//foo/bar')
1613-
self.assertEqual(fn('//localhost/foo/bar'), '//localhost/foo/bar')
1614+
self.assertEqual(fn('//localhost/foo/bar'), '/foo/bar')
1615+
self.assertEqual(fn('//127.0.0.1/foo/bar'), '/foo/bar')
1616+
self.assertEqual(fn(f'//{socket.gethostname()}/foo/bar'), '/foo/bar')
16141617

16151618
class Utility_Tests(unittest.TestCase):
16161619
"""Testcase to test the various utility functions in the urllib."""

Lib/urllib/request.py

Lines changed: 15 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1448,16 +1448,6 @@ def parse_http_list(s):
14481448
return [part.strip() for part in res]
14491449

14501450
class FileHandler(BaseHandler):
1451-
# Use local file or FTP depending on form of URL
1452-
def file_open(self, req):
1453-
url = req.selector
1454-
if url[:2] == '//' and url[2:3] != '/' and (req.host and
1455-
req.host != 'localhost'):
1456-
if not req.host in self.get_names():
1457-
raise URLError("file:// scheme is supported only on localhost")
1458-
else:
1459-
return self.open_local_file(req)
1460-
14611451
# names for the localhost
14621452
names = None
14631453
def get_names(self):
@@ -1474,9 +1464,8 @@ def get_names(self):
14741464
def open_local_file(self, req):
14751465
import email.utils
14761466
import mimetypes
1477-
host = req.host
1478-
filename = req.selector
1479-
localfile = url2pathname(filename)
1467+
filename = req.full_url
1468+
localfile = url2pathname(filename.removeprefix('file:'))
14801469
try:
14811470
stats = os.stat(localfile)
14821471
size = stats.st_size
@@ -1485,24 +1474,20 @@ def open_local_file(self, req):
14851474
headers = email.message_from_string(
14861475
'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
14871476
(mtype or 'text/plain', size, modified))
1488-
if host:
1489-
host, port = _splitport(host)
1490-
if not host or \
1491-
(not port and _safe_gethostbyname(host) in self.get_names()):
1492-
if host:
1493-
origurl = 'file://' + host + filename
1494-
else:
1495-
origurl = 'file://' + filename
1496-
return addinfourl(open(localfile, 'rb'), headers, origurl)
1477+
return addinfourl(open(localfile, 'rb'), headers, filename)
14971478
except OSError as exp:
14981479
raise URLError(exp)
1499-
raise URLError('file not on local host')
15001480

1501-
def _safe_gethostbyname(host):
1481+
file_open = open_local_file
1482+
1483+
def _is_local_host(host):
1484+
if not host or host == 'localhost':
1485+
return True
15021486
try:
1503-
return socket.gethostbyname(host)
1487+
name = socket.gethostbyname(host)
15041488
except socket.gaierror:
1505-
return None
1489+
return False
1490+
return name in FileHandler().get_names()
15061491

15071492
class FTPHandler(BaseHandler):
15081493
def ftp_open(self, req):
@@ -1653,13 +1638,12 @@ def data_open(self, req):
16531638
if os.name == 'nt':
16541639
from nturl2path import url2pathname, pathname2url
16551640
else:
1656-
def url2pathname(pathname):
1641+
def url2pathname(url):
16571642
"""OS-specific conversion from a relative URL of the 'file' scheme
16581643
to a file system path; not recommended for general use."""
1659-
if pathname[:3] == '///':
1660-
# URL has an empty authority section, so the path begins on the
1661-
# third character.
1662-
pathname = pathname[2:]
1644+
authority, pathname = urlsplit(f'file:{url}')[1:3]
1645+
if not _is_local_host(authority):
1646+
raise URLError(f'URL {url!r} uses non-local authority {authority!r}')
16631647
return unquote(pathname)
16641648

16651649
def pathname2url(pathname):
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Fix issue where :func:`urllib.request.url2pathname` included any URL
2+
authority in the resulting path, except on Windows. It now discards a local
3+
authority, and raises :exc:`urllib.error.URLError` for a non-local
4+
authority.

0 commit comments

Comments
 (0)