Skip to content

Commit 7735f58

Browse files
[3.12] pythonGH-85168: Use filesystem encoding when converting to/from file URIs (pythonGH-126852) (python#127040)
pythonGH-85168: Use filesystem encoding when converting to/from `file` URIs (pythonGH-126852) Adjust `urllib.request.url2pathname()` and `pathname2url()` to use the filesystem encoding when quoting and unquoting file URIs, rather than forcing use of UTF-8. No changes are needed in the `nturl2path` module because Windows always uses UTF-8, per PEP 529. (cherry picked from commit c9b399f) Co-authored-by: Barney Gale <[email protected]>
1 parent 153221a commit 7735f58

File tree

4 files changed

+26
-10
lines changed

4 files changed

+26
-10
lines changed

Lib/test/test_urllib.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -718,10 +718,6 @@ def tearDown(self):
718718

719719
def constructLocalFileUrl(self, filePath):
720720
filePath = os.path.abspath(filePath)
721-
try:
722-
filePath.encode("utf-8")
723-
except UnicodeEncodeError:
724-
raise unittest.SkipTest("filePath is not encodable to utf8")
725721
return "file://%s" % urllib.request.pathname2url(filePath)
726722

727723
def createNewTempFile(self, data=b""):
@@ -1571,6 +1567,13 @@ def test_pathname2url_posix(self):
15711567
self.assertEqual(fn('/a/b.c'), '/a/b.c')
15721568
self.assertEqual(fn('/a/b%#c'), '/a/b%25%23c')
15731569

1570+
@unittest.skipUnless(os_helper.FS_NONASCII, 'need os_helper.FS_NONASCII')
1571+
def test_pathname2url_nonascii(self):
1572+
encoding = sys.getfilesystemencoding()
1573+
errors = sys.getfilesystemencodeerrors()
1574+
url = urllib.parse.quote(os_helper.FS_NONASCII, encoding=encoding, errors=errors)
1575+
self.assertEqual(urllib.request.pathname2url(os_helper.FS_NONASCII), url)
1576+
15741577
@unittest.skipUnless(sys.platform == 'win32',
15751578
'test specific to Windows pathnames.')
15761579
def test_url2pathname_win(self):
@@ -1621,6 +1624,15 @@ def test_url2pathname_posix(self):
16211624
self.assertEqual(fn('////foo/bar'), '//foo/bar')
16221625
self.assertEqual(fn('//localhost/foo/bar'), '//localhost/foo/bar')
16231626

1627+
@unittest.skipUnless(os_helper.FS_NONASCII, 'need os_helper.FS_NONASCII')
1628+
def test_url2pathname_nonascii(self):
1629+
encoding = sys.getfilesystemencoding()
1630+
errors = sys.getfilesystemencodeerrors()
1631+
url = os_helper.FS_NONASCII
1632+
self.assertEqual(urllib.request.url2pathname(url), os_helper.FS_NONASCII)
1633+
url = urllib.parse.quote(url, encoding=encoding, errors=errors)
1634+
self.assertEqual(urllib.request.url2pathname(url), os_helper.FS_NONASCII)
1635+
16241636
class Utility_Tests(unittest.TestCase):
16251637
"""Testcase to test the various utility functions in the urllib."""
16261638

Lib/test/test_urllib2.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -716,10 +716,6 @@ def test_processors(self):
716716

717717

718718
def sanepathname2url(path):
719-
try:
720-
path.encode("utf-8")
721-
except UnicodeEncodeError:
722-
raise unittest.SkipTest("path is not encodable to utf8")
723719
urlpath = urllib.request.pathname2url(path)
724720
if os.name == "nt" and urlpath.startswith("///"):
725721
urlpath = urlpath[2:]

Lib/urllib/request.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1685,12 +1685,16 @@ def url2pathname(pathname):
16851685
# URL has an empty authority section, so the path begins on the
16861686
# third character.
16871687
pathname = pathname[2:]
1688-
return unquote(pathname)
1688+
encoding = sys.getfilesystemencoding()
1689+
errors = sys.getfilesystemencodeerrors()
1690+
return unquote(pathname, encoding=encoding, errors=errors)
16891691

16901692
def pathname2url(pathname):
16911693
"""OS-specific conversion from a file system path to a relative URL
16921694
of the 'file' scheme; not recommended for general use."""
1693-
return quote(pathname)
1695+
encoding = sys.getfilesystemencoding()
1696+
errors = sys.getfilesystemencodeerrors()
1697+
return quote(pathname, encoding=encoding, errors=errors)
16941698

16951699

16961700
ftpcache = {}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Fix issue where :func:`urllib.request.url2pathname` and
2+
:func:`~urllib.request.pathname2url` always used UTF-8 when quoting and
3+
unquoting file URIs. They now use the :term:`filesystem encoding and error
4+
handler`.

0 commit comments

Comments
 (0)