Skip to content

Commit 5927013

Browse files
authored
pythongh-101144: Allow open and read_text encoding to be positional. (python#101145)
The zipfile.Path open() and read_text() encoding parameter can be supplied as a positional argument without causing a TypeError again. 3.10.0b1 included a regression that made it keyword only. Documentation update included as users writing code to be compatible with a wide range of versions will need to consider this for some time.
1 parent 9e025d3 commit 5927013

File tree

4 files changed

+95
-10
lines changed

4 files changed

+95
-10
lines changed

Doc/library/zipfile.rst

+12
Original file line numberDiff line numberDiff line change
@@ -551,6 +551,12 @@ Path objects are traversable using the ``/`` operator or ``joinpath``.
551551
Added support for text and binary modes for open. Default
552552
mode is now text.
553553

554+
.. versionchanged:: 3.11.2
555+
The ``encoding`` parameter can be supplied as a positional argument
556+
without causing a :exc:`TypeError`. As it could in 3.9. Code needing to
557+
be compatible with unpatched 3.10 and 3.11 versions must pass all
558+
:class:`io.TextIOWrapper` arguments, ``encoding`` included, as keywords.
559+
554560
.. method:: Path.iterdir()
555561

556562
Enumerate the children of the current directory.
@@ -596,6 +602,12 @@ Path objects are traversable using the ``/`` operator or ``joinpath``.
596602
:class:`io.TextIOWrapper` (except ``buffer``, which is
597603
implied by the context).
598604

605+
.. versionchanged:: 3.11.2
606+
The ``encoding`` parameter can be supplied as a positional argument
607+
without causing a :exc:`TypeError`. As it could in 3.9. Code needing to
608+
be compatible with unpatched 3.10 and 3.11 versions must pass all
609+
:class:`io.TextIOWrapper` arguments, ``encoding`` included, as keywords.
610+
599611
.. method:: Path.read_bytes()
600612

601613
Read the current file as bytes.

Lib/test/test_zipfile/test_path.py

+69-5
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
import io
2-
import zipfile
2+
import itertools
33
import contextlib
44
import pathlib
5-
import unittest
6-
import string
75
import pickle
8-
import itertools
6+
import string
7+
from test.support.script_helper import assert_python_ok
8+
import unittest
9+
import zipfile
910

1011
from ._test_params import parameterize, Invoked
1112
from ._functools import compose
@@ -145,7 +146,69 @@ def test_open(self, alpharep):
145146
a, b, g = root.iterdir()
146147
with a.open(encoding="utf-8") as strm:
147148
data = strm.read()
148-
assert data == "content of a"
149+
self.assertEqual(data, "content of a")
150+
with a.open('r', "utf-8") as strm: # not a kw, no gh-101144 TypeError
151+
data = strm.read()
152+
self.assertEqual(data, "content of a")
153+
154+
def test_open_encoding_utf16(self):
155+
in_memory_file = io.BytesIO()
156+
zf = zipfile.ZipFile(in_memory_file, "w")
157+
zf.writestr("path/16.txt", "This was utf-16".encode("utf-16"))
158+
zf.filename = "test_open_utf16.zip"
159+
root = zipfile.Path(zf)
160+
(path,) = root.iterdir()
161+
u16 = path.joinpath("16.txt")
162+
with u16.open('r', "utf-16") as strm:
163+
data = strm.read()
164+
self.assertEqual(data, "This was utf-16")
165+
with u16.open(encoding="utf-16") as strm:
166+
data = strm.read()
167+
self.assertEqual(data, "This was utf-16")
168+
169+
def test_open_encoding_errors(self):
170+
in_memory_file = io.BytesIO()
171+
zf = zipfile.ZipFile(in_memory_file, "w")
172+
zf.writestr("path/bad-utf8.bin", b"invalid utf-8: \xff\xff.")
173+
zf.filename = "test_read_text_encoding_errors.zip"
174+
root = zipfile.Path(zf)
175+
(path,) = root.iterdir()
176+
u16 = path.joinpath("bad-utf8.bin")
177+
178+
# encoding= as a positional argument for gh-101144.
179+
data = u16.read_text("utf-8", errors="ignore")
180+
self.assertEqual(data, "invalid utf-8: .")
181+
with u16.open("r", "utf-8", errors="surrogateescape") as f:
182+
self.assertEqual(f.read(), "invalid utf-8: \udcff\udcff.")
183+
184+
# encoding= both positional and keyword is an error; gh-101144.
185+
with self.assertRaisesRegex(TypeError, "encoding"):
186+
data = u16.read_text("utf-8", encoding="utf-8")
187+
188+
# both keyword arguments work.
189+
with u16.open("r", encoding="utf-8", errors="strict") as f:
190+
# error during decoding with wrong codec.
191+
with self.assertRaises(UnicodeDecodeError):
192+
f.read()
193+
194+
def test_encoding_warnings(self):
195+
"""EncodingWarning must blame the read_text and open calls."""
196+
code = '''\
197+
import io, zipfile
198+
with zipfile.ZipFile(io.BytesIO(), "w") as zf:
199+
zf.filename = '<test_encoding_warnings in memory zip file>'
200+
zf.writestr("path/file.txt", b"Spanish Inquisition")
201+
root = zipfile.Path(zf)
202+
(path,) = root.iterdir()
203+
file_path = path.joinpath("file.txt")
204+
unused = file_path.read_text() # should warn
205+
file_path.open("r").close() # should warn
206+
'''
207+
proc = assert_python_ok('-X', 'warn_default_encoding', '-c', code)
208+
warnings = proc.err.splitlines()
209+
self.assertEqual(len(warnings), 2, proc.err)
210+
self.assertRegex(warnings[0], rb"^<string>:8: EncodingWarning:")
211+
self.assertRegex(warnings[1], rb"^<string>:9: EncodingWarning:")
149212

150213
def test_open_write(self):
151214
"""
@@ -187,6 +250,7 @@ def test_read(self, alpharep):
187250
root = zipfile.Path(alpharep)
188251
a, b, g = root.iterdir()
189252
assert a.read_text(encoding="utf-8") == "content of a"
253+
a.read_text("utf-8") # No positional arg TypeError per gh-101144.
190254
assert a.read_bytes() == b"content of a"
191255

192256
@pass_alpharep

Lib/zipfile/_path.py

+10-5
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,11 @@ def _name_set(self):
148148
return self.__lookup
149149

150150

151+
def _extract_text_encoding(encoding=None, *args, **kwargs):
152+
# stacklevel=3 so that the caller of the caller see any warning.
153+
return io.text_encoding(encoding, 3), args, kwargs
154+
155+
151156
class Path:
152157
"""
153158
A pathlib-compatible interface for zip files.
@@ -257,9 +262,9 @@ def open(self, mode='r', *args, pwd=None, **kwargs):
257262
if args or kwargs:
258263
raise ValueError("encoding args invalid for binary operation")
259264
return stream
260-
else:
261-
kwargs["encoding"] = io.text_encoding(kwargs.get("encoding"))
262-
return io.TextIOWrapper(stream, *args, **kwargs)
265+
# Text mode:
266+
encoding, args, kwargs = _extract_text_encoding(*args, **kwargs)
267+
return io.TextIOWrapper(stream, encoding, *args, **kwargs)
263268

264269
@property
265270
def name(self):
@@ -282,8 +287,8 @@ def filename(self):
282287
return pathlib.Path(self.root.filename).joinpath(self.at)
283288

284289
def read_text(self, *args, **kwargs):
285-
kwargs["encoding"] = io.text_encoding(kwargs.get("encoding"))
286-
with self.open('r', *args, **kwargs) as strm:
290+
encoding, args, kwargs = _extract_text_encoding(*args, **kwargs)
291+
with self.open('r', encoding, *args, **kwargs) as strm:
287292
return strm.read()
288293

289294
def read_bytes(self):
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Make :func:`zipfile.Path.open` and :func:`zipfile.Path.read_text` also accept
2+
``encoding`` as a positional argument. This was the behavior in Python 3.9 and
3+
earlier. 3.10 introduced a regression where supplying it as a positional
4+
argument would lead to a :exc:`TypeError`.

0 commit comments

Comments
 (0)