From 547b06f6afdad773c82d609f41c937c4d9a80f80 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 27 Nov 2019 16:09:03 +0900 Subject: [PATCH 1/7] bpo-26730: Fix SpooledTemporaryFile data corruption SpooledTemporaryFile.rollback() might cause data corruption when it is in text mode. --- Lib/tempfile.py | 15 +++++++++------ Lib/test/test_tempfile.py | 21 ++++++++++++--------- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/Lib/tempfile.py b/Lib/tempfile.py index 45709cb06cf4e6..48c0a041c08aad 100644 --- a/Lib/tempfile.py +++ b/Lib/tempfile.py @@ -633,10 +633,9 @@ def __init__(self, max_size=0, mode='w+b', buffering=-1, if 'b' in mode: self._file = _io.BytesIO() else: - # Setting newline="\n" avoids newline translation; - # this is important because otherwise on Windows we'd - # get double newline translation upon rollover(). - self._file = _io.StringIO(newline="\n") + self._file = _io.TextIOWrapper(_io.BytesIO(), + encoding=encoding, newline=newline, + write_through=True) self._max_size = max_size self._rolled = False self._TemporaryFileArgs = {'mode': mode, 'buffering': buffering, @@ -656,8 +655,12 @@ def rollover(self): newfile = self._file = TemporaryFile(**self._TemporaryFileArgs) del self._TemporaryFileArgs - newfile.write(file.getvalue()) - newfile.seek(file.tell(), 0) + pos = file.tell() + if hasattr(newfile, 'buffer'): + newfile.buffer.write(file.detach().getvalue()) + else: + newfile.write(file.getvalue()) + newfile.seek(pos, 0) self._rolled = True diff --git a/Lib/test/test_tempfile.py b/Lib/test/test_tempfile.py index f995f6c9bfaf00..87792b98cae857 100644 --- a/Lib/test/test_tempfile.py +++ b/Lib/test/test_tempfile.py @@ -1114,7 +1114,8 @@ def test_properties(self): def test_text_mode(self): # Creating a SpooledTemporaryFile with a text mode should produce # a file object reading and writing (Unicode) text strings. - f = tempfile.SpooledTemporaryFile(mode='w+', max_size=10) + f = tempfile.SpooledTemporaryFile(mode='w+', max_size=10, + encoding="utf-8") f.write("abc\n") f.seek(0) self.assertEqual(f.read(), "abc\n") @@ -1124,9 +1125,9 @@ def test_text_mode(self): self.assertFalse(f._rolled) self.assertEqual(f.mode, 'w+') self.assertIsNone(f.name) - self.assertIsNone(f.newlines) - self.assertIsNone(f.encoding) - self.assertIsNone(f.errors) + self.assertEqual(f.newlines, os.linesep) + self.assertIsNotNone(f.encoding) + self.assertIsNotNone(f.errors) f.write("xyzzy\n") f.seek(0) @@ -1152,13 +1153,15 @@ def test_text_newline_and_encoding(self): self.assertFalse(f._rolled) self.assertEqual(f.mode, 'w+') self.assertIsNone(f.name) - self.assertIsNone(f.newlines) - self.assertIsNone(f.encoding) - self.assertIsNone(f.errors) + self.assertIsNotNone(f.newlines) + self.assertIsNotNone(f.encoding) + self.assertIsNotNone(f.errors) - f.write("\u039B" * 20 + "\r\n") + f.write("\u039C" * 10 + "\r\n") + f.write("\u039D" * 20) f.seek(0) - self.assertEqual(f.read(), "\u039B\r\n" + ("\u039B" * 20) + "\r\n") + self.assertEqual(f.read(), + "\u039B\r\n" + ("\u039C" * 10) + "\r\n" + ("\u039D" * 20)) self.assertTrue(f._rolled) self.assertEqual(f.mode, 'w+') self.assertIsNotNone(f.name) From ecb8858eafec445371545485e4d9f8c02d1cc64f Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 27 Nov 2019 16:27:07 +0900 Subject: [PATCH 2/7] update doc --- Doc/library/tempfile.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Doc/library/tempfile.rst b/Doc/library/tempfile.rst index fff7a7a03eb81d..364f4990d4db9d 100644 --- a/Doc/library/tempfile.rst +++ b/Doc/library/tempfile.rst @@ -93,7 +93,7 @@ The module defines the following user-callable items: Added *errors* parameter. -.. function:: SpooledTemporaryFile(max_size=0, mode='w+b', buffering=None, encoding=None, newline=None, suffix=None, prefix=None, dir=None, *, errors=None) +.. class:: SpooledTemporaryFile(max_size=0, mode='w+b', buffering=None, encoding=None, newline=None, suffix=None, prefix=None, dir=None, *, errors=None) This function operates exactly as :func:`TemporaryFile` does, except that data is spooled in memory until the file size exceeds *max_size*, or @@ -105,8 +105,8 @@ The module defines the following user-callable items: causes the file to roll over to an on-disk file regardless of its size. The returned object is a file-like object whose :attr:`_file` attribute - is either an :class:`io.BytesIO` or :class:`io.StringIO` object (depending on - whether binary or text *mode* was specified) or a true file + is either an :class:`io.BytesIO` or :class:`io.TextIOWrapper` object + (depending on whether binary or text *mode* was specified) or a true file object, depending on whether :func:`rollover` has been called. This file-like object can be used in a :keyword:`with` statement, just like a normal file. From 30bb6949c846d0be428357b62eeea02f7944a2e6 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 27 Nov 2019 16:30:11 +0900 Subject: [PATCH 3/7] Add NEWS --- .../next/Library/2019-11-27-16-30-02.bpo-26730.56cdBn.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2019-11-27-16-30-02.bpo-26730.56cdBn.rst diff --git a/Misc/NEWS.d/next/Library/2019-11-27-16-30-02.bpo-26730.56cdBn.rst b/Misc/NEWS.d/next/Library/2019-11-27-16-30-02.bpo-26730.56cdBn.rst new file mode 100644 index 00000000000000..a92b90a4956053 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-11-27-16-30-02.bpo-26730.56cdBn.rst @@ -0,0 +1,2 @@ +Fix ``SpooledTemporaryFile.rollover()`` might corrupt the file when it is in +text mode. Patch by Serhiy Storchaka. From adce72bdd2452b9ac908de6a84abe3dc7a9e6a12 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 27 Nov 2019 19:38:50 +0900 Subject: [PATCH 4/7] revert function -> class --- Doc/library/tempfile.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/tempfile.rst b/Doc/library/tempfile.rst index 364f4990d4db9d..a59817c1039210 100644 --- a/Doc/library/tempfile.rst +++ b/Doc/library/tempfile.rst @@ -93,7 +93,7 @@ The module defines the following user-callable items: Added *errors* parameter. -.. class:: SpooledTemporaryFile(max_size=0, mode='w+b', buffering=None, encoding=None, newline=None, suffix=None, prefix=None, dir=None, *, errors=None) +.. function:: SpooledTemporaryFile(max_size=0, mode='w+b', buffering=None, encoding=None, newline=None, suffix=None, prefix=None, dir=None, *, errors=None) This function operates exactly as :func:`TemporaryFile` does, except that data is spooled in memory until the file size exceeds *max_size*, or From 70aeefa87b18956d09ca848131a59dcf142ba44e Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 27 Nov 2019 20:04:44 +0900 Subject: [PATCH 5/7] Pass errors to TextIOWrapper --- Lib/tempfile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/tempfile.py b/Lib/tempfile.py index 48c0a041c08aad..e9a2185a84edef 100644 --- a/Lib/tempfile.py +++ b/Lib/tempfile.py @@ -634,8 +634,8 @@ def __init__(self, max_size=0, mode='w+b', buffering=-1, self._file = _io.BytesIO() else: self._file = _io.TextIOWrapper(_io.BytesIO(), - encoding=encoding, newline=newline, - write_through=True) + encoding=encoding, errors=errors, + newline=newline, write_through=True) self._max_size = max_size self._rolled = False self._TemporaryFileArgs = {'mode': mode, 'buffering': buffering, From 4b3d8576bdf8a000f96a77cc1312eceb6f81382f Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 27 Nov 2019 20:33:58 +0900 Subject: [PATCH 6/7] write_through is not required --- Lib/tempfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/tempfile.py b/Lib/tempfile.py index e9a2185a84edef..62875540f8b920 100644 --- a/Lib/tempfile.py +++ b/Lib/tempfile.py @@ -635,7 +635,7 @@ def __init__(self, max_size=0, mode='w+b', buffering=-1, else: self._file = _io.TextIOWrapper(_io.BytesIO(), encoding=encoding, errors=errors, - newline=newline, write_through=True) + newline=newline) self._max_size = max_size self._rolled = False self._TemporaryFileArgs = {'mode': mode, 'buffering': buffering, From c5fac2ab8f76fc3565a5ea182cc0c86000bda59b Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 27 Nov 2019 22:00:04 +0900 Subject: [PATCH 7/7] update test --- Lib/test/test_tempfile.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Lib/test/test_tempfile.py b/Lib/test/test_tempfile.py index 87792b98cae857..232c5dae10fdf4 100644 --- a/Lib/test/test_tempfile.py +++ b/Lib/test/test_tempfile.py @@ -1126,8 +1126,8 @@ def test_text_mode(self): self.assertEqual(f.mode, 'w+') self.assertIsNone(f.name) self.assertEqual(f.newlines, os.linesep) - self.assertIsNotNone(f.encoding) - self.assertIsNotNone(f.errors) + self.assertEqual(f.encoding, "utf-8") + self.assertEqual(f.errors, "strict") f.write("xyzzy\n") f.seek(0) @@ -1140,8 +1140,8 @@ def test_text_mode(self): self.assertEqual(f.mode, 'w+') self.assertIsNotNone(f.name) self.assertEqual(f.newlines, os.linesep) - self.assertIsNotNone(f.encoding) - self.assertIsNotNone(f.errors) + self.assertEqual(f.encoding, "utf-8") + self.assertEqual(f.errors, "strict") def test_text_newline_and_encoding(self): f = tempfile.SpooledTemporaryFile(mode='w+', max_size=10, @@ -1154,8 +1154,8 @@ def test_text_newline_and_encoding(self): self.assertEqual(f.mode, 'w+') self.assertIsNone(f.name) self.assertIsNotNone(f.newlines) - self.assertIsNotNone(f.encoding) - self.assertIsNotNone(f.errors) + self.assertEqual(f.encoding, "utf-8") + self.assertEqual(f.errors, "ignore") f.write("\u039C" * 10 + "\r\n") f.write("\u039D" * 20)