From d50a7da5e911a06562132ef87a7ade0004688631 Mon Sep 17 00:00:00 2001 From: RanKKI Date: Tue, 3 Dec 2024 14:51:22 +1100 Subject: [PATCH 1/6] gh-98188: Fix EmailMessage.get_payload to decode data Fix `email.message.EmailMessage.get_payload` failing to decode data when there is a trailing whitespace following the ``. For backward compatibility, `str(cte_header)` still returns the original value; `get_payload` uses `cte_header.cte` to retrieve the parsed CTE. --- Lib/email/message.py | 6 +++- Lib/test/test_email/test_headerregistry.py | 5 +++ Lib/test/test_email/test_message.py | 32 +++++++++++++++++++ Misc/ACKS | 1 + ...4-12-03-14-45-16.gh-issue-98188.GX9i2b.rst | 2 ++ 5 files changed, 45 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2024-12-03-14-45-16.gh-issue-98188.GX9i2b.rst diff --git a/Lib/email/message.py b/Lib/email/message.py index a58afc5fe5f68e..c4c4dab971f154 100644 --- a/Lib/email/message.py +++ b/Lib/email/message.py @@ -14,6 +14,7 @@ # Intrapackage imports from email import utils from email import errors +from email import headerregistry from email._policybase import compat32 from email import charset as _charset from email._encoded_words import decode_b @@ -286,8 +287,11 @@ def get_payload(self, i=None, decode=False): if i is not None and not isinstance(self._payload, list): raise TypeError('Expected list, got %s' % type(self._payload)) payload = self._payload + cte = self.get('content-transfer-encoding', '') + if isinstance(cte, headerregistry.ContentTransferEncodingHeader): + cte = cte.cte # cte might be a Header, so for now stringify it. - cte = str(self.get('content-transfer-encoding', '')).lower() + cte = str(cte).strip().lower() # payload may be bytes here. if not decode: if isinstance(payload, str) and utils._has_surrogates(payload): diff --git a/Lib/test/test_email/test_headerregistry.py b/Lib/test/test_email/test_headerregistry.py index 4c0523f410332f..ff7a6da644d572 100644 --- a/Lib/test/test_email/test_headerregistry.py +++ b/Lib/test/test_email/test_headerregistry.py @@ -837,6 +837,11 @@ def cte_as_value(self, '7bit', [errors.InvalidHeaderDefect]), + 'extra_space_after_cte': ( + 'base64 ', + 'base64', + []), + } diff --git a/Lib/test/test_email/test_message.py b/Lib/test/test_email/test_message.py index 96979db27f3a21..6896b12ffb78aa 100644 --- a/Lib/test/test_email/test_message.py +++ b/Lib/test/test_email/test_message.py @@ -1,3 +1,4 @@ +from email.errors import InvalidHeaderDefect import textwrap import unittest from email import message_from_bytes, message_from_string, policy @@ -1054,6 +1055,37 @@ def test_string_payload_with_multipart_content_type(self): attachments = msg.iter_attachments() self.assertEqual(list(attachments), []) + def test_string_payload_with_base64_cte(self): + msg = message_from_string(textwrap.dedent("""\ + Content-Transfer-Encoding: base64 + + SGVsbG8uIFRlc3Rpbmc= + """), policy=policy.default) + self.assertEqual(msg.get_payload(decode=True), b"Hello. Testing") + self.assertDefectsEqual(msg['content-transfer-encoding'].defects, []) + + def test_string_payload_with_extra_space_after_cte(self): + # https://github.com/python/cpython/issues/98188 + cte = "base64 " + msg = message_from_string(textwrap.dedent(f"""\ + Content-Transfer-Encoding: {cte} + + SGVsbG8uIFRlc3Rpbmc= + """), policy=policy.default) + self.assertEqual(msg.get_payload(decode=True), b"Hello. Testing") + self.assertDefectsEqual(msg['content-transfer-encoding'].defects, []) + + def test_string_payload_with_extra_text_after_cte(self): + msg = message_from_string(textwrap.dedent("""\ + Content-Transfer-Encoding: base64 some text + + SGVsbG8uIFRlc3Rpbmc= + """), policy=policy.default) + self.assertEqual(msg.get_payload(decode=True), b"Hello. Testing") + cte = msg['content-transfer-encoding'] + self.assertDefectsEqual(cte.defects, [InvalidHeaderDefect]) + + if __name__ == '__main__': unittest.main() diff --git a/Misc/ACKS b/Misc/ACKS index 08cd293eac3835..984b6273ebb594 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1127,6 +1127,7 @@ Gregor Lingl Everett Lipman Mirko Liss Alexander Liu +Hui Liu Yuan Liu Nick Lockwood Stephanie Lockwood diff --git a/Misc/NEWS.d/next/Library/2024-12-03-14-45-16.gh-issue-98188.GX9i2b.rst b/Misc/NEWS.d/next/Library/2024-12-03-14-45-16.gh-issue-98188.GX9i2b.rst new file mode 100644 index 00000000000000..acc414e36d9ec7 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-12-03-14-45-16.gh-issue-98188.GX9i2b.rst @@ -0,0 +1,2 @@ +Fix an issue in :meth:`email.message.EmailMessage.get_payload` where data +cannot be decoded if there is a trailing whitespace following the mechanism. From e655493caf82bf5bcda0403fce7047a2976d293a Mon Sep 17 00:00:00 2001 From: RanKKI Date: Tue, 3 Dec 2024 15:42:31 +1100 Subject: [PATCH 2/6] docs: update NEWS.d --- .../next/Library/2024-12-03-14-45-16.gh-issue-98188.GX9i2b.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2024-12-03-14-45-16.gh-issue-98188.GX9i2b.rst b/Misc/NEWS.d/next/Library/2024-12-03-14-45-16.gh-issue-98188.GX9i2b.rst index acc414e36d9ec7..9b93bdcc573679 100644 --- a/Misc/NEWS.d/next/Library/2024-12-03-14-45-16.gh-issue-98188.GX9i2b.rst +++ b/Misc/NEWS.d/next/Library/2024-12-03-14-45-16.gh-issue-98188.GX9i2b.rst @@ -1,2 +1,2 @@ Fix an issue in :meth:`email.message.EmailMessage.get_payload` where data -cannot be decoded if there is a trailing whitespace following the mechanism. +cannot be decoded if there is a trailing whitespace and/or texts following the mechanism. From 6bf441b67c2cf5cc4238c8d7dac69c53018e999c Mon Sep 17 00:00:00 2001 From: RanKKI Date: Tue, 3 Dec 2024 16:33:50 +1100 Subject: [PATCH 3/6] docs: update NEWS.d to fix linked method --- .../next/Library/2024-12-03-14-45-16.gh-issue-98188.GX9i2b.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2024-12-03-14-45-16.gh-issue-98188.GX9i2b.rst b/Misc/NEWS.d/next/Library/2024-12-03-14-45-16.gh-issue-98188.GX9i2b.rst index 9b93bdcc573679..d739f9aea8becd 100644 --- a/Misc/NEWS.d/next/Library/2024-12-03-14-45-16.gh-issue-98188.GX9i2b.rst +++ b/Misc/NEWS.d/next/Library/2024-12-03-14-45-16.gh-issue-98188.GX9i2b.rst @@ -1,2 +1,2 @@ -Fix an issue in :meth:`email.message.EmailMessage.get_payload` where data +Fix an issue in :meth:`email.message.Message.get_payload` where data cannot be decoded if there is a trailing whitespace and/or texts following the mechanism. From 53755c1b036b5e7e562ff0d4f2c734691c5e6cd5 Mon Sep 17 00:00:00 2001 From: RanKKI Date: Thu, 19 Dec 2024 18:04:54 +1100 Subject: [PATCH 4/6] Update Misc/NEWS.d/next/Library/2024-12-03-14-45-16.gh-issue-98188.GX9i2b.rst MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- .../next/Library/2024-12-03-14-45-16.gh-issue-98188.GX9i2b.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2024-12-03-14-45-16.gh-issue-98188.GX9i2b.rst b/Misc/NEWS.d/next/Library/2024-12-03-14-45-16.gh-issue-98188.GX9i2b.rst index d739f9aea8becd..30ab8cfc3f0bc6 100644 --- a/Misc/NEWS.d/next/Library/2024-12-03-14-45-16.gh-issue-98188.GX9i2b.rst +++ b/Misc/NEWS.d/next/Library/2024-12-03-14-45-16.gh-issue-98188.GX9i2b.rst @@ -1,2 +1,3 @@ Fix an issue in :meth:`email.message.Message.get_payload` where data -cannot be decoded if there is a trailing whitespace and/or texts following the mechanism. +cannot be decoded if the Content Transfer Encoding mechanism contains +trailing whitespaces or additional junk text. Patch by Hui Liu. From a81ad68c589d1e61fc23cf2cd6a577f923a050a6 Mon Sep 17 00:00:00 2001 From: RanKKI Date: Thu, 19 Dec 2024 18:52:08 +1100 Subject: [PATCH 5/6] refactor: move test cases --- Lib/email/message.py | 8 +++--- Lib/test/test_email/test_email.py | 44 +++++++++++++++++++++++++++++ Lib/test/test_email/test_message.py | 31 -------------------- 3 files changed, 48 insertions(+), 35 deletions(-) diff --git a/Lib/email/message.py b/Lib/email/message.py index c4c4dab971f154..87fcab68868d46 100644 --- a/Lib/email/message.py +++ b/Lib/email/message.py @@ -14,7 +14,6 @@ # Intrapackage imports from email import utils from email import errors -from email import headerregistry from email._policybase import compat32 from email import charset as _charset from email._encoded_words import decode_b @@ -288,10 +287,11 @@ def get_payload(self, i=None, decode=False): raise TypeError('Expected list, got %s' % type(self._payload)) payload = self._payload cte = self.get('content-transfer-encoding', '') - if isinstance(cte, headerregistry.ContentTransferEncodingHeader): + if hasattr(cte, 'cte'): cte = cte.cte - # cte might be a Header, so for now stringify it. - cte = str(cte).strip().lower() + else: + # cte might be a Header, so for now stringify it. + cte = str(cte).strip().lower() # payload may be bytes here. if not decode: if isinstance(payload, str) and utils._has_surrogates(payload): diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index abe9ef2e94409f..2deb35721576b8 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -810,6 +810,16 @@ def test_unicode_body_defaults_to_utf8_encoding(self): w4kgdGVzdGFiYwo= """)) + def test_string_payload_with_base64_cte(self): + msg = email.message_from_string(textwrap.dedent("""\ + Content-Transfer-Encoding: base64 + + SGVsbG8uIFRlc3Rpbmc= + """), policy=email.policy.default) + self.assertEqual(msg.get_payload(decode=True), b"Hello. Testing") + self.assertDefectsEqual(msg['content-transfer-encoding'].defects, []) + + # Test the email.encoders module class TestEncoders(unittest.TestCase): @@ -2352,6 +2362,40 @@ def test_missing_header_body_separator(self): self.assertDefectsEqual(msg.defects, [errors.MissingHeaderBodySeparatorDefect]) + def test_string_payload_with_extra_space_after_cte(self): + # https://github.com/python/cpython/issues/98188 + cte = "base64 " + msg = email.message_from_string(textwrap.dedent(f"""\ + Content-Transfer-Encoding: {cte} + + SGVsbG8uIFRlc3Rpbmc= + """), policy=email.policy.default) + self.assertEqual(msg.get_payload(decode=True), b"Hello. Testing") + self.assertDefectsEqual(msg['content-transfer-encoding'].defects, []) + + def test_string_payload_with_extra_text_after_cte(self): + msg = email.message_from_string(textwrap.dedent("""\ + Content-Transfer-Encoding: base64 some text + + SGVsbG8uIFRlc3Rpbmc= + """), policy=email.policy.default) + self.assertEqual(msg.get_payload(decode=True), b"Hello. Testing") + cte = msg['content-transfer-encoding'] + self.assertDefectsEqual(cte.defects, [email.errors.InvalidHeaderDefect]) + + def test_string_payload_with_extra_space_after_cte_compat32(self): + cte = "base64 " + msg = email.message_from_string(textwrap.dedent(f"""\ + Content-Transfer-Encoding: {cte} + + SGVsbG8uIFRlc3Rpbmc= + """), policy=email.policy.compat32) + pasted_cte = msg['content-transfer-encoding'] + self.assertEqual(pasted_cte, cte) + self.assertEqual(msg.get_payload(decode=True), b"Hello. Testing") + self.assertDefectsEqual(msg.defects, []) + + # Test RFC 2047 header encoding and decoding class TestRFC2047(TestEmailBase): diff --git a/Lib/test/test_email/test_message.py b/Lib/test/test_email/test_message.py index 6896b12ffb78aa..e276cdb85c94fc 100644 --- a/Lib/test/test_email/test_message.py +++ b/Lib/test/test_email/test_message.py @@ -1055,37 +1055,6 @@ def test_string_payload_with_multipart_content_type(self): attachments = msg.iter_attachments() self.assertEqual(list(attachments), []) - def test_string_payload_with_base64_cte(self): - msg = message_from_string(textwrap.dedent("""\ - Content-Transfer-Encoding: base64 - - SGVsbG8uIFRlc3Rpbmc= - """), policy=policy.default) - self.assertEqual(msg.get_payload(decode=True), b"Hello. Testing") - self.assertDefectsEqual(msg['content-transfer-encoding'].defects, []) - - def test_string_payload_with_extra_space_after_cte(self): - # https://github.com/python/cpython/issues/98188 - cte = "base64 " - msg = message_from_string(textwrap.dedent(f"""\ - Content-Transfer-Encoding: {cte} - - SGVsbG8uIFRlc3Rpbmc= - """), policy=policy.default) - self.assertEqual(msg.get_payload(decode=True), b"Hello. Testing") - self.assertDefectsEqual(msg['content-transfer-encoding'].defects, []) - - def test_string_payload_with_extra_text_after_cte(self): - msg = message_from_string(textwrap.dedent("""\ - Content-Transfer-Encoding: base64 some text - - SGVsbG8uIFRlc3Rpbmc= - """), policy=policy.default) - self.assertEqual(msg.get_payload(decode=True), b"Hello. Testing") - cte = msg['content-transfer-encoding'] - self.assertDefectsEqual(cte.defects, [InvalidHeaderDefect]) - - if __name__ == '__main__': unittest.main() From 6631ef60a6dc938dc2642918078567eeeb9baa30 Mon Sep 17 00:00:00 2001 From: RanKKI Date: Sun, 22 Dec 2024 23:10:52 +1100 Subject: [PATCH 6/6] chore: remove unused import --- Lib/test/test_email/test_message.py | 1 - 1 file changed, 1 deletion(-) diff --git a/Lib/test/test_email/test_message.py b/Lib/test/test_email/test_message.py index e276cdb85c94fc..96979db27f3a21 100644 --- a/Lib/test/test_email/test_message.py +++ b/Lib/test/test_email/test_message.py @@ -1,4 +1,3 @@ -from email.errors import InvalidHeaderDefect import textwrap import unittest from email import message_from_bytes, message_from_string, policy