Skip to content

Commit dae5b16

Browse files
miss-islingtonRanKKIpicnixz
authored
[3.12] gh-98188: Fix EmailMessage.get_payload to decode data when CTE value has extra text (GH-127547) (#128529)
gh-98188: Fix EmailMessage.get_payload to decode data when CTE value has extra text (GH-127547) Up to this point message handling has been very strict with regards to content encoding values: mixed case was accepted, but trailing blanks or other text would cause decoding failure, even if the first token was a valid encoding. By Postel's Rule we should go ahead and decode as long as we can recognize that first token. We have not thought of any security or backward compatibility concerns with this fix. This fix does introduce a new technique/pattern to the Message code: we look to see if the header has a 'cte' attribute, and if so we use that. This effectively promotes the header API exposed by HeaderRegistry to an API that any header parser "should" support. This seems like a reasonable thing to do. It is not, however, a requirement, as the string value of the header is still used if there is no cte attribute. The full fix (ignore any trailing blanks or blank-separated trailing text) applies only to the non-compat32 API. compat32 is only fixed to the extent that it now ignores trailing spaces. Note that the HeaderRegistry parsing still records a HeaderDefect if there is extra text. (cherry picked from commit a62ba52) Co-authored-by: RanKKI <[email protected]> Co-authored-by: Bénédikt Tran <[email protected]>
1 parent e758eb3 commit dae5b16

File tree

5 files changed

+59
-2
lines changed

5 files changed

+59
-2
lines changed

Lib/email/message.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -286,8 +286,12 @@ def get_payload(self, i=None, decode=False):
286286
if i is not None and not isinstance(self._payload, list):
287287
raise TypeError('Expected list, got %s' % type(self._payload))
288288
payload = self._payload
289-
# cte might be a Header, so for now stringify it.
290-
cte = str(self.get('content-transfer-encoding', '')).lower()
289+
cte = self.get('content-transfer-encoding', '')
290+
if hasattr(cte, 'cte'):
291+
cte = cte.cte
292+
else:
293+
# cte might be a Header, so for now stringify it.
294+
cte = str(cte).strip().lower()
291295
# payload may be bytes here.
292296
if not decode:
293297
if isinstance(payload, str) and utils._has_surrogates(payload):

Lib/test/test_email/test_email.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -810,6 +810,16 @@ def test_unicode_body_defaults_to_utf8_encoding(self):
810810
w4kgdGVzdGFiYwo=
811811
"""))
812812

813+
def test_string_payload_with_base64_cte(self):
814+
msg = email.message_from_string(textwrap.dedent("""\
815+
Content-Transfer-Encoding: base64
816+
817+
SGVsbG8uIFRlc3Rpbmc=
818+
"""), policy=email.policy.default)
819+
self.assertEqual(msg.get_payload(decode=True), b"Hello. Testing")
820+
self.assertDefectsEqual(msg['content-transfer-encoding'].defects, [])
821+
822+
813823

814824
# Test the email.encoders module
815825
class TestEncoders(unittest.TestCase):
@@ -2352,6 +2362,40 @@ def test_missing_header_body_separator(self):
23522362
self.assertDefectsEqual(msg.defects,
23532363
[errors.MissingHeaderBodySeparatorDefect])
23542364

2365+
def test_string_payload_with_extra_space_after_cte(self):
2366+
# https://github.com/python/cpython/issues/98188
2367+
cte = "base64 "
2368+
msg = email.message_from_string(textwrap.dedent(f"""\
2369+
Content-Transfer-Encoding: {cte}
2370+
2371+
SGVsbG8uIFRlc3Rpbmc=
2372+
"""), policy=email.policy.default)
2373+
self.assertEqual(msg.get_payload(decode=True), b"Hello. Testing")
2374+
self.assertDefectsEqual(msg['content-transfer-encoding'].defects, [])
2375+
2376+
def test_string_payload_with_extra_text_after_cte(self):
2377+
msg = email.message_from_string(textwrap.dedent("""\
2378+
Content-Transfer-Encoding: base64 some text
2379+
2380+
SGVsbG8uIFRlc3Rpbmc=
2381+
"""), policy=email.policy.default)
2382+
self.assertEqual(msg.get_payload(decode=True), b"Hello. Testing")
2383+
cte = msg['content-transfer-encoding']
2384+
self.assertDefectsEqual(cte.defects, [email.errors.InvalidHeaderDefect])
2385+
2386+
def test_string_payload_with_extra_space_after_cte_compat32(self):
2387+
cte = "base64 "
2388+
msg = email.message_from_string(textwrap.dedent(f"""\
2389+
Content-Transfer-Encoding: {cte}
2390+
2391+
SGVsbG8uIFRlc3Rpbmc=
2392+
"""), policy=email.policy.compat32)
2393+
pasted_cte = msg['content-transfer-encoding']
2394+
self.assertEqual(pasted_cte, cte)
2395+
self.assertEqual(msg.get_payload(decode=True), b"Hello. Testing")
2396+
self.assertDefectsEqual(msg.defects, [])
2397+
2398+
23552399

23562400
# Test RFC 2047 header encoding and decoding
23572401
class TestRFC2047(TestEmailBase):

Lib/test/test_email/test_headerregistry.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -838,6 +838,11 @@ def cte_as_value(self,
838838
'7bit',
839839
[errors.InvalidHeaderDefect]),
840840

841+
'extra_space_after_cte': (
842+
'base64 ',
843+
'base64',
844+
[]),
845+
841846
}
842847

843848

Misc/ACKS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1112,6 +1112,7 @@ Gregor Lingl
11121112
Everett Lipman
11131113
Mirko Liss
11141114
Alexander Liu
1115+
Hui Liu
11151116
Yuan Liu
11161117
Nick Lockwood
11171118
Stephanie Lockwood
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fix an issue in :meth:`email.message.Message.get_payload` where data
2+
cannot be decoded if the Content Transfer Encoding mechanism contains
3+
trailing whitespaces or additional junk text. Patch by Hui Liu.

0 commit comments

Comments
 (0)