diff --git a/Lib/email/_policybase.py b/Lib/email/_policybase.py index 4b63b97217a835..95e79b8938bb4c 100644 --- a/Lib/email/_policybase.py +++ b/Lib/email/_policybase.py @@ -4,6 +4,7 @@ """ import abc +import re from email import header from email import charset as _charset from email.utils import _has_surrogates @@ -14,6 +15,14 @@ 'compat32', ] +# validation regex from RFC 5322, equivalent to pattern re.compile("[!-9;-~]+$") +valid_header_name_re = re.compile("[\041-\071\073-\176]+$") + +def validate_header_name(name): + # Validate header name according to RFC 5322 + if not valid_header_name_re.match(name): + raise ValueError( + f"Header field name contains invalid characters: {name!r}") class _PolicyBase: @@ -314,6 +323,7 @@ def header_store_parse(self, name, value): """+ The name and value are returned unmodified. """ + validate_header_name(name) return (name, value) def header_fetch_parse(self, name, value): diff --git a/Lib/email/policy.py b/Lib/email/policy.py index 6e109b65011a44..4169150101a29d 100644 --- a/Lib/email/policy.py +++ b/Lib/email/policy.py @@ -4,7 +4,13 @@ import re import sys -from email._policybase import Policy, Compat32, compat32, _extend_docstrings +from email._policybase import ( + Compat32, + Policy, + _extend_docstrings, + compat32, + validate_header_name +) from email.utils import _has_surrogates from email.headerregistry import HeaderRegistry as HeaderRegistry from email.contentmanager import raw_data_manager @@ -138,6 +144,7 @@ def header_store_parse(self, name, value): CR or LF characters. """ + validate_header_name(name) if hasattr(value, 'name') and value.name.lower() == name.lower(): return (name, value) if isinstance(value, str) and len(value.splitlines())>1: diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index 2deb35721576b8..724af3b787d38b 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -728,6 +728,31 @@ def test_nonascii_add_header_with_tspecial(self): "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt", msg['Content-Disposition']) + def test_invalid_header_names(self): + invalid_headers = [ + ('Invalid Header', 'contains space'), + ('Tab\tHeader', 'contains tab'), + ('Colon:Header', 'contains colon'), + ('', 'Empty name'), + (' LeadingSpace', 'starts with space'), + ('TrailingSpace ', 'ends with space'), + ('Header\x7F', 'Non-ASCII character'), + ('Header\x80', 'Extended ASCII'), + ] + for policy in (email.policy.default, email.policy.compat32): + for setter in (Message.__setitem__, Message.add_header): + for name, value in invalid_headers: + self.do_test_invalid_header_names( + policy, setter,name, value) + + def do_test_invalid_header_names(self, policy, setter, name, value): + with self.subTest(policy=policy, setter=setter, name=name, value=value): + message = Message(policy=policy) + pattern = r'(?i)(?=.*invalid)(?=.*header)(?=.*name)' + with self.assertRaisesRegex(ValueError, pattern) as cm: + setter(message, name, value) + self.assertIn(f"{name!r}", str(cm.exception)) + def test_binary_quopri_payload(self): for charset in ('latin-1', 'ascii'): msg = Message() diff --git a/Lib/test/test_email/test_message.py b/Lib/test/test_email/test_message.py index 96979db27f3a21..23c39775a8b2e5 100644 --- a/Lib/test/test_email/test_message.py +++ b/Lib/test/test_email/test_message.py @@ -1004,6 +1004,30 @@ def test_folding_with_long_nospace_http_policy_1(self): parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default) self.assertEqual(parsed_msg['Message-ID'], m['Message-ID']) + def test_invalid_header_names(self): + invalid_headers = [ + ('Invalid Header', 'contains space'), + ('Tab\tHeader', 'contains tab'), + ('Colon:Header', 'contains colon'), + ('', 'Empty name'), + (' LeadingSpace', 'starts with space'), + ('TrailingSpace ', 'ends with space'), + ('Header\x7F', 'Non-ASCII character'), + ('Header\x80', 'Extended ASCII'), + ] + for email_policy in (policy.default, policy.compat32): + for setter in (EmailMessage.__setitem__, EmailMessage.add_header): + for name, value in invalid_headers: + self.do_test_invalid_header_names(email_policy, setter, name, value) + + def do_test_invalid_header_names(self, policy, setter, name, value): + with self.subTest(policy=policy, setter=setter, name=name, value=value): + message = EmailMessage(policy=policy) + pattern = r'(?i)(?=.*invalid)(?=.*header)(?=.*name)' + with self.assertRaisesRegex(ValueError, pattern) as cm: + setter(message, name, value) + self.assertIn(f"{name!r}", str(cm.exception)) + def test_get_body_malformed(self): """test for bpo-42892""" msg = textwrap.dedent("""\ diff --git a/Misc/NEWS.d/next/Library/2024-12-11-17-44-36.gh-issue-127794.VwmRsp.rst b/Misc/NEWS.d/next/Library/2024-12-11-17-44-36.gh-issue-127794.VwmRsp.rst new file mode 100644 index 00000000000000..b6e39d66d04221 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-12-11-17-44-36.gh-issue-127794.VwmRsp.rst @@ -0,0 +1,4 @@ +When headers are added to :class:`email.message.Message` objects, either through +:meth:`email.message.Message.__setitem__` or :meth:`email.message.Message.add_header`, +the field name is now validated according to :rfc:`RFC 5322, Section 2.2 <5322#section-2.2>` +and a :exc:`ValueError` is raised if the field name contains any invalid characters.