Skip to content

Commit c5d9c5d

Browse files
vstinnerencukou
authored andcommitted
[3.12] [CVE-2023-27043] pythongh-102988: Reject malformed addresses in email.parseaddr() (pythonGH-111116)
Detect email address parsing errors and return empty tuple to indicate the parsing error (old API). Add an optional 'strict' parameter to getaddresses() and parseaddr() functions. Patch by Thomas Dwyer. Co-Authored-By: Thomas Dwyer <[email protected]> (cherry picked from commit 4a153a1)
1 parent 747abc0 commit c5d9c5d

File tree

5 files changed

+361
-21
lines changed

5 files changed

+361
-21
lines changed

Doc/library/email.utils.rst

+15-4
Original file line numberDiff line numberDiff line change
@@ -58,13 +58,18 @@ of the new API.
5858
begins with angle brackets, they are stripped off.
5959

6060

61-
.. function:: parseaddr(address)
61+
.. function:: parseaddr(address, *, strict=True)
6262

6363
Parse address -- which should be the value of some address-containing field such
6464
as :mailheader:`To` or :mailheader:`Cc` -- into its constituent *realname* and
6565
*email address* parts. Returns a tuple of that information, unless the parse
6666
fails, in which case a 2-tuple of ``('', '')`` is returned.
6767

68+
If *strict* is true, use a strict parser which rejects malformed inputs.
69+
70+
.. versionchanged:: 3.13
71+
Add *strict* optional parameter and reject malformed inputs by default.
72+
6873

6974
.. function:: formataddr(pair, charset='utf-8')
7075

@@ -82,12 +87,15 @@ of the new API.
8287
Added the *charset* option.
8388

8489

85-
.. function:: getaddresses(fieldvalues)
90+
.. function:: getaddresses(fieldvalues, *, strict=True)
8691

8792
This method returns a list of 2-tuples of the form returned by ``parseaddr()``.
8893
*fieldvalues* is a sequence of header field values as might be returned by
89-
:meth:`Message.get_all <email.message.Message.get_all>`. Here's a simple
90-
example that gets all the recipients of a message::
94+
:meth:`Message.get_all <email.message.Message.get_all>`.
95+
96+
If *strict* is true, use a strict parser which rejects malformed inputs.
97+
98+
Here's a simple example that gets all the recipients of a message::
9199

92100
from email.utils import getaddresses
93101

@@ -97,6 +105,9 @@ of the new API.
97105
resent_ccs = msg.get_all('resent-cc', [])
98106
all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs)
99107

108+
.. versionchanged:: 3.13
109+
Add *strict* optional parameter and reject malformed inputs by default.
110+
100111

101112
.. function:: parsedate(date)
102113

Doc/whatsnew/3.12.rst

+17
Original file line numberDiff line numberDiff line change
@@ -2277,3 +2277,20 @@ email
22772277
If you need to turn this safety feature off,
22782278
set :attr:`~email.policy.Policy.verify_generated_headers`.
22792279
(Contributed by Bas Bloemsaat and Petr Viktorin in :gh:`121650`.)
2280+
2281+
2282+
Notable changes in 3.12.6
2283+
=========================
2284+
2285+
email
2286+
-----
2287+
2288+
* :func:`email.utils.getaddresses` and :func:`email.utils.parseaddr` now return
2289+
``('', '')`` 2-tuples in more situations where invalid email addresses are
2290+
encountered, instead of potentially inaccurate values.
2291+
An optional *strict* parameter was added to these two functions:
2292+
use ``strict=False`` to get the old behavior, accepting malformed inputs.
2293+
``getattr(email.utils, 'supports_strict_parsing', False)`` can be used to
2294+
check if the *strict* paramater is available.
2295+
(Contributed by Thomas Dwyer and Victor Stinner for :gh:`102988` to improve
2296+
the CVE-2023-27043 fix.)

Lib/email/utils.py

+142-9
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
specialsre = re.compile(r'[][\\()<>@,:;".]')
4949
escapesre = re.compile(r'[\\"]')
5050

51+
5152
def _has_surrogates(s):
5253
"""Return True if s may contain surrogate-escaped binary data."""
5354
# This check is based on the fact that unless there are surrogates, utf8
@@ -106,12 +107,127 @@ def formataddr(pair, charset='utf-8'):
106107
return address
107108

108109

110+
def _iter_escaped_chars(addr):
111+
pos = 0
112+
escape = False
113+
for pos, ch in enumerate(addr):
114+
if escape:
115+
yield (pos, '\\' + ch)
116+
escape = False
117+
elif ch == '\\':
118+
escape = True
119+
else:
120+
yield (pos, ch)
121+
if escape:
122+
yield (pos, '\\')
123+
124+
125+
def _strip_quoted_realnames(addr):
126+
"""Strip real names between quotes."""
127+
if '"' not in addr:
128+
# Fast path
129+
return addr
130+
131+
start = 0
132+
open_pos = None
133+
result = []
134+
for pos, ch in _iter_escaped_chars(addr):
135+
if ch == '"':
136+
if open_pos is None:
137+
open_pos = pos
138+
else:
139+
if start != open_pos:
140+
result.append(addr[start:open_pos])
141+
start = pos + 1
142+
open_pos = None
143+
144+
if start < len(addr):
145+
result.append(addr[start:])
146+
147+
return ''.join(result)
109148

110-
def getaddresses(fieldvalues):
111-
"""Return a list of (REALNAME, EMAIL) for each fieldvalue."""
112-
all = COMMASPACE.join(str(v) for v in fieldvalues)
113-
a = _AddressList(all)
114-
return a.addresslist
149+
150+
supports_strict_parsing = True
151+
152+
def getaddresses(fieldvalues, *, strict=True):
153+
"""Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue.
154+
155+
When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in
156+
its place.
157+
158+
If strict is true, use a strict parser which rejects malformed inputs.
159+
"""
160+
161+
# If strict is true, if the resulting list of parsed addresses is greater
162+
# than the number of fieldvalues in the input list, a parsing error has
163+
# occurred and consequently a list containing a single empty 2-tuple [('',
164+
# '')] is returned in its place. This is done to avoid invalid output.
165+
#
166+
# Malformed input: getaddresses(['[email protected] <[email protected]>'])
167+
# Invalid output: [('', '[email protected]'), ('', '[email protected]')]
168+
# Safe output: [('', '')]
169+
170+
if not strict:
171+
all = COMMASPACE.join(str(v) for v in fieldvalues)
172+
a = _AddressList(all)
173+
return a.addresslist
174+
175+
fieldvalues = [str(v) for v in fieldvalues]
176+
fieldvalues = _pre_parse_validation(fieldvalues)
177+
addr = COMMASPACE.join(fieldvalues)
178+
a = _AddressList(addr)
179+
result = _post_parse_validation(a.addresslist)
180+
181+
# Treat output as invalid if the number of addresses is not equal to the
182+
# expected number of addresses.
183+
n = 0
184+
for v in fieldvalues:
185+
# When a comma is used in the Real Name part it is not a deliminator.
186+
# So strip those out before counting the commas.
187+
v = _strip_quoted_realnames(v)
188+
# Expected number of addresses: 1 + number of commas
189+
n += 1 + v.count(',')
190+
if len(result) != n:
191+
return [('', '')]
192+
193+
return result
194+
195+
196+
def _check_parenthesis(addr):
197+
# Ignore parenthesis in quoted real names.
198+
addr = _strip_quoted_realnames(addr)
199+
200+
opens = 0
201+
for pos, ch in _iter_escaped_chars(addr):
202+
if ch == '(':
203+
opens += 1
204+
elif ch == ')':
205+
opens -= 1
206+
if opens < 0:
207+
return False
208+
return (opens == 0)
209+
210+
211+
def _pre_parse_validation(email_header_fields):
212+
accepted_values = []
213+
for v in email_header_fields:
214+
if not _check_parenthesis(v):
215+
v = "('', '')"
216+
accepted_values.append(v)
217+
218+
return accepted_values
219+
220+
221+
def _post_parse_validation(parsed_email_header_tuples):
222+
accepted_values = []
223+
# The parser would have parsed a correctly formatted domain-literal
224+
# The existence of an [ after parsing indicates a parsing failure
225+
for v in parsed_email_header_tuples:
226+
if '[' in v[1]:
227+
v = ('', '')
228+
accepted_values.append(v)
229+
230+
return accepted_values
115231

116232

117233
def _format_timetuple_and_zone(timetuple, zone):
@@ -205,16 +321,33 @@ def parsedate_to_datetime(data):
205321
tzinfo=datetime.timezone(datetime.timedelta(seconds=tz)))
206322

207323

208-
def parseaddr(addr):
324+
def parseaddr(addr, *, strict=True):
209325
"""
210326
Parse addr into its constituent realname and email address parts.
211327
212328
Return a tuple of realname and email address, unless the parse fails, in
213329
which case return a 2-tuple of ('', '').
330+
331+
If strict is True, use a strict parser which rejects malformed inputs.
214332
"""
215-
addrs = _AddressList(addr).addresslist
216-
if not addrs:
217-
return '', ''
333+
if not strict:
334+
addrs = _AddressList(addr).addresslist
335+
if not addrs:
336+
return ('', '')
337+
return addrs[0]
338+
339+
if isinstance(addr, list):
340+
addr = addr[0]
341+
342+
if not isinstance(addr, str):
343+
return ('', '')
344+
345+
addr = _pre_parse_validation([addr])[0]
346+
addrs = _post_parse_validation(_AddressList(addr).addresslist)
347+
348+
if not addrs or len(addrs) > 1:
349+
return ('', '')
350+
218351
return addrs[0]
219352

220353

0 commit comments

Comments
 (0)