Skip to content

Commit e67d55a

Browse files
authored
Merge branch '3.11' into backport-56b00f4-3.11
2 parents af2d968 + 8b275e7 commit e67d55a

29 files changed

+621
-95
lines changed

Doc/library/email.utils.rst

+15-4
Original file line numberDiff line numberDiff line change
@@ -60,13 +60,18 @@ of the new API.
6060
begins with angle brackets, they are stripped off.
6161

6262

63-
.. function:: parseaddr(address)
63+
.. function:: parseaddr(address, *, strict=True)
6464

6565
Parse address -- which should be the value of some address-containing field such
6666
as :mailheader:`To` or :mailheader:`Cc` -- into its constituent *realname* and
6767
*email address* parts. Returns a tuple of that information, unless the parse
6868
fails, in which case a 2-tuple of ``('', '')`` is returned.
6969

70+
If *strict* is true, use a strict parser which rejects malformed inputs.
71+
72+
.. versionchanged:: 3.11.10
73+
Add *strict* optional parameter and reject malformed inputs by default.
74+
7075

7176
.. function:: formataddr(pair, charset='utf-8')
7277

@@ -84,12 +89,15 @@ of the new API.
8489
Added the *charset* option.
8590

8691

87-
.. function:: getaddresses(fieldvalues)
92+
.. function:: getaddresses(fieldvalues, *, strict=True)
8893

8994
This method returns a list of 2-tuples of the form returned by ``parseaddr()``.
9095
*fieldvalues* is a sequence of header field values as might be returned by
91-
:meth:`Message.get_all <email.message.Message.get_all>`. Here's a simple
92-
example that gets all the recipients of a message::
96+
:meth:`Message.get_all <email.message.Message.get_all>`.
97+
98+
If *strict* is true, use a strict parser which rejects malformed inputs.
99+
100+
Here's a simple example that gets all the recipients of a message::
93101

94102
from email.utils import getaddresses
95103

@@ -99,6 +107,9 @@ of the new API.
99107
resent_ccs = msg.get_all('resent-cc', [])
100108
all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs)
101109

110+
.. versionchanged:: 3.11.10
111+
Add *strict* optional parameter and reject malformed inputs by default.
112+
102113

103114
.. function:: parsedate(date)
104115

Doc/whatsnew/3.11.rst

+10
Original file line numberDiff line numberDiff line change
@@ -2776,3 +2776,13 @@ email
27762776
If you need to turn this safety feature off,
27772777
set :attr:`~email.policy.Policy.verify_generated_headers`.
27782778
(Contributed by Bas Bloemsaat and Petr Viktorin in :gh:`121650`.)
2779+
2780+
* :func:`email.utils.getaddresses` and :func:`email.utils.parseaddr` now return
2781+
``('', '')`` 2-tuples in more situations where invalid email addresses are
2782+
encountered, instead of potentially inaccurate values.
2783+
An optional *strict* parameter was added to these two functions:
2784+
use ``strict=False`` to get the old behavior, accepting malformed inputs.
2785+
``getattr(email.utils, 'supports_strict_parsing', False)`` can be used to
2786+
check if the *strict* paramater is available.
2787+
(Contributed by Thomas Dwyer and Victor Stinner for :gh:`102988` to improve
2788+
the CVE-2023-27043 fix.)

Include/patchlevel.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,12 @@
1818
/*--start constants--*/
1919
#define PY_MAJOR_VERSION 3
2020
#define PY_MINOR_VERSION 11
21-
#define PY_MICRO_VERSION 9
21+
#define PY_MICRO_VERSION 10
2222
#define PY_RELEASE_LEVEL PY_RELEASE_LEVEL_FINAL
2323
#define PY_RELEASE_SERIAL 0
2424

2525
/* Version as a string */
26-
#define PY_VERSION "3.11.9+"
26+
#define PY_VERSION "3.11.10+"
2727
/*--end constants--*/
2828

2929
/* Version as a single 4-byte hex number, e.g. 0x010502B2 == 1.5.2b2.

Lib/email/utils.py

+142-9
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
specialsre = re.compile(r'[][\\()<>@,:;".]')
4949
escapesre = re.compile(r'[\\"]')
5050

51+
5152
def _has_surrogates(s):
5253
"""Return True if s may contain surrogate-escaped binary data."""
5354
# This check is based on the fact that unless there are surrogates, utf8
@@ -106,12 +107,127 @@ def formataddr(pair, charset='utf-8'):
106107
return address
107108

108109

110+
def _iter_escaped_chars(addr):
111+
pos = 0
112+
escape = False
113+
for pos, ch in enumerate(addr):
114+
if escape:
115+
yield (pos, '\\' + ch)
116+
escape = False
117+
elif ch == '\\':
118+
escape = True
119+
else:
120+
yield (pos, ch)
121+
if escape:
122+
yield (pos, '\\')
123+
124+
125+
def _strip_quoted_realnames(addr):
126+
"""Strip real names between quotes."""
127+
if '"' not in addr:
128+
# Fast path
129+
return addr
130+
131+
start = 0
132+
open_pos = None
133+
result = []
134+
for pos, ch in _iter_escaped_chars(addr):
135+
if ch == '"':
136+
if open_pos is None:
137+
open_pos = pos
138+
else:
139+
if start != open_pos:
140+
result.append(addr[start:open_pos])
141+
start = pos + 1
142+
open_pos = None
143+
144+
if start < len(addr):
145+
result.append(addr[start:])
146+
147+
return ''.join(result)
109148

110-
def getaddresses(fieldvalues):
111-
"""Return a list of (REALNAME, EMAIL) for each fieldvalue."""
112-
all = COMMASPACE.join(str(v) for v in fieldvalues)
113-
a = _AddressList(all)
114-
return a.addresslist
149+
150+
supports_strict_parsing = True
151+
152+
def getaddresses(fieldvalues, *, strict=True):
153+
"""Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue.
154+
155+
When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in
156+
its place.
157+
158+
If strict is true, use a strict parser which rejects malformed inputs.
159+
"""
160+
161+
# If strict is true, if the resulting list of parsed addresses is greater
162+
# than the number of fieldvalues in the input list, a parsing error has
163+
# occurred and consequently a list containing a single empty 2-tuple [('',
164+
# '')] is returned in its place. This is done to avoid invalid output.
165+
#
166+
# Malformed input: getaddresses(['[email protected] <[email protected]>'])
167+
# Invalid output: [('', '[email protected]'), ('', '[email protected]')]
168+
# Safe output: [('', '')]
169+
170+
if not strict:
171+
all = COMMASPACE.join(str(v) for v in fieldvalues)
172+
a = _AddressList(all)
173+
return a.addresslist
174+
175+
fieldvalues = [str(v) for v in fieldvalues]
176+
fieldvalues = _pre_parse_validation(fieldvalues)
177+
addr = COMMASPACE.join(fieldvalues)
178+
a = _AddressList(addr)
179+
result = _post_parse_validation(a.addresslist)
180+
181+
# Treat output as invalid if the number of addresses is not equal to the
182+
# expected number of addresses.
183+
n = 0
184+
for v in fieldvalues:
185+
# When a comma is used in the Real Name part it is not a deliminator.
186+
# So strip those out before counting the commas.
187+
v = _strip_quoted_realnames(v)
188+
# Expected number of addresses: 1 + number of commas
189+
n += 1 + v.count(',')
190+
if len(result) != n:
191+
return [('', '')]
192+
193+
return result
194+
195+
196+
def _check_parenthesis(addr):
197+
# Ignore parenthesis in quoted real names.
198+
addr = _strip_quoted_realnames(addr)
199+
200+
opens = 0
201+
for pos, ch in _iter_escaped_chars(addr):
202+
if ch == '(':
203+
opens += 1
204+
elif ch == ')':
205+
opens -= 1
206+
if opens < 0:
207+
return False
208+
return (opens == 0)
209+
210+
211+
def _pre_parse_validation(email_header_fields):
212+
accepted_values = []
213+
for v in email_header_fields:
214+
if not _check_parenthesis(v):
215+
v = "('', '')"
216+
accepted_values.append(v)
217+
218+
return accepted_values
219+
220+
221+
def _post_parse_validation(parsed_email_header_tuples):
222+
accepted_values = []
223+
# The parser would have parsed a correctly formatted domain-literal
224+
# The existence of an [ after parsing indicates a parsing failure
225+
for v in parsed_email_header_tuples:
226+
if '[' in v[1]:
227+
v = ('', '')
228+
accepted_values.append(v)
229+
230+
return accepted_values
115231

116232

117233
def _format_timetuple_and_zone(timetuple, zone):
@@ -205,16 +321,33 @@ def parsedate_to_datetime(data):
205321
tzinfo=datetime.timezone(datetime.timedelta(seconds=tz)))
206322

207323

208-
def parseaddr(addr):
324+
def parseaddr(addr, *, strict=True):
209325
"""
210326
Parse addr into its constituent realname and email address parts.
211327
212328
Return a tuple of realname and email address, unless the parse fails, in
213329
which case return a 2-tuple of ('', '').
330+
331+
If strict is True, use a strict parser which rejects malformed inputs.
214332
"""
215-
addrs = _AddressList(addr).addresslist
216-
if not addrs:
217-
return '', ''
333+
if not strict:
334+
addrs = _AddressList(addr).addresslist
335+
if not addrs:
336+
return ('', '')
337+
return addrs[0]
338+
339+
if isinstance(addr, list):
340+
addr = addr[0]
341+
342+
if not isinstance(addr, str):
343+
return ('', '')
344+
345+
addr = _pre_parse_validation([addr])[0]
346+
addrs = _post_parse_validation(_AddressList(addr).addresslist)
347+
348+
if not addrs or len(addrs) > 1:
349+
return ('', '')
350+
218351
return addrs[0]
219352

220353

Lib/pydoc_data/topics.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# -*- coding: utf-8 -*-
2-
# Autogenerated by Sphinx on Tue Apr 2 09:24:48 2024
2+
# Autogenerated by Sphinx on Sat Sep 7 02:03:11 2024
33
# as part of the release process.
44
topics = {'assert': 'The "assert" statement\n'
55
'**********************\n'
@@ -13958,10 +13958,10 @@
1395813958
' The iterator returns "tuple"s containing the "(start_line,\n'
1395913959
' end_line, start_column, end_column)". The *i-th* tuple '
1396013960
'corresponds\n'
13961-
' to the position of the source code that compiled to the *i-th*\n'
13962-
' instruction. Column information is 0-indexed utf-8 byte offsets '
13963-
'on\n'
13964-
' the given source line.\n'
13961+
' to the position of the source code that compiled to the *i-th* '
13962+
'code\n'
13963+
' unit. Column information is 0-indexed utf-8 byte offsets on the\n'
13964+
' given source line.\n'
1396513965
'\n'
1396613966
' This positional information can be missing. A non-exhaustive '
1396713967
'lists\n'

0 commit comments

Comments
 (0)