Skip to content

Commit f47015f

Browse files
committed
[CVE-2023-27043] detect email address parsing errors
Return empty tuple to indicate the parsing error (old API). Code is from gh#python/cpython@4a153a1, it was released upstream in 3.13.0a3. Fixes: bsc#1210638 Patch: CVE-2023-27043-email-parsing-errors.patch
1 parent 5afa628 commit f47015f

File tree

4 files changed

+339
-30
lines changed

4 files changed

+339
-30
lines changed

Doc/library/email.utils.rst

+15-4
Original file line numberDiff line numberDiff line change
@@ -60,13 +60,18 @@ of the new API.
6060
begins with angle brackets, they are stripped off.
6161

6262

63-
.. function:: parseaddr(address)
63+
.. function:: parseaddr(address, *, strict=True)
6464

6565
Parse address -- which should be the value of some address-containing field such
6666
as :mailheader:`To` or :mailheader:`Cc` -- into its constituent *realname* and
6767
*email address* parts. Returns a tuple of that information, unless the parse
6868
fails, in which case a 2-tuple of ``('', '')`` is returned.
6969

70+
If *strict* is true, use a strict parser which rejects malformed inputs.
71+
72+
.. versionchanged:: 3.13
73+
Add *strict* optional parameter and reject malformed inputs by default.
74+
7075

7176
.. function:: formataddr(pair, charset='utf-8')
7277

@@ -84,12 +89,15 @@ of the new API.
8489
Added the *charset* option.
8590

8691

87-
.. function:: getaddresses(fieldvalues)
92+
.. function:: getaddresses(fieldvalues, *, strict=True)
8893

8994
This method returns a list of 2-tuples of the form returned by ``parseaddr()``.
9095
*fieldvalues* is a sequence of header field values as might be returned by
91-
:meth:`Message.get_all <email.message.Message.get_all>`. Here's a simple
92-
example that gets all the recipients of a message::
96+
:meth:`Message.get_all <email.message.Message.get_all>`.
97+
98+
If *strict* is true, use a strict parser which rejects malformed inputs.
99+
100+
Here's a simple example that gets all the recipients of a message::
93101

94102
from email.utils import getaddresses
95103

@@ -99,6 +107,9 @@ of the new API.
99107
resent_ccs = msg.get_all('resent-cc', [])
100108
all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs)
101109

110+
.. versionchanged:: 3.13
111+
Add *strict* optional parameter and reject malformed inputs by default.
112+
102113

103114
.. function:: parsedate(date)
104115

Lib/email/utils.py

+137-18
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
specialsre = re.compile(r'[][\\()<>@,:;".]')
4949
escapesre = re.compile(r'[\\"]')
5050

51+
5152
def _has_surrogates(s):
5253
"""Return True if s contains surrogate-escaped binary data."""
5354
# This check is based on the fact that unless there are surrogates, utf8
@@ -105,26 +106,127 @@ def formataddr(pair, charset='utf-8'):
105106
return '%s%s%s <%s>' % (quotes, name, quotes, address)
106107
return address
107108

109+
def _iter_escaped_chars(addr):
110+
pos = 0
111+
escape = False
112+
for pos, ch in enumerate(addr):
113+
if escape:
114+
yield (pos, '\\' + ch)
115+
escape = False
116+
elif ch == '\\':
117+
escape = True
118+
else:
119+
yield (pos, ch)
120+
if escape:
121+
yield (pos, '\\')
122+
123+
124+
def _strip_quoted_realnames(addr):
125+
"""Strip real names between quotes."""
126+
if '"' not in addr:
127+
# Fast path
128+
return addr
129+
130+
start = 0
131+
open_pos = None
132+
result = []
133+
for pos, ch in _iter_escaped_chars(addr):
134+
if ch == '"':
135+
if open_pos is None:
136+
open_pos = pos
137+
else:
138+
if start != open_pos:
139+
result.append(addr[start:open_pos])
140+
start = pos + 1
141+
open_pos = None
142+
143+
if start < len(addr):
144+
result.append(addr[start:])
145+
146+
return ''.join(result)
108147

109148

110-
def getaddresses(fieldvalues):
111-
"""Return a list of (REALNAME, EMAIL) for each fieldvalue."""
112-
all = COMMASPACE.join(fieldvalues)
113-
a = _AddressList(all)
114-
return a.addresslist
149+
supports_strict_parsing = True
115150

151+
def getaddresses(fieldvalues, *, strict=True):
152+
"""Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue.
116153
154+
When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in
155+
its place.
117156
118-
ecre = re.compile(r'''
119-
=\? # literal =?
120-
(?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
121-
\? # literal ?
122-
(?P<encoding>[qb]) # either a "q" or a "b", case insensitive
123-
\? # literal ?
124-
(?P<atom>.*?) # non-greedy up to the next ?= is the atom
125-
\?= # literal ?=
126-
''', re.VERBOSE | re.IGNORECASE)
157+
If strict is true, use a strict parser which rejects malformed inputs.
158+
"""
127159

160+
# If strict is true, if the resulting list of parsed addresses is greater
161+
# than the number of fieldvalues in the input list, a parsing error has
162+
# occurred and consequently a list containing a single empty 2-tuple [('',
163+
# '')] is returned in its place. This is done to avoid invalid output.
164+
#
165+
# Malformed input: getaddresses(['[email protected] <[email protected]>'])
166+
# Invalid output: [('', '[email protected]'), ('', '[email protected]')]
167+
# Safe output: [('', '')]
168+
169+
if not strict:
170+
all = COMMASPACE.join(str(v) for v in fieldvalues)
171+
a = _AddressList(all)
172+
return a.addresslist
173+
174+
fieldvalues = [str(v) for v in fieldvalues]
175+
fieldvalues = _pre_parse_validation(fieldvalues)
176+
addr = COMMASPACE.join(fieldvalues)
177+
a = _AddressList(addr)
178+
result = _post_parse_validation(a.addresslist)
179+
180+
# Treat output as invalid if the number of addresses is not equal to the
181+
# expected number of addresses.
182+
n = 0
183+
for v in fieldvalues:
184+
# When a comma is used in the Real Name part it is not a deliminator.
185+
# So strip those out before counting the commas.
186+
v = _strip_quoted_realnames(v)
187+
# Expected number of addresses: 1 + number of commas
188+
n += 1 + v.count(',')
189+
if len(result) != n:
190+
return [('', '')]
191+
192+
return result
193+
194+
195+
def _check_parenthesis(addr):
196+
# Ignore parenthesis in quoted real names.
197+
addr = _strip_quoted_realnames(addr)
198+
199+
opens = 0
200+
for pos, ch in _iter_escaped_chars(addr):
201+
if ch == '(':
202+
opens += 1
203+
elif ch == ')':
204+
opens -= 1
205+
if opens < 0:
206+
return False
207+
return (opens == 0)
208+
209+
210+
def _pre_parse_validation(email_header_fields):
211+
accepted_values = []
212+
for v in email_header_fields:
213+
if not _check_parenthesis(v):
214+
v = "('', '')"
215+
accepted_values.append(v)
216+
217+
return accepted_values
218+
219+
220+
def _post_parse_validation(parsed_email_header_tuples):
221+
accepted_values = []
222+
# The parser would have parsed a correctly formatted domain-literal
223+
# The existence of an [ after parsing indicates a parsing failure
224+
for v in parsed_email_header_tuples:
225+
if '[' in v[1]:
226+
v = ('', '')
227+
accepted_values.append(v)
228+
229+
return accepted_values
128230

129231
def _format_timetuple_and_zone(timetuple, zone):
130232
return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
@@ -214,16 +316,33 @@ def parsedate_to_datetime(data):
214316
tzinfo=datetime.timezone(datetime.timedelta(seconds=tz)))
215317

216318

217-
def parseaddr(addr):
319+
def parseaddr(addr, *, strict=True):
218320
"""
219321
Parse addr into its constituent realname and email address parts.
220322
221323
Return a tuple of realname and email address, unless the parse fails, in
222324
which case return a 2-tuple of ('', '').
325+
326+
If strict is True, use a strict parser which rejects malformed inputs.
223327
"""
224-
addrs = _AddressList(addr).addresslist
225-
if not addrs:
226-
return '', ''
328+
if not strict:
329+
addrs = _AddressList(addr).addresslist
330+
if not addrs:
331+
return ('', '')
332+
return addrs[0]
333+
334+
if isinstance(addr, list):
335+
addr = addr[0]
336+
337+
if not isinstance(addr, str):
338+
return ('', '')
339+
340+
addr = _pre_parse_validation([addr])[0]
341+
addrs = _post_parse_validation(_AddressList(addr).addresslist)
342+
343+
if not addrs or len(addrs) > 1:
344+
return ('', '')
345+
227346
return addrs[0]
228347

229348

0 commit comments

Comments
 (0)