|
48 | 48 | specialsre = re.compile(r'[][\\()<>@,:;".]')
|
49 | 49 | escapesre = re.compile(r'[\\"]')
|
50 | 50 |
|
| 51 | + |
51 | 52 | def _has_surrogates(s):
|
52 | 53 | """Return True if s contains surrogate-escaped binary data."""
|
53 | 54 | # This check is based on the fact that unless there are surrogates, utf8
|
@@ -105,26 +106,127 @@ def formataddr(pair, charset='utf-8'):
|
105 | 106 | return '%s%s%s <%s>' % (quotes, name, quotes, address)
|
106 | 107 | return address
|
107 | 108 |
|
| 109 | +def _iter_escaped_chars(addr): |
| 110 | + pos = 0 |
| 111 | + escape = False |
| 112 | + for pos, ch in enumerate(addr): |
| 113 | + if escape: |
| 114 | + yield (pos, '\\' + ch) |
| 115 | + escape = False |
| 116 | + elif ch == '\\': |
| 117 | + escape = True |
| 118 | + else: |
| 119 | + yield (pos, ch) |
| 120 | + if escape: |
| 121 | + yield (pos, '\\') |
| 122 | + |
| 123 | + |
| 124 | +def _strip_quoted_realnames(addr): |
| 125 | + """Strip real names between quotes.""" |
| 126 | + if '"' not in addr: |
| 127 | + # Fast path |
| 128 | + return addr |
| 129 | + |
| 130 | + start = 0 |
| 131 | + open_pos = None |
| 132 | + result = [] |
| 133 | + for pos, ch in _iter_escaped_chars(addr): |
| 134 | + if ch == '"': |
| 135 | + if open_pos is None: |
| 136 | + open_pos = pos |
| 137 | + else: |
| 138 | + if start != open_pos: |
| 139 | + result.append(addr[start:open_pos]) |
| 140 | + start = pos + 1 |
| 141 | + open_pos = None |
| 142 | + |
| 143 | + if start < len(addr): |
| 144 | + result.append(addr[start:]) |
| 145 | + |
| 146 | + return ''.join(result) |
108 | 147 |
|
109 | 148 |
|
110 |
| -def getaddresses(fieldvalues): |
111 |
| - """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" |
112 |
| - all = COMMASPACE.join(fieldvalues) |
113 |
| - a = _AddressList(all) |
114 |
| - return a.addresslist |
| 149 | +supports_strict_parsing = True |
115 | 150 |
|
| 151 | +def getaddresses(fieldvalues, *, strict=True): |
| 152 | + """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue. |
116 | 153 |
|
| 154 | + When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in |
| 155 | + its place. |
117 | 156 |
|
118 |
| -ecre = re.compile(r''' |
119 |
| - =\? # literal =? |
120 |
| - (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset |
121 |
| - \? # literal ? |
122 |
| - (?P<encoding>[qb]) # either a "q" or a "b", case insensitive |
123 |
| - \? # literal ? |
124 |
| - (?P<atom>.*?) # non-greedy up to the next ?= is the atom |
125 |
| - \?= # literal ?= |
126 |
| - ''', re.VERBOSE | re.IGNORECASE) |
| 157 | + If strict is true, use a strict parser which rejects malformed inputs. |
| 158 | + """ |
127 | 159 |
|
| 160 | + # If strict is true, if the resulting list of parsed addresses is greater |
| 161 | + # than the number of fieldvalues in the input list, a parsing error has |
| 162 | + # occurred and consequently a list containing a single empty 2-tuple [('', |
| 163 | + # '')] is returned in its place. This is done to avoid invalid output. |
| 164 | + # |
| 165 | + # Malformed input: getaddresses(['[email protected] <[email protected]>']) |
| 166 | + # Invalid output: [('', '[email protected]'), ('', '[email protected]')] |
| 167 | + # Safe output: [('', '')] |
| 168 | + |
| 169 | + if not strict: |
| 170 | + all = COMMASPACE.join(str(v) for v in fieldvalues) |
| 171 | + a = _AddressList(all) |
| 172 | + return a.addresslist |
| 173 | + |
| 174 | + fieldvalues = [str(v) for v in fieldvalues] |
| 175 | + fieldvalues = _pre_parse_validation(fieldvalues) |
| 176 | + addr = COMMASPACE.join(fieldvalues) |
| 177 | + a = _AddressList(addr) |
| 178 | + result = _post_parse_validation(a.addresslist) |
| 179 | + |
| 180 | + # Treat output as invalid if the number of addresses is not equal to the |
| 181 | + # expected number of addresses. |
| 182 | + n = 0 |
| 183 | + for v in fieldvalues: |
| 184 | + # When a comma is used in the Real Name part it is not a deliminator. |
| 185 | + # So strip those out before counting the commas. |
| 186 | + v = _strip_quoted_realnames(v) |
| 187 | + # Expected number of addresses: 1 + number of commas |
| 188 | + n += 1 + v.count(',') |
| 189 | + if len(result) != n: |
| 190 | + return [('', '')] |
| 191 | + |
| 192 | + return result |
| 193 | + |
| 194 | + |
| 195 | +def _check_parenthesis(addr): |
| 196 | + # Ignore parenthesis in quoted real names. |
| 197 | + addr = _strip_quoted_realnames(addr) |
| 198 | + |
| 199 | + opens = 0 |
| 200 | + for pos, ch in _iter_escaped_chars(addr): |
| 201 | + if ch == '(': |
| 202 | + opens += 1 |
| 203 | + elif ch == ')': |
| 204 | + opens -= 1 |
| 205 | + if opens < 0: |
| 206 | + return False |
| 207 | + return (opens == 0) |
| 208 | + |
| 209 | + |
| 210 | +def _pre_parse_validation(email_header_fields): |
| 211 | + accepted_values = [] |
| 212 | + for v in email_header_fields: |
| 213 | + if not _check_parenthesis(v): |
| 214 | + v = "('', '')" |
| 215 | + accepted_values.append(v) |
| 216 | + |
| 217 | + return accepted_values |
| 218 | + |
| 219 | + |
| 220 | +def _post_parse_validation(parsed_email_header_tuples): |
| 221 | + accepted_values = [] |
| 222 | + # The parser would have parsed a correctly formatted domain-literal |
| 223 | + # The existence of an [ after parsing indicates a parsing failure |
| 224 | + for v in parsed_email_header_tuples: |
| 225 | + if '[' in v[1]: |
| 226 | + v = ('', '') |
| 227 | + accepted_values.append(v) |
| 228 | + |
| 229 | + return accepted_values |
128 | 230 |
|
129 | 231 | def _format_timetuple_and_zone(timetuple, zone):
|
130 | 232 | return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
|
@@ -214,16 +316,33 @@ def parsedate_to_datetime(data):
|
214 | 316 | tzinfo=datetime.timezone(datetime.timedelta(seconds=tz)))
|
215 | 317 |
|
216 | 318 |
|
217 |
| -def parseaddr(addr): |
| 319 | +def parseaddr(addr, *, strict=True): |
218 | 320 | """
|
219 | 321 | Parse addr into its constituent realname and email address parts.
|
220 | 322 |
|
221 | 323 | Return a tuple of realname and email address, unless the parse fails, in
|
222 | 324 | which case return a 2-tuple of ('', '').
|
| 325 | +
|
| 326 | + If strict is True, use a strict parser which rejects malformed inputs. |
223 | 327 | """
|
224 |
| - addrs = _AddressList(addr).addresslist |
225 |
| - if not addrs: |
226 |
| - return '', '' |
| 328 | + if not strict: |
| 329 | + addrs = _AddressList(addr).addresslist |
| 330 | + if not addrs: |
| 331 | + return ('', '') |
| 332 | + return addrs[0] |
| 333 | + |
| 334 | + if isinstance(addr, list): |
| 335 | + addr = addr[0] |
| 336 | + |
| 337 | + if not isinstance(addr, str): |
| 338 | + return ('', '') |
| 339 | + |
| 340 | + addr = _pre_parse_validation([addr])[0] |
| 341 | + addrs = _post_parse_validation(_AddressList(addr).addresslist) |
| 342 | + |
| 343 | + if not addrs or len(addrs) > 1: |
| 344 | + return ('', '') |
| 345 | + |
227 | 346 | return addrs[0]
|
228 | 347 |
|
229 | 348 |
|
|
0 commit comments