|
48 | 48 | specialsre = re.compile(r'[][\\()<>@,:;".]')
|
49 | 49 | escapesre = re.compile(r'[\\"]')
|
50 | 50 |
|
| 51 | + |
51 | 52 | def _has_surrogates(s):
|
52 | 53 | """Return True if s may contain surrogate-escaped binary data."""
|
53 | 54 | # This check is based on the fact that unless there are surrogates, utf8
|
@@ -106,12 +107,127 @@ def formataddr(pair, charset='utf-8'):
|
106 | 107 | return address
|
107 | 108 |
|
108 | 109 |
|
| 110 | +def _iter_escaped_chars(addr): |
| 111 | + pos = 0 |
| 112 | + escape = False |
| 113 | + for pos, ch in enumerate(addr): |
| 114 | + if escape: |
| 115 | + yield (pos, '\\' + ch) |
| 116 | + escape = False |
| 117 | + elif ch == '\\': |
| 118 | + escape = True |
| 119 | + else: |
| 120 | + yield (pos, ch) |
| 121 | + if escape: |
| 122 | + yield (pos, '\\') |
| 123 | + |
| 124 | + |
| 125 | +def _strip_quoted_realnames(addr): |
| 126 | + """Strip real names between quotes.""" |
| 127 | + if '"' not in addr: |
| 128 | + # Fast path |
| 129 | + return addr |
| 130 | + |
| 131 | + start = 0 |
| 132 | + open_pos = None |
| 133 | + result = [] |
| 134 | + for pos, ch in _iter_escaped_chars(addr): |
| 135 | + if ch == '"': |
| 136 | + if open_pos is None: |
| 137 | + open_pos = pos |
| 138 | + else: |
| 139 | + if start != open_pos: |
| 140 | + result.append(addr[start:open_pos]) |
| 141 | + start = pos + 1 |
| 142 | + open_pos = None |
| 143 | + |
| 144 | + if start < len(addr): |
| 145 | + result.append(addr[start:]) |
| 146 | + |
| 147 | + return ''.join(result) |
109 | 148 |
|
110 |
| -def getaddresses(fieldvalues): |
111 |
| - """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" |
112 |
| - all = COMMASPACE.join(str(v) for v in fieldvalues) |
113 |
| - a = _AddressList(all) |
114 |
| - return a.addresslist |
| 149 | + |
| 150 | +supports_strict_parsing = True |
| 151 | + |
| 152 | +def getaddresses(fieldvalues, *, strict=True): |
| 153 | + """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue. |
| 154 | +
|
| 155 | + When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in |
| 156 | + its place. |
| 157 | +
|
| 158 | + If strict is true, use a strict parser which rejects malformed inputs. |
| 159 | + """ |
| 160 | + |
| 161 | + # If strict is true, if the resulting list of parsed addresses is greater |
| 162 | + # than the number of fieldvalues in the input list, a parsing error has |
| 163 | + # occurred and consequently a list containing a single empty 2-tuple [('', |
| 164 | + # '')] is returned in its place. This is done to avoid invalid output. |
| 165 | + # |
| 166 | + # Malformed input: getaddresses(['[email protected] <[email protected]>']) |
| 167 | + # Invalid output: [('', '[email protected]'), ('', '[email protected]')] |
| 168 | + # Safe output: [('', '')] |
| 169 | + |
| 170 | + if not strict: |
| 171 | + all = COMMASPACE.join(str(v) for v in fieldvalues) |
| 172 | + a = _AddressList(all) |
| 173 | + return a.addresslist |
| 174 | + |
| 175 | + fieldvalues = [str(v) for v in fieldvalues] |
| 176 | + fieldvalues = _pre_parse_validation(fieldvalues) |
| 177 | + addr = COMMASPACE.join(fieldvalues) |
| 178 | + a = _AddressList(addr) |
| 179 | + result = _post_parse_validation(a.addresslist) |
| 180 | + |
| 181 | + # Treat output as invalid if the number of addresses is not equal to the |
| 182 | + # expected number of addresses. |
| 183 | + n = 0 |
| 184 | + for v in fieldvalues: |
| 185 | + # When a comma is used in the Real Name part it is not a deliminator. |
| 186 | + # So strip those out before counting the commas. |
| 187 | + v = _strip_quoted_realnames(v) |
| 188 | + # Expected number of addresses: 1 + number of commas |
| 189 | + n += 1 + v.count(',') |
| 190 | + if len(result) != n: |
| 191 | + return [('', '')] |
| 192 | + |
| 193 | + return result |
| 194 | + |
| 195 | + |
| 196 | +def _check_parenthesis(addr): |
| 197 | + # Ignore parenthesis in quoted real names. |
| 198 | + addr = _strip_quoted_realnames(addr) |
| 199 | + |
| 200 | + opens = 0 |
| 201 | + for pos, ch in _iter_escaped_chars(addr): |
| 202 | + if ch == '(': |
| 203 | + opens += 1 |
| 204 | + elif ch == ')': |
| 205 | + opens -= 1 |
| 206 | + if opens < 0: |
| 207 | + return False |
| 208 | + return (opens == 0) |
| 209 | + |
| 210 | + |
| 211 | +def _pre_parse_validation(email_header_fields): |
| 212 | + accepted_values = [] |
| 213 | + for v in email_header_fields: |
| 214 | + if not _check_parenthesis(v): |
| 215 | + v = "('', '')" |
| 216 | + accepted_values.append(v) |
| 217 | + |
| 218 | + return accepted_values |
| 219 | + |
| 220 | + |
| 221 | +def _post_parse_validation(parsed_email_header_tuples): |
| 222 | + accepted_values = [] |
| 223 | + # The parser would have parsed a correctly formatted domain-literal |
| 224 | + # The existence of an [ after parsing indicates a parsing failure |
| 225 | + for v in parsed_email_header_tuples: |
| 226 | + if '[' in v[1]: |
| 227 | + v = ('', '') |
| 228 | + accepted_values.append(v) |
| 229 | + |
| 230 | + return accepted_values |
115 | 231 |
|
116 | 232 |
|
117 | 233 | def _format_timetuple_and_zone(timetuple, zone):
|
@@ -205,16 +321,33 @@ def parsedate_to_datetime(data):
|
205 | 321 | tzinfo=datetime.timezone(datetime.timedelta(seconds=tz)))
|
206 | 322 |
|
207 | 323 |
|
208 |
| -def parseaddr(addr): |
| 324 | +def parseaddr(addr, *, strict=True): |
209 | 325 | """
|
210 | 326 | Parse addr into its constituent realname and email address parts.
|
211 | 327 |
|
212 | 328 | Return a tuple of realname and email address, unless the parse fails, in
|
213 | 329 | which case return a 2-tuple of ('', '').
|
| 330 | +
|
| 331 | + If strict is True, use a strict parser which rejects malformed inputs. |
214 | 332 | """
|
215 |
| - addrs = _AddressList(addr).addresslist |
216 |
| - if not addrs: |
217 |
| - return '', '' |
| 333 | + if not strict: |
| 334 | + addrs = _AddressList(addr).addresslist |
| 335 | + if not addrs: |
| 336 | + return ('', '') |
| 337 | + return addrs[0] |
| 338 | + |
| 339 | + if isinstance(addr, list): |
| 340 | + addr = addr[0] |
| 341 | + |
| 342 | + if not isinstance(addr, str): |
| 343 | + return ('', '') |
| 344 | + |
| 345 | + addr = _pre_parse_validation([addr])[0] |
| 346 | + addrs = _post_parse_validation(_AddressList(addr).addresslist) |
| 347 | + |
| 348 | + if not addrs or len(addrs) > 1: |
| 349 | + return ('', '') |
| 350 | + |
218 | 351 | return addrs[0]
|
219 | 352 |
|
220 | 353 |
|
|
0 commit comments