Skip to content

Commit 7799902

Browse files
committed
More Structured Errors?
Signed-off-by: Elsie Hupp <[email protected]>
1 parent 1682bda commit 7799902

File tree

4 files changed

+235
-51
lines changed

4 files changed

+235
-51
lines changed

MANIFEST.in

+1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
include email_validator.py
2+
include error_classes.py
23
include LICENSE README.md

email_validator/__init__.py

+35-48
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import dns.exception
88
import idna # implements IDNA 2008; Python's codec is only IDNA 2003
99

10+
from email_validator.error_classes import *
1011

1112
# Based on RFC 2822 section 3.2.4 / RFC 5322 section 3.2.3, these
1213
# characters are permitted in email addresses (not taking into
@@ -49,22 +50,6 @@
4950

5051
DEFAULT_TIMEOUT = 15 # secs
5152

52-
53-
class EmailNotValidError(ValueError):
54-
"""Parent class of all exceptions raised by this module."""
55-
pass
56-
57-
58-
class EmailSyntaxError(EmailNotValidError):
59-
"""Exception raised when an email address fails validation because of its form."""
60-
pass
61-
62-
63-
class EmailUndeliverableError(EmailNotValidError):
64-
"""Exception raised when an email address fails validation because its domain name does not appear deliverable."""
65-
pass
66-
67-
6853
class ValidatedEmail(object):
6954
"""The validate_email function returns objects of this type holding the normalized form of the email address
7055
and other information."""
@@ -174,10 +159,10 @@ def as_dict(self):
174159

175160
def __get_length_reason(addr, utf8=False, limit=EMAIL_MAX_LENGTH):
176161
diff = len(addr) - limit
177-
reason = "({}{} character{} too many)"
162+
reason_string = "({}{} character{} too many)"
178163
prefix = "at least " if utf8 else ""
179164
suffix = "s" if diff > 1 else ""
180-
return reason.format(prefix, diff, suffix)
165+
return (reason_string.format(prefix, diff, suffix), diff)
181166

182167

183168
def caching_resolver(timeout=DEFAULT_TIMEOUT, cache=None):
@@ -208,12 +193,14 @@ def validate_email(
208193
try:
209194
email = email.decode("ascii")
210195
except ValueError:
211-
raise EmailSyntaxError("The email address is not valid ASCII.")
196+
raise EmailInvalidAsciiError("The email address is not valid ASCII.")
212197

213198
# At-sign.
214199
parts = email.split('@')
215-
if len(parts) != 2:
216-
raise EmailSyntaxError("The email address is not valid. It must have exactly one @-sign.")
200+
if len(parts) < 2:
201+
raise EmailNoAtSignError("The email address is not valid. It must have exactly one @-sign.")
202+
if len(parts) > 2:
203+
raise EmailMultipleAtSignsError("The email address is not valid. It must have exactly one @-sign.")
217204

218205
# Collect return values in this instance.
219206
ret = ValidatedEmail()
@@ -261,22 +248,22 @@ def validate_email(
261248
# See the length checks on the local part and the domain.
262249
if ret.ascii_email and len(ret.ascii_email) > EMAIL_MAX_LENGTH:
263250
if ret.ascii_email == ret.email:
264-
reason = __get_length_reason(ret.ascii_email)
251+
reason_tuple = __get_length_reason(ret.ascii_email)
265252
elif len(ret.email) > EMAIL_MAX_LENGTH:
266253
# If there are more than 254 characters, then the ASCII
267254
# form is definitely going to be too long.
268-
reason = __get_length_reason(ret.email, utf8=True)
255+
reason_tuple = __get_length_reason(ret.email, utf8=True)
269256
else:
270-
reason = "(when converted to IDNA ASCII)"
271-
raise EmailSyntaxError("The email address is too long {}.".format(reason))
257+
reason_tuple = "(when converted to IDNA ASCII)"
258+
raise EmailTooLongAsciiError("The email address is too long {}.".format(reason_tuple[0]), reason_tuple[1])
272259
if len(ret.email.encode("utf8")) > EMAIL_MAX_LENGTH:
273260
if len(ret.email) > EMAIL_MAX_LENGTH:
274261
# If there are more than 254 characters, then the UTF-8
275262
# encoding is definitely going to be too long.
276-
reason = __get_length_reason(ret.email, utf8=True)
263+
reason_tuple = __get_length_reason(ret.email, utf8=True)
277264
else:
278-
reason = "(when encoded in bytes)"
279-
raise EmailSyntaxError("The email address is too long {}.".format(reason))
265+
reason_tuple = "(when encoded in bytes)"
266+
raise EmailTooLongUtf8Error("The email address is too long {}.".format(reason_tuple[0]), reason_tuple[1])
280267

281268
if check_deliverability:
282269
# Validate the email address's deliverability and update the
@@ -296,7 +283,7 @@ def validate_email_local_part(local, allow_smtputf8=True, allow_empty_local=Fals
296283

297284
if len(local) == 0:
298285
if not allow_empty_local:
299-
raise EmailSyntaxError("There must be something before the @-sign.")
286+
raise EmailDomainPartEmptyError("There must be something before the @-sign.")
300287
else:
301288
# The caller allows an empty local part. Useful for validating certain
302289
# Postfix aliases.
@@ -313,8 +300,8 @@ def validate_email_local_part(local, allow_smtputf8=True, allow_empty_local=Fals
313300
# that may not be relevant. We will check the total address length
314301
# instead.
315302
if len(local) > LOCAL_PART_MAX_LENGTH:
316-
reason = __get_length_reason(local, limit=LOCAL_PART_MAX_LENGTH)
317-
raise EmailSyntaxError("The email address is too long before the @-sign {}.".format(reason))
303+
reason_tuple = __get_length_reason(local, limit=LOCAL_PART_MAX_LENGTH)
304+
raise EmailLocalPartTooLongError("The email address is too long before the @-sign {}.".format(reason_tuple[0]))
318305

319306
# Check the local part against the regular expression for the older ASCII requirements.
320307
m = re.match(DOT_ATOM_TEXT + "\\Z", local)
@@ -334,11 +321,11 @@ def validate_email_local_part(local, allow_smtputf8=True, allow_empty_local=Fals
334321
bad_chars = ', '.join(sorted(set(
335322
c for c in local if not re.match(u"[" + (ATEXT if not allow_smtputf8 else ATEXT_UTF8) + u"]", c)
336323
)))
337-
raise EmailSyntaxError("The email address contains invalid characters before the @-sign: %s." % bad_chars)
324+
raise EmailLocalPartInvalidCharactersError("The email address contains invalid characters before the @-sign: %s." % bad_chars)
338325

339326
# It would be valid if internationalized characters were allowed by the caller.
340327
if not allow_smtputf8:
341-
raise EmailSyntaxError("Internationalized characters before the @-sign are not supported.")
328+
raise EmailLocalPartInternationalizedCharactersError("Internationalized characters before the @-sign are not supported.")
342329

343330
# It's valid.
344331

@@ -357,7 +344,7 @@ def validate_email_local_part(local, allow_smtputf8=True, allow_empty_local=Fals
357344
def validate_email_domain_part(domain):
358345
# Empty?
359346
if len(domain) == 0:
360-
raise EmailSyntaxError("There must be something after the @-sign.")
347+
raise EmailDomainPartEmptyError("There must be something after the @-sign.")
361348

362349
# Perform UTS-46 normalization, which includes casefolding, NFC normalization,
363350
# and converting all label separators (the period/full stop, fullwidth full stop,
@@ -367,18 +354,18 @@ def validate_email_domain_part(domain):
367354
try:
368355
domain = idna.uts46_remap(domain, std3_rules=False, transitional=False)
369356
except idna.IDNAError as e:
370-
raise EmailSyntaxError("The domain name %s contains invalid characters (%s)." % (domain, str(e)))
357+
raise EmailDomainInvalidIdnaError("The domain name %s contains invalid characters (%s)." % (domain, str(e)), e)
371358

372359
# Now we can perform basic checks on the use of periods (since equivalent
373360
# symbols have been mapped to periods). These checks are needed because the
374361
# IDNA library doesn't handle well domains that have empty labels (i.e. initial
375362
# dot, trailing dot, or two dots in a row).
376363
if domain.endswith("."):
377-
raise EmailSyntaxError("An email address cannot end with a period.")
364+
raise EmailDomainEndsWithPeriodError("An email address cannot end with a period.")
378365
if domain.startswith("."):
379-
raise EmailSyntaxError("An email address cannot have a period immediately after the @-sign.")
366+
raise EmailDomainStartsWithPeriodError("An email address cannot have a period immediately after the @-sign.")
380367
if ".." in domain:
381-
raise EmailSyntaxError("An email address cannot have two periods in a row.")
368+
raise EmailDomainMultiplePeriodsInARowError("An email address cannot have two periods in a row.")
382369

383370
# Regardless of whether international characters are actually used,
384371
# first convert to IDNA ASCII. For ASCII-only domains, the transformation
@@ -398,8 +385,8 @@ def validate_email_domain_part(domain):
398385
# the length check is applied to a string that is different from the
399386
# one the user supplied. Also I'm not sure if the length check applies
400387
# to the internationalized form, the IDNA ASCII form, or even both!
401-
raise EmailSyntaxError("The email address is too long after the @-sign.")
402-
raise EmailSyntaxError("The domain name %s contains invalid characters (%s)." % (domain, str(e)))
388+
raise EmailDomainTooLongError("The email address is too long after the @-sign.")
389+
raise EmailDomainInvalidIdnaError("The domain name %s contains invalid characters (%s)." % (domain, str(e)), e)
403390

404391
# We may have been given an IDNA ASCII domain to begin with. Check
405392
# that the domain actually conforms to IDNA. It could look like IDNA
@@ -411,7 +398,7 @@ def validate_email_domain_part(domain):
411398
try:
412399
domain_i18n = idna.decode(ascii_domain.encode('ascii'))
413400
except idna.IDNAError as e:
414-
raise EmailSyntaxError("The domain name %s is not valid IDNA (%s)." % (ascii_domain, str(e)))
401+
raise EmailDomainInvalidIdnaError("The domain name %s is not valid IDNA (%s)." % (ascii_domain, str(e)), e)
415402

416403
# RFC 5321 4.5.3.1.2
417404
# We're checking the number of bytes (octets) here, which can be much
@@ -420,7 +407,7 @@ def validate_email_domain_part(domain):
420407
# as IDNA ASCII. This is also checked by idna.encode, so this exception
421408
# is never reached.
422409
if len(ascii_domain) > DOMAIN_MAX_LENGTH:
423-
raise EmailSyntaxError("The email address is too long after the @-sign.")
410+
raise EmailDomainTooLongError("The email address is too long after the @-sign.")
424411

425412
# A "dot atom text", per RFC 2822 3.2.4, but using the restricted
426413
# characters allowed in a hostname (see ATEXT_HOSTNAME above).
@@ -430,14 +417,14 @@ def validate_email_domain_part(domain):
430417
# with idna.decode, which also checks this format.
431418
m = re.match(DOT_ATOM_TEXT + "\\Z", ascii_domain)
432419
if not m:
433-
raise EmailSyntaxError("The email address contains invalid characters after the @-sign.")
420+
raise EmailDomainInvalidCharactersError("The email address contains invalid characters after the @-sign.")
434421

435422
# All publicly deliverable addresses have domain named with at least
436423
# one period. We also know that all TLDs end with a letter.
437424
if "." not in ascii_domain:
438-
raise EmailSyntaxError("The domain name %s is not valid. It should have a period." % domain_i18n)
425+
raise EmailDomainNoPeriodError("The domain name %s is not valid. It should have a period." % domain_i18n)
439426
if not re.search(r"[A-Za-z]\Z", ascii_domain):
440-
raise EmailSyntaxError(
427+
raise EmailDomainNoValidTldError(
441428
"The domain name %s is not valid. It is not within a valid top-level domain." % domain_i18n
442429
)
443430

@@ -509,7 +496,7 @@ def dns_resolver_resolve_shim(domain, record):
509496

510497
# If there was no MX, A, or AAAA record, then mail to
511498
# this domain is not deliverable.
512-
raise EmailUndeliverableError("The domain name %s does not exist." % domain_i18n)
499+
raise EmailDomainNameDoesNotExistError("The domain name %s does not exist." % domain_i18n)
513500

514501
except dns.exception.Timeout:
515502
# A timeout could occur for various reasons, so don't treat it as a failure.
@@ -523,8 +510,8 @@ def dns_resolver_resolve_shim(domain, record):
523510

524511
except Exception as e:
525512
# Unhandled conditions should not propagate.
526-
raise EmailUndeliverableError(
527-
"There was an error while checking if the domain name in the email address is deliverable: " + str(e)
513+
raise EmailDomainUnhandledDnsExceptionError(
514+
"There was an error while checking if the domain name in the email address is deliverable: " + str(e), e
528515
)
529516

530517
return {
+128
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
# -*- coding: utf-8 -*-
2+
3+
class EmailNotValidError(ValueError):
4+
"""Parent class of all exceptions raised by this module."""
5+
pass
6+
7+
8+
class EmailSyntaxError(EmailNotValidError):
9+
"""Parent class of exceptions raised when an email address fails validation because of its form."""
10+
pass
11+
12+
13+
14+
# Syntax errors pertaining to the email address as a whole
15+
class EmailInvalidAsciiError(EmailSyntaxError):
16+
"""Exception raised when an email address fails validation because it is not valid ASCII."""
17+
pass
18+
19+
class EmailNoAtSignError(EmailSyntaxError):
20+
"""Exception raised when an email address fails validation because it does not contain an @-sign"""
21+
pass
22+
23+
class EmailMultipleAtSignsError(EmailSyntaxError):
24+
"""Exception raised when an email address fails validation because it contains more than one @-sign"""
25+
pass
26+
27+
28+
29+
# Syntax errors pertaining to the email address being too long
30+
class EmailTooLongError(EmailSyntaxError):
31+
"""Parent class of exceptions raised when an email address fails validation because it is too long."""
32+
pass
33+
34+
class EmailTooLongAsciiError(EmailTooLongError):
35+
"""Exception raised when an email address fails validation because it is too long when converted to IDNA ASCII.
36+
May contain a second argument with the integer number of characters the email address exceeds the allowed length."""
37+
pass
38+
39+
class EmailTooLongUtf8Error(EmailTooLongError):
40+
"""Exception raised when an email address fails validation because it is too long when encoded in bytes.
41+
May contain a second argument with the integer number of characters the email address exceeds the allowed length."""
42+
pass
43+
44+
45+
46+
# Syntax errors pertaining to the local part of the email (i.e. before the @-sign)
47+
class EmailLocalPartError(EmailSyntaxError):
48+
"""Parent class of exceptions raised when an email address fails validation because of its local part."""
49+
pass
50+
51+
class EmailLocalPartEmptyError(EmailLocalPartError):
52+
"""Exception raised when an email address fails validation because it contains no characters before the @-sign."""
53+
pass
54+
55+
class EmailLocalPartTooLongError(EmailLocalPartError):
56+
"""Exception raised when an email address fails validation because the part before the @-sign is too long when converted to IDNA ASCII.
57+
May contain a second argument with the integer number of characters the email address exceeds the allowed length."""
58+
pass
59+
60+
class EmailLocalPartInvalidCharactersError(EmailLocalPartError):
61+
"""Exception raised when an email address fails validation because it contains invalid characters before the @-sign."""
62+
pass
63+
64+
class EmailLocalPartInternationalizedCharactersError(EmailLocalPartError):
65+
"""Exception raised when an email address fails validation because it contains internationalized characters before the @-sign."""
66+
pass
67+
68+
69+
70+
# Syntax errors pertaining to the domain part of the email (i.e. after the @-sign)
71+
class EmailDomainPartError(EmailSyntaxError):
72+
"""Parent class of exceptions raised when an email address fails validation because of its local part."""
73+
pass
74+
75+
class EmailDomainPartEmptyError(EmailDomainPartError):
76+
"""Exception raised when an email address fails validation because it contains no characters after the @-sign."""
77+
pass
78+
79+
class EmailDomainInvalidCharactersError(EmailDomainPartError):
80+
"""Exception raised when an email address fails validation because it contains invalid characters after the @-sign."""
81+
pass
82+
83+
class EmailDomainInvalidIdnaError(EmailDomainInvalidCharactersError):
84+
"""Exception raised when an email address fails validation because it contains invalid characters after the @-sign.
85+
Contains the original IDNA error as a second argument."""
86+
pass
87+
88+
class EmailDomainEndsWithPeriodError(EmailDomainPartError):
89+
"""Exception raised when an email address fails validation because it ends with a period."""
90+
pass
91+
92+
class EmailDomainStartsWithPeriodError(EmailDomainPartError):
93+
"""Exception raised when an email address fails validation because it has a period immediately after the @-sign."""
94+
pass
95+
96+
class EmailDomainMultiplePeriodsInARowError(EmailDomainPartError):
97+
"""Exception raised when an email address fails validation because it contains two or more periods in a row after the @-sign."""
98+
pass
99+
100+
class EmailDomainTooLongError(EmailDomainPartError):
101+
"""Exception raised when an email address fails validation because the part after the @-sign is too long."""
102+
pass
103+
104+
class EmailDomainNoPeriodError(EmailDomainPartError):
105+
"""Exception raised when an email address fails validation because it does not contain a period after the @-sign."""
106+
pass
107+
108+
class EmailDomainNoValidTldError(EmailDomainPartError):
109+
"""Exception raised when an email address fails validation because it does not contain a valid top-level domain (TLD) after the @-sign."""
110+
pass
111+
112+
113+
114+
# Errors determined heuristically from DNS queries
115+
# The parent class name is retained for backwards-compatibility
116+
class EmailUndeliverableError(EmailNotValidError):
117+
"""Parent class of exceptions raised when an email address fails validation because its domain name does not appear deliverable."""
118+
pass
119+
120+
class EmailDomainNameDoesNotExistError(EmailUndeliverableError):
121+
"""Exception raised when an email address fails validation because its domain name does not exist."""
122+
pass
123+
124+
class EmailDomainUnhandledDnsExceptionError(EmailUndeliverableError):
125+
"""Exception raised when an email address fails validation because the DNS query of its domain name has raised an exception.
126+
Contains the DNS exception (from the Python dns module) as the second argument."""
127+
pass
128+

0 commit comments

Comments
 (0)