Skip to content

Commit ab293fd

Browse files
committed
Reorganize the library into smaller modules
There are no logic changes in this commit, just moving code.
1 parent c6722e1 commit ab293fd

File tree

9 files changed

+730
-674
lines changed

9 files changed

+730
-674
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
In Development
2+
--------------
3+
4+
* The library has been reorganized internally into smaller modules.
5+
16
Version 1.3.1 (January 21, 2023)
27
--------------------------------
38

email_validator/__init__.py

Lines changed: 7 additions & 666 deletions
Large diffs are not rendered by default.

email_validator/__main__.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# A command-line tool for testing.
2+
#
3+
# Usage:
4+
#
5+
# python -m email_validator
6+
#
7+
# Provide email addresses to validate either as a command-line argument
8+
# or in STDIN separated by newlines. No output will be given for valid
9+
# email addresses. Validation errors will be printed for invalid email
10+
# addresses.
11+
12+
import json
13+
import sys
14+
15+
from .validate_email import validate_email
16+
from .deliverability import caching_resolver
17+
from .exceptions_types import EmailNotValidError
18+
19+
20+
def __utf8_input_shim(input_str):
21+
if sys.version_info < (3,):
22+
return input_str.decode("utf-8")
23+
return input_str
24+
25+
26+
def __utf8_output_shim(output_str):
27+
if sys.version_info < (3,):
28+
return unicode_class(output_str).encode("utf-8")
29+
return output_str
30+
31+
32+
def main():
33+
if len(sys.argv) == 1:
34+
# Validate the email addresses pased line-by-line on STDIN.
35+
dns_resolver = caching_resolver()
36+
for line in sys.stdin:
37+
email = __utf8_input_shim(line.strip())
38+
try:
39+
validate_email(email, dns_resolver=dns_resolver)
40+
except EmailNotValidError as e:
41+
print(__utf8_output_shim("{} {}".format(email, e)))
42+
else:
43+
# Validate the email address passed on the command line.
44+
email = __utf8_input_shim(sys.argv[1])
45+
try:
46+
result = validate_email(email)
47+
print(json.dumps(result.as_dict(), indent=2, sort_keys=True, ensure_ascii=False))
48+
except EmailNotValidError as e:
49+
print(__utf8_output_shim(e))
50+
51+
52+
if __name__ == "__main__":
53+
main()

email_validator/deliverability.py

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
from .exceptions_types import EmailUndeliverableError
2+
3+
import dns.resolver
4+
import dns.exception
5+
6+
7+
def caching_resolver(*, timeout=None, cache=None):
8+
if timeout is None:
9+
from . import DEFAULT_TIMEOUT
10+
timeout = DEFAULT_TIMEOUT
11+
resolver = dns.resolver.Resolver()
12+
resolver.cache = cache or dns.resolver.LRUCache()
13+
resolver.lifetime = timeout # timeout, in seconds
14+
return resolver
15+
16+
17+
def validate_email_deliverability(domain, domain_i18n, timeout=None, dns_resolver=None):
18+
# Check that the domain resolves to an MX record. If there is no MX record,
19+
# try an A or AAAA record which is a deprecated fallback for deliverability.
20+
# Raises an EmailUndeliverableError on failure. On success, returns a dict
21+
# with deliverability information.
22+
23+
# If no dns.resolver.Resolver was given, get dnspython's default resolver.
24+
# Override the default resolver's timeout. This may affect other uses of
25+
# dnspython in this process.
26+
if dns_resolver is None:
27+
from . import DEFAULT_TIMEOUT
28+
if timeout is None:
29+
timeout = DEFAULT_TIMEOUT
30+
dns_resolver = dns.resolver.get_default_resolver()
31+
dns_resolver.lifetime = timeout
32+
33+
deliverability_info = {}
34+
35+
def dns_resolver_resolve_shim(domain, record):
36+
try:
37+
# dns.resolver.Resolver.resolve is new to dnspython 2.x.
38+
# https://dnspython.readthedocs.io/en/latest/resolver-class.html#dns.resolver.Resolver.resolve
39+
return dns_resolver.resolve(domain, record)
40+
except AttributeError:
41+
# dnspython 2.x is only available in Python 3.6 and later. For earlier versions
42+
# of Python, we maintain compatibility with dnspython 1.x which has a
43+
# dnspython.resolver.Resolver.query method instead. The only difference is that
44+
# query may treat the domain as relative and use the system's search domains,
45+
# which we prevent by adding a "." to the domain name to make it absolute.
46+
# dns.resolver.Resolver.query is deprecated in dnspython version 2.x.
47+
# https://dnspython.readthedocs.io/en/latest/resolver-class.html#dns.resolver.Resolver.query
48+
return dns_resolver.query(domain + ".", record)
49+
50+
try:
51+
# We need a way to check how timeouts are handled in the tests. So we
52+
# have a secret variable that if set makes this method always test the
53+
# handling of a timeout.
54+
if getattr(validate_email_deliverability, 'TEST_CHECK_TIMEOUT', False):
55+
raise dns.exception.Timeout()
56+
57+
try:
58+
# Try resolving for MX records.
59+
response = dns_resolver_resolve_shim(domain, "MX")
60+
61+
# For reporting, put them in priority order and remove the trailing dot in the qnames.
62+
mtas = sorted([(r.preference, str(r.exchange).rstrip('.')) for r in response])
63+
64+
# Remove "null MX" records from the list (their value is (0, ".") but we've stripped
65+
# trailing dots, so the 'exchange' is just ""). If there was only a null MX record,
66+
# email is not deliverable.
67+
mtas = [(preference, exchange) for preference, exchange in mtas
68+
if exchange != ""]
69+
if len(mtas) == 0:
70+
raise EmailUndeliverableError("The domain name %s does not accept email." % domain_i18n)
71+
72+
deliverability_info["mx"] = mtas
73+
deliverability_info["mx_fallback_type"] = None
74+
75+
except (dns.resolver.NoNameservers, dns.resolver.NXDOMAIN, dns.resolver.NoAnswer):
76+
77+
# If there was no MX record, fall back to an A record, as SMTP servers do.
78+
try:
79+
response = dns_resolver_resolve_shim(domain, "A")
80+
deliverability_info["mx"] = [(0, str(r)) for r in response]
81+
deliverability_info["mx_fallback_type"] = "A"
82+
except (dns.resolver.NoNameservers, dns.resolver.NXDOMAIN, dns.resolver.NoAnswer):
83+
84+
# If there was no A record, fall back to an AAAA record.
85+
try:
86+
response = dns_resolver_resolve_shim(domain, "AAAA")
87+
deliverability_info["mx"] = [(0, str(r)) for r in response]
88+
deliverability_info["mx_fallback_type"] = "AAAA"
89+
except (dns.resolver.NoNameservers, dns.resolver.NXDOMAIN, dns.resolver.NoAnswer):
90+
91+
# If there was no MX, A, or AAAA record, then mail to
92+
# this domain is not deliverable.
93+
raise EmailUndeliverableError("The domain name %s does not exist." % domain_i18n)
94+
95+
# Check for a SPF reject-all record ("v=spf1 -all") which indicates
96+
# no emails are sent from this domain (similar to a NULL MX record
97+
# but for sending rather than receiving). In combination with the
98+
# absence of an MX record, this is probably a good sign that the
99+
# domain is not used for email.
100+
try:
101+
response = dns_resolver_resolve_shim(domain, "TXT")
102+
for rec in response:
103+
value = b"".join(rec.strings)
104+
if value.startswith(b"v=spf1 "):
105+
deliverability_info["spf"] = value.decode("ascii", errors='replace')
106+
if value == b"v=spf1 -all":
107+
raise EmailUndeliverableError("The domain name %s does not send email." % domain_i18n)
108+
except dns.resolver.NoAnswer:
109+
# No TXT records means there is no SPF policy, so we cannot take any action.
110+
pass
111+
except (dns.resolver.NoNameservers, dns.resolver.NXDOMAIN):
112+
# Failure to resolve at this step will be ignored.
113+
pass
114+
115+
except dns.exception.Timeout:
116+
# A timeout could occur for various reasons, so don't treat it as a failure.
117+
return {
118+
"unknown-deliverability": "timeout",
119+
}
120+
121+
except EmailUndeliverableError:
122+
# Don't let these get clobbered by the wider except block below.
123+
raise
124+
125+
except Exception as e:
126+
# Unhandled conditions should not propagate.
127+
raise EmailUndeliverableError(
128+
"There was an error while checking if the domain name in the email address is deliverable: " + str(e)
129+
)
130+
131+
return deliverability_info

email_validator/exceptions_types.py

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
class EmailNotValidError(ValueError):
2+
"""Parent class of all exceptions raised by this module."""
3+
pass
4+
5+
6+
class EmailSyntaxError(EmailNotValidError):
7+
"""Exception raised when an email address fails validation because of its form."""
8+
pass
9+
10+
11+
class EmailUndeliverableError(EmailNotValidError):
12+
"""Exception raised when an email address fails validation because its domain name does not appear deliverable."""
13+
pass
14+
15+
16+
class ValidatedEmail(object):
17+
"""The validate_email function returns objects of this type holding the normalized form of the email address
18+
and other information."""
19+
20+
"""The email address that was passed to validate_email. (If passed as bytes, this will be a string.)"""
21+
original_email = None
22+
23+
"""The normalized email address, which should always be used in preferance to the original address.
24+
The normalized address converts an IDNA ASCII domain name to Unicode, if possible, and performs
25+
Unicode normalization on the local part and on the domain (if originally Unicode). It is the
26+
concatenation of the local_part and domain attributes, separated by an @-sign."""
27+
email = None
28+
29+
"""The local part of the email address after Unicode normalization."""
30+
local_part = None
31+
32+
"""The domain part of the email address after Unicode normalization or conversion to
33+
Unicode from IDNA ascii."""
34+
domain = None
35+
36+
"""If not None, a form of the email address that uses 7-bit ASCII characters only."""
37+
ascii_email = None
38+
39+
"""If not None, the local part of the email address using 7-bit ASCII characters only."""
40+
ascii_local_part = None
41+
42+
"""If not None, a form of the domain name that uses 7-bit ASCII characters only."""
43+
ascii_domain = None
44+
45+
"""If True, the SMTPUTF8 feature of your mail relay will be required to transmit messages
46+
to this address. This flag is True just when ascii_local_part is missing. Otherwise it
47+
is False."""
48+
smtputf8 = None
49+
50+
"""If a deliverability check is performed and if it succeeds, a list of (priority, domain)
51+
tuples of MX records specified in the DNS for the domain."""
52+
mx = None
53+
54+
"""If no MX records are actually specified in DNS and instead are inferred, through an obsolete
55+
mechanism, from A or AAAA records, the value is the type of DNS record used instead (`A` or `AAAA`)."""
56+
mx_fallback_type = None
57+
58+
"""Tests use this constructor."""
59+
def __init__(self, **kwargs):
60+
for k, v in kwargs.items():
61+
setattr(self, k, v)
62+
63+
"""As a convenience, str(...) on instances of this class return the normalized address."""
64+
def __self__(self):
65+
return self.normalized_email
66+
67+
def __repr__(self):
68+
return "<ValidatedEmail {}>".format(self.email)
69+
70+
"""For backwards compatibility, some fields are also exposed through a dict-like interface. Note
71+
that some of the names changed when they became attributes."""
72+
def __getitem__(self, key):
73+
if key == "email":
74+
return self.email
75+
if key == "email_ascii":
76+
return self.ascii_email
77+
if key == "local":
78+
return self.local_part
79+
if key == "domain":
80+
return self.ascii_domain
81+
if key == "domain_i18n":
82+
return self.domain
83+
if key == "smtputf8":
84+
return self.smtputf8
85+
if key == "mx":
86+
return self.mx
87+
if key == "mx-fallback":
88+
return self.mx_fallback_type
89+
raise KeyError()
90+
91+
"""Tests use this."""
92+
def __eq__(self, other):
93+
if not isinstance(other, ValidatedEmail):
94+
return False
95+
return (
96+
self.email == other.email
97+
and self.local_part == other.local_part
98+
and self.domain == other.domain
99+
and self.ascii_email == other.ascii_email
100+
and self.ascii_local_part == other.ascii_local_part
101+
and self.ascii_domain == other.ascii_domain
102+
and self.smtputf8 == other.smtputf8
103+
and repr(sorted(self.mx) if self.mx else self.mx)
104+
== repr(sorted(other.mx) if other.mx else other.mx)
105+
and self.mx_fallback_type == other.mx_fallback_type
106+
)
107+
108+
"""This helps producing the README."""
109+
def as_constructor(self):
110+
return "ValidatedEmail(" \
111+
+ ",".join("\n {}={}".format(
112+
key,
113+
repr(getattr(self, key)))
114+
for key in ('email', 'local_part', 'domain',
115+
'ascii_email', 'ascii_local_part', 'ascii_domain',
116+
'smtputf8', 'mx', 'mx_fallback_type')
117+
) \
118+
+ ")"
119+
120+
"""Convenience method for accessing ValidatedEmail as a dict"""
121+
def as_dict(self):
122+
return self.__dict__

email_validator/rfc_constants.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
import sys
2+
3+
# These constants are defined by the email specifications.
4+
5+
# Based on RFC 2822 section 3.2.4 / RFC 5322 section 3.2.3, these
6+
# characters are permitted in email addresses (not taking into
7+
# account internationalization):
8+
ATEXT = r'a-zA-Z0-9_!#\$%&\'\*\+\-/=\?\^`\{\|\}~'
9+
10+
# A "dot atom text", per RFC 2822 3.2.4:
11+
DOT_ATOM_TEXT = '[' + ATEXT + ']+(?:\\.[' + ATEXT + ']+)*'
12+
13+
# RFC 6531 section 3.3 extends the allowed characters in internationalized
14+
# addresses to also include three specific ranges of UTF8 defined in
15+
# RFC3629 section 4, which appear to be the Unicode code points from
16+
# U+0080 to U+10FFFF.
17+
ATEXT_INTL = ATEXT + u"\u0080-\U0010FFFF"
18+
DOT_ATOM_TEXT_INTL = '[' + ATEXT_INTL + ']+(?:\\.[' + ATEXT_INTL + ']+)*'
19+
20+
# The domain part of the email address, after IDNA (ASCII) encoding,
21+
# must also satisfy the requirements of RFC 952/RFC 1123 which restrict
22+
# the allowed characters of hostnames further. The hyphen cannot be at
23+
# the beginning or end of a *dot-atom component* of a hostname either.
24+
ATEXT_HOSTNAME = r'(?:(?:[a-zA-Z0-9][a-zA-Z0-9\-]*)?[a-zA-Z0-9])'
25+
26+
# Length constants
27+
# RFC 3696 + errata 1003 + errata 1690 (https://www.rfc-editor.org/errata_search.php?rfc=3696&eid=1690)
28+
# explains the maximum length of an email address is 254 octets.
29+
EMAIL_MAX_LENGTH = 254
30+
LOCAL_PART_MAX_LENGTH = 64
31+
DOMAIN_MAX_LENGTH = 255
32+
33+
34+
# In Python 2.x, turn the regexes above from bytes regexes into unicode
35+
# regexes. If Python 3.x had a "ur" string literal prefix we'd use that instead.
36+
if sys.version_info < (3,):
37+
ATEXT = ATEXT.decode("ascii")
38+
DOT_ATOM_TEXT = DOT_ATOM_TEXT.decode("ascii")
39+
ATEXT_HOSTNAME = ATEXT_HOSTNAME.decode("ascii")

0 commit comments

Comments
 (0)