Skip to content

Commit 7428eeb

Browse files
authored
Dedupe length reason logic and declare magic numbers as constants (#50)
1 parent 48840d1 commit 7428eeb

File tree

1 file changed

+30
-29
lines changed

1 file changed

+30
-29
lines changed

email_validator/__init__.py

Lines changed: 30 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,13 @@
2929
# the beginning or end of a *dot-atom component* of a hostname either.
3030
ATEXT_HOSTNAME = r'(?:(?:[a-zA-Z0-9][a-zA-Z0-9\-]*)?[a-zA-Z0-9])'
3131

32+
# Length constants
33+
# RFC 3696 + errata 1003 + errata 1690 (https://www.rfc-editor.org/errata_search.php?rfc=3696&eid=1690)
34+
# explains the maximum length of an email address is 254 octets.
35+
EMAIL_MAX_LENGTH = 254
36+
LOCAL_PART_MAX_LENGTH = 64
37+
DOMAIN_MAX_LENGTH = 255
38+
3239
# ease compatibility in type checking
3340
if sys.version_info >= (3,):
3441
unicode_class = str
@@ -161,6 +168,14 @@ def as_dict(self):
161168
return self.__dict__
162169

163170

171+
def __get_length_reason(addr, utf8=False, limit=EMAIL_MAX_LENGTH):
172+
diff = len(addr) - limit
173+
reason = "({}{} character{} too many)"
174+
prefix = "at least " if utf8 else ""
175+
suffix = "s" if diff > 1 else ""
176+
return reason.format(prefix, diff, suffix)
177+
178+
164179
def validate_email(
165180
email,
166181
allow_smtputf8=True,
@@ -212,9 +227,6 @@ def validate_email(
212227
if not ret.smtputf8:
213228
ret.ascii_email = ret.ascii_local_part + "@" + ret.ascii_domain
214229

215-
# RFC 3696 + errata 1003 + errata 1690 (https://www.rfc-editor.org/errata_search.php?rfc=3696&eid=1690)
216-
# explains the maximum length of an email address is 254 octets.
217-
#
218230
# If the email address has an ASCII representation, then we assume it may be
219231
# transmitted in ASCII (we can't assume SMTPUTF8 will be used on all hops to
220232
# the destination) and the length limit applies to ASCII characters (which is
@@ -235,33 +247,24 @@ def validate_email(
235247
# longer than the number of characters.
236248
#
237249
# See the length checks on the local part and the domain.
238-
if ret.ascii_email and len(ret.ascii_email) > 254:
250+
if ret.ascii_email and len(ret.ascii_email) > EMAIL_MAX_LENGTH:
239251
if ret.ascii_email == ret.email:
240-
reason = " ({} character{} too many)".format(
241-
len(ret.ascii_email) - 254,
242-
"s" if (len(ret.ascii_email) - 254 != 1) else ""
243-
)
244-
elif len(ret.email) > 254:
252+
reason = __get_length_reason(ret.ascii_email)
253+
elif len(ret.email) > EMAIL_MAX_LENGTH:
245254
# If there are more than 254 characters, then the ASCII
246255
# form is definitely going to be too long.
247-
reason = " (at least {} character{} too many)".format(
248-
len(ret.email) - 254,
249-
"s" if (len(ret.email) - 254 != 1) else ""
250-
)
256+
reason = __get_length_reason(ret.email, utf8=True)
251257
else:
252-
reason = " (when converted to IDNA ASCII)"
253-
raise EmailSyntaxError("The email address is too long{}.".format(reason))
254-
if len(ret.email.encode("utf8")) > 254:
255-
if len(ret.email) > 254:
258+
reason = "(when converted to IDNA ASCII)"
259+
raise EmailSyntaxError("The email address is too long {}.".format(reason))
260+
if len(ret.email.encode("utf8")) > EMAIL_MAX_LENGTH:
261+
if len(ret.email) > EMAIL_MAX_LENGTH:
256262
# If there are more than 254 characters, then the UTF-8
257263
# encoding is definitely going to be too long.
258-
reason = " (at least {} character{} too many)".format(
259-
len(ret.email) - 254,
260-
"s" if (len(ret.email) - 254 != 1) else ""
261-
)
264+
reason = __get_length_reason(ret.email, utf8=True)
262265
else:
263-
reason = " (when encoded in bytes)"
264-
raise EmailSyntaxError("The email address is too long{}.".format(reason))
266+
reason = "(when encoded in bytes)"
267+
raise EmailSyntaxError("The email address is too long {}.".format(reason))
265268

266269
if check_deliverability:
267270
# Validate the email address's deliverability and update the
@@ -295,11 +298,9 @@ def validate_email_local_part(local, allow_smtputf8=True, allow_empty_local=Fals
295298
# internationalized, then the UTF-8 encoding may be longer, but
296299
# that may not be relevant. We will check the total address length
297300
# instead.
298-
if len(local) > 64:
299-
raise EmailSyntaxError("The email address is too long before the @-sign ({} character{} too many).".format(
300-
len(local) - 64,
301-
"s" if (len(local) - 64 != 1) else ""
302-
))
301+
if len(local) > LOCAL_PART_MAX_LENGTH:
302+
reason = __get_length_reason(local, limit=LOCAL_PART_MAX_LENGTH)
303+
raise EmailSyntaxError("The email address is too long before the @-sign {}.".format(reason))
303304

304305
# Check the local part against the regular expression for the older ASCII requirements.
305306
m = re.match(DOT_ATOM_TEXT + "\\Z", local)
@@ -404,7 +405,7 @@ def validate_email_domain_part(domain):
404405
# on the assumption that the domain may be transmitted without SMTPUTF8
405406
# as IDNA ASCII. This is also checked by idna.encode, so this exception
406407
# is never reached.
407-
if len(ascii_domain) > 255:
408+
if len(ascii_domain) > DOMAIN_MAX_LENGTH:
408409
raise EmailSyntaxError("The email address is too long after the @-sign.")
409410

410411
# A "dot atom text", per RFC 2822 3.2.4, but using the restricted

0 commit comments

Comments
 (0)