Skip to content

Commit c23c0d6

Browse files
committed
Improve the error message for IDNA domains being too long by handling the length check ourselves rather than in idna.encode
1 parent 452e0ca commit c23c0d6

File tree

3 files changed

+22
-21
lines changed

3 files changed

+22
-21
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ In Development
33

44
* Email addresses with internationalized local parts could, with rare Unicode characters, be returned as valid but actually be invalid in their normalized form (returned in the `normalized` field). Local parts now re-validated after Unicode NFC normalization to ensure that invalid characters cannot be injected into the normalized address and that characters with length-increasing NFC normalizations cannot cause a local part to exceed the maximum length after normalization.
55
* The length check for email addresses with internationalized local parts is now also applied to the original address string prior to Unicode NFC normalization, which may be longer and could exceed the maximum email address length, to protect callers who do not use the returned normalized address.
6+
* Improved error message for IDNA domains that are too long.
67
* A new option to parse `My Name <address@domain>` strings, i.e. a display name plus an email address in angle brackets, is now available. It is off by default.
78

89
2.1.2 (June 16, 2024)

email_validator/syntax.py

Lines changed: 18 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -469,6 +469,7 @@ def validate_email_domain_name(domain: str, test_environment: bool = False, glob
469469
# such as "⒈" which is invalid because it would expand to include a dot.
470470
# Since several characters are normalized to a dot, this has to come before
471471
# checks related to dots, like check_dot_atom which comes next.
472+
original_domain = domain
472473
try:
473474
domain = idna.uts46_remap(domain, std3_rules=False, transitional=False)
474475
except idna.IDNAError as e:
@@ -498,29 +499,22 @@ def validate_email_domain_name(domain: str, test_environment: bool = False, glob
498499
# the MTA must either support SMTPUTF8 or the mail client must convert the
499500
# domain name to IDNA before submission.
500501
#
501-
# Unfortunately this step incorrectly 'fixes' domain names with leading
502-
# periods by removing them, so we have to check for this above. It also gives
503-
# a funky error message ("No input") when there are two periods in a
504-
# row, also checked separately above.
505-
#
506502
# For ASCII-only domains, the transformation does nothing and is safe to
507503
# apply. However, to ensure we don't rely on the idna library for basic
508504
# syntax checks, we don't use it if it's not needed.
509505
#
510-
# uts46 is off here because it is handled above.
506+
# idna.encode also checks the domain name length after encoding but it
507+
# doesn't give a nice error, so we call the underlying idna.alabel method
508+
# directly. idna.alabel checks label length and doesn't give great messages,
509+
# but we can't easily go to lower level methods.
511510
try:
512-
ascii_domain = idna.encode(domain, uts46=False).decode("ascii")
511+
ascii_domain = ".".join(
512+
idna.alabel(label).decode("ascii")
513+
for label in domain.split(".")
514+
)
513515
except idna.IDNAError as e:
514-
if "Domain too long" in str(e):
515-
# We can't really be more specific because UTS-46 normalization means
516-
# the length check is applied to a string that is different from the
517-
# one the user supplied. Also I'm not sure if the length check applies
518-
# to the internationalized form, the IDNA ASCII form, or even both!
519-
raise EmailSyntaxError("The email address is too long after the @-sign.") from e
520-
521-
# Other errors seem to not be possible because the call to idna.uts46_remap
522-
# would have already raised them.
523-
raise EmailSyntaxError(f"The part after the @-sign contains invalid characters ({e}).") from e
516+
# Some errors would have already been raised by idna.uts46_remap.
517+
raise EmailSyntaxError(f"The part after the @-sign is invalid ({e}).") from e
524518

525519
# Check the syntax of the string returned by idna.encode.
526520
# It should never fail.
@@ -535,8 +529,13 @@ def validate_email_domain_name(domain: str, test_environment: bool = False, glob
535529
# as IDNA ASCII. (This is also checked by idna.encode, so this exception
536530
# is never reached for internationalized domains.)
537531
if len(ascii_domain) > DOMAIN_MAX_LENGTH:
538-
reason = get_length_reason(ascii_domain, limit=DOMAIN_MAX_LENGTH)
539-
raise EmailSyntaxError(f"The email address is too long after the @-sign {reason}.")
532+
if ascii_domain == original_domain:
533+
reason = get_length_reason(ascii_domain, limit=DOMAIN_MAX_LENGTH)
534+
raise EmailSyntaxError(f"The email address is too long after the @-sign {reason}.")
535+
else:
536+
diff = len(ascii_domain) - DOMAIN_MAX_LENGTH
537+
s = "" if diff == 1 else "s"
538+
raise EmailSyntaxError(f"The email address is too long after the @-sign ({diff} byte{s} too many after IDNA encoding).")
540539

541540
# Also check the label length limit.
542541
# (RFC 1035 2.3.1)

tests/test_syntax.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -406,10 +406,11 @@ def test_domain_literal() -> None:
406406
('111111111122222222223333333333444444444455555555556666666666777777@example.com', 'The email address is too long before the @-sign (2 characters too many).'),
407407
('\uFB2C111111122222222223333333333444444444455555555556666666666777777@example.com', 'After Unicode normalization: The email address is too long before the @-sign (2 characters too many).'),
408408
('me@1111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.1111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.11111111112222222222333333333344444444445555555555.com', 'The email address is too long after the @-sign (1 character too many).'),
409-
('me@中1111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.1111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.1111111111222222222233333333334444444444.com', 'The email address is too long after the @-sign.'),
409+
('me@中1111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.1111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.1111111111222222222233333333334444444444.com', 'The email address is too long after the @-sign (1 byte too many after IDNA encoding).'),
410+
('me@\uFB2C1111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.1111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.1111111111222222222233333333334444444444.com', 'The email address is too long after the @-sign (5 bytes too many after IDNA encoding).'),
410411
('me@1111111111222222222233333333334444444444555555555666666666677777.com', 'After the @-sign, periods cannot be separated by so many characters (1 character too many).'),
411412
('me@11111111112222222222333333333344444444445555555556666666666777777.com', 'After the @-sign, periods cannot be separated by so many characters (2 characters too many).'),
412-
('me@中111111111222222222233333333334444444444555555555666666.com', 'The part after the @-sign contains invalid characters (Label too long).'),
413+
('me@中111111111222222222233333333334444444444555555555666666.com', 'The part after the @-sign is invalid (Label too long).'),
413414
('meme@1111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.1111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.1111111111222222222233333333334444444444555555555.com', 'The email address is too long (4 characters too many).'),
414415
('my.long.address@1111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.1111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.11111111112222222222333333333344444.info', 'The email address is too long (2 characters too many).'),
415416
('my.long.address@λ111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.1111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.1111111111222222222233333333334444.info', 'The email address is too long (1-2 characters too many).'),

0 commit comments

Comments
 (0)