Skip to content

Commit 3426885

Browse files
committed
Several fixes for parsing display names
* Fix error message text for input addresses without @-signs. The incorrect message was "There must be something after the @-sign.". This was broken by the changes to parse display names. Prior to that, the message was "The email address is not valid. It must have exactly one @-sign.". * Move the allow_display_name check to the end of the syntax checks. The optional checks should be the last to occur so that fatal syntax errors are raised first. * Check that display name email addresses have a closing angle bracket and nothing after. * Don't treat < + U+0338 (Combining Long Solidus Overlay) as the start of a bracketed email address. This would already be rejected because the combining character would be reported as an unsafe character at the start of the address, but it may be confusing since the caller won't see the address that way. When splitting the address into parts, skip the other special characters (@, quote, backslash) that have meaningful combining characters after them (i.e. they change under NFC normalization), although I don't think there are any such cases.
1 parent 0b22c13 commit 3426885

File tree

3 files changed

+34
-5
lines changed

3 files changed

+34
-5
lines changed

email_validator/syntax.py

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,12 +48,22 @@ def split_email(email: str) -> Tuple[Optional[str], str, str, bool]:
4848

4949
def split_string_at_unquoted_special(text: str, specials: Tuple[str, ...]) -> Tuple[str, str]:
5050
# Split the string at the first character in specials (an @-sign
51-
# or left angle bracket) that does not occur within quotes.
51+
# or left angle bracket) that does not occur within quotes and
52+
# is not followed by a Unicode combining character.
53+
# If no special character is found, raise an error.
5254
inside_quote = False
5355
escaped = False
5456
left_part = ""
55-
for c in text:
56-
if inside_quote:
57+
for i, c in enumerate(text):
58+
# < plus U+0338 (Combining Long Solidus Overlay) normalizes to
59+
# ≮ U+226E (Not Less-Than), and it would be confusing to treat
60+
# the < as the start of "<email>" syntax in that case. Liekwise,
61+
# if anything combines with an @ or ", we should probably not
62+
# treat it as a special character.
63+
if unicodedata.normalize("NFC", text[i:])[0] != c:
64+
left_part += c
65+
66+
elif inside_quote:
5767
left_part += c
5868
if c == '\\' and not escaped:
5969
escaped = True
@@ -72,6 +82,9 @@ def split_string_at_unquoted_special(text: str, specials: Tuple[str, ...]) -> Tu
7282
else:
7383
left_part += c
7484

85+
if len(left_part) == len(text):
86+
raise EmailSyntaxError("An email address must have an @-sign.")
87+
7588
# The right part is whatever is left.
7689
right_part = text[len(left_part):]
7790

@@ -134,6 +147,14 @@ def unquote_quoted_string(text: str) -> Tuple[str, bool]:
134147
# Check for other unsafe characters.
135148
check_unsafe_chars(display_name, allow_space=True)
136149

150+
# Check that the right part ends with an angle bracket
151+
# but allow spaces after it, I guess.
152+
if ">" not in right_part:
153+
raise EmailSyntaxError("An open angle bracket at the start of the email address has to be followed by a close angle bracket at the end.")
154+
right_part = right_part.rstrip(" ")
155+
if right_part[-1] != ">":
156+
raise EmailSyntaxError("There can't be anything after the email address.")
157+
137158
# Remove the initial and trailing angle brackets.
138159
addr_spec = right_part[1:].rstrip(">")
139160

email_validator/validate_email.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,6 @@ def validate_email(
6868
# part if the local part is quoted.
6969
display_name, local_part, domain_part, is_quoted_local_part \
7070
= split_email(email)
71-
if display_name is not None and not allow_display_name:
72-
raise EmailSyntaxError("A display name and angle brackets around the email address are not permitted here.")
7371

7472
# Collect return values in this instance.
7573
ret = ValidatedEmail()
@@ -139,6 +137,11 @@ def validate_email(
139137
# Check the length of the address.
140138
validate_email_length(ret)
141139

140+
# Check that a display name is permitted. It's the last syntax check
141+
# because we always check against optional parsing features last.
142+
if display_name is not None and not allow_display_name:
143+
raise EmailSyntaxError("A display name and angle brackets around the email address are not permitted here.")
144+
142145
if check_deliverability and not test_environment:
143146
# Validate the email address's deliverability using DNS
144147
# and update the returned ValidatedEmail object with metadata.

tests/test_syntax.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,7 @@ def test_domain_literal() -> None:
352352
@pytest.mark.parametrize(
353353
'email_input,error_msg',
354354
[
355+
('hello.world', 'An email address must have an @-sign.'),
355356
('my@localhost', 'The part after the @-sign is not valid. It should have a period.'),
356357
('my@.leadingdot.com', 'An email address cannot have a period immediately after the @-sign.'),
357358
('my@.leadingfwdot.com', 'An email address cannot have a period immediately after the @-sign.'),
@@ -413,6 +414,10 @@ def test_domain_literal() -> None:
413414
('me@[untaggedtext]', 'The part after the @-sign in brackets is not an IPv4 address and has no address literal tag.'),
414415
('me@[tag:invalid space]', 'The part after the @-sign contains invalid characters in brackets: SPACE.'),
415416
('<me@example.com>', 'A display name and angle brackets around the email address are not permitted here.'),
417+
('<me@example.com', 'An open angle bracket at the start of the email address has to be followed by a close angle bracket at the end.'),
418+
('<me@example.com> !', 'There can\'t be anything after the email address.'),
419+
('<\u0338me@example.com', 'The email address contains invalid characters before the @-sign: \'<\'.'),
420+
('DisplayName <me@-example.com>', 'An email address cannot have a hyphen immediately after the @-sign.'),
416421
('DisplayName <me@example.com>', 'A display name and angle brackets around the email address are not permitted here.'),
417422
('Display Name <me@example.com>', 'A display name and angle brackets around the email address are not permitted here.'),
418423
('\"Display Name\" <me@example.com>', 'A display name and angle brackets around the email address are not permitted here.'),

0 commit comments

Comments
 (0)