Skip to content

Commit 8043de4

Browse files
committed
NFC-normalize display names per UTS #39
1 parent bc08faa commit 8043de4

File tree

3 files changed

+23
-5
lines changed

3 files changed

+23
-5
lines changed

email_validator/syntax.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -302,8 +302,8 @@ def validate_email_local_part(local: str, allow_smtputf8: bool = True, allow_emp
302302
valid = "dot-atom"
303303
requires_smtputf8 = True
304304

305-
# There are no syntactic restrictions on quoted local parts, so if
306-
# it was originally quoted, it is probably valid. More characters
305+
# There are no dot-atom syntax restrictions on quoted local parts, so
306+
# if it was originally quoted, it is probably valid. More characters
307307
# are allowed, like @-signs, spaces, and quotes, and there are no
308308
# restrictions on the placement of dots, as in dot-atom local parts.
309309
elif quoted_local_part:

email_validator/validate_email.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,14 @@ def validate_email(
7373
display_name, local_part, domain_part, is_quoted_local_part \
7474
= split_email(email)
7575

76+
if display_name:
77+
# UTS #39 3.3 Email Security Profiles for Identifiers requires
78+
# display names (incorrectly called "quoted-string-part" there)
79+
# to be NFC normalized. Since these are not a part of what we
80+
# are really validating, we won't check that the input was NFC
81+
# normalized, but we'll normalize in output.
82+
display_name = unicodedata.normalize("NFC", display_name)
83+
7684
# Collect return values in this instance.
7785
ret = ValidatedEmail()
7886
ret.original = ((local_part if not is_quoted_local_part
@@ -95,6 +103,15 @@ def validate_email(
95103
# RFC 6532 section 3.1 says that Unicode NFC normalization should be applied,
96104
# so we'll return the NFC-normalized local part. Since the caller may use that
97105
# string in place of the original string, ensure it is also valid.
106+
#
107+
# UTS #39 3.3 Email Security Profiles for Identifiers requires local parts
108+
# to be NFKC normalized, which loses some information in characters that can
109+
# be decomposed. We might want to consider applying NFKC normalization, but
110+
# we can't make the change easily because it would break database lookups
111+
# for any caller that put a normalized address from a previous version of
112+
# this library. (UTS #39 seems to require that the *input* be NKFC normalized
113+
# and has other requirements that are hard to check without additional Unicode
114+
# data, and I don't know whether the rules really apply in the wild.)
98115
normalized_local_part = unicodedata.normalize("NFC", ret.local_part)
99116
if normalized_local_part != ret.local_part:
100117
try:

tests/test_syntax.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -295,13 +295,14 @@ def test_email_valid(email_input: str, output: ValidatedEmail) -> None:
295295
),
296296
),
297297
(
298-
's\u0323\u0307@nfc.tld',
298+
'\"s\u0323\u0307\" <s\u0323\u0307@nfc.tld>',
299299
MakeValidatedEmail(
300300
local_part='\u1E69',
301301
smtputf8=True,
302302
ascii_domain='nfc.tld',
303303
domain='nfc.tld',
304304
normalized='\u1E69@nfc.tld',
305+
display_name='\u1E69'
305306
),
306307
),
307308
(
@@ -318,11 +319,11 @@ def test_email_valid(email_input: str, output: ValidatedEmail) -> None:
318319
)
319320
def test_email_valid_intl_local_part(email_input: str, output: ValidatedEmail) -> None:
320321
# Check that it passes when allow_smtputf8 is True.
321-
assert validate_email(email_input, check_deliverability=False) == output
322+
assert validate_email(email_input, check_deliverability=False, allow_display_name=True) == output
322323

323324
# Check that it fails when allow_smtputf8 is False.
324325
with pytest.raises(EmailSyntaxError) as exc_info:
325-
validate_email(email_input, allow_smtputf8=False, check_deliverability=False)
326+
validate_email(email_input, allow_smtputf8=False, check_deliverability=False, allow_display_name=True)
326327
assert "Internationalized characters before the @-sign are not supported: " in str(exc_info.value)
327328

328329

0 commit comments

Comments
 (0)