@@ -73,6 +73,14 @@ def validate_email(
73
73
display_name , local_part , domain_part , is_quoted_local_part \
74
74
= split_email (email )
75
75
76
+ if display_name :
77
+ # UTS #39 3.3 Email Security Profiles for Identifiers requires
78
+ # display names (incorrectly called "quoted-string-part" there)
79
+ # to be NFC normalized. Since these are not a part of what we
80
+ # are really validating, we won't check that the input was NFC
81
+ # normalized, but we'll normalize in output.
82
+ display_name = unicodedata .normalize ("NFC" , display_name )
83
+
76
84
# Collect return values in this instance.
77
85
ret = ValidatedEmail ()
78
86
ret .original = ((local_part if not is_quoted_local_part
@@ -95,6 +103,15 @@ def validate_email(
95
103
# RFC 6532 section 3.1 says that Unicode NFC normalization should be applied,
96
104
# so we'll return the NFC-normalized local part. Since the caller may use that
97
105
# string in place of the original string, ensure it is also valid.
106
+ #
107
+ # UTS #39 3.3 Email Security Profiles for Identifiers requires local parts
108
+ # to be NFKC normalized, which loses some information in characters that can
109
+ # be decomposed. We might want to consider applying NFKC normalization, but
110
+ # we can't make the change easily because it would break database lookups
111
+ # for any caller that put a normalized address from a previous version of
112
+ # this library. (UTS #39 seems to require that the *input* be NKFC normalized
113
+ # and has other requirements that are hard to check without additional Unicode
114
+ # data, and I don't know whether the rules really apply in the wild.)
98
115
normalized_local_part = unicodedata .normalize ("NFC" , ret .local_part )
99
116
if normalized_local_part != ret .local_part :
100
117
try :
0 commit comments