Skip to content

Commit 3804cd7

Browse files
committed
Explicitly check for non-ASCII characters with better error messages when SMTPUTF8 is off
1 parent 00902f8 commit 3804cd7

File tree

2 files changed

+66
-19
lines changed

2 files changed

+66
-19
lines changed

email_validator/syntax.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,25 @@ def validate_email_local_part(local, allow_smtputf8=True, allow_empty_local=Fals
7575
# for internationalized addresses. It's the same pattern but with additional
7676
# characters permitted.
7777
m = DOT_ATOM_TEXT_INTL.match(local)
78-
if m and allow_smtputf8:
78+
if m:
7979
# It's valid.
8080

81+
# But international characters in the local part may not be permitted.
82+
if not allow_smtputf8:
83+
# Check for invalid characters against the non-internationalized
84+
# permitted character set.
85+
# (RFC 2822 Section 3.2.4 / RFC 5322 Section 3.2.3)
86+
bad_chars = set(
87+
safe_character_display(c)
88+
for c in local
89+
if not ATEXT_RE.match(c)
90+
)
91+
if bad_chars:
92+
raise EmailSyntaxError("Internationalized characters before the @-sign are not supported: " + ", ".join(sorted(bad_chars)) + ".")
93+
94+
# Although the check above should always find something, fall back to this just in case.
95+
raise EmailSyntaxError("Internationalized characters before the @-sign are not supported.")
96+
8197
# RFC 6532 section 3.1 also says that Unicode NFC normalization should be applied,
8298
# so we'll return the normalized local part in the return value.
8399
local = unicodedata.normalize("NFC", local)
@@ -111,7 +127,7 @@ def validate_email_local_part(local, allow_smtputf8=True, allow_empty_local=Fals
111127
bad_chars = set(
112128
safe_character_display(c)
113129
for c in local
114-
if not (ATEXT_INTL_RE if allow_smtputf8 else ATEXT_RE).match(c)
130+
if not ATEXT_INTL_RE.match(c)
115131
)
116132
if bad_chars:
117133
raise EmailSyntaxError("The email address contains invalid characters before the @-sign: " + ", ".join(sorted(bad_chars)) + ".")

tests/test_syntax.py

Lines changed: 48 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,31 @@
5656
ascii_email="!#$%&'*+-/=?^_`.{|}~@example.tld",
5757
),
5858
),
59+
(
60+
'jeff@臺網中心.tw',
61+
ValidatedEmail(
62+
local_part='jeff',
63+
ascii_local_part='jeff',
64+
smtputf8=False,
65+
ascii_domain='xn--fiqq24b10vi0d.tw',
66+
domain='臺網中心.tw',
67+
email='jeff@臺網中心.tw',
68+
ascii_email='jeff@xn--fiqq24b10vi0d.tw',
69+
),
70+
),
71+
],
72+
)
73+
def test_email_valid(email_input, output):
74+
# These addresses do not require SMTPUTF8. See test_email_valid_intl_local_part
75+
# for addresses that are valid but require SMTPUTF8. Check that it passes with
76+
# allow_smtput8 both on and off.
77+
assert validate_email(email_input, check_deliverability=False, allow_smtputf8=False) == output
78+
assert validate_email(email_input, check_deliverability=False, allow_smtputf8=True) == output
79+
80+
81+
@pytest.mark.parametrize(
82+
'email_input,output',
83+
[
5984
(
6085
'伊昭傑@郵件.商務',
6186
ValidatedEmail(
@@ -106,18 +131,6 @@
106131
email='葉士豪@臺網中心.tw',
107132
),
108133
),
109-
(
110-
'jeff@臺網中心.tw',
111-
ValidatedEmail(
112-
local_part='jeff',
113-
ascii_local_part='jeff',
114-
smtputf8=False,
115-
ascii_domain='xn--fiqq24b10vi0d.tw',
116-
domain='臺網中心.tw',
117-
email='jeff@臺網中心.tw',
118-
ascii_email='jeff@xn--fiqq24b10vi0d.tw',
119-
),
120-
),
121134
(
122135
'葉士豪@臺網中心.台灣',
123136
ValidatedEmail(
@@ -200,10 +213,15 @@
200213
),
201214
],
202215
)
203-
def test_email_valid(email_input, output):
204-
# print(f'({email_input!r}, {validate_email(email_input, check_deliverability=False)!r}),')
216+
def test_email_valid_intl_local_part(email_input, output):
217+
# Check that it passes when allow_smtputf8 is True.
205218
assert validate_email(email_input, check_deliverability=False) == output
206219

220+
# Check that it fails when allow_smtputf8 is False.
221+
with pytest.raises(EmailSyntaxError) as exc_info:
222+
validate_email(email_input, allow_smtputf8=False, check_deliverability=False)
223+
assert "Internationalized characters before the @-sign are not supported: " in str(exc_info.value)
224+
207225

208226
@pytest.mark.parametrize(
209227
'email_input,error_msg',
@@ -263,7 +281,6 @@ def test_email_invalid_syntax(email_input, error_msg):
263281
# checks do not arise.
264282
with pytest.raises(EmailSyntaxError) as exc_info:
265283
validate_email(email_input)
266-
# print(f'({email_input!r}, {str(exc_info.value)!r}),')
267284
assert str(exc_info.value) == error_msg
268285

269286

@@ -283,7 +300,6 @@ def test_email_invalid_reserved_domain(email_input):
283300
# DNS deliverability checks do not arise.
284301
with pytest.raises(EmailSyntaxError) as exc_info:
285302
validate_email(email_input)
286-
# print(f'({email_input!r}, {str(exc_info.value)!r}),')
287303
assert "is a special-use or reserved name" in str(exc_info.value)
288304

289305

@@ -317,16 +333,31 @@ def test_email_unsafe_character(s, expected_error):
317333
('email_input', 'expected_error'),
318334
[
319335
('white space@test', 'The email address contains invalid characters before the @-sign: SPACE.'),
336+
('test@white space', 'The part after the @-sign contains invalid characters: SPACE.'),
320337
('\n@test', 'The email address contains invalid characters before the @-sign: U+000A.'),
338+
('test@\n', 'The part after the @-sign contains invalid characters: U+000A.'),
321339
],
322340
)
323341
def test_email_invalid_character(email_input, expected_error):
324-
# Check for various unsafe characters:
342+
# Check for various unsafe test_email_invalid_character_smtputf8:
325343
with pytest.raises(EmailSyntaxError) as exc_info:
326344
validate_email(email_input, test_environment=True)
327345
assert str(exc_info.value) == expected_error
328346

329347

348+
@pytest.mark.parametrize(
349+
('email_input', 'expected_error'),
350+
[
351+
('λambdaツ@test', 'Internationalized characters before the @-sign are not supported: \'λ\', \'\'.'),
352+
],
353+
)
354+
def test_email_invalid_character_smtputf8(email_input, expected_error):
355+
# Check for various unsafe characters:
356+
with pytest.raises(EmailSyntaxError) as exc_info:
357+
validate_email(email_input, allow_smtputf8=False, test_environment=True)
358+
assert str(exc_info.value) == expected_error
359+
360+
330361
def test_email_test_domain_name_in_test_environment():
331362
validate_email("anything@test", test_environment=True)
332363
validate_email("anything@mycompany.test", test_environment=True)

0 commit comments

Comments
 (0)