Skip to content

Commit 4f69b42

Browse files
committed
Don't rely on the IDNA library for checking valid hyphens, use our own checks
Including invalid RFC 5890 R-LDH labels (e.g. '??--' other than 'xn--'), see #92. The IDNA library will check this but its error messages are not friendly, and for future proofing it's better to not assume it does any general syntax checks.
1 parent 52f2ab7 commit 4f69b42

File tree

2 files changed

+32
-15
lines changed

2 files changed

+32
-15
lines changed

email_validator/syntax.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -130,16 +130,27 @@ def validate_email_domain_part(domain, test_environment=False, globally_delivera
130130
except idna.IDNAError as e:
131131
raise EmailSyntaxError("The part after the @-sign contains invalid characters ({}).".format(str(e)))
132132

133-
# Now we can perform basic checks on the use of periods (since equivalent
134-
# symbols have been mapped to periods). These checks are needed because the
135-
# IDNA library doesn't handle well domains that have empty labels (i.e. initial
136-
# dot, trailing dot, or two dots in a row).
133+
# The domain part is made up period-separated "labels." Each label must
134+
# have at least one character and cannot start or end with dashes, which
135+
# means there are some surprising restrictions on periods and dashes.
136+
# Check that before we do IDNA encoding because the IDNA library gives
137+
# unfriendly errors for these cases, but after UTS-46 normalization because
138+
# it can insert periods and hyphens (from fullwidth characters).
137139
if domain.endswith("."):
138140
raise EmailSyntaxError("An email address cannot end with a period.")
139141
if domain.startswith("."):
140142
raise EmailSyntaxError("An email address cannot have a period immediately after the @-sign.")
141143
if ".." in domain:
142144
raise EmailSyntaxError("An email address cannot have two periods in a row.")
145+
if domain.endswith("-"):
146+
raise EmailSyntaxError("An email address cannot end with a hyphen.")
147+
if domain.startswith("-"):
148+
raise EmailSyntaxError("An email address cannot have a hyphen immediately after the @-sign.")
149+
if ".-" in domain or "-." in domain:
150+
raise EmailSyntaxError("An email address cannot have a period and a hyphen next to each other.")
151+
for label in domain.split("."):
152+
if re.match(r"(?!xn)..--", label, re.I): # RFC 5890 invalid R-LDH labels
153+
raise EmailSyntaxError("An email address cannot have two letters followed by two dashes immediately after the @-sign or after a period, except Punycode.")
143154

144155
if DOT_ATOM_TEXT_HOSTNAME.match(domain):
145156
ascii_domain = domain

tests/test_syntax.py

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -210,15 +210,21 @@ def test_email_valid(email_input, output):
210210
[
211211
('my@localhost', 'The part after the @-sign is not valid. It should have a period.'),
212212
('my@.leadingdot.com', 'An email address cannot have a period immediately after the @-sign.'),
213-
('my@..leadingfwdot.com', 'An email address cannot have a period immediately after the @-sign.'),
214-
('my@..twodots.com', 'An email address cannot have a period immediately after the @-sign.'),
213+
('my@.leadingfwdot.com', 'An email address cannot have a period immediately after the @-sign.'),
215214
('my@twodots..com', 'An email address cannot have two periods in a row.'),
216-
('my@baddash.-.com',
217-
'The part after the @-sign contains invalid characters (Label must not start or end with a hyphen).'),
218-
('my@baddash.-a.com',
219-
'The part after the @-sign contains invalid characters (Label must not start or end with a hyphen).'),
220-
('my@baddash.b-.com',
221-
'The part after the @-sign contains invalid characters (Label must not start or end with a hyphen).'),
215+
('my@twofwdots...com', 'An email address cannot have two periods in a row.'),
216+
('my@trailingdot.com.', 'An email address cannot end with a period.'),
217+
('my@trailingfwdot.com.', 'An email address cannot end with a period.'),
218+
('me@-leadingdash', 'An email address cannot have a hyphen immediately after the @-sign.'),
219+
('me@-leadingdashfw', 'An email address cannot have a hyphen immediately after the @-sign.'),
220+
('me@trailingdash-', 'An email address cannot end with a hyphen.'),
221+
('me@trailingdashfw-', 'An email address cannot end with a hyphen.'),
222+
('my@baddash.-.com', 'An email address cannot have a period and a hyphen next to each other.'),
223+
('my@baddash.-a.com', 'An email address cannot have a period and a hyphen next to each other.'),
224+
('my@baddash.b-.com', 'An email address cannot have a period and a hyphen next to each other.'),
225+
('my@baddashfw.-.com', 'An email address cannot have a period and a hyphen next to each other.'),
226+
('my@baddashfw.-a.com', 'An email address cannot have a period and a hyphen next to each other.'),
227+
('my@baddashfw.b-.com', 'An email address cannot have a period and a hyphen next to each other.'),
222228
('my@example.com\n',
223229
'The part after the @-sign contains invalid characters (Codepoint U+000A at position 4 of '
224230
'\'com\\n\' not allowed).'),
@@ -247,10 +253,10 @@ def test_email_valid(email_input, output):
247253
('my.λong.address@1111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.1111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.1111111111222222222233333333334444.info', 'The email address is too long (at least 1 character too many).'),
248254
('me@bad-tld-1', 'The part after the @-sign is not valid. It should have a period.'),
249255
('me@bad.tld-2', 'The part after the @-sign is not valid. It is not within a valid top-level domain.'),
250-
('me@-', 'The part after the @-sign contains invalid characters (Label must not start or end with a hyphen).'),
251256
('me@x!', 'The part after the @-sign contains invalid characters (Codepoint U+0021 at position 2 of \'x!\' not allowed).'),
252-
('me@xn--', 'The part after the @-sign contains invalid characters (Malformed A-label, no Punycode eligible content found).'),
253-
('me@yy--', 'The part after the @-sign contains invalid characters (Label has disallowed hyphens in 3rd and 4th position).'),
257+
('me@xn--0.tld', 'The part after the @-sign is not valid IDNA (Invalid A-label).'),
258+
('me@yy--0.tld', 'An email address cannot have two letters followed by two dashes immediately after the @-sign or after a period, except Punycode.'),
259+
('me@yy--0.tld', 'An email address cannot have two letters followed by two dashes immediately after the @-sign or after a period, except Punycode.'),
254260
],
255261
)
256262
def test_email_invalid_syntax(email_input, error_msg):

0 commit comments

Comments
 (0)