|
1 | 1 | from .exceptions_types import EmailSyntaxError
|
2 | 2 | from .rfc_constants import EMAIL_MAX_LENGTH, LOCAL_PART_MAX_LENGTH, DOMAIN_MAX_LENGTH, \
|
3 |
| - DOT_ATOM_TEXT, DOT_ATOM_TEXT_INTL, ATEXT, ATEXT_INTL, ATEXT_HOSTNAME_INTL, DNS_LABEL_LENGTH_LIMIT, DOT_ATOM_TEXT_HOSTNAME, DOMAIN_NAME_REGEX |
| 3 | + DOT_ATOM_TEXT, DOT_ATOM_TEXT_INTL, ATEXT_RE, ATEXT_INTL_RE, ATEXT_HOSTNAME_INTL, DNS_LABEL_LENGTH_LIMIT, DOT_ATOM_TEXT_HOSTNAME, DOMAIN_NAME_REGEX |
4 | 4 |
|
5 | 5 | import re
|
6 | 6 | import unicodedata
|
@@ -57,44 +57,25 @@ def validate_email_local_part(local, allow_smtputf8=True, allow_empty_local=Fals
|
57 | 57 | reason = get_length_reason(local, limit=LOCAL_PART_MAX_LENGTH)
|
58 | 58 | raise EmailSyntaxError("The email address is too long before the @-sign {}.".format(reason))
|
59 | 59 |
|
60 |
| - # Check for invalid characters. |
61 |
| - # (RFC 2822 Section 3.2.4 / RFC 5322 Section 3.2.3, plus RFC 6531 section 3.3 |
62 |
| - # if internationalized local parts are allowed) |
63 |
| - atext_re = re.compile('[.' + (ATEXT if not allow_smtputf8 else ATEXT_INTL) + ']') |
64 |
| - bad_chars = set( |
65 |
| - safe_character_display(c) |
66 |
| - for c in local |
67 |
| - if not atext_re.match(c) |
68 |
| - ) |
69 |
| - if bad_chars: |
70 |
| - raise EmailSyntaxError("The email address contains invalid characters before the @-sign: " + ", ".join(sorted(bad_chars)) + ".") |
71 |
| - |
72 |
| - # Check for dot errors imposted by the dot-atom rule. |
73 |
| - # (RFC 2822 3.2.4) |
74 |
| - check_dot_atom(local, 'An email address cannot start with a {}.', 'An email address cannot have a {} immediately before the @-sign.', is_hostname=False) |
75 |
| - |
76 | 60 | # Check the local part against the non-internationalized regular expression.
|
| 61 | + # Most email addresses match this regex so it's probably fastest to check this first. |
77 | 62 | # (RFC 2822 3.2.4)
|
78 | 63 | m = DOT_ATOM_TEXT.match(local)
|
79 | 64 | if m:
|
| 65 | + # It's valid. |
| 66 | + |
80 | 67 | # Return the local part unchanged and flag that SMTPUTF8 is not needed.
|
81 | 68 | return {
|
82 | 69 | "local_part": local,
|
83 | 70 | "ascii_local_part": local,
|
84 | 71 | "smtputf8": False,
|
85 | 72 | }
|
86 | 73 |
|
87 |
| - else: |
88 |
| - # The local part failed the ASCII check. Now try the extended internationalized requirements. |
89 |
| - # This should already be handled by the bad_chars and check_dot_atom tests above. |
90 |
| - # It's the same pattern but with additional characters permitted. |
91 |
| - m = DOT_ATOM_TEXT_INTL.match(local) |
92 |
| - if not m: |
93 |
| - raise EmailSyntaxError("The email address contains invalid characters before the @-sign.") |
94 |
| - # It would be valid if internationalized characters were allowed by the caller. |
95 |
| - if not allow_smtputf8: |
96 |
| - raise EmailSyntaxError("Internationalized characters before the @-sign are not supported.") |
97 |
| - |
| 74 | + # The local part failed the ASCII check. Try the extended character set |
| 75 | + # for internationalized addresses. It's the same pattern but with additional |
| 76 | + # characters permitted. |
| 77 | + m = DOT_ATOM_TEXT_INTL.match(local) |
| 78 | + if m and allow_smtputf8: |
98 | 79 | # It's valid.
|
99 | 80 |
|
100 | 81 | # RFC 6532 section 3.1 also says that Unicode NFC normalization should be applied,
|
@@ -122,6 +103,27 @@ def validate_email_local_part(local, allow_smtputf8=True, allow_empty_local=Fals
|
122 | 103 | "smtputf8": True,
|
123 | 104 | }
|
124 | 105 |
|
| 106 | + # It's not a valid local part either non-internationalized or internationalized. |
| 107 | + # Let's find out why. |
| 108 | + |
| 109 | + # Check for invalid characters. |
| 110 | + # (RFC 2822 Section 3.2.4 / RFC 5322 Section 3.2.3, plus RFC 6531 section 3.3) |
| 111 | + bad_chars = set( |
| 112 | + safe_character_display(c) |
| 113 | + for c in local |
| 114 | + if not (ATEXT_INTL_RE if allow_smtputf8 else ATEXT_RE).match(c) |
| 115 | + ) |
| 116 | + if bad_chars: |
| 117 | + raise EmailSyntaxError("The email address contains invalid characters before the @-sign: " + ", ".join(sorted(bad_chars)) + ".") |
| 118 | + |
| 119 | + # Check for dot errors imposted by the dot-atom rule. |
| 120 | + # (RFC 2822 3.2.4) |
| 121 | + check_dot_atom(local, 'An email address cannot start with a {}.', 'An email address cannot have a {} immediately before the @-sign.', is_hostname=False) |
| 122 | + |
| 123 | + # All of the reasons should already have been checked, but just in case |
| 124 | + # we have a fallback message. |
| 125 | + raise EmailSyntaxError("The email address contains invalid characters before the @-sign.") |
| 126 | + |
125 | 127 |
|
126 | 128 | def check_unsafe_chars(s):
|
127 | 129 | # Check for unsafe characters or characters that would make the string
|
|
0 commit comments