Skip to content

Commit b6b9e1c

Browse files
committed
Manually merged PR95 to enable py2+3 unit testing of safeinput and clarify which tests should pass and which should fail. Adjust email-validator requirements for various python versions with the note that python-2.7 does NOT really support email-validator-1.3.0 despite pypi metadata claiming it. Added comments about current limitations and pending work for the validation to be more robust and RFC 6532 compliant.
git-svn-id: svn+ssh://svn.code.sf.net/p/migrid/code/trunk@6108 b75ad72c-e7d7-11dd-a971-7dbc132099af
1 parent 64fb094 commit b6b9e1c

File tree

3 files changed

+126
-39
lines changed

3 files changed

+126
-39
lines changed

mig/shared/safeinput.py

Lines changed: 104 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@
117117
# Unicode letter categories as defined on
118118
# http://www.unicode.org/reports/tr44/#GC_Values_Table
119119
# TODO: should we go all in and allow even these very exotic modifiers?
120-
#_ACCENT_CATS = frozenset(('Lu', 'Ll', 'Lt', 'Lm', 'Lo', ))
120+
# _ACCENT_CATS = frozenset(('Lu', 'Ll', 'Lt', 'Lm', 'Lo', ))
121121
_ACCENT_CATS = frozenset(('Lu', 'Ll', 'Lt', ))
122122

123123
# Use utf8 byte string representation here ("something" and not u"something")
@@ -879,13 +879,29 @@ def valid_email_address(addr, allow_real_name=True):
879879
John Doe <john@doe.org>
880880
"""
881881

882+
# TODO: rework this as formataddr does NOT have RFC6532 support on py3
883+
# and assuming parse+format matches orig also introduces false
884+
# positives regarding e.g. extra white space in display name.
885+
# https://github.com/python/cpython/issues/70143
882886
(real_name, plain_addr) = name_address_pair = parseaddr(addr)
883887
if real_name and not allow_real_name:
884888
raise InputException("Only plain addresses without name allowed")
885889
if not silent_email_validator(plain_addr):
890+
# NOTE: email-validator validate_email function ONLY accepts non-ascii
891+
# chars when passed unicode on py2, so we end up here for valid
892+
# RFC 6532 addresses when passed as native byte strings.
886893
raise InputException("No actual email address included")
887-
merged = formataddr(name_address_pair)
888-
if merged != addr:
894+
try:
895+
merged = formataddr(name_address_pair)
896+
if merged != addr:
897+
raise ValueError("parsed vs raw email mismatch")
898+
except (ValueError, UnicodeEncodeError) as exc:
899+
# TODO: eliminate this UnicodeEncodeError handling here once reworked
900+
# NOTE: formataddr does NOT support non-ascii chars on py3 at all and
901+
# explicitly encodes to ascii so we end up here for valid RFC
902+
# 6532 addresses. See note and link above for details.
903+
# print("unicode err in %s" %
904+
# [addr, real_name, plain_addr, name_address_pair])
889905
raise InputException("Invalid email format")
890906

891907

@@ -2281,24 +2297,47 @@ def __str__(self):
22812297
return force_utf8(force_unicode(self.value))
22822298

22832299

2284-
def main(_print=print):
2285-
print = _print # workaround print as reserved word on PY2
2300+
def main(_exit=sys.exit, _print=print):
2301+
print = _print # workaround print as reserved word on PY2
22862302

22872303
for test_org in ('UCPH', 'Some University, Some Dept.', 'Green Shoes Ltd.',
2288-
u'Unicode Org', "Invalid R+D", "Invalid R/D",
2289-
"Invalid R@D", 'Test HTML Invalid <code/>'):
2304+
u'Unicode Org'):
2305+
try:
2306+
print('Testing valid_organization: %r' % test_org)
2307+
print('Filtered organization: %s' % filter_organization(test_org))
2308+
valid_organization(test_org)
2309+
print('Accepted raw organization!')
2310+
except InputException as exc:
2311+
print('Rejected raw organization %r: %s' % (test_org, exc))
2312+
_exit(1)
2313+
2314+
for test_org in ("Invalid R+D", "Invalid R/D", "Invalid R@D",
2315+
'Test HTML Invalid <code/>'):
22902316
try:
22912317
print('Testing valid_organization: %r' % test_org)
22922318
print('Filtered organization: %s' % filter_organization(test_org))
22932319
valid_organization(test_org)
22942320
print('Accepted raw organization!')
2295-
except Exception as exc:
2321+
_exit(1)
2322+
except InputException as exc:
22962323
print('Rejected raw organization %r: %s' % (test_org, exc))
22972324

22982325
for test_path in ('test.txt', 'Test Æøå', 'Test Überh4x0r',
22992326
'Test valid Jean-Luc Géraud', 'Test valid Źacãŕ',
23002327
'Test valid special%&()!$¶â€', 'Test look-alike-å å',
2301-
'Test north Atlantic Barður Ðýþ', 'Test exotic لرحيم',
2328+
'Test north Atlantic Barður Ðýþ'):
2329+
try:
2330+
print('Testing valid_path: %s' % test_path)
2331+
print('Filtered path: %s' % filter_path(test_path))
2332+
# print 'DEBUG %s only in %s' % ([test_path],
2333+
# [VALID_PATH_CHARACTERS])
2334+
valid_path(test_path)
2335+
print('Accepted raw path!')
2336+
except InputException as exc:
2337+
print('Rejected raw path %r: %s' % (test_path, exc))
2338+
_exit(1)
2339+
2340+
for test_path in ('Test exotic لرحيم',
23022341
'Test Invalid ?', 'Test Invalid `',
23032342
'Test invalid <', 'Test Invalid >',
23042343
'Test Invalid *', 'Test Invalid "'):
@@ -2309,19 +2348,40 @@ def main(_print=print):
23092348
# [VALID_PATH_CHARACTERS])
23102349
valid_path(test_path)
23112350
print('Accepted raw path!')
2312-
except Exception as exc:
2351+
_exit(1)
2352+
except InputException as exc:
23132353
print('Rejected raw path %r: %s' % (test_path, exc))
23142354

2315-
for test_addr in ('', 'invalid', 'abc@dk', 'abc@def.org', 'abc@def.gh.org',
2316-
'aBc@Def.org', '<invalid@def.org>',
2317-
'Test <abc@def.org>', 'Test Æøå <æøå@abc.org>',
2355+
# Simple valid addresses to accept
2356+
for test_addr in ('abc@def.org', 'abc@def.gh.org', 'Test <abc@def.org>',
23182357
'Test <abc.def@ghi.org>', 'Test <abc+def@ghi.org>',
2319-
'A valid-name <abc@ghi.org>',
2358+
'A valid-name <abc@ghi.org>'):
2359+
try:
2360+
print('Testing valid_email_address: %s' % test_addr)
2361+
valid_email_address(test_addr)
2362+
print('Accepted raw address! %s' % [parseaddr(test_addr)])
2363+
except InputException as exc:
2364+
print('Rejected raw address %r: %s' % (test_addr, exc))
2365+
_exit(1)
2366+
2367+
# TODO: fix validation and toggle exit back on below
2368+
# Corner-case valid addresses that we currently don't accept
2369+
for test_addr in ('Test Æøå <æøå@abc.org>',
2370+
'aBc@Def.org', '<valid@def.org>',
23202371
' false-positive@ghi.org',
23212372
'False Positive <abc@ghi.org>',
2322-
' Another False Positive <abc@ghi.org>',
2323-
'invalid <abc@ghi,org>', 'invalid <abc@',
2324-
'invalid abc@def.org',
2373+
' Another False Positive <abc@ghi.org>'):
2374+
try:
2375+
print('Testing valid_email_address: %s' % test_addr)
2376+
valid_email_address(test_addr)
2377+
print('Accepted raw address! %s' % [parseaddr(test_addr)])
2378+
except InputException as exc:
2379+
print('Rejected raw address %r: %s' % (test_addr, exc))
2380+
# _exit(1)
2381+
2382+
# Simple invalid addresses to fail on
2383+
for test_addr in ('', 'invalid', 'abc@dk', 'invalid <abc@ghi,org>',
2384+
'invalid <abc@', 'invalid abc@def.org',
23252385
'Test <abc@ghi.org>, twice <def@def.org>',
23262386
'A !#¤%/) positive <abc@ghi.org>',
23272387
'a@b.c<script>alert("XSS vulnerable");</script>',
@@ -2331,7 +2391,8 @@ def main(_print=print):
23312391
print('Testing valid_email_address: %s' % test_addr)
23322392
valid_email_address(test_addr)
23332393
print('Accepted raw address! %s' % [parseaddr(test_addr)])
2334-
except Exception as exc:
2394+
_exit(1)
2395+
except InputException as exc:
23352396
print('Rejected raw address %r: %s' % (test_addr, exc))
23362397

23372398
# OpenID 2.0 version
@@ -2378,19 +2439,24 @@ def main(_print=print):
23782439
print("Accepted:")
23792440
for (key, val) in accepted.items():
23802441
print("\t%s: %s" % (key, val))
2381-
print("Rejected:")
2382-
for (key, val) in rejected.items():
2383-
print("\t%s: %s" % (key, val))
2442+
if rejected:
2443+
print("Rejected:")
2444+
for (key, val) in rejected.items():
2445+
print("\t%s: %s" % (key, val))
2446+
_exit(1)
2447+
23842448
user_arguments_dict['openid.sreg.fullname'] = [
23852449
force_unicode('Jonas Æøå Bardino')]
23862450
(accepted, rejected) = validated_input(
23872451
user_arguments_dict, autocreate_defaults)
23882452
print("Accepted:")
23892453
for (key, val) in accepted.items():
23902454
print("\t%s: %s" % (key, val))
2391-
print("Rejected:")
2392-
for (key, val) in rejected.items():
2393-
print("\t%s: %s" % (key, val))
2455+
if rejected:
2456+
print("Rejected:")
2457+
for (key, val) in rejected.items():
2458+
print("\t%s: %s" % (key, val))
2459+
_exit(1)
23942460

23952461
# OpenID Connect version
23962462
autocreate_defaults = {
@@ -2437,9 +2503,12 @@ def main(_print=print):
24372503
print("Accepted:")
24382504
for (key, val) in accepted.items():
24392505
print("\t%s: %s" % (key, val))
2440-
print("Rejected:")
2441-
for (key, val) in rejected.items():
2442-
print("\t%s: %s" % (key, val))
2506+
if rejected:
2507+
print("Rejected:")
2508+
for (key, val) in rejected.items():
2509+
print("\t%s: %s" % (key, val))
2510+
_exit(1)
2511+
24432512
user_arguments_dict = {'oidc.claim.aud': ['http://somedomain.org'],
24442513
'oidc.claim.country': ['DK'],
24452514
'oidc.claim.email': ['bardino@nbi.ku.dk,bardino@science.ku.dk'],
@@ -2464,9 +2533,14 @@ def main(_print=print):
24642533
print("Accepted:")
24652534
for (key, val) in accepted.items():
24662535
print("\t%s: %s" % (key, val))
2467-
print("Rejected:")
2468-
for (key, val) in rejected.items():
2469-
print("\t%s: %s" % (key, val))
2536+
if rejected:
2537+
print("Rejected:")
2538+
for (key, val) in rejected.items():
2539+
print("\t%s: %s" % (key, val))
2540+
_exit(1)
2541+
2542+
_exit(0)
2543+
24702544

24712545
if __name__ == '__main__':
24722546
main()

requirements.txt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,11 @@ future
44
pyotp;python_version >= "3"
55
pyotp<2.4;python_version < "3"
66
pyyaml
7-
email-validator
7+
dnspython;python_version >= "3"
8+
dnspython<2;python_version < "3"
9+
email-validator<2.1;python_version >= "3.7"
10+
email-validator<2.0;python_version >= "3" and python_version < "3.7"
11+
email-validator<1.3;python_version < "3"
812

913
# NOTE: additional optional dependencies depending on site conf are listed
1014
# in recommended.txt and can be installed in the same manner by pointing

tests/test_mig_shared_safeinput.py

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
sys.path.append(os.path.realpath(os.path.join(os.path.dirname(__file__), ".")))
3737

3838
from support import MigTestCase, testmain
39-
from mig.shared.safeinput import \
39+
from mig.shared.safeinput import main as safeinput_main, InputException, \
4040
filter_commonname, valid_commonname
4141

4242
PY2 = sys.version_info[0] == 2
@@ -56,12 +56,21 @@ def is_string_of_unicode(value):
5656

5757
class MigSharedSafeinput(MigTestCase):
5858

59-
def test_basic_import(self):
60-
safeimport = importlib.import_module("mig.shared.safeinput")
61-
6259
def test_existing_main(self):
63-
safeimport = importlib.import_module("mig.shared.safeinput")
64-
safeimport.main(_print=lambda _: None)
60+
def raise_on_error_exit(exit_code):
61+
if exit_code != 0:
62+
if raise_on_error_exit.last_print is not None:
63+
identifying_message = raise_on_error_exit.last_print
64+
else:
65+
identifying_message = 'unknown'
66+
raise AssertionError(
67+
'failure in unittest/testcore: %s' % (identifying_message,))
68+
raise_on_error_exit.last_print = None
69+
70+
def record_last_print(value):
71+
raise_on_error_exit.last_print = value
72+
73+
safeinput_main(_exit=raise_on_error_exit, _print=record_last_print)
6574

6675
COMMONNAME_PERMITTED = (
6776
'Firstname Lastname',
@@ -81,15 +90,15 @@ def test_commonname_valid(self):
8190
saw_raise = False
8291
try:
8392
valid_commonname(test_cn)
84-
except Exception:
93+
except InputException:
8594
saw_raise = True
8695
self.assertFalse(saw_raise)
8796

8897
for test_cn in self.COMMONNAME_PROHIBITED:
8998
saw_raise = False
9099
try:
91100
valid_commonname(test_cn)
92-
except Exception:
101+
except InputException:
93102
saw_raise = True
94103
self.assertTrue(saw_raise)
95104

0 commit comments

Comments
 (0)