Skip to content

Commit a40a095

Browse files
committed
Make the force recursive functions work on Python 3.
* support tuples * string conversion * define the walking internals only once
1 parent 0520550 commit a40a095

File tree

3 files changed

+159
-22
lines changed

3 files changed

+159
-22
lines changed

mig/shared/base.py

Lines changed: 39 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,12 @@
3232
from past.builtins import basestring
3333

3434
import base64
35+
import codecs
3536
import os
3637
import re
3738

3839
# IMPORTANT: do not import any other MiG modules here - to avoid import loops
40+
from mig.shared.compat import PY2
3941
from mig.shared.defaults import default_str_coding, default_fs_coding, \
4042
keyword_all, keyword_auto, sandbox_names, _user_invisible_files, \
4143
_user_invisible_dirs, _vgrid_xgi_scripts, cert_field_order, csrf_field, \
@@ -488,12 +490,28 @@ def verify_local_url(configuration, req_url):
488490
return False
489491

490492

493+
def is_bytes_type(thetype):
494+
"""Return boolean indicating if val is a unicode string. We avoid
495+
`isinstance(val, unicode)`
496+
and the like since it breaks when combined with python-future and futurize.
497+
"""
498+
return (thetype == bytes)
499+
500+
491501
def is_unicode(val):
492502
"""Return boolean indicating if val is a unicode string. We avoid
493503
`isinstance(val, unicode)`
494504
and the like since it breaks when combined with python-future and futurize.
495505
"""
496-
return (type(u"") == type(val))
506+
return is_unicode_type(type(val))
507+
508+
509+
def is_unicode_type(thetype):
510+
"""Return boolean indicating if val is a unicode string. We avoid
511+
`isinstance(val, unicode)`
512+
and the like since it breaks when combined with python-future and futurize.
513+
"""
514+
return (thetype == type(u""))
497515

498516

499517
def force_utf8(val, highlight=''):
@@ -505,23 +523,32 @@ def force_utf8(val, highlight=''):
505523
val = "%s" % val
506524
if not is_unicode(val):
507525
return val
508-
return "%s%s%s" % (highlight, val.encode("utf8"), highlight)
526+
return codecs.encode("%s%s%s" % (highlight, val, highlight), 'utf8')
527+
528+
529+
def _walk_and_convert_recursive(input_obj, highlight='', _is_primitive=None, _force_primitive=None, _force_recursive=None):
530+
assert _is_primitive is not None
531+
assert _force_primitive is not None
532+
assert _force_recursive is not None
533+
534+
thetype = type(input_obj)
535+
if issubclass(thetype, dict):
536+
return {_force_recursive(i, highlight): _force_recursive(j, highlight) for (i, j) in
537+
input_obj.items()}
538+
elif issubclass(thetype, (list, tuple)):
539+
return thetype((_force_recursive(i, highlight) for i in input_obj))
540+
elif not _is_primitive(thetype):
541+
return _force_primitive(input_obj, highlight)
542+
else:
543+
return input_obj
509544

510545

511546
def force_utf8_rec(input_obj, highlight=''):
512547
"""Recursive object conversion from unicode to utf8: useful to convert e.g.
513548
dictionaries with nested unicode strings to a pure utf8 version. Actual
514549
changes are marked out with the highlight string if given.
515550
"""
516-
if isinstance(input_obj, dict):
517-
return {force_utf8_rec(i, highlight): force_utf8_rec(j, highlight) for (i, j) in
518-
input_obj.items()}
519-
elif isinstance(input_obj, list):
520-
return [force_utf8_rec(i, highlight) for i in input_obj]
521-
elif is_unicode(input_obj):
522-
return force_utf8(input_obj, highlight)
523-
else:
524-
return input_obj
551+
return _walk_and_convert_recursive(input_obj, highlight, _is_primitive=is_bytes_type, _force_primitive=force_utf8, _force_recursive=force_utf8_rec)
525552

526553

527554
def force_unicode(val, highlight=''):
@@ -541,15 +568,7 @@ def force_unicode_rec(input_obj, highlight=''):
541568
dictionaries with nested utf8 strings to a pure unicode version. Actual
542569
changes are marked out with the highlight string if given.
543570
"""
544-
if isinstance(input_obj, dict):
545-
return {force_unicode_rec(i, highlight): force_unicode_rec(j, highlight) for (i, j) in
546-
input_obj.items()}
547-
elif isinstance(input_obj, list):
548-
return [force_unicode_rec(i, highlight) for i in input_obj]
549-
elif not is_unicode(input_obj):
550-
return force_unicode(input_obj, highlight)
551-
else:
552-
return input_obj
571+
return _walk_and_convert_recursive(input_obj, highlight, _is_primitive=is_unicode_type, _force_primitive=force_unicode, _force_recursive=force_unicode_rec)
553572

554573

555574
def _force_default_coding(input_obj, kind, highlight=''):

mig/shared/safeinput.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,8 @@
5858
from html import escape as escape_html
5959
assert escape_html is not None
6060

61-
from mig.shared.base import force_unicode, force_utf8
61+
from mig.shared.base import force_unicode
62+
from mig.shared.compat import ensure_native_string
6263
from mig.shared.defaults import src_dst_sep, username_charset, \
6364
username_max_length, session_id_charset, session_id_length, \
6465
subject_id_charset, subject_id_min_length, subject_id_max_length, \
@@ -2294,7 +2295,7 @@ def __init__(self, value):
22942295
def __str__(self):
22952296
"""Return string representation"""
22962297

2297-
return force_utf8(force_unicode(self.value))
2298+
return ensure_native_string(self.value)
22982299

22992300

23002301
def main(_exit=sys.exit, _print=print):

tests/test_mig_shared_base.py

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
# -*- coding: utf-8 -*-
2+
#
3+
# --- BEGIN_HEADER ---
4+
#
5+
# test_mig_shared_compat - unit test of the corresponding mig shared module
6+
# Copyright (C) 2003-2024 The MiG Project by the Science HPC Center at UCPH
7+
#
8+
# This file is part of MiG.
9+
#
10+
# MiG is free software: you can redistribute it and/or modify
11+
# it under the terms of the GNU General Public License as published by
12+
# the Free Software Foundation; either version 2 of the License, or
13+
# (at your option) any later version.
14+
#
15+
# MiG is distributed in the hope that it will be useful,
16+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
17+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18+
# GNU General Public License for more details.
19+
#
20+
# You should have received a copy of the GNU General Public License
21+
# along with this program; if not, write to the Free Software
22+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
23+
# USA.
24+
#
25+
# --- END_HEADER ---
26+
#
27+
28+
"""Unit tests for the migrid module pointed to in the filename"""
29+
30+
import binascii
31+
import os
32+
import sys
33+
34+
from tests.support import PY2, MigTestCase, testmain
35+
36+
from mig.shared.base import force_default_fs_coding_rec, \
37+
force_default_fs_coding, force_default_str_coding_rec, \
38+
force_default_str_coding, force_utf8, force_unicode
39+
40+
DUMMY_BYTECHARS = b'DEADBEEF'
41+
DUMMY_BYTESRAW = binascii.unhexlify('DEADBEEF') # 4 bytes
42+
DUMMY_UNICODE = u'UniCode123½¾µßðþđŋħĸþł@ª€£$¥©®'
43+
44+
45+
class MigSharedBase__force_default_fs_coding_rec(MigTestCase):
46+
"""Unit tests of mig.shared.base force_default_fs_coding_rec()"""
47+
48+
def test_encode_a_string(self):
49+
output = force_default_fs_coding_rec('foobar')
50+
51+
self.assertEqual(output, b'foobar')
52+
53+
def test_encode_within_a_dict(self):
54+
output = force_default_fs_coding_rec({ 'key': 'value' })
55+
56+
self.assertEqual(output, { b'key': b'value' })
57+
58+
def test_encode_within_a_list(self):
59+
output = force_default_fs_coding_rec(['foo', 'bar', 'baz'])
60+
61+
self.assertEqual(output, [b'foo', b'bar', b'baz'])
62+
63+
def test_encode_within_a_tuple_string(self):
64+
output = force_default_fs_coding_rec(('foo', 'bar', 'baz'))
65+
66+
self.assertEqual(output, (b'foo', b'bar', b'baz'))
67+
68+
def test_encode_within_a_tuple_bytes(self):
69+
output = force_default_fs_coding_rec((b'foo', b'bar', b'baz'))
70+
71+
self.assertEqual(output, (b'foo', b'bar', b'baz'))
72+
73+
def test_encode_within_a_tuple_unicode(self):
74+
output = force_default_fs_coding_rec((u'foo', u'bar', u'baz'))
75+
76+
self.assertEqual(output, (b'foo', b'bar', b'baz'))
77+
78+
79+
class MigSharedBase__force_utf8(MigTestCase):
80+
"""Unit tests of mig.shared.base force_utf8()"""
81+
82+
def test_encode_string(self):
83+
output = force_utf8('foobar')
84+
85+
self.assertEqual(output, b'foobar')
86+
87+
def test_encode_bytes(self):
88+
output = force_utf8(b'foobar')
89+
90+
self.assertEqual(output, b'foobar')
91+
92+
def test_encode_unicode(self):
93+
output = force_utf8(u'foobar')
94+
95+
self.assertEqual(output, b'foobar')
96+
97+
98+
class MigSharedBase__force_unicode(MigTestCase):
99+
"""Unit tests of mig.shared.base force_unicode()"""
100+
101+
def test_encode_string(self):
102+
output = force_unicode('foobar')
103+
104+
self.assertEqual(output, u'foobar')
105+
106+
def test_encode_bytes(self):
107+
output = force_unicode(b'foobar')
108+
109+
self.assertEqual(output, u'foobar')
110+
111+
def test_encode_unicode(self):
112+
output = force_unicode(u'foobar')
113+
114+
self.assertEqual(output, u'foobar')
115+
116+
if __name__ == '__main__':
117+
testmain(failfast=True)

0 commit comments

Comments
 (0)