Skip to content

Commit ca47e01

Browse files
committed
Make the force recursive functions work on Python 3.
* support tuples * string conversion * define the walking internals only once
1 parent f5c4340 commit ca47e01

File tree

3 files changed

+113
-21
lines changed

3 files changed

+113
-21
lines changed

mig/shared/base.py

Lines changed: 18 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,12 @@
3232
from past.builtins import basestring
3333

3434
import base64
35+
import codecs
3536
import os
3637
import re
3738

3839
# IMPORTANT: do not import any other MiG modules here - to avoid import loops
40+
from mig.shared.compat import PY2
3941
from mig.shared.defaults import default_str_coding, default_fs_coding, \
4042
keyword_all, keyword_auto, sandbox_names, _user_invisible_files, \
4143
_user_invisible_dirs, _vgrid_xgi_scripts, cert_field_order, csrf_field, \
@@ -505,23 +507,28 @@ def force_utf8(val, highlight=''):
505507
val = "%s" % val
506508
if not is_unicode(val):
507509
return val
508-
return "%s%s%s" % (highlight, val.encode("utf8"), highlight)
510+
return codecs.encode("%s%s%s" % (highlight, val, highlight), 'utf8')
511+
512+
513+
def _walk_and_covert_recursive(input_obj, highlight='', _as_bytes=False, _force_primitive=None, _force_recursive=None):
514+
thetype = type(input_obj)
515+
if issubclass(thetype, dict):
516+
return {_force_recursive(i, highlight): _force_recursive(j, highlight) for (i, j) in
517+
input_obj.items()}
518+
elif issubclass(thetype, (list, tuple)):
519+
return thetype((_force_recursive(i, highlight) for i in input_obj))
520+
elif not is_unicode(input_obj):
521+
return _force_primitive(input_obj, highlight)
522+
else:
523+
return input_obj
509524

510525

511526
def force_utf8_rec(input_obj, highlight=''):
512527
"""Recursive object conversion from unicode to utf8: useful to convert e.g.
513528
dictionaries with nested unicode strings to a pure utf8 version. Actual
514529
changes are marked out with the highlight string if given.
515530
"""
516-
if isinstance(input_obj, dict):
517-
return {force_utf8_rec(i, highlight): force_utf8_rec(j, highlight) for (i, j) in
518-
input_obj.items()}
519-
elif isinstance(input_obj, list):
520-
return [force_utf8_rec(i, highlight) for i in input_obj]
521-
elif is_unicode(input_obj):
522-
return force_utf8(input_obj, highlight)
523-
else:
524-
return input_obj
531+
return _walk_and_covert_recursive(input_obj, highlight, _force_primitive=force_utf8, _force_recursive=force_utf8_rec)
525532

526533

527534
def force_unicode(val, highlight=''):
@@ -541,15 +548,7 @@ def force_unicode_rec(input_obj, highlight=''):
541548
dictionaries with nested utf8 strings to a pure unicode version. Actual
542549
changes are marked out with the highlight string if given.
543550
"""
544-
if isinstance(input_obj, dict):
545-
return {force_unicode_rec(i, highlight): force_unicode_rec(j, highlight) for (i, j) in
546-
input_obj.items()}
547-
elif isinstance(input_obj, list):
548-
return [force_unicode_rec(i, highlight) for i in input_obj]
549-
elif not is_unicode(input_obj):
550-
return force_unicode(input_obj, highlight)
551-
else:
552-
return input_obj
551+
return _walk_and_covert_recursive(input_obj, highlight, _force_primitive=force_unicode, _force_recursive=force_unicode_rec)
553552

554553

555554
def _force_default_coding(input_obj, kind, highlight=''):

mig/shared/safeinput.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,8 @@
5858
from html import escape as escape_html
5959
assert escape_html is not None
6060

61-
from mig.shared.base import force_unicode, force_utf8
61+
from mig.shared.base import force_unicode
62+
from mig.shared.compat import ensure_native_string
6263
from mig.shared.defaults import src_dst_sep, username_charset, \
6364
username_max_length, session_id_charset, session_id_length, \
6465
subject_id_charset, subject_id_min_length, subject_id_max_length, \
@@ -2294,7 +2295,7 @@ def __init__(self, value):
22942295
def __str__(self):
22952296
"""Return string representation"""
22962297

2297-
return force_utf8(force_unicode(self.value))
2298+
return ensure_native_string(self.value)
22982299

22992300

23002301
def main(_exit=sys.exit, _print=print):

tests/test_mig_shared_base.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
# -*- coding: utf-8 -*-
2+
#
3+
# --- BEGIN_HEADER ---
4+
#
5+
# test_mig_shared_compat - unit test of the corresponding mig shared module
6+
# Copyright (C) 2003-2024 The MiG Project by the Science HPC Center at UCPH
7+
#
8+
# This file is part of MiG.
9+
#
10+
# MiG is free software: you can redistribute it and/or modify
11+
# it under the terms of the GNU General Public License as published by
12+
# the Free Software Foundation; either version 2 of the License, or
13+
# (at your option) any later version.
14+
#
15+
# MiG is distributed in the hope that it will be useful,
16+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
17+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18+
# GNU General Public License for more details.
19+
#
20+
# You should have received a copy of the GNU General Public License
21+
# along with this program; if not, write to the Free Software
22+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
23+
# USA.
24+
#
25+
# --- END_HEADER ---
26+
#
27+
28+
"""Unit tests for the migrid module pointed to in the filename"""
29+
30+
import binascii
31+
import os
32+
import sys
33+
34+
from tests.support import PY2, MigTestCase, testmain
35+
36+
from mig.shared.base import force_default_fs_coding_rec, force_default_str_coding_rec
37+
38+
DUMMY_BYTECHARS = b'DEADBEEF'
39+
DUMMY_BYTESRAW = binascii.unhexlify('DEADBEEF') # 4 bytes
40+
DUMMY_UNICODE = u'UniCode123½¾µßðþđŋħĸþł@ª€£$¥©®'
41+
42+
43+
class MigSharedBase__force_default_fs_coding_rec(MigTestCase):
44+
"""Unit tests of mig.shared.base force_default_fs_coding_rec()"""
45+
46+
def test_encode_a_string(self):
47+
output = force_default_fs_coding_rec('foobar')
48+
49+
self.assertEqual(output, 'foobar')
50+
51+
def test_encode_within_a_dict(self):
52+
output = force_default_fs_coding_rec({ 'key': 'value' })
53+
54+
self.assertEqual(output, { 'key': 'value' })
55+
56+
def test_encode_within_a_list(self):
57+
output = force_default_fs_coding_rec(['foo', 'bar', 'baz'])
58+
59+
self.assertEqual(output, ['foo', 'bar', 'baz'])
60+
61+
def test_encode_within_a_tuple(self):
62+
output = force_default_fs_coding_rec(('foo', 'bar', 'baz'))
63+
64+
self.assertEqual(output, ('foo', 'bar', 'baz'))
65+
66+
67+
class MigSharedBase__force_default_str_coding_rec(MigTestCase):
68+
"""Unit tests of mig.shared.base force_default_str_coding_rec()"""
69+
70+
def test_encode_a_string(self):
71+
output = force_default_str_coding_rec('foobar')
72+
73+
self.assertEqual(output, 'foobar')
74+
75+
def test_encode_within_a_dict(self):
76+
output = force_default_str_coding_rec({ 'key': 'value' })
77+
78+
self.assertEqual(output, { 'key': 'value' })
79+
80+
def test_encode_within_a_list(self):
81+
output = force_default_str_coding_rec(['foo', 'bar', 'baz'])
82+
83+
self.assertEqual(output, ['foo', 'bar', 'baz'])
84+
85+
def test_encode_within_a_tuple(self):
86+
output = force_default_str_coding_rec(('foo', 'bar', 'baz'))
87+
88+
self.assertEqual(output, ('foo', 'bar', 'baz'))
89+
90+
91+
if __name__ == '__main__':
92+
testmain()

0 commit comments

Comments
 (0)