ucphhpc · albu-diku · Jul 27, 2024 · jonasbardino · Aug 21, 2024 · jonasbardino
diff --git a/mig/shared/base.py b/mig/shared/base.py
@@ -32,10 +32,12 @@
 from past.builtins import basestring
 
 import base64
+import codecs
 import os
 import re
 
 # IMPORTANT: do not import any other MiG modules here - to avoid import loops
+from mig.shared.compat import PY2
 from mig.shared.defaults import default_str_coding, default_fs_coding, \
     keyword_all, keyword_auto, sandbox_names, _user_invisible_files, \
     _user_invisible_dirs, _vgrid_xgi_scripts, cert_field_order, csrf_field, \
@@ -488,12 +490,28 @@ def verify_local_url(configuration, req_url):
     return False
 
 
+def is_bytes_type(thetype):
+    """Return boolean indicating if val is a unicode string. We avoid
+    `isinstance(val, unicode)`
+    and the like since it breaks when combined with python-future and futurize.
+    """
+    return (thetype == bytes)
+
+
 def is_unicode(val):
     """Return boolean indicating if val is a unicode string. We avoid
     `isinstance(val, unicode)`
     and the like since it breaks when combined with python-future and futurize.
     """
-    return (type(u"") == type(val))
+    return is_unicode_type(type(val))
+
+
+def is_unicode_type(thetype):
+    """Return boolean indicating if val is a unicode string. We avoid
+    `isinstance(val, unicode)`
+    and the like since it breaks when combined with python-future and futurize.
+    """
+    return (thetype == type(u""))
 
 
 def force_utf8(val, highlight=''):
@@ -505,23 +523,32 @@ def force_utf8(val, highlight=''):
         val = "%s" % val
     if not is_unicode(val):
         return val
-    return "%s%s%s" % (highlight, val.encode("utf8"), highlight)
+    return codecs.encode("%s%s%s" % (highlight, val, highlight), 'utf8')
+
+
+def _walk_and_convert_recursive(input_obj, highlight='', _is_primitive=None, _force_primitive=None, _force_recursive=None):
+    assert _is_primitive is not None
+    assert _force_primitive is not None
+    assert _force_recursive is not None
+
+    thetype = type(input_obj)
+    if issubclass(thetype, dict):
+        return {_force_recursive(i, highlight): _force_recursive(j, highlight) for (i, j) in
+                input_obj.items()}
+    elif issubclass(thetype, (list, tuple)):
+        return thetype((_force_recursive(i, highlight) for i in input_obj))
+    elif not _is_primitive(thetype):
+        return _force_primitive(input_obj, highlight)
+    else:
+        return input_obj
 
 
 def force_utf8_rec(input_obj, highlight=''):
     """Recursive object conversion from unicode to utf8: useful to convert e.g.
     dictionaries with nested unicode strings to a pure utf8 version. Actual
     changes are marked out with the highlight string if given.
     """
-    if isinstance(input_obj, dict):
-        return {force_utf8_rec(i, highlight): force_utf8_rec(j, highlight) for (i, j) in
-                input_obj.items()}
-    elif isinstance(input_obj, list):
-        return [force_utf8_rec(i, highlight) for i in input_obj]
-    elif is_unicode(input_obj):
-        return force_utf8(input_obj, highlight)
-    else:
-        return input_obj
+    return _walk_and_convert_recursive(input_obj, highlight, _is_primitive=is_bytes_type, _force_primitive=force_utf8, _force_recursive=force_utf8_rec)
 
 
 def force_unicode(val, highlight=''):
@@ -541,15 +568,7 @@ def force_unicode_rec(input_obj, highlight=''):
     dictionaries with nested utf8 strings to a pure unicode version. Actual
     changes are marked out with the highlight string if given.
     """
-    if isinstance(input_obj, dict):
-        return {force_unicode_rec(i, highlight): force_unicode_rec(j, highlight) for (i, j) in
-                input_obj.items()}
-    elif isinstance(input_obj, list):
-        return [force_unicode_rec(i, highlight) for i in input_obj]
-    elif not is_unicode(input_obj):
-        return force_unicode(input_obj, highlight)
-    else:
-        return input_obj
+    return _walk_and_convert_recursive(input_obj, highlight, _is_primitive=is_unicode_type, _force_primitive=force_unicode, _force_recursive=force_unicode_rec)
 
 
 def _force_default_coding(input_obj, kind, highlight=''):

diff --git a/mig/shared/safeinput.py b/mig/shared/safeinput.py
@@ -58,7 +58,8 @@
     from html import escape as escape_html
 assert escape_html is not None
 
-from mig.shared.base import force_unicode, force_utf8
+from mig.shared.base import force_unicode
+from mig.shared.compat import ensure_native_string
 from mig.shared.defaults import src_dst_sep, username_charset, \
     username_max_length, session_id_charset, session_id_length, \
     subject_id_charset, subject_id_min_length, subject_id_max_length, \
@@ -2294,7 +2295,7 @@ def __init__(self, value):
     def __str__(self):
         """Return string representation"""
 
-        return force_utf8(force_unicode(self.value))
+        return ensure_native_string(self.value)
 
 
 def main(_exit=sys.exit, _print=print):

diff --git a/tests/test_mig_shared_base.py b/tests/test_mig_shared_base.py
@@ -0,0 +1,117 @@
+# -*- coding: utf-8 -*-
+#
+# --- BEGIN_HEADER ---
+#
+# test_mig_shared_compat - unit test of the corresponding mig shared module
+# Copyright (C) 2003-2024  The MiG Project by the Science HPC Center at UCPH
+#
+# This file is part of MiG.
+#
+# MiG is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# MiG is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,
+# USA.
+#
+# --- END_HEADER ---
+#
+
+"""Unit tests for the migrid module pointed to in the filename"""
+
+import binascii
+import os
+import sys
+
+from tests.support import PY2, MigTestCase, testmain
+
+from mig.shared.base import force_default_fs_coding_rec, \
+    force_default_fs_coding, force_default_str_coding_rec, \
+    force_default_str_coding, force_utf8, force_unicode
+
+DUMMY_BYTECHARS = b'DEADBEEF'
+DUMMY_BYTESRAW = binascii.unhexlify('DEADBEEF') # 4 bytes
+DUMMY_UNICODE = u'UniCode123½¾µßðþđŋħĸþł@ª€£$¥©®'
+
+
+class MigSharedBase__force_default_fs_coding_rec(MigTestCase):
+    """Unit tests of mig.shared.base force_default_fs_coding_rec()"""
+
+    def test_encode_a_string(self):
+        output = force_default_fs_coding_rec('foobar')
+
+        self.assertEqual(output, b'foobar')
+
+    def test_encode_within_a_dict(self):
+        output = force_default_fs_coding_rec({ 'key': 'value' })
+
+        self.assertEqual(output, { b'key': b'value' })
+
+    def test_encode_within_a_list(self):
+        output = force_default_fs_coding_rec(['foo', 'bar', 'baz'])
+
+        self.assertEqual(output, [b'foo', b'bar', b'baz'])
+
+    def test_encode_within_a_tuple_string(self):
+        output = force_default_fs_coding_rec(('foo', 'bar', 'baz'))
+
+        self.assertEqual(output, (b'foo', b'bar', b'baz'))
+
+    def test_encode_within_a_tuple_bytes(self):
+        output = force_default_fs_coding_rec((b'foo', b'bar', b'baz'))
+
+        self.assertEqual(output, (b'foo', b'bar', b'baz'))
+
+    def test_encode_within_a_tuple_unicode(self):
+        output = force_default_fs_coding_rec((u'foo', u'bar', u'baz'))
+
+        self.assertEqual(output, (b'foo', b'bar', b'baz'))
+
+
+class MigSharedBase__force_utf8(MigTestCase):
+    """Unit tests of mig.shared.base force_utf8()"""
+
+    def test_encode_string(self):
+        output = force_utf8('foobar')
+
+        self.assertEqual(output, b'foobar')
+
+    def test_encode_bytes(self):
+        output = force_utf8(b'foobar')
+
+        self.assertEqual(output, b'foobar')
+
+    def test_encode_unicode(self):
+        output = force_utf8(u'foobar')
+
+        self.assertEqual(output, b'foobar')
+
+
+class MigSharedBase__force_unicode(MigTestCase):
+    """Unit tests of mig.shared.base force_unicode()"""
+
+    def test_encode_string(self):
+        output = force_unicode('foobar')
+
+        self.assertEqual(output, u'foobar')
+
+    def test_encode_bytes(self):
+        output = force_unicode(b'foobar')
+
+        self.assertEqual(output, u'foobar')
+
+    def test_encode_unicode(self):
+        output = force_unicode(u'foobar')
+
+        self.assertEqual(output, u'foobar')
+
+if __name__ == '__main__':
+    testmain(failfast=True)