Make responding with binary data work under PY3.

albu-diku · albu-diku · commit e9444d40a0ac · 2024-10-02T18:50:39.000+02:00
The code as previously written would unconditionally encode parts as
though they contained text - this went unnoticed on PY2 because strings
and bytes are one and the same thing but blew up on PY3 where a string
is explicitly of type unicode while a binary file would be raw bytes.

Explicitly check the output_format and if instructed to serve a file do
so without touching the chunks of file content bytes being yielded.
diff --git a/mig/wsgi-bin/migwsgi.py b/mig/wsgi-bin/migwsgi.py
@@ -216,7 +216,7 @@ def _set_os_environ(value):
     return _application(None, environ, start_response, _set_environ=_set_os_environ, _wrap_wsgi_errors=wrap_wsgi_errors)
 
 
-def _application(configuration, environ, start_response, _set_environ, _format_output=format_output, _retrieve_handler=_import_backend, _wrap_wsgi_errors=True, _config_file=None, _skip_log=False):
+def _application(configuration, environ, start_response, _set_environ, _fieldstorage_to_dict=fieldstorage_to_dict, _format_output=format_output, _retrieve_handler=_import_backend, _wrap_wsgi_errors=True, _config_file=None, _skip_log=False):
 
     # NOTE: pass app environ including apache and query args on to sub handlers
     #       through the usual 'os.environ' channel expected in functionality
@@ -329,7 +329,7 @@ def _application(configuration, environ, start_response, _set_environ, _format_o
         #              (backend, script_name))
         fieldstorage = cgi.FieldStorage(fp=environ['wsgi.input'],
                                         environ=environ)
-        user_arguments_dict = fieldstorage_to_dict(fieldstorage)
+        user_arguments_dict = _fieldstorage_to_dict(fieldstorage)
         if 'output_format' in user_arguments_dict:
             output_format = user_arguments_dict['output_format'][0]
 
@@ -447,7 +447,10 @@ def _application(configuration, environ, start_response, _set_environ, _format_o
             #              (backend, i+1, chunk_parts))
             # end index may be after end of content - but no problem
             part = output[i*download_block_size:(i+1)*download_block_size]
-            yield _ensure_encoded_string(part)
+            if output_format == 'file':
+                yield part
+            else:
+                yield _ensure_encoded_string(part)
         if chunk_parts > 1:
             _logger.info("WSGI %s finished yielding all %d output parts" %
                          (backend, chunk_parts))
diff --git a/tests/data/loading.gif b/tests/data/loading.gif
@@ -0,0 +1 @@
+../../mig/images/loading.gif
diff --git a/tests/support/__init__.py b/tests/support/__init__.py
@@ -42,7 +42,7 @@
 
 from tests.support.configsupp import FakeConfiguration
 from tests.support.suppconst import MIG_BASE, TEST_BASE, TEST_FIXTURE_DIR, \
-    TEST_OUTPUT_DIR
+    TEST_OUTPUT_DIR, TEST_DATA_DIR
 
 PY2 = (sys.version_info[0] == 2)
 
diff --git a/tests/support/suppconst.py b/tests/support/suppconst.py
@@ -31,6 +31,7 @@
 # Use abspath for __file__ on Py2
 _SUPPORT_DIR = os.path.dirname(os.path.abspath(__file__))
 TEST_BASE = os.path.normpath(os.path.join(_SUPPORT_DIR, ".."))
+TEST_DATA_DIR = os.path.join(TEST_BASE, "data")
 TEST_FIXTURE_DIR = os.path.join(TEST_BASE, "fixture")
 TEST_OUTPUT_DIR = os.path.join(TEST_BASE, "output")
 MIG_BASE = os.path.realpath(os.path.join(TEST_BASE, ".."))
diff --git a/tests/test_mig_wsgi-bin_migwsgi.py b/tests/test_mig_wsgi-bin_migwsgi.py
@@ -32,8 +32,9 @@
 import os
 import stat
 import sys
+import unittest
 
-from tests.support import MIG_BASE, MigTestCase, testmain
+from tests.support import MIG_BASE, TEST_BASE, TEST_DATA_DIR, MigTestCase, testmain
 from mig.shared.output import format_output
 import mig.shared.returnvalues as returnvalues
 
@@ -66,14 +67,35 @@ def _is_return_value(return_value):
     return return_value in defined_return_values
 
 
-def _trigger_and_unpack_result(application_result):
+def _trigger_and_unpack_result(application_result, result_kind='textual'):
+    assert result_kind in ('textual', 'binary')
+
     chunks = list(application_result)
     assert len(chunks) > 0, "invocation returned no output"
     complete_value = b''.join(chunks)
-    decoded_value = codecs.decode(complete_value, 'utf8')
+    if result_kind == 'binary':
+        decoded_value = complete_value
+    else:
+        decoded_value = codecs.decode(complete_value, 'utf8')
     return decoded_value
 
 
+def create_instrumented_fieldstorage_to_dict():
+    def _instrumented_fieldstorage_to_dict(fieldstorage):
+        return _instrumented_fieldstorage_to_dict._result
+
+    _instrumented_fieldstorage_to_dict._result = {
+        'output_format': ('html',)
+    }
+
+    def set_result(result):
+        _instrumented_fieldstorage_to_dict._result = result
+
+    _instrumented_fieldstorage_to_dict.set_result = set_result
+
+    return _instrumented_fieldstorage_to_dict
+
+
 def create_instrumented_format_output():
     def _instrumented_format_output(
         configuration,
@@ -88,6 +110,16 @@ def _instrumented_format_output(
         call_args = (configuration, backend, ret_val, ret_msg, call_args_out_obj, outputformat,)
         _instrumented_format_output.calls.append({ 'args': call_args })
 
+        if _instrumented_format_output._file:
+            return format_output(
+                configuration,
+                backend,
+                ret_val,
+                ret_msg,
+                out_obj,
+                outputformat,
+            )
+
         # FIXME: the following is a workaround for a bug that exists between the WSGI wrapper
         #        and the output formatter - specifically, the latter adds default header and
         #        title if start does not exist, but the former ensures that start always exists
@@ -122,11 +154,18 @@ def _instrumented_format_output(
             outputformat,
         )
     _instrumented_format_output.calls = []
+    _instrumented_format_output._file = False
     _instrumented_format_output.values = dict(
         title_text='',
         header_text='',
     )
 
+
+    def _set_file(is_enabled):
+        _instrumented_format_output._file = is_enabled
+
+    setattr(_instrumented_format_output, 'set_file', _set_file)
+
     def _program_values(**kwargs):
         _instrumented_format_output.values.update(kwargs)
 
@@ -185,13 +224,15 @@ def before_each(self):
         self.fake_start_response = create_wsgi_start_response()
 
         # MiG WSGI wrapper specific setup
+        self.instrumented_fieldstorage_to_dict = create_instrumented_fieldstorage_to_dict()
         self.instrumented_format_output = create_instrumented_format_output()
         self.instrumented_retrieve_handler = create_instrumented_retrieve_handler()
 
         self.application_args = (self.configuration, self.fake_wsgi_environ, self.fake_start_response,)
         self.application_kwargs = dict(
             _wrap_wsgi_errors=noop,
             _format_output=self.instrumented_format_output,
+            _fieldstorage_to_dict=self.instrumented_fieldstorage_to_dict,
             _retrieve_handler=self.instrumented_retrieve_handler,
             _set_environ=noop,
         )
@@ -236,6 +277,29 @@ def test_return_value_ok_returns_expected_title(self):
         self.assertInstrumentation()
         self.assertHtmlElementTextContent(output, 'title', 'TEST', trim_newlines=True)
 
+    def test_return_value_ok_serving_a_binary_file(self):
+        test_binary_file = os.path.join(TEST_DATA_DIR, 'loading.gif')
+        with open(test_binary_file, 'rb') as f:
+            test_binary_data = f.read()
+
+        self.instrumented_fieldstorage_to_dict.set_result({
+            'output_format': ('file',)
+        })
+        self.instrumented_format_output.set_file(True)
+
+        file_obj = { 'object_type': 'binary', 'data': test_binary_data }
+        self.instrumented_retrieve_handler.program([file_obj], returnvalues.OK)
+
+        application_result = migwsgi._application(
+            *self.application_args,
+            **self.application_kwargs
+        )
+
+        output = _trigger_and_unpack_result(application_result, 'binary')
+
+        self.assertInstrumentation()
+        self.assertEqual(output, test_binary_data)
+
 
 if __name__ == '__main__':
     testmain()