Make responding with binary data work under PY3.

albu-diku · albu-diku · commit 5089682ebd91 · 2025-01-11T13:13:29.000+01:00
The code as previously written would unconditionally encode parts as
though they contained text - this went unnoticed on PY2 because strings
and bytes are one and the same thing but blew up on PY3 where a string
is explicitly of type unicode while a binary file would be raw bytes.

Explicitly check the output_format and if instructed to serve a file do
so without touching the chunks of file content bytes being yielded.
diff --git a/mig/wsgi-bin/migwsgi.py b/mig/wsgi-bin/migwsgi.py
@@ -206,7 +206,7 @@ def _set_os_environ(value):
     return _application(None, environ, start_response, _set_environ=_set_os_environ, _wrap_wsgi_errors=wrap_wsgi_errors)
 
 
-def _application(configuration, environ, start_response, _set_environ, _format_output=format_output, _retrieve_handler=_import_backend, _wrap_wsgi_errors=True, _config_file=None, _skip_log=False):
+def _application(configuration, environ, start_response, _set_environ, _fieldstorage_to_dict=fieldstorage_to_dict, _format_output=format_output, _retrieve_handler=_import_backend, _wrap_wsgi_errors=True, _config_file=None, _skip_log=False):
 
     # NOTE: pass app environ including apache and query args on to sub handlers
     #       through the usual 'os.environ' channel expected in functionality
@@ -319,7 +319,7 @@ def _application(configuration, environ, start_response, _set_environ, _format_o
         #              (backend, script_name))
         fieldstorage = cgi.FieldStorage(fp=environ['wsgi.input'],
                                         environ=environ)
-        user_arguments_dict = fieldstorage_to_dict(fieldstorage)
+        user_arguments_dict = _fieldstorage_to_dict(fieldstorage)
         if 'output_format' in user_arguments_dict:
             output_format = user_arguments_dict['output_format'][0]
 
@@ -437,7 +437,10 @@ def _application(configuration, environ, start_response, _set_environ, _format_o
             #              (backend, i+1, chunk_parts))
             # end index may be after end of content - but no problem
             part = output[i*download_block_size:(i+1)*download_block_size]
-            yield force_utf8(part)
+            if output_format == 'file':
+                yield part
+            else:
+                yield force_utf8(part)
         if chunk_parts > 1:
             _logger.info("WSGI %s finished yielding all %d output parts" %
                          (backend, chunk_parts))
diff --git a/tests/test_mig_wsgi-bin_migwsgi.py b/tests/test_mig_wsgi-bin_migwsgi.py
@@ -32,8 +32,9 @@
 import os
 import stat
 import sys
+import unittest
 
-from tests.support import MIG_BASE, MigTestCase, testmain
+from tests.support import MIG_BASE, TEST_BASE, TEST_DATA_DIR, MigTestCase, testmain
 from mig.shared.output import format_output
 import mig.shared.returnvalues as returnvalues
 
@@ -65,14 +66,35 @@ def _is_return_value(return_value):
     return return_value in defined_return_values
 
 
-def _trigger_and_unpack_result(application_result):
+def _trigger_and_unpack_result(application_result, result_kind='textual'):
+    assert result_kind in ('textual', 'binary')
+
     chunks = list(application_result)
     assert len(chunks) > 0, "invocation returned no output"
     complete_value = b''.join(chunks)
-    decoded_value = codecs.decode(complete_value, 'utf8')
+    if result_kind == 'binary':
+        decoded_value = complete_value
+    else:
+        decoded_value = codecs.decode(complete_value, 'utf8')
     return decoded_value
 
 
+def create_instrumented_fieldstorage_to_dict():
+    def _instrumented_fieldstorage_to_dict(fieldstorage):
+        return _instrumented_fieldstorage_to_dict._result
+
+    _instrumented_fieldstorage_to_dict._result = {
+        'output_format': ('html',)
+    }
+
+    def set_result(result):
+        _instrumented_fieldstorage_to_dict._result = result
+
+    _instrumented_fieldstorage_to_dict.set_result = set_result
+
+    return _instrumented_fieldstorage_to_dict
+
+
 def create_instrumented_format_output():
     def _instrumented_format_output(
         configuration,
@@ -87,6 +109,16 @@ def _instrumented_format_output(
         call_args = (configuration, backend, ret_val, ret_msg, call_args_out_obj, outputformat,)
         _instrumented_format_output.calls.append({ 'args': call_args })
 
+        if _instrumented_format_output._file:
+            return format_output(
+                configuration,
+                backend,
+                ret_val,
+                ret_msg,
+                out_obj,
+                outputformat,
+            )
+
         # FIXME: the following is a workaround for a bug that exists between the WSGI wrapper
         #        and the output formatter - specifically, the latter adds default header and
         #        title if start does not exist, but the former ensures that start always exists
@@ -121,11 +153,18 @@ def _instrumented_format_output(
             outputformat,
         )
     _instrumented_format_output.calls = []
+    _instrumented_format_output._file = False
     _instrumented_format_output.values = dict(
         title_text='',
         header_text='',
     )
 
+
+    def _set_file(is_enabled):
+        _instrumented_format_output._file = is_enabled
+
+    setattr(_instrumented_format_output, 'set_file', _set_file)
+
     def _program_values(**kwargs):
         _instrumented_format_output.values.update(kwargs)
 
@@ -179,6 +218,7 @@ def before_each(self):
         self.fake_wsgi = prepare_wsgi(self.configuration, 'http://localhost/')
 
         # MiG WSGI wrapper specific setup
+        self.instrumented_fieldstorage_to_dict = create_instrumented_fieldstorage_to_dict()
         self.instrumented_format_output = create_instrumented_format_output()
         self.instrumented_retrieve_handler = create_instrumented_retrieve_handler()
 
@@ -190,6 +230,7 @@ def before_each(self):
         self.application_kwargs = dict(
             _wrap_wsgi_errors=noop,
             _format_output=self.instrumented_format_output,
+            _fieldstorage_to_dict=self.instrumented_fieldstorage_to_dict,
             _retrieve_handler=self.instrumented_retrieve_handler,
             _set_environ=noop,
         )
@@ -230,6 +271,29 @@ def test_return_value_ok_returns_expected_title(self):
         self.assertHtmlElementTextContent(output, 'title', 'TEST', trim_newlines=True)
         self.assertInstrumentation()
 
+    def test_return_value_ok_serving_a_binary_file(self):
+        test_binary_file = os.path.join(TEST_DATA_DIR, 'loading.gif')
+        with open(test_binary_file, 'rb') as f:
+            test_binary_data = f.read()
+
+        self.instrumented_fieldstorage_to_dict.set_result({
+            'output_format': ('file',)
+        })
+        self.instrumented_format_output.set_file(True)
+
+        file_obj = { 'object_type': 'binary', 'data': test_binary_data }
+        self.instrumented_retrieve_handler.program([file_obj], returnvalues.OK)
+
+        application_result = migwsgi._application(
+            *self.application_args,
+            **self.application_kwargs
+        )
+
+        output = _trigger_and_unpack_result(application_result, 'binary')
+
+        self.assertInstrumentation()
+        self.assertEqual(output, test_binary_data)
+
 
 if __name__ == '__main__':
     testmain()