Skip to content

Fix handling of binary output with output format override #275

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@ dist/
downloads/
eggs/
.eggs/
lib/
lib64/
/lib/
usr/lib/
/lib64/
usr/lib64/
parts/
sdist/
var/
Expand Down
10 changes: 10 additions & 0 deletions mig/lib/README
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
= Modernization and Clean Up =
We will gradually move code here in the on-going modernization and clean up
efforts.
That also means that any code placed here MUST comply with the project style
guides, be lint clean, documented and have decent unit test coverage.

You may want to use autopep8, pylint, ruff or any available make lint targets
to help verify.
The black code formatter and isort may also come in handy. You can see usage
hints in `.github/workflows/python-stylecheck.yml`.
69 changes: 69 additions & 0 deletions mig/lib/xgicore.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# --- BEGIN_HEADER ---
#
# xgicore - Xgi wrapper functions for functionality backends
# Copyright (C) 2003-2025 The MiG Project by the Science HPC Center at UCPH
#
# This file is part of MiG.
#
# MiG is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# MiG is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

Check warning on line 23 in mig/lib/xgicore.py

View workflow job for this annotation

GitHub Actions / Style check python and annotate

line too long (81 > 80 characters)
#
# -- END_HEADER ---
#

"""Shared helpers for CGI+WSGI interface to functionality backends."""


def get_output_format(configuration, user_args, default_format="html"):
"""Get output_format from user_args."""
return user_args.get("output_format", [default_format])[0]


def override_output_format(configuration, user_args, out_objs, out_format):
"""Override output_format if requested in start entry of output_objs."""
if not [
i
for i in out_objs
if i.get("object_type", None) == "start"
and i.get("override_format", False)
]:
return out_format
return get_output_format(configuration, user_args)


def fill_start_headers(configuration, out_objs, out_format):
"""Make sure out_objs has start entry with basic content headers."""
start_entry = None
for entry in out_objs:
if entry["object_type"] == "start":
start_entry = entry
if not start_entry:
start_entry = {"object_type": "start", "headers": []}
out_objs.insert(0, start_entry)
elif not start_entry.get("headers", False):
start_entry["headers"] = []
# Now fill headers to match output format
default_content = "text/html"
if "json" == out_format:
default_content = "application/json"
elif "file" == out_format:
default_content = "application/octet-stream"
elif "html" != out_format:
default_content = "text/plain"
if not start_entry["headers"]:
start_entry["headers"].append(("Content-Type", default_content))
return start_entry
34 changes: 12 additions & 22 deletions mig/shared/cgiscriptstub.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# --- BEGIN_HEADER ---
#
# cgiscriptstub - cgi wrapper functions for functionality backends
# Copyright (C) 2003-2024 The MiG Project lead by Brian Vinter
# Copyright (C) 2003-2025 The MiG Project by the Science HPC Center at UCPH
#
# This file is part of MiG.
#
Expand All @@ -20,7 +20,7 @@
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

Check warning on line 23 in mig/shared/cgiscriptstub.py

View workflow job for this annotation

GitHub Actions / Style check python and annotate

line too long (81 > 80 characters)
#
# -- END_HEADER ---
#
Expand All @@ -39,9 +39,11 @@
# DUMMY try/except to avoid autopep8 from mangling import order
try:
cgitb.enable()
except:

Check warning on line 42 in mig/shared/cgiscriptstub.py

View workflow job for this annotation

GitHub Actions / Style check python and annotate

do not use bare 'except'
pass

from mig.lib.xgicore import fill_start_headers, get_output_format, \
override_output_format
from mig.shared.bailout import crash_helper
from mig.shared.base import requested_backend, allow_script, \
is_default_str_coding, force_default_str_coding_rec
Expand Down Expand Up @@ -79,23 +81,7 @@
"""Shared finalization"""

logger = configuration.logger
default_content = 'text/html'
if 'json' == output_format:
default_content = 'application/json'
elif 'file' == output_format:
default_content = 'application/octet-stream'
elif 'html' != output_format:
default_content = 'text/plain'
default_headers = [('Content-Type', default_content)]
start_entry = None
for entry in output_objs:
if entry['object_type'] == 'start':
start_entry = entry
if not start_entry:
start_entry = {'object_type': 'start', 'headers': default_headers}
output_objs = [start_entry] + output_objs
elif not start_entry.get('headers', []):
start_entry['headers'] = default_headers
start_entry = fill_start_headers(configuration, output_objs, output_format)
headers = start_entry['headers']

output = format_output(configuration, backend, ret_code, ret_msg,
Expand All @@ -110,7 +96,7 @@

if output_format != 'file' and not is_default_str_coding(output):
logger.error(
"Formatted output is NOT on default str coding: %s" % [output[:100]])

Check warning on line 99 in mig/shared/cgiscriptstub.py

View workflow job for this annotation

GitHub Actions / Style check python and annotate

line too long (81 > 80 characters)
err_mark = '__****__'
output = format_output(configuration, backend, ret_code, ret_msg,
force_default_str_coding_rec(
Expand All @@ -125,12 +111,12 @@
# https://stackoverflow.com/questions/40450791/python-cgi-print-image-to-html

try:
#logger.debug("write headers: %s" % header_out)

Check warning on line 114 in mig/shared/cgiscriptstub.py

View workflow job for this annotation

GitHub Actions / Style check python and annotate

block comment should start with '# '
sys.stdout.write(header_out)
sys.stdout.write("\n\n")
#logger.debug("flush stdout")

Check warning on line 117 in mig/shared/cgiscriptstub.py

View workflow job for this annotation

GitHub Actions / Style check python and annotate

block comment should start with '# '
sys.stdout.flush()
#logger.debug("write content: %s" % [output[:64], '..', output[-64:]])

Check warning on line 119 in mig/shared/cgiscriptstub.py

View workflow job for this annotation

GitHub Actions / Style check python and annotate

block comment should start with '# '
# NOTE: always output native strings to stdout but use raw buffer
# for byte output on py3 as explained above.
if sys.version_info[0] < 3 or is_default_str_coding(output):
Expand Down Expand Up @@ -166,9 +152,7 @@
logger.debug("handling cgi request with python %s from %s (%s)" %
(sys.version_info, client_id, environ))

# default to html output

output_format = user_arguments_dict.get('output_format', ['html'])[-1]
output_format = get_output_format(configuration, user_arguments_dict)

# TODO: add environ arg support to all main backends and use here

Expand All @@ -183,7 +167,7 @@
# Override main function with reject helper
main = reject_main
(out_obj, (ret_code, ret_msg)) = main(client_id, user_arguments_dict)
except:

Check warning on line 170 in mig/shared/cgiscriptstub.py

View workflow job for this annotation

GitHub Actions / Style check python and annotate

do not use bare 'except'
import traceback
logger.error("script crashed:\n%s" % traceback.format_exc())
crash_helper(configuration, backend, out_obj)
Expand All @@ -191,14 +175,20 @@
after_time = time.time()
out_obj.append({'object_type': 'timing_info', 'text':
"done in %.3fs" % (after_time - before_time)})

# TODO: drop delay_format and rely on shared override_format marker instead
if delay_format:
output_format = user_arguments_dict.get('output_format', ['html'])[-1]
output_format = get_output_format(configuration, user_arguments_dict)

# NOTE: optional output_format override if backend requests it in start
output_format = override_output_format(configuration, user_arguments_dict,
out_obj, output_format)

finish_cgi_script(configuration, backend, output_format,
ret_code, ret_msg, out_obj)


def run_cgi_script(main, delayed_input=None, delay_format=False):

Check failure on line 191 in mig/shared/cgiscriptstub.py

View workflow job for this annotation

GitHub Actions / Style check python and annotate

unused function 'run_cgi_script' (60% confidence)
"""Just a wrapper for run_cgi_script_possibly_with_cert now since we always
verify client_id in backend anyway and have easier access to outputting a
sane help page there.
Expand Down
3 changes: 3 additions & 0 deletions mig/shared/functionality/showvgridprivatefile.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
Expand All @@ -20,7 +20,7 @@
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

Check warning on line 23 in mig/shared/functionality/showvgridprivatefile.py

View workflow job for this annotation

GitHub Actions / Style check python and annotate

line too long (81 > 80 characters)
#
# -- END_HEADER ---
#
Expand Down Expand Up @@ -54,7 +54,7 @@
def main(client_id, user_arguments_dict):
"""Main function used by front end"""

(configuration, logger, output_objects, op_name) = \

Check failure on line 57 in mig/shared/functionality/showvgridprivatefile.py

View workflow job for this annotation

GitHub Actions / Style check python and annotate

unused variable 'op_name' (60% confidence)
initialize_main_variables(client_id, op_title=False, op_header=False,
op_menu=False)
defaults = signature()[1]
Expand Down Expand Up @@ -116,6 +116,9 @@
if force_file:
content = read_file(abs_path, logger, mode=src_mode)
lines = [content]
# Force delivery of binary as file download
user_arguments_dict['output_format'] = ['file']
start_entry['override_format'] = True
else:
content = lines = read_file_lines(abs_path, logger,
mode=src_mode)
Expand Down
35 changes: 9 additions & 26 deletions mig/wsgi-bin/migwsgi.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
#!/usr/bin/env python-version-from-mod-wsgi
# -*- coding: utf-8 -*-
#
# --- BEGIN_HEADER ---
#
# migwsgi.py - Provides the entire WSGI interface
# Copyright (C) 2003-2024 The MiG Project lead by Brian Vinter
# Copyright (C) 2003-2025 The MiG Project by the Science HPC Center at UCPH
#
# This file is part of MiG.
#
Expand All @@ -20,13 +20,13 @@
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

Check warning on line 23 in mig/wsgi-bin/migwsgi.py

View workflow job for this annotation

GitHub Actions / Style check python and annotate

line too long (81 > 80 characters)
#
# -- END_HEADER ---
#

from builtins import range
from past.builtins import basestring

Check failure on line 29 in mig/wsgi-bin/migwsgi.py

View workflow job for this annotation

GitHub Actions / Style check python and annotate

unused import 'basestring' (90% confidence)

import cgi
import importlib
Expand All @@ -34,9 +34,11 @@
import sys
import time

from mig.lib.xgicore import fill_start_headers, get_output_format, \
override_output_format
from mig.shared import returnvalues
from mig.shared.bailout import bailout_helper, crash_helper, compact_string
from mig.shared.base import requested_backend, allow_script, \

Check failure on line 41 in mig/wsgi-bin/migwsgi.py

View workflow job for this annotation

GitHub Actions / Style check python and annotate

unused import 'force_utf8_rec' (90% confidence)
is_default_str_coding, force_native_str_rec, force_utf8, force_utf8_rec
from mig.shared.defaults import download_block_size, default_fs_coding
from mig.shared.conf import get_configuration_object
Expand All @@ -46,7 +48,7 @@
from mig.shared.scriptinput import fieldstorage_to_dict


def object_type_info(object_type):

Check failure on line 51 in mig/wsgi-bin/migwsgi.py

View workflow job for this annotation

GitHub Actions / Style check python and annotate

unused function 'object_type_info' (60% confidence)
"""Lookup object type"""

return get_object_type_info(object_type)
Expand Down Expand Up @@ -174,7 +176,7 @@

try:
environ['wsgi.errors'].write = _write_wsgi_errors
environ['wsgi.errors'].writelines = _writelines_wsgi_errors

Check failure on line 179 in mig/wsgi-bin/migwsgi.py

View workflow job for this annotation

GitHub Actions / Style check python and annotate

unused attribute 'writelines' (60% confidence)
environ['wsgi.errors'].flush = _flush_wsgi_errors
except:
# NOTE: python2 wsgi throws Log object attribute 'write' is read-only
Expand Down Expand Up @@ -271,11 +273,7 @@
from mig.shared.httpsclient import extract_client_id
client_id = extract_client_id(configuration, environ)

# Default to html output

default_content = 'text/html'
output_format = 'html'

output_format = "UNSET"
backend = "UNKNOWN"
output_objs = []
fieldstorage = None
Expand Down Expand Up @@ -316,8 +314,7 @@
fieldstorage = cgi.FieldStorage(fp=environ['wsgi.input'],
environ=environ)
user_arguments_dict = fieldstorage_to_dict(fieldstorage)
if 'output_format' in user_arguments_dict:
output_format = user_arguments_dict['output_format'][0]
output_format = get_output_format(configuration, user_arguments_dict)

module_path = 'mig.shared.functionality.%s' % backend
(allow, msg) = allow_script(configuration, script_name, client_id)
Expand Down Expand Up @@ -346,24 +343,10 @@

(ret_code, ret_msg) = ret_val

if 'json' == output_format:
default_content = 'application/json'
elif 'file' == output_format:
default_content = 'application/octet-stream'
elif 'html' != output_format:
default_content = 'text/plain'
default_headers = [('Content-Type', default_content)]
start_entry = None
for entry in output_objs:
if entry['object_type'] == 'start':
start_entry = entry
if not start_entry:
# _logger.debug("WSGI adding explicit headers: %s" % default_headers)
start_entry = {'object_type': 'start', 'headers': default_headers}
output_objs = [start_entry] + output_objs
elif not start_entry.get('headers', []):
# _logger.debug("WSGI adding missing headers: %s" % default_headers)
start_entry['headers'] = default_headers
# NOTE: optional output_format override if backend requests it in start
output_format = override_output_format(configuration, user_arguments_dict,
output_objs, output_format)
start_entry = fill_start_headers(configuration, output_objs, output_format)
response_headers = start_entry['headers']

# Pass wsgi info and helpers for optional use in output delivery
Expand Down
139 changes: 139 additions & 0 deletions tests/test_mig_lib_xgicore.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
# -*- coding: utf-8 -*-
#
# --- BEGIN_HEADER ---
#
# test_mig_lib_xgicore - unit test of the corresponding mig lib module
# Copyright (C) 2003-2025 The MiG Project by the Science HPC Center at UCPH
#
# This file is part of MiG.
#
# MiG is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# MiG is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
# USA.
#
# --- END_HEADER ---
#

"""Unit test xgicore functions"""

import os
import sys

from tests.support import MigTestCase, FakeConfiguration, testmain

from mig.lib.xgicore import *


class MigLibXgicore__get_output_format(MigTestCase):
"""Unit test get_output_format"""

def test_default_when_missing(self):
"""Test that default output_format is returned when not set."""
expected = "html"
user_args = {}
actual = get_output_format(FakeConfiguration(), user_args,
default_format=expected)
self.assertEqual(actual, expected,
"mismatch in default output_format")

def test_get_single_requested_format(self):
"""Test that the requested output_format is returned."""
expected = "file"
user_args = {'output_format': [expected]}
actual = get_output_format(FakeConfiguration(), user_args,
default_format='BOGUS')
self.assertEqual(actual, expected,
"mismatch in extracted output_format")

def test_get_first_requested_format(self):
"""Test that first requested output_format is returned."""
expected = "file"
user_args = {'output_format': [expected, 'BOGUS']}
actual = get_output_format(FakeConfiguration(), user_args,
default_format='BOGUS')
self.assertEqual(actual, expected,
"mismatch in extracted output_format")


class MigLibXgicore__override_output_format(MigTestCase):
"""Unit test override_output_format"""

def test_unchanged_without_override(self):
"""Test that existing output_format is returned when not overriden."""
expected = "html"
user_args = {}
out_objs = []
actual = override_output_format(FakeConfiguration(), user_args,
out_objs, expected)
self.assertEqual(actual, expected,
"mismatch in unchanged output_format")

def test_get_single_requested_format(self):
"""Test that the requested output_format is returned if overriden."""
expected = "file"
user_args = {'output_format': [expected]}
out_objs = [{'object_type': 'start', 'override_format': True}]
actual = override_output_format(FakeConfiguration(), user_args,
out_objs, 'OVERRIDE')
self.assertEqual(actual, expected,
"mismatch in overriden output_format")

def test_get_first_requested_format(self):
"""Test that first requested output_format is returned if overriden."""
expected = "file"
user_args = {'output_format': [expected, 'BOGUS']}
actual = get_output_format(FakeConfiguration(), user_args,
default_format='BOGUS')
self.assertEqual(actual, expected,
"mismatch in extracted output_format")


class MigLibXgicore__fill_start_headers(MigTestCase):
"""Unit test fill_start_headers"""

def test_unchanged_when_set(self):
"""Test that existing valid start entry is returned as-is."""
out_format = "file"
headers = [('Content-Type', 'application/octet-stream'),
('Content-Size', 42)]
expected = {'object_type': 'start', 'headers': headers}
out_objs = [expected, {'object_type': 'binary', 'data': 42*b'0'}]
actual = fill_start_headers(FakeConfiguration(), out_objs, out_format)
self.assertEqual(actual, expected,
"mismatch in unchanged start entry")

def test_headers_added_when_missing(self):
"""Test that start entry headers are added if missing."""
out_format = "file"
headers = [('Content-Type', 'application/octet-stream')]
minimal_start = {'object_type': 'start'}
expected = {'object_type': 'start', 'headers': headers}
out_objs = [minimal_start, {'object_type': 'binary', 'data': 42*b'0'}]
actual = fill_start_headers(FakeConfiguration(), out_objs, out_format)
self.assertEqual(actual, expected,
"mismatch in auto initialized start entry")

def test_start_added_when_missing(self):
"""Test that start entry is added if missing."""
out_format = "file"
headers = [('Content-Type', 'application/octet-stream')]
expected = {'object_type': 'start', 'headers': headers}
out_objs = [{'object_type': 'binary', 'data': 42*b'0'}]
actual = fill_start_headers(FakeConfiguration(), out_objs, out_format)
self.assertEqual(actual, expected,
"mismatch in auto initialized start entry")


if __name__ == '__main__':
testmain()