From 5e59ee7244e7f514aac7ba75ca18922ce0317030 Mon Sep 17 00:00:00 2001
From: Alex Burke <albu@di.ku.dk>
Date: Tue, 8 Oct 2024 10:12:55 +0200
Subject: [PATCH 1/5] Rework python version handling associated testconfig path
 handling.

A previous revision fixed an immediate problem with test path handling
but knowingly left some things on the table - in particular a split
between container based handling of Python 2 and the local execution
of Python 3 (meaning a potentially inconsistent version relative to
what the project considers officially supported).

Address this entirely: rework the default behaviour of `make test` to
container based execution and use a consistent baseline Python 3.

In order to continue to support rapid local iteration, provide a
separate `make unittest` target which will execute the test suite
locally and is also used as a mechanism to run other supporting tools.

The clean use of containers necessitated various changes that make path
arguments within the testconfig non-overlapping and better isolated.
Adjust all paths generated within the test suite to be always from the
base root instead of relative the output directory. Opt to patch the
makeconfig generator rather than fiddle with generateconfs again.

While here also add support for an environment variable overried that
allows execution of the test suite against arbitrary python 3 versions.
---
 .github/workflows/ci.yml                      | 11 +--
 Makefile                                      | 54 +++++++----
 .../{Dockerfile.python2 => Dockerfile.py2}    |  0
 envhelp/docker/Dockerfile.py3                 |  8 ++
 envhelp/docker/Dockerfile.pyver               |  9 ++
 envhelp/dpython                               | 88 +++++++++++++++++
 envhelp/lpython                               | 46 +++++++++
 envhelp/makeconfig.py                         | 29 +++---
 envhelp/python2                               | 43 +--------
 envhelp/python3                               | 20 +---
 mig/unittest/testcore.py                      | 27 ++----
 tests/__init__.py                             | 10 ++
 tests/support/__init__.py                     | 95 ++++++++++++++-----
 tests/support/_env.py                         | 11 +++
 tests/support/suppconst.py                    | 30 +++++-
 tests/test_booleans.py                        |  9 +-
 tests/test_mig_shared_functionality_cat.py    |  7 +-
 tests/test_mig_unittest_testcore.py           |  5 +-
 18 files changed, 352 insertions(+), 150 deletions(-)
 rename envhelp/docker/{Dockerfile.python2 => Dockerfile.py2} (100%)
 create mode 100644 envhelp/docker/Dockerfile.py3
 create mode 100644 envhelp/docker/Dockerfile.pyver
 create mode 100755 envhelp/dpython
 create mode 100755 envhelp/lpython
 create mode 100644 tests/support/_env.py

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index f86bfba88..95c6e4b56 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -35,7 +35,7 @@ jobs:
           make dependencies
       - name: Run tests
         run: |
-          make test
+          make unittest
 
   python3-rocky9ish:
     runs-on: ubuntu-22.04
@@ -51,7 +51,7 @@ jobs:
           make dependencies
       - name: Run tests
         run: |
-          make test
+          make unittest
 
   python3-rocky8ish:
     runs-on: ubuntu-20.04
@@ -67,7 +67,7 @@ jobs:
           make dependencies
       - name: Run tests
         run: |
-          make test
+          make unittest
 
   python2-latest:
     runs-on: ubuntu-latest
@@ -80,8 +80,7 @@ jobs:
         uses: actions/checkout@v4
       - name: Setup environment
         run: |
-          pip install --no-cache-dir -r requirements.txt -r local-requirements.txt
+          make PYTHON_BIN=python PY=2 dependencies
       - name: Run tests
         run: |
-          PYTHON_BIN=python ./envhelp/makeconfig test --python2
-          MIG_ENV='local' python -m unittest discover -s tests/
+          make PYTHON_BIN=python PY=2 unittest
diff --git a/Makefile b/Makefile
index 7e39f2149..1863a9ffc 100644
--- a/Makefile
+++ b/Makefile
@@ -1,11 +1,21 @@
 ifndef MIG_ENV
 	MIG_ENV = 'local'
 endif
-ifeq ($(PY),2)
+
+ifndef PY
+	PY = 3
+endif
+
+LOCAL_PYTHON_BIN = './envhelp/lpython'
+
+ifdef PYTHON_BIN
+	LOCAL_PYTHON_BIN = $(PYTHON_BIN)
+else ifeq ($(PY),2)
 	PYTHON_BIN = './envhelp/python2'
 else
 	PYTHON_BIN = './envhelp/python3'
 endif
+
 ifeq ($(ALLDEPS),1)
 	REQS_PATH = ./recommended.txt
 else
@@ -17,8 +27,9 @@ info:
 	@echo
 	@echo "The following should help you get started:"
 	@echo
-	@echo "'make test'      - run the test suite"
-	@echo "'make PY=2 test' - run the test suite (python 2)"
+	@echo "'make test'      - run the test suite (default python 3)"
+	@echo "'make PY=2 test' - run the test suite (default python 2)"
+	@echo "'make unittest'  - execute tests locally for development"
 
 .PHONY: fmt
 fmt:
@@ -26,12 +37,13 @@ ifneq ($(MIG_ENV),'local')
 	@echo "unavailable outside local development environment"
 	@exit 1
 endif
-	$(PYTHON_BIN) -m autopep8 --ignore E402 -i
+	$(LOCAL_PYTHON_BIN) -m autopep8 --ignore E402 -i
 
 .PHONY: clean
 clean:
 	@rm -f ./envhelp/py2.imageid
-	@rm -f ./envhelp/py3.depends
+	@rm -f ./envhelp/py3.imageid
+	@rm -f ./envhelp/local.depends
 
 .PHONY: distclean
 distclean: clean
@@ -44,37 +56,41 @@ distclean: clean
 test: dependencies testconfig
 	@$(PYTHON_BIN) -m unittest discover -s tests/
 
+.PHONY: unittest
+unittest: dependencies testconfig
+	@$(LOCAL_PYTHON_BIN) -m unittest discover -s tests/
+
 .PHONY: dependencies
-dependencies: ./envhelp/venv/pyvenv.cfg ./envhelp/py3.depends
+ifeq ($(PY),2)
+dependencies: ./envhelp/local.depends
+else
+dependencies: ./envhelp/venv/pyvenv.cfg ./envhelp/local.depends
+endif
 
 .PHONY: testconfig
 testconfig: ./envhelp/output/testconfs
 
 ./envhelp/output/testconfs:
-	@./envhelp/makeconfig test --python2
+	@./envhelp/makeconfig test --docker
 	@./envhelp/makeconfig test
-	@mkdir -p ./envhelp/output/certs
-	@mkdir -p ./envhelp/output/state
-	@mkdir -p ./envhelp/output/state/log
 
 ifeq ($(MIG_ENV),'local')
-./envhelp/py3.depends: $(REQS_PATH) local-requirements.txt
+./envhelp/local.depends: $(REQS_PATH) local-requirements.txt
 else
-./envhelp/py3.depends: $(REQS_PATH)
+./envhelp/local.depends: $(REQS_PATH)
 endif
-	@rm -f ./envhelp/py3.depends
-	@echo "upgrading venv pip as required for some dependencies"
-	@./envhelp/venv/bin/pip3 install --upgrade pip
 	@echo "installing dependencies from $(REQS_PATH)"
-	@./envhelp/venv/bin/pip3 install -r $(REQS_PATH)
+	@$(LOCAL_PYTHON_BIN) -m pip install -r $(REQS_PATH)
 ifeq ($(MIG_ENV),'local')
 	@echo ""
 	@echo "installing development dependencies"
-	@./envhelp/venv/bin/pip3 install -r local-requirements.txt
+	@$(LOCAL_PYTHON_BIN) -m pip install -r local-requirements.txt
 endif
-	@touch ./envhelp/py3.depends
+	@touch ./envhelp/local.depends
 
 ./envhelp/venv/pyvenv.cfg:
 	@echo "provisioning environment"
 	@/usr/bin/env python3 -m venv ./envhelp/venv
-	@rm -f ./envhelp/py3.depends
+	@rm -f ./envhelp/local.depends
+	@echo "upgrading venv pip as required for some dependencies"
+	@./envhelp/venv/bin/pip3 install --upgrade pip
diff --git a/envhelp/docker/Dockerfile.python2 b/envhelp/docker/Dockerfile.py2
similarity index 100%
rename from envhelp/docker/Dockerfile.python2
rename to envhelp/docker/Dockerfile.py2
diff --git a/envhelp/docker/Dockerfile.py3 b/envhelp/docker/Dockerfile.py3
new file mode 100644
index 000000000..c0aeeb9d6
--- /dev/null
+++ b/envhelp/docker/Dockerfile.py3
@@ -0,0 +1,8 @@
+FROM python:3.9
+
+WORKDIR /usr/src/app
+
+COPY requirements.txt local-requirements.txt ./
+RUN pip install --no-cache-dir -r requirements.txt -r local-requirements.txt
+
+CMD [ "python", "--version" ]
diff --git a/envhelp/docker/Dockerfile.pyver b/envhelp/docker/Dockerfile.pyver
new file mode 100644
index 000000000..e90a17ef8
--- /dev/null
+++ b/envhelp/docker/Dockerfile.pyver
@@ -0,0 +1,9 @@
+ARG pyver
+FROM python:${pyver}
+
+WORKDIR /usr/src/app
+
+COPY requirements.txt local-requirements.txt ./
+RUN pip install --no-cache-dir -r requirements.txt -r local-requirements.txt
+
+CMD [ "python", "--version" ]
diff --git a/envhelp/dpython b/envhelp/dpython
new file mode 100755
index 000000000..cedd5f778
--- /dev/null
+++ b/envhelp/dpython
@@ -0,0 +1,88 @@
+#!/bin/sh
+#
+# --- BEGIN_HEADER ---
+#
+# dpython - wrapper to invoke a containerised python
+# Copyright (C) 2003-2024  The MiG Project by the Science HPC Center at UCPH
+#
+# This file is part of MiG.
+#
+# MiG is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# MiG is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,
+# USA.
+#
+# --- END_HEADER ---
+#
+
+set -e
+
+SCRIPT_PATH=$(realpath "$0")
+SCRIPT_BASE=$(dirname -- "$SCRIPT_PATH")
+MIG_BASE=$(realpath "$SCRIPT_BASE/..")
+
+if [ -n "${PY}" ]; then
+    PYVER="$PY"
+    PYTHON_SUFFIX="py$PY"
+    DOCKER_FILE_SUFFIX="$PYTHON_SUFFIX"
+elif [ -n "${PYVER}" ]; then
+    PY=3
+    PYTHON_SUFFIX="pyver-$PYVER"
+    DOCKER_FILE_SUFFIX="pyver"
+else
+    echo "No python version specified - please supply a PY env var"
+    exit 1
+fi
+
+DOCKER_FILE="$SCRIPT_BASE/docker/Dockerfile.$DOCKER_FILE_SUFFIX"
+DOCKER_IMAGEID_FILE="$SCRIPT_BASE/$PYTHON_SUFFIX.imageid"
+
+# NOTE: portable dynamic lookup with docker as default and fallback to podman
+DOCKER_BIN=$(command -v docker || command -v podman || echo "")
+if [ -z "${DOCKER_BIN}" ]; then
+    echo "No docker binary found - cannot use for python $PY tests"
+    exit 1
+fi
+
+# default PYTHONPATH such that directly executing files in the repo "just works"
+# NOTE: this is hard-coded to the mount point used within the container
+PYTHONPATH='/usr/src/app'
+
+# default any variables for container development
+MIG_ENV=${MIG_ENV:-'docker'}
+
+# determine if the image has changed
+echo -n "validating python $PY container.. "
+
+# load a previously written docker image id if present
+IMAGEID_STORED=$(cat "$DOCKER_IMAGEID_FILE" 2>/dev/null || echo "")
+
+IMAGEID=$(${DOCKER_BIN} build -f "$DOCKER_FILE" . -q --build-arg "pyver=$PYVER")
+if [ "$IMAGEID" != "$IMAGEID_STORED" ]; then
+    echo "rebuilt for changes"
+
+    # reset the image id so the next call finds no changes
+    echo "$IMAGEID" > "$DOCKER_IMAGEID_FILE"
+else
+    echo "no changes needed"
+fi
+
+echo "using image id $IMAGEID"
+
+# execute python2 within the image passing the supplied arguments
+
+${DOCKER_BIN} run -it --rm \
+    --mount "type=bind,source=$MIG_BASE,target=/usr/src/app" \
+    --env "PYTHONPATH=$PYTHONPATH" \
+    --env "MIG_ENV=$MIG_ENV" \
+    "$IMAGEID" python$PY $@
diff --git a/envhelp/lpython b/envhelp/lpython
new file mode 100755
index 000000000..9d42a83d3
--- /dev/null
+++ b/envhelp/lpython
@@ -0,0 +1,46 @@
+#!/bin/sh
+#
+# --- BEGIN_HEADER ---
+#
+# python3 - wrapper to invoke a local python3 virtual environment
+# Copyright (C) 2003-2024  The MiG Project by the Science HPC Center at UCPH
+#
+# This file is part of MiG.
+#
+# MiG is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# MiG is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,
+# USA.
+#
+# --- END_HEADER ---
+#
+
+set -e
+
+SCRIPT_PATH=$(realpath "$0")
+SCRIPT_BASE=$(dirname -- "$SCRIPT_PATH")
+MIG_BASE=$(realpath "$SCRIPT_BASE/..")
+
+PYTHON_BIN=${PYTHON_BIN:-"$SCRIPT_BASE/venv/bin/python3"}
+if [ ! -f "${PYTHON_BIN}" ]; then
+    echo "No python binary found - perhaps the virtual env was not created"
+    exit 1
+fi
+
+# default PYTHONPATH such that directly executing files in the repo "just works"
+PYTHONPATH=${PYTHONPATH:-"$MIG_BASE"}
+
+# default any variables for local development
+MIG_ENV=${MIG_ENV:-'local'}
+
+PYTHONPATH="$PYTHONPATH" MIG_ENV="$MIG_ENV" "$PYTHON_BIN" "$@"
diff --git a/envhelp/makeconfig.py b/envhelp/makeconfig.py
index 703b62e07..0d1941b83 100644
--- a/envhelp/makeconfig.py
+++ b/envhelp/makeconfig.py
@@ -38,8 +38,9 @@
 
 from mig.shared.install import MIG_BASE, generate_confs
 
-_LOCAL_ENVHELP_OUTPUT_DIR = os.path.realpath(
-    os.path.join(os.path.dirname(__file__), "output"))
+_LOCAL_MIG_BASE = os.path.normpath(
+    os.path.join(os.path.dirname(__file__), ".."))
+_LOCAL_ENVHELP_OUTPUT_DIR = os.path.join(_LOCAL_MIG_BASE, "envhelp/output")
 _MAKECONFIG_ALLOWED = ["local", "test"]
 
 
@@ -51,21 +52,27 @@ def _at(sequence, index=-1, default=None):
         return default
 
 
-def write_testconfig(env_name, is_py2=False):
-    confs_name = 'confs' if env_name == 'local' else '%sconfs' % (env_name,)
-    confs_suffix = 'py2' if is_py2 else 'py3'
+def write_testconfig(env_name, is_docker=False):
+    is_predefined = env_name == 'test'
+    confs_name = '%sconfs' % (env_name,)
+    if is_predefined:
+        confs_suffix = 'docker' if is_docker else 'local'
+    else:
+        confs_suffix = 'py3'
 
     overrides = {
         'destination': os.path.join(_LOCAL_ENVHELP_OUTPUT_DIR, confs_name),
         'destination_suffix': "-%s" % (confs_suffix,),
     }
 
-    # determine the paths by which we will access the various configured dirs
-    if is_py2:
+    # determine the paths b which we will access the various configured dirs
+    #  the tests output directory - when invoked within
+
+    if is_predefined and is_docker:
         env_mig_base = '/usr/src/app'
     else:
-        env_mig_base = MIG_BASE
-    conf_dir_path = os.path.join(env_mig_base, "envhelp/output")
+        env_mig_base = _LOCAL_MIG_BASE
+    conf_dir_path = os.path.join(env_mig_base, "tests/output")
 
     overrides.update(**{
         'mig_code': os.path.join(conf_dir_path, 'mig'),
@@ -85,7 +92,7 @@ def write_testconfig(env_name, is_py2=False):
 
 def main_(argv):
     env_name = _at(argv, index=1, default='')
-    arg_is_py2 = '--python2' in argv
+    arg_is_docker = '--docker' in argv
 
     if env_name == '':
         raise RuntimeError(
@@ -94,7 +101,7 @@ def main_(argv):
         raise RuntimeError('environment must be one of %s' %
                            (_MAKECONFIG_ALLOWED,))
 
-    write_testconfig(env_name, is_py2=arg_is_py2)
+    write_testconfig(env_name, is_docker=arg_is_docker)
 
 
 def main(argv=sys.argv):
diff --git a/envhelp/python2 b/envhelp/python2
index c0c0351a5..d0b0d04a8 100755
--- a/envhelp/python2
+++ b/envhelp/python2
@@ -29,46 +29,5 @@ set -e
 
 SCRIPT_PATH=$(realpath "$0")
 SCRIPT_BASE=$(dirname -- "$SCRIPT_PATH")
-DOCKER_BASE="$SCRIPT_BASE/docker"
-DOCKER_IMAGEID_FILE="$SCRIPT_BASE/py2.imageid"
-# NOTE: portable dynamic lookup with docker as default and fallback to podman
-DOCKER_BIN=$(command -v docker || command -v podman || echo "")
 
-if [ -z "${DOCKER_BIN}" ]; then
-    echo "No docker binary found - cannot use for python2 tests"
-    exit 1
-fi
-
-# default PYTHONPATH such that directly executing files in the repo "just works"
-# NOTE: this is hard-coded to the mount point used within the container
-PYTHONPATH='/usr/app/src'
-
-# default any variables for local development
-MIG_ENV=${MIG_ENV:-'local'}
-
-# determine if the image has changed
-echo -n "validating container.. "
-
-# load a previously written docker image id if present
-IMAGEID_STORED=$(cat "$DOCKER_IMAGEID_FILE" 2>/dev/null || echo "")
-
-IMAGEID=$(${DOCKER_BIN} build -f "$DOCKER_BASE/Dockerfile.python2" . -q)
-if [ "$IMAGEID" != "$IMAGEID_STORED" ]; then
-    echo "rebuilt for changes"
-
-    # reset the image id so the next call finds no changes
-    echo "$IMAGEID" > "$DOCKER_IMAGEID_FILE"
-else
-    echo "no changes needed"
-fi
-
-echo "running with MIG_ENV='$MIG_ENV' under python 2"
-echo
-
-# execute python2 within the image passing the supplied arguments
-
-${DOCKER_BIN} run -it --rm \
-    --mount type=bind,source=.,target=/usr/src/app \
-    --env "PYTHONPATH=$PYTHON_PATH" \
-    --env "MIG_ENV=$MIG_ENV" \
-    "$IMAGEID" python2 "$@"
+PY=2 $SCRIPT_BASE/dpython "$@"
diff --git a/envhelp/python3 b/envhelp/python3
index 309356a03..9584a6dc5 100755
--- a/envhelp/python3
+++ b/envhelp/python3
@@ -2,7 +2,7 @@
 #
 # --- BEGIN_HEADER ---
 #
-# python3 - wrap python3 virtual environment for testing
+# python2 - wrap python2 docker container for testing
 # Copyright (C) 2003-2024  The MiG Project by the Science HPC Center at UCPH
 #
 # This file is part of MiG.
@@ -29,21 +29,5 @@ set -e
 
 SCRIPT_PATH=$(realpath "$0")
 SCRIPT_BASE=$(dirname -- "$SCRIPT_PATH")
-MIG_BASE=$(realpath "$SCRIPT_BASE/..")
-PYTHON3_BIN="$SCRIPT_BASE/venv/bin/python3"
 
-if [ ! -f "${PYTHON3_BIN}" ]; then
-    echo "No python3 binary found - perhaps the virtual env was not created"
-    exit 1
-fi
-
-# default PYTHONPATH such that directly executing files in the repo "just works"
-PYTHONPATH=${PYTHONPATH:-"$MIG_BASE"}
-
-# default any variables for local development
-MIG_ENV=${MIG_ENV:-'local'}
-
-echo "running with MIG_ENV='$MIG_ENV' under python 3"
-echo
-
-PYTHONPATH="$PYTHONPATH" MIG_ENV="$MIG_ENV" "$PYTHON3_BIN" "$@"
+PY=3 $SCRIPT_BASE/dpython "$@"
diff --git a/mig/unittest/testcore.py b/mig/unittest/testcore.py
index 34998b8db..8944fd45b 100644
--- a/mig/unittest/testcore.py
+++ b/mig/unittest/testcore.py
@@ -42,18 +42,17 @@
     invisible_path, allow_script, brief_list
 
 
-_LOCAL_MIG_BASE = '/usr/src/app' if PY2 else MIG_BASE # account for execution in container
-_PYTHON_MAJOR = '2' if PY2 else '3'
-_TEST_CONF_DIR = os.path.join(MIG_BASE, "envhelp/output/testconfs-py%s" % (_PYTHON_MAJOR,))
-_TEST_CONF_FILE = os.path.join(_TEST_CONF_DIR, "MiGserver.conf")
+_TEST_CONF_FILE = os.environ['MIG_CONF']
+_TEST_CONF_DIR = os.path.dirname(_TEST_CONF_FILE)
 _TEST_CONF_SYMLINK = os.path.join(MIG_BASE, "envhelp/output/testconfs")
 
 
 def _assert_local_config():
     try:
-        link_stat = os.lstat(_TEST_CONF_SYMLINK)
-        assert stat.S_ISLNK(link_stat.st_mode)
-        configdir_stat = os.stat(_TEST_CONF_DIR)
+        #link_stat = os.lstat(_TEST_CONF_SYMLINK)
+        #assert stat.S_ISLNK(link_stat.st_mode)
+        _test_conf_dir = os.path.dirname(_TEST_CONF_DIR)
+        configdir_stat = os.stat(_test_conf_dir)
         assert stat.S_ISDIR(configdir_stat.st_mode)
         config = ConfigParser()
         config.read([_TEST_CONF_FILE])
@@ -67,23 +66,16 @@ def _assert_local_config_global_values(config):
 
     for path in ('mig_path', 'certs_path', 'state_path'):
         path_value = config_global_values.get(path)
-        if not is_path_within(path_value, start=_LOCAL_MIG_BASE):
+        if not is_path_within(path_value, start=MIG_BASE):
             raise AssertionError('local config contains bad path: %s=%s' % (path, path_value))
 
     return config_global_values
 
 
-def main(_exit=sys.exit):
+def main(configuration, _exit=sys.exit):
     config = _assert_local_config()
     config_global_values = _assert_local_config_global_values(config)
 
-    from mig.shared.conf import get_configuration_object
-    configuration = get_configuration_object(_TEST_CONF_FILE, skip_log=True,
-                                             disable_auth_log=True)
-    logging.basicConfig(filename=None, level=logging.INFO,
-                        format="%(asctime)s %(levelname)s %(message)s")
-    configuration.logger = logging
-
     print("Running unit test on shared core functions ..")
 
     short_alias = 'email'
@@ -192,4 +184,5 @@ def main(_exit=sys.exit):
     _exit(0)
 
 if __name__ == "__main__":
-    main()
+    from mig.shared.conf import get_configuration_object
+    main(get_configuration_object())
diff --git a/tests/__init__.py b/tests/__init__.py
index e69de29bb..bcec2ab8a 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -0,0 +1,10 @@
+def _print_identity():
+    import os
+    import sys
+    python_version_string = sys.version.split(' ')[0]
+    mig_env = os.environ.get('MIG_ENV', 'local')
+    print("running with MIG_ENV='%s' under Python %s" %
+          (mig_env, python_version_string))
+    print("")
+
+_print_identity()
diff --git a/tests/support/__init__.py b/tests/support/__init__.py
index 499ea005c..422182b4a 100644
--- a/tests/support/__init__.py
+++ b/tests/support/__init__.py
@@ -42,27 +42,30 @@
 
 from tests.support.configsupp import FakeConfiguration
 from tests.support.suppconst import MIG_BASE, TEST_BASE, TEST_FIXTURE_DIR, \
-    TEST_OUTPUT_DIR, TEST_DATA_DIR
+    TEST_DATA_DIR, TEST_OUTPUT_DIR, ENVHELP_OUTPUT_DIR
 
-PY2 = (sys.version_info[0] == 2)
+from tests.support._env import MIG_ENV, PY2
 
-# force defaults to a local environment
-os.environ['MIG_ENV'] = 'local'
+# Provide access to a configuration file for the active environment.
 
-# expose the configured environment as a constant
-MIG_ENV = os.environ['MIG_ENV']
-
-if MIG_ENV == 'local':
-    # force testconfig as the conig file path
-    is_py2 = PY2
-    _conf_dir_suffix = "-py%s" % ('2' if is_py2 else '3',)
-    _conf_dir = "testconfs%s" % (_conf_dir_suffix,)
-    _local_conf = os.path.join(
-        MIG_BASE, 'envhelp/output', _conf_dir, 'MiGserver.conf')
+if MIG_ENV in ('local', 'docker'):
+    # force local testconfig
+    _output_dir = os.path.join(MIG_BASE, 'envhelp/output')
+    _conf_dir_name = "testconfs-%s" % (MIG_ENV,)
+    _conf_dir = os.path.join(_output_dir, _conf_dir_name)
+    _local_conf = os.path.join(_conf_dir, 'MiGserver.conf')
     _config_file = os.getenv('MIG_CONF', None)
     if _config_file is None:
         os.environ['MIG_CONF'] = _local_conf
 
+    # adjust the link through which confs are accessed to suit the environment
+    _conf_link = os.path.join(_output_dir, 'testconfs')
+    assert os.path.lexists(_conf_link) # it must already exist
+    os.remove(_conf_link)              # blow it away
+    os.symlink(_conf_dir, _conf_link)  # recreate it using the active MIG_BASE
+else:
+    raise NotImplementedError()
+
 # All MiG related code will at some point include bits from the mig module
 # namespace. Rather than have this knowledge spread through every test file,
 # make the sole responsbility of test files to find the support file and
@@ -75,7 +78,10 @@
     os.mkdir(TEST_OUTPUT_DIR)
 except EnvironmentError as enverr:
     if enverr.errno == errno.EEXIST:  # FileExistsError
-        shutil.rmtree(TEST_OUTPUT_DIR)
+        try:
+            shutil.rmtree(TEST_OUTPUT_DIR)
+        except Exception as exc:
+            raise
         os.mkdir(TEST_OUTPUT_DIR)
 
 # Exports to expose at the top level from the support library.
@@ -146,7 +152,11 @@ def tearDown(self):
             if os.path.islink(path):
                 os.remove(path)
             elif os.path.isdir(path):
-                shutil.rmtree(path)
+                try:
+                    shutil.rmtree(path)
+                except Exception as exc:
+                    print(path)
+                    raise
             elif os.path.exists(path):
                 os.remove(path)
             else:
@@ -164,6 +174,11 @@ def before_each(self):
     def _register_check(self, check_callable):
         self._cleanup_checks.append(check_callable)
 
+    def _register_path(self, cleanup_path):
+        assert os.path.isabs(cleanup_path)
+        self._cleanup_paths.add(cleanup_path)
+        return cleanup_path
+
     def _reset_logging(self, stream):
         root_logger = logging.getLogger()
         root_handler = root_logger.handlers[0]
@@ -188,11 +203,26 @@ def _provide_configuration(self):
     @property
     def configuration(self):
         """Init a fake configuration if not already done"""
-        if self._configuration is None:
-            configuration_to_make = self._provide_configuration()
-            self._configuration = self._make_configuration_instance(
-                configuration_to_make)
-        return self._configuration
+
+        if self._configuration is not None:
+            return self._configuration
+
+        configuration_to_make = self._provide_configuration()
+        configuration_instance = self._make_configuration_instance(
+            configuration_to_make)
+
+        if configuration_to_make == 'testconfig':
+            # use the paths defined by the loaded configuration to create
+            # the directories which are expected to be present by the code
+            os.mkdir(self._register_path(configuration_instance.certs_path))
+            os.mkdir(self._register_path(configuration_instance.state_path))
+            log_path = os.path.join(configuration_instance.state_path, "log")
+            os.mkdir(self._register_path(log_path))
+
+        self._configuration = configuration_instance
+
+        return configuration_instance
+
 
     @property
     def logger(self):
@@ -361,18 +391,31 @@ def fixturepath(relative_path):
     return tmp_path
 
 
-def temppath(relative_path, test_case, ensure_dir=False, skip_clean=False,
-             skip_output_anchor=False):
+def temppath(relative_path, test_case, ensure_dir=False, skip_clean=False):
     """Register relative_path as a temp path and schedule automatic clean up
     after unit tests unless skip_clean is set. Anchors the temp path in
     internal test output dir unless skip_output_anchor is set. Returns
     resulting temp path.
     """
     assert isinstance(test_case, MigTestCase)
-    if not skip_output_anchor:
-        tmp_path = os.path.join(TEST_OUTPUT_DIR, relative_path)
-    else:
+
+    if os.path.isabs(relative_path):
+        # the only permitted paths are those within the output directory set
+        # aside for execution of the test suite: this will be enforced below
+        # so effectively submit the supplied path for scrutiny
         tmp_path = relative_path
+    else:
+        tmp_path = os.path.join(TEST_OUTPUT_DIR, relative_path)
+
+    # failsafe path checking that supplied paths are rooted within valid paths
+    is_tmp_path_within_safe_dir = False
+    for start in (ENVHELP_OUTPUT_DIR):
+        is_tmp_path_within_safe_dir = is_path_within(tmp_path, start=start)
+        if is_tmp_path_within_safe_dir:
+            break
+    if not is_tmp_path_within_safe_dir:
+        raise AssertionError("ABORT: corrupt test path=%s" % (tmp_path,))
+
     if ensure_dir:
         try:
             os.mkdir(tmp_path)
diff --git a/tests/support/_env.py b/tests/support/_env.py
new file mode 100644
index 000000000..2c71386a4
--- /dev/null
+++ b/tests/support/_env.py
@@ -0,0 +1,11 @@
+import os
+import sys
+
+# expose the configured environment as a constant
+MIG_ENV = os.environ.get('MIG_ENV', 'local')
+
+# force the chosen environment globally
+os.environ['MIG_ENV'] = MIG_ENV
+
+# expose a boolean indicating whether we are executing on Python 2
+PY2 = (sys.version_info[0] == 2)
diff --git a/tests/support/suppconst.py b/tests/support/suppconst.py
index 15912e933..148303f0d 100644
--- a/tests/support/suppconst.py
+++ b/tests/support/suppconst.py
@@ -27,11 +27,33 @@
 
 import os
 
+from tests.support._env import MIG_ENV
 
-# Use abspath for __file__ on Py2
-_SUPPORT_DIR = os.path.dirname(os.path.abspath(__file__))
-TEST_BASE = os.path.normpath(os.path.join(_SUPPORT_DIR, ".."))
+if MIG_ENV == 'local':
+    # Use abspath for __file__ on Py2
+    _SUPPORT_DIR = os.path.dirname(os.path.abspath(__file__))
+elif MIG_ENV == 'docker':
+    _SUPPORT_DIR = '/usr/src/app/tests/support'
+else:
+    raise NotImplementedError("ABORT: unsupported environment: %s" % (MIG_ENV,))
+
+MIG_BASE = os.path.realpath(os.path.join(_SUPPORT_DIR, "../.."))
+TEST_BASE = os.path.join(MIG_BASE, "tests")
 TEST_DATA_DIR = os.path.join(TEST_BASE, "data")
 TEST_FIXTURE_DIR = os.path.join(TEST_BASE, "fixture")
 TEST_OUTPUT_DIR = os.path.join(TEST_BASE, "output")
-MIG_BASE = os.path.realpath(os.path.join(TEST_BASE, ".."))
+ENVHELP_DIR = os.path.join(MIG_BASE, "envhelp")
+ENVHELP_OUTPUT_DIR = os.path.join(ENVHELP_DIR, "output")
+
+
+if __name__ == '__main__':
+    def print_root_relative(prefix, path):
+        print("%s = <root>/%s" % (prefix, os.path.relpath(path, MIG_BASE)))
+
+    print("# base paths")
+    print("root=%s" % (MIG_BASE,))
+    print("# envhelp paths")
+    print_root_relative("output", ENVHELP_OUTPUT_DIR)
+    print("# test paths")
+    print_root_relative("fixture", TEST_FIXTURE_DIR)
+    print_root_relative("output", TEST_OUTPUT_DIR)
diff --git a/tests/test_booleans.py b/tests/test_booleans.py
index 3c37c1ce3..5246197ee 100644
--- a/tests/test_booleans.py
+++ b/tests/test_booleans.py
@@ -1,11 +1,14 @@
 from __future__ import print_function
-import sys
 
-from unittest import TestCase
+from tests.support import MigTestCase, testmain
 
-class TestBooleans(TestCase):
+class TestBooleans(MigTestCase):
     def test_true(self):
         self.assertEqual(True, True)
 
     def test_false(self):
         self.assertEqual(False, False)
+
+
+if __name__ == '__main__':
+    testmain()
diff --git a/tests/test_mig_shared_functionality_cat.py b/tests/test_mig_shared_functionality_cat.py
index b7edaab5a..e02af8896 100644
--- a/tests/test_mig_shared_functionality_cat.py
+++ b/tests/test_mig_shared_functionality_cat.py
@@ -75,19 +75,20 @@ def before_each(self):
         test_user_dir = os.path.join(conf_user_home, test_client_dir)
 
         # ensure a user db that includes our test user
+
         conf_user_db_home = ensure_dirs_exist(self.configuration.user_db_home)
-        temppath(conf_user_db_home, self, skip_output_anchor=True)
+        temppath(conf_user_db_home, self)
         db_fixture, db_fixture_file = fixturefile('MiG-users.db--example',
                                                   fixture_format='binary',
                                                   include_path=True)
         test_db_file = temppath(fixturefile_normname('MiG-users.db--example',
                                                      prefix=conf_user_db_home),
-                                self, skip_output_anchor=True)
+                                self)
         shutil.copyfile(db_fixture_file, test_db_file)
 
         # create the test user home directory
         self.test_user_dir = ensure_dirs_exist(test_user_dir)
-        temppath(self.test_user_dir, self, skip_output_anchor=True)
+        temppath(self.test_user_dir, self)
         self.test_environ = create_http_environ(self.configuration)
 
     def assertSingleOutputObject(self, output_objects, with_object_type=None):
diff --git a/tests/test_mig_unittest_testcore.py b/tests/test_mig_unittest_testcore.py
index a7621812a..b27a74d33 100644
--- a/tests/test_mig_unittest_testcore.py
+++ b/tests/test_mig_unittest_testcore.py
@@ -38,6 +38,9 @@
 
 class MigUnittestTestcore(MigTestCase):
 
+    def _provide_configuration(self):
+        return 'testconfig'
+
     def test_existing_main(self):
         def raise_on_error_exit(exit_code, identifying_message=None):
             if exit_code != 0:
@@ -48,7 +51,7 @@ def raise_on_error_exit(exit_code, identifying_message=None):
 
         print("") # account for wrapped tests printing to console
 
-        testcore_main(_exit=raise_on_error_exit)
+        testcore_main(self.configuration, _exit=raise_on_error_exit)
 
 
 if __name__ == '__main__':

From b0e32043d56088122ba150651d252f3d08fdef17 Mon Sep 17 00:00:00 2001
From: Alex Burke <albu@di.ku.dk>
Date: Fri, 18 Oct 2024 14:39:28 +0200
Subject: [PATCH 2/5] Use the correct force_utf8 function based on Python
 version.

Import the experimental branch version of force_utf8 wholesale adding a
-py(2|3) suffix and expose the correct implementation dependent on PY2.

Include forcing InputException messages to a native string as is done in
experimental (also taken directly from that branch) which ensures the
exception message, which may be unicode, becomes a string everywhere.
---
 mig/shared/base.py            | 28 +++++++++++++++-
 mig/shared/safeinput.py       |  4 +--
 tests/test_mig_shared_base.py | 60 +++++++++++++++++++++++++++++++++++
 3 files changed, 89 insertions(+), 3 deletions(-)
 create mode 100644 tests/test_mig_shared_base.py

diff --git a/mig/shared/base.py b/mig/shared/base.py
index 64f12b370..b21d4ae6f 100644
--- a/mig/shared/base.py
+++ b/mig/shared/base.py
@@ -36,6 +36,7 @@
 import re
 
 # IMPORTANT: do not import any other MiG modules here - to avoid import loops
+from mig.shared.compat import PY2
 from mig.shared.defaults import default_str_coding, default_fs_coding, \
     keyword_all, keyword_auto, sandbox_names, _user_invisible_files, \
     _user_invisible_dirs, _vgrid_xgi_scripts, cert_field_order, csrf_field, \
@@ -496,7 +497,7 @@ def is_unicode(val):
     return (type(u"") == type(val))
 
 
-def force_utf8(val, highlight=''):
+def _force_utf8_py2(val, highlight=''):
     """Internal helper to encode unicode strings to utf8 version. Actual
     changes are marked out with the highlight string if given.
     """
@@ -507,6 +508,31 @@ def force_utf8(val, highlight=''):
         return val
     return "%s%s%s" % (highlight, val.encode("utf8"), highlight)
 
+def _force_utf8_py3(val, highlight='', stringify=True):
+    """Internal helper to encode unicode strings to utf8 version. Actual
+    changes are marked out with the highlight string if given.
+    The optional stringify turns ALL values including numbers into string.
+    """
+    # We run into all kind of nasty encoding problems if we mix
+    if not isinstance(val, basestring):
+        if stringify:
+            val = "%s" % val
+        else:
+            return val
+    if not is_unicode(val):
+        return val
+    if is_unicode(highlight):
+        hl_utf = highlight.encode("utf8")
+    else:
+        hl_utf = highlight
+    return (b"%s%s%s" % (hl_utf, val.encode("utf8"), hl_utf))
+
+
+if PY2:
+    force_utf8 = _force_utf8_py2
+else:
+    force_utf8 = _force_utf8_py3
+
 
 def force_utf8_rec(input_obj, highlight=''):
     """Recursive object conversion from unicode to utf8: useful to convert e.g.
diff --git a/mig/shared/safeinput.py b/mig/shared/safeinput.py
index 592250755..e91937d8c 100644
--- a/mig/shared/safeinput.py
+++ b/mig/shared/safeinput.py
@@ -58,7 +58,7 @@
     from html import escape as escape_html
 assert escape_html is not None
 
-from mig.shared.base import force_unicode, force_utf8
+from mig.shared.base import force_unicode, force_native_str
 from mig.shared.defaults import src_dst_sep, username_charset, \
     username_max_length, session_id_charset, session_id_length, \
     subject_id_charset, subject_id_min_length, subject_id_max_length, \
@@ -2294,7 +2294,7 @@ def __init__(self, value):
     def __str__(self):
         """Return string representation"""
 
-        return force_utf8(force_unicode(self.value))
+        return force_native_str(self.value)
 
 
 def main(_exit=sys.exit, _print=print):
diff --git a/tests/test_mig_shared_base.py b/tests/test_mig_shared_base.py
new file mode 100644
index 000000000..82145cb20
--- /dev/null
+++ b/tests/test_mig_shared_base.py
@@ -0,0 +1,60 @@
+# -*- coding: utf-8 -*-
+#
+# --- BEGIN_HEADER ---
+#
+# test_mig_shared_base - unit test of the corresponding mig shared module
+# Copyright (C) 2003-2024  The MiG Project by the Science HPC Center at UCPH
+#
+# This file is part of MiG.
+#
+# MiG is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# MiG is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,
+# USA.
+#
+# --- END_HEADER ---
+#
+
+"""Unit test base functions"""
+
+import binascii
+import codecs
+import os
+import sys
+
+from tests.support import PY2, MigTestCase, testmain
+
+from mig.shared.base import force_utf8
+
+DUMMY_STRING = "foo bÆr baz"
+DUMMY_UNICODE = u'UniCode123½¾µßðþđŋħĸþł@ª€£$¥©®'
+
+
+class MigSharedBase(MigTestCase):
+    """Unit tests of fucntions within the mig.shared.base module."""
+
+    def test_force_utf8_on_string(self):
+        actual = force_utf8(DUMMY_STRING)
+
+        self.assertIsInstance(actual, bytes)
+        self.assertEqual(binascii.hexlify(actual), b'666f6f2062c386722062617a')
+
+    def test_force_utf8_on_unicode(self):
+        actual = force_utf8(DUMMY_UNICODE)
+
+        self.assertIsInstance(actual, bytes)
+        self.assertEqual(actual, codecs.encode(DUMMY_UNICODE, 'utf8'))
+
+
+if __name__ == '__main__':
+    testmain()

From a186fa57b61c5251003a5abef9eb76e822935ee7 Mon Sep 17 00:00:00 2001
From: Alex Burke <albu@di.ku.dk>
Date: Wed, 4 Dec 2024 14:52:57 +0100
Subject: [PATCH 3/5] wsgisupp: implement support code for the testing of WSGI
 handlers/servers

---
 local-requirements.txt    |   1 +
 tests/support/wsgisupp.py | 156 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 157 insertions(+)
 create mode 100644 tests/support/wsgisupp.py

diff --git a/local-requirements.txt b/local-requirements.txt
index da83dd4b7..7faf3b026 100644
--- a/local-requirements.txt
+++ b/local-requirements.txt
@@ -7,3 +7,4 @@ autopep8;python_version >= "3"
 # NOTE: paramiko-3.0.0 dropped python2 and python3.6 support
 paramiko;python_version >= "3.7"
 paramiko<3;python_version < "3.7"
+werkzeug
diff --git a/tests/support/wsgisupp.py b/tests/support/wsgisupp.py
new file mode 100644
index 000000000..d4f54e96f
--- /dev/null
+++ b/tests/support/wsgisupp.py
@@ -0,0 +1,156 @@
+# -*- coding: utf-8 -*-
+#
+# --- BEGIN_HEADER ---
+#
+# htmlsupp - test support library for WSGI
+# Copyright (C) 2003-2024  The MiG Project by the Science HPC Center at UCPH
+#
+# This file is part of MiG.
+#
+# MiG is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# MiG is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+#
+# -- END_HEADER ---
+#
+
+from collections import namedtuple
+import codecs
+from io import BytesIO
+from werkzeug.datastructures import MultiDict
+
+from tests.support._env import PY2
+
+if PY2:
+    from urllib import urlencode
+    from urlparse import urlparse
+else:
+    from urllib.parse import urlencode, urlparse
+
+"""Test support library for WSGI."""
+
+
+_PreparedWsgi = namedtuple('_PreparedWsgi', ['environ', 'start_response'])
+
+
+class FakeWsgiStartResponse:
+    """Glue object that conforms to the same interface as the start_response()
+       in the WSGI specs but records the calls to it such that they can be
+       inspected and, for our purposes, asserted against."""
+
+    def __init__(self):
+        self.calls = []
+
+    def __call__(self, status, headers, exc=None):
+        self.calls.append((status, headers, exc))
+
+
+def create_wsgi_environ(configuration, wsgi_url, method='GET', query=None, headers=None, form=None):
+    parsed_url = urlparse(wsgi_url)
+
+    if query:
+        method = 'GET'
+
+        request_query = urlencode(query)
+        wsgi_input = ()
+    elif form:
+        method = 'POST'
+        request_query = ''
+
+        body = urlencode(MultiDict(form)).encode('ascii')
+
+        headers = headers or {}
+        if not 'Content-Type' in headers:
+            headers['Content-Type'] = 'application/x-www-form-urlencoded'
+
+        headers['Content-Length'] = str(len(body))
+        wsgi_input = BytesIO(body)
+    else:
+        request_query = parsed_url.query
+        wsgi_input = ()
+
+    environ = {}
+    environ['wsgi.input'] = wsgi_input
+    environ['wsgi.url_scheme'] = parsed_url.scheme
+    environ['wsgi.version'] = (1, 0)
+    environ['MIG_CONF'] = configuration.config_file
+    environ['HTTP_HOST'] = parsed_url.netloc
+    environ['PATH_INFO'] = parsed_url.path
+    environ['QUERY_STRING'] = request_query
+    environ['REQUEST_METHOD'] = method
+    environ['SCRIPT_URI'] = ''.join(('http://', environ['HTTP_HOST'], environ['PATH_INFO']))
+
+    if headers:
+        for k, v in headers.items():
+            header_key = k.replace('-', '_').upper()
+            if header_key.startswith('CONTENT'):
+                # Content-* headers must not be prefixed in WSGI
+                pass
+            else:
+                header_key = "HTTP_%s" % (header_key),
+            environ[header_key] = v
+
+    return environ
+
+
+def create_wsgi_start_response():
+    return FakeWsgiStartResponse()
+
+
+def prepare_wsgi(configuration, url, **kwargs):
+    return _PreparedWsgi(
+        create_wsgi_environ(configuration, url, **kwargs),
+        create_wsgi_start_response()
+    )
+
+
+def _trigger_and_unpack_result(wsgi_result):
+    chunks = list(wsgi_result)
+    assert len(chunks) > 0, "invocation returned no output"
+    complete_value = b''.join(chunks)
+    decoded_value = codecs.decode(complete_value, 'utf8')
+    return decoded_value
+
+
+class WsgiAssertMixin:
+    """Custom assertions for verifying server code executed under test."""
+
+    def assertWsgiResponse(self, wsgi_result, fake_wsgi, expected_status_code):
+        assert isinstance(fake_wsgi, _PreparedWsgi)
+
+        content = _trigger_and_unpack_result(wsgi_result)
+
+        def called_once(fake):
+            assert hasattr(fake, 'calls')
+            return len(fake.calls) == 1
+
+        fake_start_response = fake_wsgi.start_response
+
+        try:
+            self.assertTrue(called_once(fake_start_response))
+        except AssertionError:
+            if len(fake.calls) == 0:
+                raise AssertionError("WSGI handler did not respond")
+            else:
+                raise AssertionError("WSGI handler responded more than once")
+
+        wsgi_call = fake_start_response.calls[0]
+
+        # check for expected HTTP status code
+        wsgi_status = wsgi_call[0]
+        actual_status_code = int(wsgi_status[0:3])
+        self.assertEqual(actual_status_code, expected_status_code)
+
+        headers = dict(wsgi_call[1])
+
+        return content, headers

From 38b00ac204d818d7c20a3fab381e849092e53206 Mon Sep 17 00:00:00 2001
From: Alex Burke <albu@di.ku.dk>
Date: Fri, 26 Jul 2024 16:39:19 +0200
Subject: [PATCH 4/5] Basic coverage of migwsgi.

This PR does the principle things required to allow exercising the
central component responsible for glueing named MiG "functionality"
files to WSGI and have the result execute to completion under Python 3.

Included is a small tactical change to allow the structural force
recursive functions to iterate tuples preserving their type such that
subsequence output behaves correctly under Py3.

=======

wip

fixup

fixup

updare and relocate a comment

start tightening up the code

shift things around a little

work to make it readable with a nod towards further tests

assert the response status

allow programming the response

repair previous

assert that a programmed title ends up in the page

line naming up with other recent work in grid_openid

fixup

fixup

fixup

fixup

split the testing infrastructure across multiple files

collect common default kwargs

use noop for set environ

make the generic WSGI handling setup code more uniform

bring over improvements to hmtlsupp from another branch

simplify

fixup
---
 mig/shared/base.py           |  10 ++-
 mig/wsgi-bin/migwsgi.py      |  78 +++++++++++-------
 tests/support/htmlsupp.py    |  84 +++++++++++++++++++
 tests/support/wsgibinsupp.py | 153 +++++++++++++++++++++++++++++++++++
 tests/test_mig_wsgi-bin.py   | 127 +++++++++++++++++++++++++++++
 5 files changed, 420 insertions(+), 32 deletions(-)
 create mode 100644 tests/support/htmlsupp.py
 create mode 100644 tests/support/wsgibinsupp.py
 create mode 100644 tests/test_mig_wsgi-bin.py

diff --git a/mig/shared/base.py b/mig/shared/base.py
index b21d4ae6f..372c9bc30 100644
--- a/mig/shared/base.py
+++ b/mig/shared/base.py
@@ -542,8 +542,9 @@ def force_utf8_rec(input_obj, highlight=''):
     if isinstance(input_obj, dict):
         return {force_utf8_rec(i, highlight): force_utf8_rec(j, highlight) for (i, j) in
                 input_obj.items()}
-    elif isinstance(input_obj, list):
-        return [force_utf8_rec(i, highlight) for i in input_obj]
+    elif isinstance(input_obj, (list, tuple)):
+        thetype = type(input_obj)
+        return thetype(force_utf8_rec(i, highlight) for i in input_obj)
     elif is_unicode(input_obj):
         return force_utf8(input_obj, highlight)
     else:
@@ -570,8 +571,9 @@ def force_unicode_rec(input_obj, highlight=''):
     if isinstance(input_obj, dict):
         return {force_unicode_rec(i, highlight): force_unicode_rec(j, highlight) for (i, j) in
                 input_obj.items()}
-    elif isinstance(input_obj, list):
-        return [force_unicode_rec(i, highlight) for i in input_obj]
+    elif isinstance(input_obj, (list, tuple)):
+        thetype = type(input_obj)
+        return thetype(force_utf8_rec(i, highlight) for i in input_obj)
     elif not is_unicode(input_obj):
         return force_unicode(input_obj, highlight)
     else:
diff --git a/mig/wsgi-bin/migwsgi.py b/mig/wsgi-bin/migwsgi.py
index 73987133e..f1f387476 100755
--- a/mig/wsgi-bin/migwsgi.py
+++ b/mig/wsgi-bin/migwsgi.py
@@ -34,7 +34,7 @@
 from mig.shared import returnvalues
 from mig.shared.bailout import bailout_helper, crash_helper, compact_string
 from mig.shared.base import requested_backend, allow_script, \
-    is_default_str_coding, force_default_str_coding_rec
+    is_default_str_coding, force_default_str_coding_rec, force_utf8
 from mig.shared.defaults import download_block_size, default_fs_coding
 from mig.shared.conf import get_configuration_object
 from mig.shared.objecttypes import get_object_type_info
@@ -43,14 +43,19 @@
 from mig.shared.scriptinput import fieldstorage_to_dict
 
 
+def _import_backend(backend):
+    import_path = 'mig.shared.functionality.%s' % backend
+    module_handle = importlib.import_module(import_path)
+    return module_handle.main
+
+
 def object_type_info(object_type):
     """Lookup object type"""
 
     return get_object_type_info(object_type)
 
 
-def stub(configuration, client_id, import_path, backend, user_arguments_dict,
-         environ):
+def stub(configuration, client_id, user_arguments_dict, environ, _retrieve_handler):
     """Run backend on behalf of client_id with supplied user_arguments_dict.
     I.e. import main from import_path and execute it with supplied arguments.
     """
@@ -61,6 +66,7 @@ def stub(configuration, client_id, import_path, backend, user_arguments_dict,
     before_time = time.time()
 
     output_objects = []
+    backend = 'UNKNOWN'
     main = dummy_main
 
     # _logger.debug("stub for backend %r" % backend)
@@ -69,10 +75,12 @@ def stub(configuration, client_id, import_path, backend, user_arguments_dict,
     #            NEVER print/output it verbatim before it is validated below.
 
     try:
+        default_page = configuration.site_landing_page # TODO: avoid doing this work a second time
+        backend = requested_backend(environ, fallback=default_page)
         valid_backend_name(backend)
     except InputException as iex:
-        _logger.error("%s refused to import invalid backend %r (%s): %s" %
-                      (_addr, backend, import_path, iex))
+        _logger.error("%s refused to import invalid backend %r: %s" %
+                      (_addr, backend, iex))
         bailout_helper(configuration, backend, output_objects,
                        header_text='User Error')
         output_objects.extend([
@@ -81,18 +89,17 @@ def stub(configuration, client_id, import_path, backend, user_arguments_dict,
             {'object_type': 'link', 'text': 'Go to default interface',
              'destination': configuration.site_landing_page}
         ])
-        return (output_objects, returnvalues.CLIENT_ERROR)
+        return backend, (output_objects, returnvalues.CLIENT_ERROR)
 
     try:
         # Import main from backend module
 
         # _logger.debug("import main from %r" % import_path)
         # NOTE: dynamic module loading to find corresponding main function
-        module_handle = importlib.import_module(import_path)
-        main = module_handle.main
+        main = _retrieve_handler(backend)
     except Exception as err:
-        _logger.error("%s could not import %r (%s): %s" %
-                      (_addr, backend, import_path, err))
+        _logger.error("%s could not import %r: %s" %
+                      (_addr, backend, err))
         bailout_helper(configuration, backend, output_objects)
         output_objects.extend([
             {'object_type': 'error_text', 'text':
@@ -100,22 +107,22 @@ def stub(configuration, client_id, import_path, backend, user_arguments_dict,
             {'object_type': 'link', 'text': 'Go to default interface',
              'destination': configuration.site_landing_page}
         ])
-        return (output_objects, returnvalues.SYSTEM_ERROR)
+        return backend, (output_objects, returnvalues.SYSTEM_ERROR)
 
     # _logger.debug("imported main %s" % main)
 
     # Now backend value is validated to be safe for output
 
     if not isinstance(user_arguments_dict, dict):
-        _logger.error("%s invalid user args %s for %s" % (_addr,
+        _logger.error("%s invalid user args %s for backend %r" % (_addr,
                                                           user_arguments_dict,
-                                                          import_path))
+                                                          backend))
         bailout_helper(configuration, backend, output_objects,
                        header_text='Input Error')
         output_objects.append(
             {'object_type': 'error_text', 'text':
              'User input is not on expected format!'})
-        return (output_objects, returnvalues.INVALID_ARGUMENT)
+        return backend, (output_objects, returnvalues.INVALID_ARGUMENT)
 
     try:
         (output_objects, (ret_code, ret_msg)) = main(client_id,
@@ -125,7 +132,7 @@ def stub(configuration, client_id, import_path, backend, user_arguments_dict,
         _logger.error("%s script crashed:\n%s" % (_addr,
                                                   traceback.format_exc()))
         crash_helper(configuration, backend, output_objects)
-        return (output_objects, returnvalues.ERROR)
+        return backend, (output_objects, returnvalues.ERROR)
 
     (val_ret, val_msg) = validate(output_objects)
     if not val_ret:
@@ -138,7 +145,7 @@ def stub(configuration, client_id, import_path, backend, user_arguments_dict,
     after_time = time.time()
     output_objects.append({'object_type': 'timing_info', 'text':
                            "done in %.3fs" % (after_time - before_time)})
-    return (output_objects, (ret_code, ret_msg))
+    return backend, (output_objects, (ret_code, ret_msg))
 
 
 def wrap_wsgi_errors(environ, configuration, max_line_len=100):
@@ -193,6 +200,14 @@ def application(environ, start_response):
     *start_response* is a helper function used to deliver the client response.
     """
 
+    def _set_os_environ(value):
+        os.environ = value
+
+    return _application(None, environ, start_response, _set_environ=_set_os_environ, _wrap_wsgi_errors=wrap_wsgi_errors)
+
+
+def _application(configuration, environ, start_response, _set_environ, _format_output=format_output, _retrieve_handler=_import_backend, _wrap_wsgi_errors=True, _config_file=None, _skip_log=False):
+
     # NOTE: pass app environ including apache and query args on to sub handlers
     #       through the usual 'os.environ' channel expected in functionality
     #       handlers. Special care is needed to avoid various sub-interpreter
@@ -235,18 +250,20 @@ def application(environ, start_response):
                                                              os_env_value))
 
     # Assign updated environ to LOCAL os.environ for the rest of this session
-    os.environ = environ
+    _set_environ(environ)
 
     # NOTE: redirect stdout to stderr in python 2 only. It breaks logger in 3
     #       and stdout redirection apparently is already handled there.
     if sys.version_info[0] < 3:
         sys.stdout = sys.stderr
 
-    configuration = get_configuration_object()
+    if configuration is None:
+        configuration = get_configuration_object(_config_file, _skip_log)
+
     _logger = configuration.logger
 
     # NOTE: replace default wsgi errors to apache error log with our own logs
-    wrap_wsgi_errors(environ, configuration)
+    _wrap_wsgi_errors(environ, configuration)
 
     for line in env_sync_status:
         _logger.debug(line)
@@ -298,7 +315,6 @@ def application(environ, start_response):
         default_page = configuration.site_landing_page
         script_name = requested_backend(environ, fallback=default_page,
                                         strip_ext=False)
-        backend = requested_backend(environ, fallback=default_page)
         # _logger.debug('DEBUG: wsgi found backend %s and script %s' %
         #              (backend, script_name))
         fieldstorage = cgi.FieldStorage(fp=environ['wsgi.input'],
@@ -307,13 +323,12 @@ def application(environ, start_response):
         if 'output_format' in user_arguments_dict:
             output_format = user_arguments_dict['output_format'][0]
 
-        module_path = 'mig.shared.functionality.%s' % backend
         (allow, msg) = allow_script(configuration, script_name, client_id)
         if allow:
             # _logger.debug("wsgi handling script: %s" % script_name)
-            (output_objs, ret_val) = stub(configuration, client_id,
-                                          module_path, backend,
-                                          user_arguments_dict, environ)
+            backend, (output_objs, ret_val) = stub(configuration, client_id,
+                                          user_arguments_dict, environ,
+                                          _retrieve_handler)
         else:
             _logger.warning("wsgi handling refused script:%s" % script_name)
             (output_objs, ret_val) = reject_main(client_id,
@@ -363,7 +378,7 @@ def application(environ, start_response):
     output_objs.append(wsgi_entry)
 
     _logger.debug("call format %r output to %s" % (backend, output_format))
-    output = format_output(configuration, backend, ret_code, ret_msg,
+    output = _format_output(configuration, backend, ret_code, ret_msg,
                            output_objs, output_format)
     # _logger.debug("formatted %s output to %s" % (backend, output_format))
     # _logger.debug("output:\n%s" % [output])
@@ -372,7 +387,7 @@ def application(environ, start_response):
         _logger.error(
             "Formatted output is NOT on default str coding: %s" % [output[:100]])
         err_mark = '__****__'
-        output = format_output(configuration, backend, ret_code, ret_msg,
+        output = _format_output(configuration, backend, ret_code, ret_msg,
                                force_default_str_coding_rec(
                                    output_objs, highlight=err_mark),
                                output_format)
@@ -396,7 +411,14 @@ def application(environ, start_response):
     # NOTE: send response to client but don't crash e.g. on closed connection
     try:
         start_response(status, response_headers)
+    except IOError as ioe:
+        _logger.warning("WSGI %s for %s could not deliver output: %s" %
+                        (backend, client_id, ioe))
+    except Exception as exc:
+        _logger.error("WSGI %s for %s crashed during response: %s" %
+                      (backend, client_id, exc))
 
+    try:
         # NOTE: we consistently hit download error for archive files reaching ~2GB
         #       with showfreezefile.py on wsgi but the same on cgi does NOT suffer
         #       the problem for the exact same files. It seems wsgi has a limited
@@ -410,12 +432,12 @@ def application(environ, start_response):
             _logger.info("WSGI %s yielding %d output parts (%db)" %
                          (backend, chunk_parts, content_length))
         # _logger.debug("send chunked %r response to client" % backend)
-        for i in xrange(chunk_parts):
+        for i in list(range(chunk_parts)):
             # _logger.debug("WSGI %s yielding part %d / %d output parts" %
             #              (backend, i+1, chunk_parts))
             # end index may be after end of content - but no problem
             part = output[i*download_block_size:(i+1)*download_block_size]
-            yield part
+            yield force_utf8(part)
         if chunk_parts > 1:
             _logger.info("WSGI %s finished yielding all %d output parts" %
                          (backend, chunk_parts))
diff --git a/tests/support/htmlsupp.py b/tests/support/htmlsupp.py
new file mode 100644
index 000000000..61fcadbee
--- /dev/null
+++ b/tests/support/htmlsupp.py
@@ -0,0 +1,84 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# --- BEGIN_HEADER ---
+#
+# htmlsupp - test support library for HTML
+# Copyright (C) 2003-2024  The MiG Project by the Science HPC Center at UCPH
+#
+# This file is part of MiG.
+#
+# MiG is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# MiG is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+#
+# -- END_HEADER ---
+#
+
+"""Test support library for HTML."""
+
+
+class HtmlAssertMixin:
+    """Custom assertions for HTML containing strings."""
+
+    def assertHtmlElement(self, value, tag_name):
+        """Check that an occurrence of the specifid tag within an HTML input
+        string can be found. Returns the textual content of the first match.
+        """
+
+        self.assertIsValidHtmlDocument(value)
+
+        # TODO: this is a definitively stop-gap way of finding a tag within the HTML
+        #       and is used purely to keep this initial change to a reasonable size.
+
+        tag_open = ''.join(['<', tag_name, '>'])
+        tag_open_index = value.index(tag_open)
+        tag_open_index_after = tag_open_index + len(tag_open)
+
+        tag_close = ''.join(['</', tag_name, '>'])
+        tag_close_index = value.index(tag_close, tag_open_index_after)
+
+        return value[tag_open_index_after:tag_close_index]
+
+    def assertHtmlElementTextContent(self, value, tag_name, expected_text, trim_newlines=True):
+        """Check there is an occurrence of a tag within an HTML input string
+        and check the text it encloses equals exactly the expecatation.
+        """
+
+        self.assertIsValidHtmlDocument(value)
+
+        # TODO: this is a definitively stop-gap way of finding a tag within the HTML
+        #       and is used purely to keep this initial change to a reasonable size.
+
+        actual_text = self.assertHtmlElement(value, tag_name)
+        if trim_newlines:
+            actual_text = actual_text.strip('\n')
+        self.assertEqual(actual_text, expected_text)
+
+    def assertIsValidHtmlDocument(self, value):
+        """Check that the input string contains a valid HTML document.
+        """
+
+        assert isinstance(value, type(u"")), "input string was not utf8"
+
+        error = None
+        try:
+            has_doctype = value.startswith("<!DOCTYPE html")
+            assert has_doctype, "no valid document opener"
+            end_html_tag_idx = value.rfind('</html>')
+            maybe_document_end = value[end_html_tag_idx:].rstrip()
+            assert maybe_document_end == '</html>', "no valid document closer"
+        except Exception as exc:
+            error = exc
+        if error:
+            raise AssertionError("failed to verify input string as HTML: %s", str(error))
diff --git a/tests/support/wsgibinsupp.py b/tests/support/wsgibinsupp.py
new file mode 100644
index 000000000..009e35696
--- /dev/null
+++ b/tests/support/wsgibinsupp.py
@@ -0,0 +1,153 @@
+# -*- coding: utf-8 -*-
+#
+# --- BEGIN_HEADER ---
+#
+# htmlsupp - test support library for WSGI
+# Copyright (C) 2003-2024  The MiG Project by the Science HPC Center at UCPH
+#
+# This file is part of MiG.
+#
+# MiG is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# MiG is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+#
+# -- END_HEADER ---
+#
+
+from collections import namedtuple
+import codecs
+from io import BytesIO
+
+from mig.shared.output import format_output
+import mig.shared.returnvalues as returnvalues
+
+
+def _is_return_value(return_value):
+    defined_return_values = returnvalues.__dict__.values()
+    return return_value in defined_return_values
+
+
+def create_instrumented_format_output():
+    def _instrumented_format_output(
+        configuration,
+        backend,
+        ret_val,
+        ret_msg,
+        out_obj,
+        outputformat,
+    ):
+        # record the call args
+        # capture the original before altering it
+        call_args_out_obj = list(out_obj)
+        call_args = (configuration, backend, ret_val, ret_msg,
+                     call_args_out_obj, outputformat,)
+        _instrumented_format_output.calls.append({'args': call_args})
+
+        # FIXME: the following is a workaround for a bug that exists between the WSGI wrapper
+        #        and the output formatter - specifically, the latter adds default header and
+        #        title if start does not exist, but the former ensures that start always exists
+        #        meaning that a default response under WSGI is missing half the HTML.
+        start_obj_idx = next((i for i, obj in enumerate(
+            out_obj) if obj['object_type'] == 'start'))
+        insertion_idx = start_obj_idx
+
+        # FIXME: format_output() is sensitive to ordering and MUST see a title object _before_
+        #        anything else otherwise the preamble ends up written above the header and thus
+        #        an invalid HTML page is served.
+        insertion_idx += 1
+        out_obj.insert(insertion_idx, {
+            'object_type': 'title',
+            'text': _instrumented_format_output.values['title_text'],
+            'meta': '',
+            'style': {},
+            'script': {},
+        })
+
+        insertion_idx += 1
+        out_obj.insert(insertion_idx, {
+            'object_type': 'header',
+            'text': _instrumented_format_output.values['header_text']
+        })
+
+        return format_output(
+            configuration,
+            backend,
+            ret_val,
+            ret_msg,
+            out_obj,
+            outputformat,
+        )
+    _instrumented_format_output.calls = []
+    _instrumented_format_output.values = dict(
+        title_text='',
+        header_text='',
+    )
+
+    def _program_values(**kwargs):
+        _instrumented_format_output.values.update(kwargs)
+
+    _instrumented_format_output.set_values = _program_values
+
+    return _instrumented_format_output
+
+
+def create_instrumented_retrieve_handler():
+    def _simulated_action(*args):
+        return _simulated_action.returning or ([], returnvalues.ERROR)
+    _simulated_action.calls = []
+    _simulated_action.returning = None
+
+    def _program_response(output_objects=None, return_value=None):
+        assert _is_return_value(
+            return_value), "return value must be present in returnvalues"
+        assert isinstance(output_objects, list)
+        _simulated_action.returning = (output_objects, return_value)
+
+    def _instrumented_retrieve_handler(*args):
+        _instrumented_retrieve_handler.calls.append(tuple(args))
+        return _simulated_action
+    _instrumented_retrieve_handler.calls = []
+
+    _instrumented_retrieve_handler.program = _program_response
+    _instrumented_retrieve_handler.simulated = _simulated_action
+
+    return _instrumented_retrieve_handler
+
+
+class WsgibinInstrumentation:
+    def __init__(self):
+        self.format_output = create_instrumented_format_output()
+        self.retrieve_handler = create_instrumented_retrieve_handler()
+
+    def set_response(self, content, returnvalue):
+        self.retrieve_handler.program(content, returnvalue)
+
+
+class WsgibinAssertMixin:
+    def assertWsgibinInstrumentation(self, instrumentation=None):
+        if instrumentation is None:
+            instrumentation = getattr(self, 'wsgibin_instrumentation', None)
+        assert isinstance(instrumentation, WsgibinInstrumentation)
+
+        simulated_action = instrumentation.retrieve_handler.simulated
+        self.assertIsNotNone(simulated_action.returning,
+                             "no response programmed")
+
+        def was_called(fake):
+            assert hasattr(fake, 'calls')
+            return len(fake.calls) > 0
+
+        self.assertTrue(was_called(
+            instrumentation.format_output), "no output generated")
+        self.assertTrue(was_called(
+            instrumentation.retrieve_handler), "no output generated")
diff --git a/tests/test_mig_wsgi-bin.py b/tests/test_mig_wsgi-bin.py
new file mode 100644
index 000000000..7dd8589dd
--- /dev/null
+++ b/tests/test_mig_wsgi-bin.py
@@ -0,0 +1,127 @@
+# -*- coding: utf-8 -*-
+#
+# --- BEGIN_HEADER ---
+#
+# Copyright (C) 2003-2024  The MiG Project by the Science HPC Center at UCPH
+#
+# This file is part of MiG.
+#
+# MiG is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# MiG is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,
+# USA.
+#
+# --- END_HEADER ---
+#
+
+"""Unit tests for the MiG WSGI glue."""
+
+import codecs
+from configparser import ConfigParser
+import importlib
+import os
+import stat
+import sys
+
+from tests.support import PY2, MIG_BASE, MigTestCase, testmain, is_path_within
+from tests.support.htmlsupp import HtmlAssertMixin
+from tests.support.wsgisupp import prepare_wsgi, WsgiAssertMixin
+from tests.support.wsgibinsupp import WsgibinInstrumentation, WsgibinAssertMixin
+
+from mig.shared.base import client_id_dir, client_dir_id, get_short_id, \
+    invisible_path, allow_script, brief_list
+import mig.shared.returnvalues as returnvalues
+
+# workaround for files within non-module directories
+
+
+def _import_forcibly(module_name, relative_module_dir=None):
+    module_path = os.path.join(MIG_BASE, 'mig')
+    if relative_module_dir is not None:
+        module_path = os.path.join(module_path, relative_module_dir)
+    sys.path.append(module_path)
+    mod = importlib.import_module(module_name)
+    sys.path.pop(-1)  # do not leave the forced module path
+    return mod
+
+
+migwsgi = _import_forcibly('migwsgi', relative_module_dir='wsgi-bin')
+
+
+def noop(*args):
+    pass
+
+
+class MigWsgibin(MigTestCase, HtmlAssertMixin,
+                 WsgiAssertMixin, WsgibinAssertMixin):
+
+    def _provide_configuration(self):
+        return 'testconfig'
+
+    def before_each(self):
+        self.fake_wsgi = prepare_wsgi(self.configuration, 'http://localhost/')
+        self.wsgibin_instrumentation = WsgibinInstrumentation()
+
+        self.application_args = (
+            self.configuration,
+            self.fake_wsgi.environ,
+            self.fake_wsgi.start_response,
+        )
+        self.application_kwargs = dict(
+            _wrap_wsgi_errors=noop,
+            _format_output=self.wsgibin_instrumentation.format_output,
+            _retrieve_handler=self.wsgibin_instrumentation.retrieve_handler,
+            _set_environ=noop,
+        )
+
+    def test_return_value_ok_returns_status_200(self):
+        self.wsgibin_instrumentation.set_response([], returnvalues.OK)
+
+        wsgi_result = migwsgi._application(
+            *self.application_args,
+            **self.application_kwargs
+        )
+
+        self.assertWsgiResponse(wsgi_result, self.fake_wsgi, 200)
+        self.assertWsgibinInstrumentation()
+
+    def test_return_value_ok_returns_valid_html_page(self):
+        self.wsgibin_instrumentation.set_response([], returnvalues.OK)
+
+        wsgi_result = migwsgi._application(
+            *self.application_args,
+            **self.application_kwargs
+        )
+
+        output, _ = self.assertWsgiResponse(wsgi_result, self.fake_wsgi, 200)
+        self.assertWsgibinInstrumentation()
+        self.assertIsValidHtmlDocument(output)
+
+    def test_return_value_ok_returns_expected_title(self):
+        self.wsgibin_instrumentation.set_response([], returnvalues.OK)
+        self.wsgibin_instrumentation.format_output.set_values(
+            title_text='TEST')
+
+        wsgi_result = migwsgi._application(
+            *self.application_args,
+            **self.application_kwargs
+        )
+
+        output, _ = self.assertWsgiResponse(wsgi_result, self.fake_wsgi, 200)
+        self.assertWsgibinInstrumentation()
+        self.assertHtmlElementTextContent(
+            output, 'title', 'TEST', trim_newlines=True)
+
+
+if __name__ == '__main__':
+    testmain()

From 0270f09b00e702656d30b660431f33b228adf058 Mon Sep 17 00:00:00 2001
From: Alex Burke <albu@di.ku.dk>
Date: Tue, 24 Sep 2024 12:37:44 +0200
Subject: [PATCH 5/5] Make responding with binary data work under PY3.

The code as previously written would unconditionally encode parts as
though they contained text - this went unnoticed on PY2 because strings
and bytes are one and the same thing but blew up on PY3 where a string
is explicitly of type unicode while a binary file would be raw bytes.

Explicitly check the output_format and if instructed to serve a file do
so without touching the chunks of file content bytes being yielded.
---
 mig/wsgi-bin/migwsgi.py      |  9 ++++--
 tests/support/wsgibinsupp.py | 55 +++++++++++++++++++++++++++++++++++-
 tests/support/wsgisupp.py    | 14 ++++++---
 tests/test_mig_wsgi-bin.py   | 21 +++++++++++++-
 4 files changed, 90 insertions(+), 9 deletions(-)

diff --git a/mig/wsgi-bin/migwsgi.py b/mig/wsgi-bin/migwsgi.py
index f1f387476..b7b0bb2ce 100755
--- a/mig/wsgi-bin/migwsgi.py
+++ b/mig/wsgi-bin/migwsgi.py
@@ -206,7 +206,7 @@ def _set_os_environ(value):
     return _application(None, environ, start_response, _set_environ=_set_os_environ, _wrap_wsgi_errors=wrap_wsgi_errors)
 
 
-def _application(configuration, environ, start_response, _set_environ, _format_output=format_output, _retrieve_handler=_import_backend, _wrap_wsgi_errors=True, _config_file=None, _skip_log=False):
+def _application(configuration, environ, start_response, _set_environ, _fieldstorage_to_dict=fieldstorage_to_dict, _format_output=format_output, _retrieve_handler=_import_backend, _wrap_wsgi_errors=True, _config_file=None, _skip_log=False):
 
     # NOTE: pass app environ including apache and query args on to sub handlers
     #       through the usual 'os.environ' channel expected in functionality
@@ -319,7 +319,7 @@ def _application(configuration, environ, start_response, _set_environ, _format_o
         #              (backend, script_name))
         fieldstorage = cgi.FieldStorage(fp=environ['wsgi.input'],
                                         environ=environ)
-        user_arguments_dict = fieldstorage_to_dict(fieldstorage)
+        user_arguments_dict = _fieldstorage_to_dict(fieldstorage)
         if 'output_format' in user_arguments_dict:
             output_format = user_arguments_dict['output_format'][0]
 
@@ -437,7 +437,10 @@ def _application(configuration, environ, start_response, _set_environ, _format_o
             #              (backend, i+1, chunk_parts))
             # end index may be after end of content - but no problem
             part = output[i*download_block_size:(i+1)*download_block_size]
-            yield force_utf8(part)
+            if output_format == 'file':
+                yield part
+            else:
+                yield force_utf8(part)
         if chunk_parts > 1:
             _logger.info("WSGI %s finished yielding all %d output parts" %
                          (backend, chunk_parts))
diff --git a/tests/support/wsgibinsupp.py b/tests/support/wsgibinsupp.py
index 009e35696..efd98fab1 100644
--- a/tests/support/wsgibinsupp.py
+++ b/tests/support/wsgibinsupp.py
@@ -37,6 +37,22 @@ def _is_return_value(return_value):
     return return_value in defined_return_values
 
 
+def create_instrumented_fieldstorage_to_dict():
+    def _instrumented_fieldstorage_to_dict(fieldstorage):
+        return _instrumented_fieldstorage_to_dict._result
+
+    _instrumented_fieldstorage_to_dict._result = {
+        'output_format': ('html',)
+    }
+
+    def set_result(result):
+        _instrumented_fieldstorage_to_dict._result = result
+
+    _instrumented_fieldstorage_to_dict.set_result = set_result
+
+    return _instrumented_fieldstorage_to_dict
+
+
 def create_instrumented_format_output():
     def _instrumented_format_output(
         configuration,
@@ -53,6 +69,16 @@ def _instrumented_format_output(
                      call_args_out_obj, outputformat,)
         _instrumented_format_output.calls.append({'args': call_args})
 
+        if _instrumented_format_output._file:
+            return format_output(
+                configuration,
+                backend,
+                ret_val,
+                ret_msg,
+                out_obj,
+                outputformat,
+            )
+
         # FIXME: the following is a workaround for a bug that exists between the WSGI wrapper
         #        and the output formatter - specifically, the latter adds default header and
         #        title if start does not exist, but the former ensures that start always exists
@@ -88,11 +114,17 @@ def _instrumented_format_output(
             outputformat,
         )
     _instrumented_format_output.calls = []
+    _instrumented_format_output._file = False
     _instrumented_format_output.values = dict(
         title_text='',
         header_text='',
     )
 
+    def _set_file(is_enabled):
+        _instrumented_format_output._file = is_enabled
+
+    setattr(_instrumented_format_output, 'set_file', _set_file)
+
     def _program_values(**kwargs):
         _instrumented_format_output.values.update(kwargs)
 
@@ -126,12 +158,33 @@ def _instrumented_retrieve_handler(*args):
 
 class WsgibinInstrumentation:
     def __init__(self):
+        self.fieldstorage_to_dict = create_instrumented_fieldstorage_to_dict()
         self.format_output = create_instrumented_format_output()
         self.retrieve_handler = create_instrumented_retrieve_handler()
 
-    def set_response(self, content, returnvalue):
+    def _set_response_content(self, content, returnvalue):
         self.retrieve_handler.program(content, returnvalue)
 
+    def _set_response_file(self, returnbytes, returnvalue):
+        self.fieldstorage_to_dict.set_result({
+            'output_format': ('file',)
+        })
+        self.format_output.set_file(True)
+        file_obj = {'object_type': 'binary', 'data': returnbytes}
+        self.set_response([file_obj], returnvalue)
+
+    def set_response(self, content, returnvalue, responding_with='objects'):
+        assert not (content is not None and file is not None)
+
+        if responding_with == 'file':
+            assert isinstance(
+                returnvalue, bytes), "file response demands bytes"
+            self._set_response_file(content, returnvalue)
+        elif responding_with == 'objects':
+            self._set_response_content(content, returnvalue)
+        else:
+            raise NotImplementedError()
+
 
 class WsgibinAssertMixin:
     def assertWsgibinInstrumentation(self, instrumentation=None):
diff --git a/tests/support/wsgisupp.py b/tests/support/wsgisupp.py
index d4f54e96f..5776bfce3 100644
--- a/tests/support/wsgisupp.py
+++ b/tests/support/wsgisupp.py
@@ -114,21 +114,27 @@ def prepare_wsgi(configuration, url, **kwargs):
     )
 
 
-def _trigger_and_unpack_result(wsgi_result):
+def _trigger_and_unpack_result(wsgi_result, content_kind='textual'):
+    assert content_kind in ('textual', 'binary')
+
     chunks = list(wsgi_result)
     assert len(chunks) > 0, "invocation returned no output"
     complete_value = b''.join(chunks)
-    decoded_value = codecs.decode(complete_value, 'utf8')
+    if content_kind == 'binary':
+        decoded_value = complete_value
+    else:
+        decoded_value = codecs.decode(complete_value, 'utf8')
     return decoded_value
 
 
 class WsgiAssertMixin:
     """Custom assertions for verifying server code executed under test."""
 
-    def assertWsgiResponse(self, wsgi_result, fake_wsgi, expected_status_code):
+    def assertWsgiResponse(self, wsgi_result, fake_wsgi, expected_status_code,
+                           content_kind='textual'):
         assert isinstance(fake_wsgi, _PreparedWsgi)
 
-        content = _trigger_and_unpack_result(wsgi_result)
+        content = _trigger_and_unpack_result(wsgi_result, content_kind=content_kind)
 
         def called_once(fake):
             assert hasattr(fake, 'calls')
diff --git a/tests/test_mig_wsgi-bin.py b/tests/test_mig_wsgi-bin.py
index 7dd8589dd..4b759cd07 100644
--- a/tests/test_mig_wsgi-bin.py
+++ b/tests/test_mig_wsgi-bin.py
@@ -33,7 +33,8 @@
 import stat
 import sys
 
-from tests.support import PY2, MIG_BASE, MigTestCase, testmain, is_path_within
+from tests.support import PY2, MIG_BASE, TEST_DATA_DIR, \
+    MigTestCase, testmain, is_path_within
 from tests.support.htmlsupp import HtmlAssertMixin
 from tests.support.wsgisupp import prepare_wsgi, WsgiAssertMixin
 from tests.support.wsgibinsupp import WsgibinInstrumentation, WsgibinAssertMixin
@@ -79,6 +80,7 @@ def before_each(self):
         )
         self.application_kwargs = dict(
             _wrap_wsgi_errors=noop,
+            _fieldstorage_to_dict=self.wsgibin_instrumentation.fieldstorage_to_dict,
             _format_output=self.wsgibin_instrumentation.format_output,
             _retrieve_handler=self.wsgibin_instrumentation.retrieve_handler,
             _set_environ=noop,
@@ -122,6 +124,23 @@ def test_return_value_ok_returns_expected_title(self):
         self.assertHtmlElementTextContent(
             output, 'title', 'TEST', trim_newlines=True)
 
+    def test_return_value_ok_serving_a_binary_file(self):
+        test_binary_file = os.path.join(TEST_DATA_DIR, 'loading.gif')
+        with open(test_binary_file, 'rb') as f:
+            test_binary_data = f.read()
+        self.wsgibin_instrumentation.set_response(
+            test_binary_data, returnvalues.OK, responding_with='file')
+
+        wsgi_result = migwsgi._application(
+            *self.application_args,
+            **self.application_kwargs
+        )
+
+        output, _ = self.assertWsgiResponse(
+            wsgi_result, self.fake_wsgi, 200, content_kind='binary')
+        self.assertWsgibinInstrumentation()
+        self.assertEqual(output, test_binary_data)
+
 
 if __name__ == '__main__':
     testmain()