Skip to content

Commit 988a0ed

Browse files
committed
Basic coverage of migwsgi.
This PR does the principle things required to allow exercising the central component responsible for glueing named MiG "functionality" files to WSGI and have the result execute to completion under Python 3. Included is a small tactical change to allow the structural force recursive functions to iterate tuples preserving their type such that subsequence output behaves correctly under Py3. ======= wip fixup fixup updare and relocate a comment start tightening up the code shift things around a little work to make it readable with a nod towards further tests assert the response status allow programming the response repair previous assert that a programmed title ends up in the page line naming up with other recent work in grid_openid fixup fixup fixup fixup split the testing infrastructure across multiple files collect common default kwargs use noop for set environ make the generic WSGI handling setup code more uniform bring over improvements to hmtlsupp from another branch simplify fixup
1 parent b04835d commit 988a0ed

File tree

6 files changed

+460
-31
lines changed

6 files changed

+460
-31
lines changed

mig/shared/base.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -516,8 +516,9 @@ def force_utf8_rec(input_obj, highlight=''):
516516
if isinstance(input_obj, dict):
517517
return {force_utf8_rec(i, highlight): force_utf8_rec(j, highlight) for (i, j) in
518518
input_obj.items()}
519-
elif isinstance(input_obj, list):
520-
return [force_utf8_rec(i, highlight) for i in input_obj]
519+
elif isinstance(input_obj, (list, tuple)):
520+
thetype = type(input_obj)
521+
return thetype(force_utf8_rec(i, highlight) for i in input_obj)
521522
elif is_unicode(input_obj):
522523
return force_utf8(input_obj, highlight)
523524
else:
@@ -544,8 +545,9 @@ def force_unicode_rec(input_obj, highlight=''):
544545
if isinstance(input_obj, dict):
545546
return {force_unicode_rec(i, highlight): force_unicode_rec(j, highlight) for (i, j) in
546547
input_obj.items()}
547-
elif isinstance(input_obj, list):
548-
return [force_unicode_rec(i, highlight) for i in input_obj]
548+
elif isinstance(input_obj, (list, tuple)):
549+
thetype = type(input_obj)
550+
return thetype(force_utf8_rec(i, highlight) for i in input_obj)
549551
elif not is_unicode(input_obj):
550552
return force_unicode(input_obj, highlight)
551553
else:

mig/wsgi-bin/migwsgi.py

Lines changed: 59 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#
2727

2828
import cgi
29+
import codecs
2930
import importlib
3031
import os
3132
import sys
@@ -35,6 +36,7 @@
3536
from mig.shared.bailout import bailout_helper, crash_helper, compact_string
3637
from mig.shared.base import requested_backend, allow_script, \
3738
is_default_str_coding, force_default_str_coding_rec
39+
from mig.shared.compat import PY2
3840
from mig.shared.defaults import download_block_size, default_fs_coding
3941
from mig.shared.conf import get_configuration_object
4042
from mig.shared.objecttypes import get_object_type_info
@@ -43,14 +45,27 @@
4345
from mig.shared.scriptinput import fieldstorage_to_dict
4446

4547

48+
if PY2:
49+
def _ensure_encoded_string(chunk):
50+
return chunk
51+
else:
52+
def _ensure_encoded_string(chunk):
53+
return codecs.encode(chunk, 'utf8')
54+
55+
56+
def _import_backend(backend):
57+
import_path = 'mig.shared.functionality.%s' % backend
58+
module_handle = importlib.import_module(import_path)
59+
return module_handle.main
60+
61+
4662
def object_type_info(object_type):
4763
"""Lookup object type"""
4864

4965
return get_object_type_info(object_type)
5066

5167

52-
def stub(configuration, client_id, import_path, backend, user_arguments_dict,
53-
environ):
68+
def stub(configuration, client_id, user_arguments_dict, environ, _retrieve_handler):
5469
"""Run backend on behalf of client_id with supplied user_arguments_dict.
5570
I.e. import main from import_path and execute it with supplied arguments.
5671
"""
@@ -61,6 +76,7 @@ def stub(configuration, client_id, import_path, backend, user_arguments_dict,
6176
before_time = time.time()
6277

6378
output_objects = []
79+
backend = 'UNKNOWN'
6480
main = dummy_main
6581

6682
# _logger.debug("stub for backend %r" % backend)
@@ -69,10 +85,12 @@ def stub(configuration, client_id, import_path, backend, user_arguments_dict,
6985
# NEVER print/output it verbatim before it is validated below.
7086

7187
try:
88+
default_page = configuration.site_landing_page # TODO: avoid doing this work a second time
89+
backend = requested_backend(environ, fallback=default_page)
7290
valid_backend_name(backend)
7391
except InputException as iex:
74-
_logger.error("%s refused to import invalid backend %r (%s): %s" %
75-
(_addr, backend, import_path, iex))
92+
_logger.error("%s refused to import invalid backend %r: %s" %
93+
(_addr, backend, iex))
7694
bailout_helper(configuration, backend, output_objects,
7795
header_text='User Error')
7896
output_objects.extend([
@@ -81,41 +99,40 @@ def stub(configuration, client_id, import_path, backend, user_arguments_dict,
8199
{'object_type': 'link', 'text': 'Go to default interface',
82100
'destination': configuration.site_landing_page}
83101
])
84-
return (output_objects, returnvalues.CLIENT_ERROR)
102+
return backend, (output_objects, returnvalues.CLIENT_ERROR)
85103

86104
try:
87105
# Import main from backend module
88106

89107
# _logger.debug("import main from %r" % import_path)
90108
# NOTE: dynamic module loading to find corresponding main function
91-
module_handle = importlib.import_module(import_path)
92-
main = module_handle.main
109+
main = _retrieve_handler(backend)
93110
except Exception as err:
94-
_logger.error("%s could not import %r (%s): %s" %
95-
(_addr, backend, import_path, err))
111+
_logger.error("%s could not import %r: %s" %
112+
(_addr, backend, err))
96113
bailout_helper(configuration, backend, output_objects)
97114
output_objects.extend([
98115
{'object_type': 'error_text', 'text':
99116
'Could not load backend: %s' % html_escape(backend)},
100117
{'object_type': 'link', 'text': 'Go to default interface',
101118
'destination': configuration.site_landing_page}
102119
])
103-
return (output_objects, returnvalues.SYSTEM_ERROR)
120+
return backend, (output_objects, returnvalues.SYSTEM_ERROR)
104121

105122
# _logger.debug("imported main %s" % main)
106123

107124
# Now backend value is validated to be safe for output
108125

109126
if not isinstance(user_arguments_dict, dict):
110-
_logger.error("%s invalid user args %s for %s" % (_addr,
127+
_logger.error("%s invalid user args %s for backend %r" % (_addr,
111128
user_arguments_dict,
112-
import_path))
129+
backend))
113130
bailout_helper(configuration, backend, output_objects,
114131
header_text='Input Error')
115132
output_objects.append(
116133
{'object_type': 'error_text', 'text':
117134
'User input is not on expected format!'})
118-
return (output_objects, returnvalues.INVALID_ARGUMENT)
135+
return backend, (output_objects, returnvalues.INVALID_ARGUMENT)
119136

120137
try:
121138
(output_objects, (ret_code, ret_msg)) = main(client_id,
@@ -125,7 +142,7 @@ def stub(configuration, client_id, import_path, backend, user_arguments_dict,
125142
_logger.error("%s script crashed:\n%s" % (_addr,
126143
traceback.format_exc()))
127144
crash_helper(configuration, backend, output_objects)
128-
return (output_objects, returnvalues.ERROR)
145+
return backend, (output_objects, returnvalues.ERROR)
129146

130147
(val_ret, val_msg) = validate(output_objects)
131148
if not val_ret:
@@ -138,7 +155,7 @@ def stub(configuration, client_id, import_path, backend, user_arguments_dict,
138155
after_time = time.time()
139156
output_objects.append({'object_type': 'timing_info', 'text':
140157
"done in %.3fs" % (after_time - before_time)})
141-
return (output_objects, (ret_code, ret_msg))
158+
return backend, (output_objects, (ret_code, ret_msg))
142159

143160

144161
def wrap_wsgi_errors(environ, configuration, max_line_len=100):
@@ -193,6 +210,14 @@ def application(environ, start_response):
193210
*start_response* is a helper function used to deliver the client response.
194211
"""
195212

213+
def _set_os_environ(value):
214+
os.environ = value
215+
216+
return _application(None, environ, start_response, _set_environ=_set_os_environ, _wrap_wsgi_errors=wrap_wsgi_errors)
217+
218+
219+
def _application(configuration, environ, start_response, _set_environ, _format_output=format_output, _retrieve_handler=_import_backend, _wrap_wsgi_errors=True, _config_file=None, _skip_log=False):
220+
196221
# NOTE: pass app environ including apache and query args on to sub handlers
197222
# through the usual 'os.environ' channel expected in functionality
198223
# handlers. Special care is needed to avoid various sub-interpreter
@@ -235,18 +260,20 @@ def application(environ, start_response):
235260
os_env_value))
236261

237262
# Assign updated environ to LOCAL os.environ for the rest of this session
238-
os.environ = environ
263+
_set_environ(environ)
239264

240265
# NOTE: redirect stdout to stderr in python 2 only. It breaks logger in 3
241266
# and stdout redirection apparently is already handled there.
242267
if sys.version_info[0] < 3:
243268
sys.stdout = sys.stderr
244269

245-
configuration = get_configuration_object()
270+
if configuration is None:
271+
configuration = get_configuration_object(_config_file, _skip_log)
272+
246273
_logger = configuration.logger
247274

248275
# NOTE: replace default wsgi errors to apache error log with our own logs
249-
wrap_wsgi_errors(environ, configuration)
276+
_wrap_wsgi_errors(environ, configuration)
250277

251278
for line in env_sync_status:
252279
_logger.debug(line)
@@ -298,7 +325,6 @@ def application(environ, start_response):
298325
default_page = configuration.site_landing_page
299326
script_name = requested_backend(environ, fallback=default_page,
300327
strip_ext=False)
301-
backend = requested_backend(environ, fallback=default_page)
302328
# _logger.debug('DEBUG: wsgi found backend %s and script %s' %
303329
# (backend, script_name))
304330
fieldstorage = cgi.FieldStorage(fp=environ['wsgi.input'],
@@ -307,13 +333,12 @@ def application(environ, start_response):
307333
if 'output_format' in user_arguments_dict:
308334
output_format = user_arguments_dict['output_format'][0]
309335

310-
module_path = 'mig.shared.functionality.%s' % backend
311336
(allow, msg) = allow_script(configuration, script_name, client_id)
312337
if allow:
313338
# _logger.debug("wsgi handling script: %s" % script_name)
314-
(output_objs, ret_val) = stub(configuration, client_id,
315-
module_path, backend,
316-
user_arguments_dict, environ)
339+
backend, (output_objs, ret_val) = stub(configuration, client_id,
340+
user_arguments_dict, environ,
341+
_retrieve_handler)
317342
else:
318343
_logger.warning("wsgi handling refused script:%s" % script_name)
319344
(output_objs, ret_val) = reject_main(client_id,
@@ -363,7 +388,7 @@ def application(environ, start_response):
363388
output_objs.append(wsgi_entry)
364389

365390
_logger.debug("call format %r output to %s" % (backend, output_format))
366-
output = format_output(configuration, backend, ret_code, ret_msg,
391+
output = _format_output(configuration, backend, ret_code, ret_msg,
367392
output_objs, output_format)
368393
# _logger.debug("formatted %s output to %s" % (backend, output_format))
369394
# _logger.debug("output:\n%s" % [output])
@@ -372,7 +397,7 @@ def application(environ, start_response):
372397
_logger.error(
373398
"Formatted output is NOT on default str coding: %s" % [output[:100]])
374399
err_mark = '__****__'
375-
output = format_output(configuration, backend, ret_code, ret_msg,
400+
output = _format_output(configuration, backend, ret_code, ret_msg,
376401
force_default_str_coding_rec(
377402
output_objs, highlight=err_mark),
378403
output_format)
@@ -396,7 +421,14 @@ def application(environ, start_response):
396421
# NOTE: send response to client but don't crash e.g. on closed connection
397422
try:
398423
start_response(status, response_headers)
424+
except IOError as ioe:
425+
_logger.warning("WSGI %s for %s could not deliver output: %s" %
426+
(backend, client_id, ioe))
427+
except Exception as exc:
428+
_logger.error("WSGI %s for %s crashed during response: %s" %
429+
(backend, client_id, exc))
399430

431+
try:
400432
# NOTE: we consistently hit download error for archive files reaching ~2GB
401433
# with showfreezefile.py on wsgi but the same on cgi does NOT suffer
402434
# the problem for the exact same files. It seems wsgi has a limited
@@ -410,12 +442,12 @@ def application(environ, start_response):
410442
_logger.info("WSGI %s yielding %d output parts (%db)" %
411443
(backend, chunk_parts, content_length))
412444
# _logger.debug("send chunked %r response to client" % backend)
413-
for i in xrange(chunk_parts):
445+
for i in list(range(chunk_parts)):
414446
# _logger.debug("WSGI %s yielding part %d / %d output parts" %
415447
# (backend, i+1, chunk_parts))
416448
# end index may be after end of content - but no problem
417449
part = output[i*download_block_size:(i+1)*download_block_size]
418-
yield part
450+
yield _ensure_encoded_string(part)
419451
if chunk_parts > 1:
420452
_logger.info("WSGI %s finished yielding all %d output parts" %
421453
(backend, chunk_parts))

tests/support/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,11 @@
8282

8383
from tests.support.assertover import AssertOver
8484
from tests.support.configsupp import FakeConfiguration
85+
from tests.support.htmlsupp import HtmlAssertMixin
8586
from tests.support.loggersupp import FakeLogger
8687
from tests.support.serversupp import make_wrapped_server
88+
from tests.support.wsgisupp import create_wsgi_environ, \
89+
create_wsgi_start_response, ServerAssertMixin
8790

8891

8992
# Basic global logging configuration for testing

tests/support/htmlsupp.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
#!/usr/bin/python
2+
# -*- coding: utf-8 -*-
3+
#
4+
# --- BEGIN_HEADER ---
5+
#
6+
# htmlsupp - test support library for HTML
7+
# Copyright (C) 2003-2024 The MiG Project by the Science HPC Center at UCPH
8+
#
9+
# This file is part of MiG.
10+
#
11+
# MiG is free software: you can redistribute it and/or modify
12+
# it under the terms of the GNU General Public License as published by
13+
# the Free Software Foundation; either version 2 of the License, or
14+
# (at your option) any later version.
15+
#
16+
# MiG is distributed in the hope that it will be useful,
17+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
18+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19+
# GNU General Public License for more details.
20+
#
21+
# You should have received a copy of the GNU General Public License
22+
# along with this program; if not, write to the Free Software
23+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
24+
#
25+
# -- END_HEADER ---
26+
#
27+
28+
"""Test support library for HTML."""
29+
30+
31+
class HtmlAssertMixin:
32+
"""Custom assertions for HTML containing strings."""
33+
34+
def assertHtmlElement(self, value, tag_name):
35+
"""Check that an occurrence of the specifid tag within an HTML input
36+
string can be found. Returns the textual content of the first match.
37+
"""
38+
39+
self.assertIsValidHtmlDocument(value)
40+
41+
# TODO: this is a definitively stop-gap way of finding a tag within the HTML
42+
# and is used purely to keep this initial change to a reasonable size.
43+
44+
tag_open = ''.join(['<', tag_name, '>'])
45+
tag_open_index = value.index(tag_open)
46+
tag_open_index_after = tag_open_index + len(tag_open)
47+
48+
tag_close = ''.join(['</', tag_name, '>'])
49+
tag_close_index = value.index(tag_close, tag_open_index_after)
50+
51+
return value[tag_open_index_after:tag_close_index]
52+
53+
def assertHtmlElementTextContent(self, value, tag_name, expected_text, trim_newlines=True):
54+
"""Check there is an occurrence of a tag within an HTML input string
55+
and check the text it encloses equals exactly the expecatation.
56+
"""
57+
58+
self.assertIsValidHtmlDocument(value)
59+
60+
# TODO: this is a definitively stop-gap way of finding a tag within the HTML
61+
# and is used purely to keep this initial change to a reasonable size.
62+
63+
actual_text = self.assertHtmlElement(value, tag_name)
64+
if trim_newlines:
65+
actual_text = actual_text.strip('\n')
66+
self.assertEqual(actual_text, expected_text)
67+
68+
def assertIsValidHtmlDocument(self, value):
69+
"""Check that the input string contains a valid HTML document.
70+
"""
71+
72+
assert isinstance(value, type(u"")), "input string was not utf8"
73+
74+
error = None
75+
try:
76+
has_doctype = value.startswith("<!DOCTYPE html")
77+
assert has_doctype, "no valid document opener"
78+
end_html_tag_idx = value.rfind('</html>')
79+
maybe_document_end = value[end_html_tag_idx:].rstrip()
80+
assert maybe_document_end == '</html>', "no valid document closer"
81+
except Exception as exc:
82+
error = exc
83+
if error:
84+
raise AssertionError("failed to verify input string as HTML: %s", str(error))

0 commit comments

Comments
 (0)