Skip to content

Commit ffa18a4

Browse files
committed
Basic coverage of migwsgi.
This PR does the principle things required to allow exercising the central component responsible for glueing named MiG "functionality" files to WSGI and have the result execute to completion under Python 3. Included is a small tactical change to allow the structural force recursive functions to iterate tuples preserving their type such that subsequence output behaves correctly under Py3. ======= wip fixup fixup updare and relocate a comment start tightening up the code shift things around a little work to make it readable with a nod towards further tests assert the response status allow programming the response repair previous assert that a programmed title ends up in the page line naming up with other recent work in grid_openid fixup fixup fixup fixup split the testing infrastructure across multiple files collect common default kwargs use noop for set environ make the generic WSGI handling setup code more uniform bring over improvements to hmtlsupp from another branch simplify fixup
1 parent a186fa5 commit ffa18a4

File tree

5 files changed

+408
-32
lines changed

5 files changed

+408
-32
lines changed

mig/shared/base.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -542,8 +542,9 @@ def force_utf8_rec(input_obj, highlight=''):
542542
if isinstance(input_obj, dict):
543543
return {force_utf8_rec(i, highlight): force_utf8_rec(j, highlight) for (i, j) in
544544
input_obj.items()}
545-
elif isinstance(input_obj, list):
546-
return [force_utf8_rec(i, highlight) for i in input_obj]
545+
elif isinstance(input_obj, (list, tuple)):
546+
thetype = type(input_obj)
547+
return thetype(force_utf8_rec(i, highlight) for i in input_obj)
547548
elif is_unicode(input_obj):
548549
return force_utf8(input_obj, highlight)
549550
else:
@@ -570,8 +571,9 @@ def force_unicode_rec(input_obj, highlight=''):
570571
if isinstance(input_obj, dict):
571572
return {force_unicode_rec(i, highlight): force_unicode_rec(j, highlight) for (i, j) in
572573
input_obj.items()}
573-
elif isinstance(input_obj, list):
574-
return [force_unicode_rec(i, highlight) for i in input_obj]
574+
elif isinstance(input_obj, (list, tuple)):
575+
thetype = type(input_obj)
576+
return thetype(force_utf8_rec(i, highlight) for i in input_obj)
575577
elif not is_unicode(input_obj):
576578
return force_unicode(input_obj, highlight)
577579
else:

mig/wsgi-bin/migwsgi.py

Lines changed: 50 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
from mig.shared import returnvalues
3535
from mig.shared.bailout import bailout_helper, crash_helper, compact_string
3636
from mig.shared.base import requested_backend, allow_script, \
37-
is_default_str_coding, force_default_str_coding_rec
37+
is_default_str_coding, force_default_str_coding_rec, force_utf8
3838
from mig.shared.defaults import download_block_size, default_fs_coding
3939
from mig.shared.conf import get_configuration_object
4040
from mig.shared.objecttypes import get_object_type_info
@@ -43,14 +43,19 @@
4343
from mig.shared.scriptinput import fieldstorage_to_dict
4444

4545

46+
def _import_backend(backend):
47+
import_path = 'mig.shared.functionality.%s' % backend
48+
module_handle = importlib.import_module(import_path)
49+
return module_handle.main
50+
51+
4652
def object_type_info(object_type):
4753
"""Lookup object type"""
4854

4955
return get_object_type_info(object_type)
5056

5157

52-
def stub(configuration, client_id, import_path, backend, user_arguments_dict,
53-
environ):
58+
def stub(configuration, client_id, user_arguments_dict, environ, _retrieve_handler):
5459
"""Run backend on behalf of client_id with supplied user_arguments_dict.
5560
I.e. import main from import_path and execute it with supplied arguments.
5661
"""
@@ -61,6 +66,7 @@ def stub(configuration, client_id, import_path, backend, user_arguments_dict,
6166
before_time = time.time()
6267

6368
output_objects = []
69+
backend = 'UNKNOWN'
6470
main = dummy_main
6571

6672
# _logger.debug("stub for backend %r" % backend)
@@ -69,10 +75,12 @@ def stub(configuration, client_id, import_path, backend, user_arguments_dict,
6975
# NEVER print/output it verbatim before it is validated below.
7076

7177
try:
78+
default_page = configuration.site_landing_page # TODO: avoid doing this work a second time
79+
backend = requested_backend(environ, fallback=default_page)
7280
valid_backend_name(backend)
7381
except InputException as iex:
74-
_logger.error("%s refused to import invalid backend %r (%s): %s" %
75-
(_addr, backend, import_path, iex))
82+
_logger.error("%s refused to import invalid backend %r: %s" %
83+
(_addr, backend, iex))
7684
bailout_helper(configuration, backend, output_objects,
7785
header_text='User Error')
7886
output_objects.extend([
@@ -81,41 +89,40 @@ def stub(configuration, client_id, import_path, backend, user_arguments_dict,
8189
{'object_type': 'link', 'text': 'Go to default interface',
8290
'destination': configuration.site_landing_page}
8391
])
84-
return (output_objects, returnvalues.CLIENT_ERROR)
92+
return backend, (output_objects, returnvalues.CLIENT_ERROR)
8593

8694
try:
8795
# Import main from backend module
8896

8997
# _logger.debug("import main from %r" % import_path)
9098
# NOTE: dynamic module loading to find corresponding main function
91-
module_handle = importlib.import_module(import_path)
92-
main = module_handle.main
99+
main = _retrieve_handler(backend)
93100
except Exception as err:
94-
_logger.error("%s could not import %r (%s): %s" %
95-
(_addr, backend, import_path, err))
101+
_logger.error("%s could not import %r: %s" %
102+
(_addr, backend, err))
96103
bailout_helper(configuration, backend, output_objects)
97104
output_objects.extend([
98105
{'object_type': 'error_text', 'text':
99106
'Could not load backend: %s' % html_escape(backend)},
100107
{'object_type': 'link', 'text': 'Go to default interface',
101108
'destination': configuration.site_landing_page}
102109
])
103-
return (output_objects, returnvalues.SYSTEM_ERROR)
110+
return backend, (output_objects, returnvalues.SYSTEM_ERROR)
104111

105112
# _logger.debug("imported main %s" % main)
106113

107114
# Now backend value is validated to be safe for output
108115

109116
if not isinstance(user_arguments_dict, dict):
110-
_logger.error("%s invalid user args %s for %s" % (_addr,
117+
_logger.error("%s invalid user args %s for backend %r" % (_addr,
111118
user_arguments_dict,
112-
import_path))
119+
backend))
113120
bailout_helper(configuration, backend, output_objects,
114121
header_text='Input Error')
115122
output_objects.append(
116123
{'object_type': 'error_text', 'text':
117124
'User input is not on expected format!'})
118-
return (output_objects, returnvalues.INVALID_ARGUMENT)
125+
return backend, (output_objects, returnvalues.INVALID_ARGUMENT)
119126

120127
try:
121128
(output_objects, (ret_code, ret_msg)) = main(client_id,
@@ -125,7 +132,7 @@ def stub(configuration, client_id, import_path, backend, user_arguments_dict,
125132
_logger.error("%s script crashed:\n%s" % (_addr,
126133
traceback.format_exc()))
127134
crash_helper(configuration, backend, output_objects)
128-
return (output_objects, returnvalues.ERROR)
135+
return backend, (output_objects, returnvalues.ERROR)
129136

130137
(val_ret, val_msg) = validate(output_objects)
131138
if not val_ret:
@@ -138,7 +145,7 @@ def stub(configuration, client_id, import_path, backend, user_arguments_dict,
138145
after_time = time.time()
139146
output_objects.append({'object_type': 'timing_info', 'text':
140147
"done in %.3fs" % (after_time - before_time)})
141-
return (output_objects, (ret_code, ret_msg))
148+
return backend, (output_objects, (ret_code, ret_msg))
142149

143150

144151
def wrap_wsgi_errors(environ, configuration, max_line_len=100):
@@ -193,6 +200,14 @@ def application(environ, start_response):
193200
*start_response* is a helper function used to deliver the client response.
194201
"""
195202

203+
def _set_os_environ(value):
204+
os.environ = value
205+
206+
return _application(None, environ, start_response, _set_environ=_set_os_environ, _wrap_wsgi_errors=wrap_wsgi_errors)
207+
208+
209+
def _application(configuration, environ, start_response, _set_environ, _format_output=format_output, _retrieve_handler=_import_backend, _wrap_wsgi_errors=True, _config_file=None, _skip_log=False):
210+
196211
# NOTE: pass app environ including apache and query args on to sub handlers
197212
# through the usual 'os.environ' channel expected in functionality
198213
# handlers. Special care is needed to avoid various sub-interpreter
@@ -235,18 +250,20 @@ def application(environ, start_response):
235250
os_env_value))
236251

237252
# Assign updated environ to LOCAL os.environ for the rest of this session
238-
os.environ = environ
253+
_set_environ(environ)
239254

240255
# NOTE: redirect stdout to stderr in python 2 only. It breaks logger in 3
241256
# and stdout redirection apparently is already handled there.
242257
if sys.version_info[0] < 3:
243258
sys.stdout = sys.stderr
244259

245-
configuration = get_configuration_object()
260+
if configuration is None:
261+
configuration = get_configuration_object(_config_file, _skip_log)
262+
246263
_logger = configuration.logger
247264

248265
# NOTE: replace default wsgi errors to apache error log with our own logs
249-
wrap_wsgi_errors(environ, configuration)
266+
_wrap_wsgi_errors(environ, configuration)
250267

251268
for line in env_sync_status:
252269
_logger.debug(line)
@@ -298,7 +315,6 @@ def application(environ, start_response):
298315
default_page = configuration.site_landing_page
299316
script_name = requested_backend(environ, fallback=default_page,
300317
strip_ext=False)
301-
backend = requested_backend(environ, fallback=default_page)
302318
# _logger.debug('DEBUG: wsgi found backend %s and script %s' %
303319
# (backend, script_name))
304320
fieldstorage = cgi.FieldStorage(fp=environ['wsgi.input'],
@@ -307,13 +323,12 @@ def application(environ, start_response):
307323
if 'output_format' in user_arguments_dict:
308324
output_format = user_arguments_dict['output_format'][0]
309325

310-
module_path = 'mig.shared.functionality.%s' % backend
311326
(allow, msg) = allow_script(configuration, script_name, client_id)
312327
if allow:
313328
# _logger.debug("wsgi handling script: %s" % script_name)
314-
(output_objs, ret_val) = stub(configuration, client_id,
315-
module_path, backend,
316-
user_arguments_dict, environ)
329+
backend, (output_objs, ret_val) = stub(configuration, client_id,
330+
user_arguments_dict, environ,
331+
_retrieve_handler)
317332
else:
318333
_logger.warning("wsgi handling refused script:%s" % script_name)
319334
(output_objs, ret_val) = reject_main(client_id,
@@ -363,7 +378,7 @@ def application(environ, start_response):
363378
output_objs.append(wsgi_entry)
364379

365380
_logger.debug("call format %r output to %s" % (backend, output_format))
366-
output = format_output(configuration, backend, ret_code, ret_msg,
381+
output = _format_output(configuration, backend, ret_code, ret_msg,
367382
output_objs, output_format)
368383
# _logger.debug("formatted %s output to %s" % (backend, output_format))
369384
# _logger.debug("output:\n%s" % [output])
@@ -372,7 +387,7 @@ def application(environ, start_response):
372387
_logger.error(
373388
"Formatted output is NOT on default str coding: %s" % [output[:100]])
374389
err_mark = '__****__'
375-
output = format_output(configuration, backend, ret_code, ret_msg,
390+
output = _format_output(configuration, backend, ret_code, ret_msg,
376391
force_default_str_coding_rec(
377392
output_objs, highlight=err_mark),
378393
output_format)
@@ -396,7 +411,14 @@ def application(environ, start_response):
396411
# NOTE: send response to client but don't crash e.g. on closed connection
397412
try:
398413
start_response(status, response_headers)
414+
except IOError as ioe:
415+
_logger.warning("WSGI %s for %s could not deliver output: %s" %
416+
(backend, client_id, ioe))
417+
except Exception as exc:
418+
_logger.error("WSGI %s for %s crashed during response: %s" %
419+
(backend, client_id, exc))
399420

421+
try:
400422
# NOTE: we consistently hit download error for archive files reaching ~2GB
401423
# with showfreezefile.py on wsgi but the same on cgi does NOT suffer
402424
# the problem for the exact same files. It seems wsgi has a limited
@@ -410,12 +432,12 @@ def application(environ, start_response):
410432
_logger.info("WSGI %s yielding %d output parts (%db)" %
411433
(backend, chunk_parts, content_length))
412434
# _logger.debug("send chunked %r response to client" % backend)
413-
for i in xrange(chunk_parts):
435+
for i in list(range(chunk_parts)):
414436
# _logger.debug("WSGI %s yielding part %d / %d output parts" %
415437
# (backend, i+1, chunk_parts))
416438
# end index may be after end of content - but no problem
417439
part = output[i*download_block_size:(i+1)*download_block_size]
418-
yield part
440+
yield force_utf8(part)
419441
if chunk_parts > 1:
420442
_logger.info("WSGI %s finished yielding all %d output parts" %
421443
(backend, chunk_parts))

tests/support/htmlsupp.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
#!/usr/bin/python
2+
# -*- coding: utf-8 -*-
3+
#
4+
# --- BEGIN_HEADER ---
5+
#
6+
# htmlsupp - test support library for HTML
7+
# Copyright (C) 2003-2024 The MiG Project by the Science HPC Center at UCPH
8+
#
9+
# This file is part of MiG.
10+
#
11+
# MiG is free software: you can redistribute it and/or modify
12+
# it under the terms of the GNU General Public License as published by
13+
# the Free Software Foundation; either version 2 of the License, or
14+
# (at your option) any later version.
15+
#
16+
# MiG is distributed in the hope that it will be useful,
17+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
18+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19+
# GNU General Public License for more details.
20+
#
21+
# You should have received a copy of the GNU General Public License
22+
# along with this program; if not, write to the Free Software
23+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
24+
#
25+
# -- END_HEADER ---
26+
#
27+
28+
"""Test support library for HTML."""
29+
30+
31+
class HtmlAssertMixin:
32+
"""Custom assertions for HTML containing strings."""
33+
34+
def assertHtmlElement(self, value, tag_name):
35+
"""Check that an occurrence of the specifid tag within an HTML input
36+
string can be found. Returns the textual content of the first match.
37+
"""
38+
39+
self.assertIsValidHtmlDocument(value)
40+
41+
# TODO: this is a definitively stop-gap way of finding a tag within the HTML
42+
# and is used purely to keep this initial change to a reasonable size.
43+
44+
tag_open = ''.join(['<', tag_name, '>'])
45+
tag_open_index = value.index(tag_open)
46+
tag_open_index_after = tag_open_index + len(tag_open)
47+
48+
tag_close = ''.join(['</', tag_name, '>'])
49+
tag_close_index = value.index(tag_close, tag_open_index_after)
50+
51+
return value[tag_open_index_after:tag_close_index]
52+
53+
def assertHtmlElementTextContent(self, value, tag_name, expected_text, trim_newlines=True):
54+
"""Check there is an occurrence of a tag within an HTML input string
55+
and check the text it encloses equals exactly the expecatation.
56+
"""
57+
58+
self.assertIsValidHtmlDocument(value)
59+
60+
# TODO: this is a definitively stop-gap way of finding a tag within the HTML
61+
# and is used purely to keep this initial change to a reasonable size.
62+
63+
actual_text = self.assertHtmlElement(value, tag_name)
64+
if trim_newlines:
65+
actual_text = actual_text.strip('\n')
66+
self.assertEqual(actual_text, expected_text)
67+
68+
def assertIsValidHtmlDocument(self, value):
69+
"""Check that the input string contains a valid HTML document.
70+
"""
71+
72+
assert isinstance(value, type(u"")), "input string was not utf8"
73+
74+
error = None
75+
try:
76+
has_doctype = value.startswith("<!DOCTYPE html")
77+
assert has_doctype, "no valid document opener"
78+
end_html_tag_idx = value.rfind('</html>')
79+
maybe_document_end = value[end_html_tag_idx:].rstrip()
80+
assert maybe_document_end == '</html>', "no valid document closer"
81+
except Exception as exc:
82+
error = exc
83+
if error:
84+
raise AssertionError("failed to verify input string as HTML: %s", str(error))

0 commit comments

Comments
 (0)