Skip to content

Commit 88d2d4d

Browse files
authored
Merge pull request #618 from cehbrecht/integrate-lxml-patch
Integrate lxml patch from 4.4 branch
2 parents ec6f6b5 + 8973087 commit 88d2d4d

17 files changed

+95
-43
lines changed

pywps/app/WPSRequest.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import logging
77

88
import lxml
9-
import lxml.etree
9+
from pywps import xml_util as etree
1010
from werkzeug.exceptions import MethodNotAllowed
1111
from pywps import get_ElementMakerForVersion
1212
import base64
@@ -106,7 +106,7 @@ def _post_request(self):
106106
json_input = 'json' in content_type
107107
if not json_input:
108108
try:
109-
doc = lxml.etree.fromstring(self.http_request.get_data())
109+
doc = etree.fromstring(self.http_request.get_data())
110110
except Exception as e:
111111
raise NoApplicableCode(e.msg)
112112
operation = doc.tag
@@ -754,7 +754,7 @@ def _get_dataelement_value(value_el):
754754
"""
755755

756756
if isinstance(value_el, lxml.etree._Element):
757-
return lxml.etree.tostring(value_el, encoding=str)
757+
return etree.tostring(value_el, encoding=str)
758758
else:
759759
return value_el
760760

pywps/exceptions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from werkzeug.http import parse_accept_header
1818
from werkzeug.wrappers import Response
1919
from werkzeug.exceptions import HTTPException
20-
from werkzeug.utils import escape
20+
from markupsafe import escape
2121

2222
import logging
2323

pywps/inout/inputs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
##################################################################
55

66
import re
7-
import lxml.etree as etree
7+
from pywps import xml_util as etree
88

99
from pywps.app.Common import Metadata
1010
from pywps.exceptions import InvalidParameterValue

pywps/inout/outputs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
"""
88
from typing import Optional, Sequence, Dict, Union
99

10-
import lxml.etree as etree
10+
from pywps import xml_util as etree
1111
import os
1212
import re
1313
from pywps.app.Common import Metadata

pywps/tests.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,11 @@
66
import tempfile
77
from pathlib import Path
88

9-
import lxml.etree
9+
import lxml
10+
from pywps import xml_util as etree
1011
import requests
1112
from werkzeug.test import Client
12-
from werkzeug.wrappers import BaseResponse
13+
from werkzeug.wrappers import Response
1314
from pywps import __version__
1415
from pywps import Process
1516
from pywps.inout import LiteralInput, LiteralOutput, ComplexInput, ComplexOutput, BoundingBoxInput, BoundingBoxOutput
@@ -94,7 +95,7 @@ class WpsClient(Client):
9495

9596
def post_xml(self, *args, **kwargs):
9697
doc = kwargs.pop('doc')
97-
data = lxml.etree.tostring(doc, pretty_print=True)
98+
data = etree.tostring(doc, pretty_print=True)
9899
kwargs['data'] = data
99100
return self.post(*args, **kwargs)
100101

@@ -109,12 +110,12 @@ def post_json(self, *args, **kwargs):
109110
return self.post(*args, **kwargs)
110111

111112

112-
class WpsTestResponse(BaseResponse):
113+
class WpsTestResponse(Response):
113114

114115
def __init__(self, *args):
115116
super(WpsTestResponse, self).__init__(*args)
116117
if re.match(r'text/xml(;\s*charset=.*)?', self.headers.get('Content-Type')):
117-
self.xml = lxml.etree.fromstring(self.get_data())
118+
self.xml = etree.fromstring(self.get_data())
118119

119120
def xpath(self, path):
120121
version = self.xml.attrib["version"]

pywps/validator/complexvalidator.py

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111

1212
from pywps.validator.mode import MODE
1313
from pywps.inout.formats import FORMATS
14+
from lxml.etree import XMLSchema
15+
from pywps import xml_util as etree
1416
from urllib.request import urlopen
1517
import mimetypes
1618
import os
@@ -61,13 +63,10 @@ def validategml(data_input, mode):
6163
passed = False
6264

6365
if mode >= MODE.VERYSTRICT:
64-
65-
from lxml import etree
66-
6766
try:
6867
schema_url = data_input.data_format.schema
6968
gmlschema_doc = etree.parse(urlopen(schema_url))
70-
gmlschema = etree.XMLSchema(gmlschema_doc)
69+
gmlschema = XMLSchema(gmlschema_doc)
7170
passed = gmlschema.validate(etree.parse(data_input.stream))
7271
except Exception as e:
7372
LOGGER.warning(e)
@@ -118,13 +117,10 @@ def validategpx(data_input, mode):
118117
passed = False
119118

120119
if mode >= MODE.VERYSTRICT:
121-
122-
from lxml import etree
123-
124120
try:
125121
schema_url = data_input.data_format.schema
126122
gpxschema_doc = etree.parse(urlopen(schema_url))
127-
gpxschema = etree.XMLSchema(gpxschema_doc)
123+
gpxschema = XMLSchema(gpxschema_doc)
128124
passed = gpxschema.validate(etree.parse(data_input.stream))
129125
except Exception as e:
130126
LOGGER.warning(e)
@@ -164,15 +160,13 @@ def validatexml(data_input, mode):
164160
passed = data_input.data_format.mime_type in {mtype, FORMATS.GML.mime_type}
165161

166162
if mode >= MODE.STRICT:
167-
from lxml import etree
168-
169163
# TODO: Raise the actual validation exception to make it easier to spot the error.
170164
# xml = etree.parse(data_input.file)
171165
# schema.assertValid(xml)
172166
try:
173167
fn = os.path.join(_get_schemas_home(), data_input.data_format.schema)
174168
schema_doc = etree.parse(fn)
175-
schema = etree.XMLSchema(schema_doc)
169+
schema = XMLSchema(schema_doc)
176170
passed = schema.validate(etree.parse(data_input.file))
177171
except Exception as e:
178172
LOGGER.warning(e)

pywps/xml_util.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
from lxml import etree as _etree
2+
3+
4+
PARSER = _etree.XMLParser(
5+
resolve_entities=False,
6+
)
7+
8+
tostring = _etree.tostring
9+
10+
11+
def fromstring(text):
12+
return _etree.fromstring(text, parser=PARSER)
13+
14+
15+
def parse(source):
16+
return _etree.parse(source, parser=PARSER)

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,5 @@ python-dateutil
66
requests
77
SQLAlchemy
88
werkzeug
9+
MarkupSafe
910
humanize

tests/test_capabilities.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,11 @@
44
##################################################################
55

66
import unittest
7-
import lxml
8-
import lxml.etree
97
from pywps import configuration
108
from pywps.app import Process, Service
119
from pywps.app.Common import Metadata
1210
from pywps import get_ElementMakerForVersion
13-
from pywps.tests import assert_pywps_version, client_for, assert_wps_version
11+
from pywps.tests import client_for, assert_wps_version
1412

1513
WPS, OWS = get_ElementMakerForVersion("1.0.0")
1614

tests/test_complexdata_io.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,12 @@
22

33
import unittest
44
import os
5-
from pywps import get_ElementMakerForVersion, E
5+
from pywps import get_ElementMakerForVersion
66
from pywps.app.basic import get_xpath_ns
77
from pywps import Service, Process, ComplexInput, ComplexOutput, FORMATS
88
from pywps.tests import client_for, assert_response_success
99
from owslib.wps import WPSExecution, ComplexDataInput
10-
from lxml import etree
10+
from pywps import xml_util as etree
1111

1212
VERSION = "1.0.0"
1313
WPS, OWS = get_ElementMakerForVersion(VERSION)

tests/test_exceptions.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,9 @@
44
##################################################################
55

66
import unittest
7-
from pywps import Process, Service, get_ElementMakerForVersion
7+
from pywps import Service, get_ElementMakerForVersion
88
from pywps.app.basic import get_xpath_ns
99
from pywps.tests import assert_pywps_version, client_for
10-
import lxml.etree
1110

1211
import re
1312

tests/test_execute.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,24 +5,22 @@
55

66
import unittest
77
import pytest
8-
import lxml.etree
8+
from pywps import xml_util as etree
99
import json
1010
import tempfile
1111
import os.path
1212
from pywps import Service, Process, LiteralOutput, LiteralInput,\
1313
BoundingBoxOutput, BoundingBoxInput, Format, ComplexInput, ComplexOutput, FORMATS
1414
from pywps.validator.base import emptyvalidator
1515
from pywps.validator.complexvalidator import validategml
16-
from pywps.validator.mode import MODE
1716
from pywps.exceptions import InvalidParameterValue
18-
from pywps import get_inputs_from_xml, get_output_from_xml
17+
from pywps import get_inputs_from_xml
1918
from pywps import E, get_ElementMakerForVersion
2019
from pywps.app.basic import get_xpath_ns
2120
from pywps.tests import client_for, assert_response_success, assert_response_success_json
2221
from pywps import configuration
2322

2423
from io import StringIO
25-
from owslib.ows import BoundingBox
2624

2725
try:
2826
import netCDF4
@@ -228,7 +226,7 @@ def get_output(doc):
228226
output[identifier_el.text] = data_el[0].text
229227
else: # XML children
230228
ch = list(data_el[0])[0]
231-
output[identifier_el.text] = lxml.etree.tostring(ch)
229+
output[identifier_el.text] = etree.tostring(ch)
232230

233231
return output
234232

@@ -700,7 +698,7 @@ def test_complex_input(self):
700698
WPS.ComplexData(the_data, mimeType='text/foobar')))))
701699
rv = get_inputs_from_xml(request_doc)
702700
self.assertEqual(rv['name'][0]['mimeType'], 'text/foobar')
703-
rv_doc = lxml.etree.parse(StringIO(rv['name'][0]['data'])).getroot()
701+
rv_doc = etree.parse(StringIO(rv['name'][0]['data'])).getroot()
704702
self.assertEqual(rv_doc.tag, 'TheData')
705703
self.assertEqual(rv_doc.text, 'hello world')
706704

tests/test_formats.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,8 @@
88
import unittest
99

1010
from pywps.inout.formats import Format, get_format, FORMATS
11-
from lxml import etree
1211
from pywps.app.basic import get_xpath_ns
13-
from pywps.validator.base import emptyvalidator
12+
1413

1514
xpath_ns = get_xpath_ns("1.0.0")
1615

@@ -121,4 +120,3 @@ def load_tests(loader=None, tests=None, pattern=None):
121120
loader.loadTestsFromTestCase(FormatsTest)
122121
]
123122
return unittest.TestSuite(suite_list)
124-

tests/test_inout.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
import requests
1212
import os
1313
import tempfile
14-
import datetime
1514
import unittest
1615
import json
1716
from pywps import inout
@@ -35,7 +34,6 @@
3534
from pywps.tests import service_ok
3635
from pywps.translations import get_translation
3736

38-
from lxml import etree
3937

4038
DATA_DIR = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data')
4139

tests/test_ows.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@
99
import os
1010
import tempfile
1111
import unittest
12-
import lxml.etree
13-
import sys
1412
from pywps import Service, Process, ComplexInput, ComplexOutput, Format, FORMATS, get_format
1513
from pywps.dependencies import ogr
1614
from pywps.exceptions import NoApplicableCode

tests/test_wpsrequest.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
##################################################################
55

66
import unittest
7-
import lxml.etree
87
from pywps.app import WPSRequest
98
import tempfile
109
import datetime

tests/test_xml_util.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
from pywps import xml_util as etree
2+
3+
from io import StringIO
4+
5+
6+
XML_EXECUTE = """
7+
<!DOCTYPE foo [
8+
<!ELEMENT foo ANY >
9+
<!ENTITY xxe SYSTEM "file:///PATH/TO/input.txt">
10+
]>
11+
<wps:Execute
12+
service="WPS"
13+
version="1.0.0"
14+
xmlns:wps="http://www.opengis.net/wps/1.0.0"
15+
xmlns:ows="http://www.opengis.net/ows/1.1"
16+
xmlns:xlink="http://www.w3.org/1999/xlink"
17+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
18+
xsi:schemaLocation="http://schemas.opengis.net/wps/1.0.0/wpsExecute_request.xsd">
19+
<ows:Identifier>test_process</ows:Identifier>
20+
<wps:DataInputs>
21+
<wps:Input>
22+
<ows:Identifier>name</ows:Identifier>
23+
<wps:Data>
24+
<wps:LiteralData>&xxe;</wps:LiteralData>
25+
</wps:Data>
26+
</wps:Input>
27+
</wps:DataInputs>
28+
<wps:ResponseForm>
29+
<wps:ResponseDocument
30+
storeExecuteResponse="true"
31+
status="true">
32+
<wps:Output asReference="false">
33+
<ows:Identifier>output</ows:Identifier>
34+
</wps:Output>
35+
</wps:ResponseDocument>
36+
</wps:ResponseForm>
37+
</wps:Execute>
38+
"""
39+
40+
41+
def test_etree_fromstring():
42+
xml = etree.tostring(etree.fromstring(XML_EXECUTE))
43+
# don't replace entities
44+
# https://lxml.de/parsing.html
45+
assert b"<wps:LiteralData>&xxe;</wps:LiteralData>" in xml
46+
47+
48+
def test_etree_parse():
49+
xml = etree.tostring(etree.parse(StringIO(XML_EXECUTE)))
50+
# don't replace entities
51+
# https://lxml.de/parsing.html
52+
assert b"<wps:LiteralData>&xxe;</wps:LiteralData>" in xml

0 commit comments

Comments
 (0)