Skip to content

Commit 2f4a22c

Browse files
authored
Merge pull request #6112 from jorgectf/jorgectf/python/deserialization
Python: Port and extend XXE modeling
2 parents bbb2847 + 0e9da4a commit 2f4a22c

26 files changed

+1768
-0
lines changed
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
/**
2+
* @name SimpleXMLRPCServer DoS vulnerability
3+
* @description SimpleXMLRPCServer is vulnerable to DoS attacks from untrusted user input
4+
* @kind problem
5+
* @problem.severity warning
6+
* @precision high
7+
* @id py/simple-xml-rpc-server-dos
8+
* @tags security
9+
* external/cwe/cwe-776
10+
*/
11+
12+
private import python
13+
private import experimental.semmle.python.Concepts
14+
private import semmle.python.ApiGraphs
15+
16+
from DataFlow::CallCfgNode call, string kinds
17+
where
18+
call = API::moduleImport("xmlrpc").getMember("server").getMember("SimpleXMLRPCServer").getACall() and
19+
kinds =
20+
strictconcat(ExperimentalXML::XMLVulnerabilityKind kind |
21+
kind.isBillionLaughs() or kind.isQuadraticBlowup()
22+
|
23+
kind, ", "
24+
)
25+
select call, "SimpleXMLRPCServer is vulnerable to: " + kinds + "."
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
<?xml version="1.0"?>
2+
<!DOCTYPE dt [
3+
<!ENTITY xxe SYSTEM "file:///etc/passwd">]>
4+
<test>&xxe;</test>
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
from flask import request, Flask
2+
import lxml.etree
3+
import xml.etree.ElementTree
4+
5+
app = Flask(__name__)
6+
7+
# BAD
8+
@app.route("/bad")
9+
def bad():
10+
xml_content = request.args['xml_content']
11+
12+
parser = lxml.etree.XMLParser()
13+
parsed_xml = xml.etree.ElementTree.fromstring(xml_content, parser=parser)
14+
15+
return parsed_xml.text
16+
17+
# GOOD
18+
@app.route("/good")
19+
def good():
20+
xml_content = request.args['xml_content']
21+
22+
parser = lxml.etree.XMLParser(resolve_entities=False)
23+
parsed_xml = xml.etree.ElementTree.fromstring(xml_content, parser=parser)
24+
25+
return parsed_xml.text
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
<!DOCTYPE qhelp PUBLIC
2+
"-//Semmle//qhelp//EN"
3+
"qhelp.dtd">
4+
<qhelp>
5+
6+
<overview>
7+
<p>
8+
Parsing untrusted XML files with a weakly configured XML parser may lead to attacks such as XML External Entity (XXE),
9+
Billion Laughs, Quadratic Blowup and DTD retrieval.
10+
This type of attack uses external entity references to access arbitrary files on a system, carry out denial of
11+
service, or server side request forgery. Even when the result of parsing is not returned to the user, out-of-band
12+
data retrieval techniques may allow attackers to steal sensitive data. Denial of services can also be carried out
13+
in this situation.
14+
</p>
15+
</overview>
16+
17+
<recommendation>
18+
<p>
19+
Use <a href="https://pypi.org/project/defusedxml/">defusedxml</a>, a Python package aimed
20+
to prevent any potentially malicious operation.
21+
</p>
22+
</recommendation>
23+
24+
<example>
25+
<p>
26+
The following example calls <code>xml.etree.ElementTree.fromstring</code> using a parser (<code>lxml.etree.XMLParser</code>)
27+
that is not safely configured on untrusted data, and is therefore inherently unsafe.
28+
</p>
29+
<sample src="XmlEntityInjection.py"/>
30+
<p>
31+
Providing an input (<code>xml_content</code>) like the following XML content against /bad, the request response would contain the contents of
32+
<code>/etc/passwd</code>.
33+
</p>
34+
<sample src="XXE.xml"/>
35+
</example>
36+
37+
<references>
38+
<li>Python 3 <a href="https://docs.python.org/3/library/xml.html#xml-vulnerabilities">XML Vulnerabilities</a>.</li>
39+
<li>Python 2 <a href="https://docs.python.org/2/library/xml.html#xml-vulnerabilities">XML Vulnerabilities</a>.</li>
40+
<li>Python <a href="https://www.edureka.co/blog/python-xml-parser-tutorial/">XML Parsing</a>.</li>
41+
<li>OWASP vulnerability description: <a href="https://www.owasp.org/index.php/XML_External_Entity_(XXE)_Processing">XML External Entity (XXE) Processing</a>.</li>
42+
<li>OWASP guidance on parsing xml files: <a href="https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html#python">XXE Prevention Cheat Sheet</a>.</li>
43+
<li>Paper by Timothy Morgen: <a href="https://research.nccgroup.com/2014/05/19/xml-schema-dtd-and-entity-attacks-a-compendium-of-known-techniques/">XML Schema, DTD, and Entity Attacks</a></li>
44+
<li>Out-of-band data retrieval: Timur Yunusov &amp; Alexey Osipov, Black hat EU 2013: <a href="https://www.slideshare.net/qqlan/bh-ready-v4">XML Out-Of-Band Data Retrieval</a>.</li>
45+
<li>Denial of service attack (Billion laughs): <a href="https://en.wikipedia.org/wiki/Billion_laughs">Billion Laughs.</a></li>
46+
</references>
47+
48+
</qhelp>
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/**
2+
* @name XML Entity injection
3+
* @description User input should not be parsed allowing the injection of entities.
4+
* @kind path-problem
5+
* @problem.severity error
6+
* @id py/xml-entity-injection
7+
* @tags security
8+
* external/cwe/cwe-611
9+
* external/cwe/cwe-776
10+
* external/cwe/cwe-827
11+
*/
12+
13+
// determine precision above
14+
import python
15+
import experimental.semmle.python.security.dataflow.XmlEntityInjection
16+
import DataFlow::PathGraph
17+
18+
from
19+
XmlEntityInjection::XmlEntityInjectionConfiguration config, DataFlow::PathNode source,
20+
DataFlow::PathNode sink, string kinds
21+
where
22+
config.hasFlowPath(source, sink) and
23+
kinds =
24+
strictconcat(string kind |
25+
kind = sink.getNode().(XmlEntityInjection::Sink).getVulnerableKind()
26+
|
27+
kind, ", "
28+
)
29+
select sink.getNode(), source, sink,
30+
"$@ XML input is constructed from a $@ and is vulnerable to: " + kinds + ".", sink.getNode(),
31+
"This", source.getNode(), "user-provided value"

python/ql/src/experimental/semmle/python/Concepts.qll

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,74 @@ private import semmle.python.dataflow.new.RemoteFlowSources
1414
private import semmle.python.dataflow.new.TaintTracking
1515
private import experimental.semmle.python.Frameworks
1616

17+
/**
18+
* Since there is both XML module in normal and experimental Concepts,
19+
* we have to rename the experimental module as this.
20+
*/
21+
module ExperimentalXML {
22+
/**
23+
* A kind of XML vulnerability.
24+
*
25+
* See https://pypi.org/project/defusedxml/#python-xml-libraries
26+
*/
27+
class XMLVulnerabilityKind extends string {
28+
XMLVulnerabilityKind() {
29+
this in ["Billion Laughs", "Quadratic Blowup", "XXE", "DTD retrieval"]
30+
}
31+
32+
/** Holds for Billion Laughs vulnerability kind. */
33+
predicate isBillionLaughs() { this = "Billion Laughs" }
34+
35+
/** Holds for Quadratic Blowup vulnerability kind. */
36+
predicate isQuadraticBlowup() { this = "Quadratic Blowup" }
37+
38+
/** Holds for XXE vulnerability kind. */
39+
predicate isXxe() { this = "XXE" }
40+
41+
/** Holds for DTD retrieval vulnerability kind. */
42+
predicate isDtdRetrieval() { this = "DTD retrieval" }
43+
}
44+
45+
/**
46+
* A data-flow node that parses XML.
47+
*
48+
* Extend this class to model new APIs. If you want to refine existing API models,
49+
* extend `XMLParsing` instead.
50+
*/
51+
class XMLParsing extends DataFlow::Node instanceof XMLParsing::Range {
52+
/**
53+
* Gets the argument containing the content to parse.
54+
*/
55+
DataFlow::Node getAnInput() { result = super.getAnInput() }
56+
57+
/**
58+
* Holds if this XML parsing is vulnerable to `kind`.
59+
*/
60+
predicate vulnerableTo(XMLVulnerabilityKind kind) { super.vulnerableTo(kind) }
61+
}
62+
63+
/** Provides classes for modeling XML parsing APIs. */
64+
module XMLParsing {
65+
/**
66+
* A data-flow node that parses XML.
67+
*
68+
* Extend this class to model new APIs. If you want to refine existing API models,
69+
* extend `XMLParsing` instead.
70+
*/
71+
abstract class Range extends DataFlow::Node {
72+
/**
73+
* Gets the argument containing the content to parse.
74+
*/
75+
abstract DataFlow::Node getAnInput();
76+
77+
/**
78+
* Holds if this XML parsing is vulnerable to `kind`.
79+
*/
80+
abstract predicate vulnerableTo(XMLVulnerabilityKind kind);
81+
}
82+
}
83+
}
84+
1785
/** Provides classes for modeling LDAP query execution-related APIs. */
1886
module LdapQuery {
1987
/**

python/ql/src/experimental/semmle/python/Frameworks.qll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
*/
44

55
private import experimental.semmle.python.frameworks.Stdlib
6+
private import experimental.semmle.python.frameworks.Xml
67
private import experimental.semmle.python.frameworks.Flask
78
private import experimental.semmle.python.frameworks.Django
89
private import experimental.semmle.python.frameworks.Werkzeug

0 commit comments

Comments
 (0)