Skip to content

Commit 3f48916

Browse files
authored
Merge pull request #7915 from yoff/python/promote-xpath-injection
Python: promote XPath injection query
2 parents c514282 + d0a393e commit 3f48916

File tree

28 files changed

+485
-202
lines changed

28 files changed

+485
-202
lines changed

docs/codeql/support/reusables/frameworks.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,3 +206,5 @@ Python built-in support
206206
pycryptodomex, Cryptography library
207207
rsa, Cryptography library
208208
MarkupSafe, Escaping Library
209+
libxml2, XML processing library
210+
lxml, XML processing library

python/ql/lib/semmle/python/Concepts.qll

Lines changed: 104 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,7 @@ module CodeExecution {
334334

335335
/**
336336
* A data-flow node that constructs an SQL statement.
337+
*
337338
* Often, it is worthy of an alert if an SQL statement is constructed such that
338339
* executing it would be a security risk.
339340
*
@@ -355,11 +356,14 @@ class SqlConstruction extends DataFlow::Node {
355356
module SqlConstruction {
356357
/**
357358
* A data-flow node that constructs an SQL statement.
359+
*
358360
* Often, it is worthy of an alert if an SQL statement is constructed such that
359361
* executing it would be a security risk.
360362
*
363+
* If it is important that the SQL statement is indeed executed, then use `SQLExecution`.
364+
*
361365
* Extend this class to model new APIs. If you want to refine existing API models,
362-
* extend `SqlExecution` instead.
366+
* extend `SqlConstruction` instead.
363367
*/
364368
abstract class Range extends DataFlow::Node {
365369
/** Gets the argument that specifies the SQL statements to be constructed. */
@@ -449,6 +453,105 @@ module RegexExecution {
449453
}
450454
}
451455

456+
/** Provides classes for modeling XML-related APIs. */
457+
module XML {
458+
/**
459+
* A data-flow node that constructs an XPath expression.
460+
*
461+
* Often, it is worthy of an alert if an XPath expression is constructed such that
462+
* executing it would be a security risk.
463+
*
464+
* If it is important that the XPath expression is indeed executed, then use `XPathExecution`.
465+
*
466+
* Extend this class to refine existing API models. If you want to model new APIs,
467+
* extend `XPathConstruction::Range` instead.
468+
*/
469+
class XPathConstruction extends DataFlow::Node {
470+
XPathConstruction::Range range;
471+
472+
XPathConstruction() { this = range }
473+
474+
/** Gets the argument that specifies the XPath expressions to be constructed. */
475+
DataFlow::Node getXPath() { result = range.getXPath() }
476+
477+
/**
478+
* Gets the name of this XPath expression construction, typically the name of an executing method.
479+
* This is used for nice alert messages and should include the module if possible.
480+
*/
481+
string getName() { result = range.getName() }
482+
}
483+
484+
/** Provides a class for modeling new XPath construction APIs. */
485+
module XPathConstruction {
486+
/**
487+
* A data-flow node that constructs an XPath expression.
488+
*
489+
* Often, it is worthy of an alert if an XPath expression is constructed such that
490+
* executing it would be a security risk.
491+
*
492+
* Extend this class to model new APIs. If you want to refine existing API models,
493+
* extend `XPathConstruction` instead.
494+
*/
495+
abstract class Range extends DataFlow::Node {
496+
/** Gets the argument that specifies the XPath expressions to be constructed. */
497+
abstract DataFlow::Node getXPath();
498+
499+
/**
500+
* Gets the name of this XPath expression construction, typically the name of an executing method.
501+
* This is used for nice alert messages and should include the module if possible.
502+
*/
503+
abstract string getName();
504+
}
505+
}
506+
507+
/**
508+
* A data-flow node that executes a xpath expression.
509+
*
510+
* If the context of interest is such that merely constructing an XPath expression
511+
* would be valuabe to report, then consider using `XPathConstruction`.
512+
*
513+
* Extend this class to refine existing API models. If you want to model new APIs,
514+
* extend `XPathExecution::Range` instead.
515+
*/
516+
class XPathExecution extends DataFlow::Node {
517+
XPathExecution::Range range;
518+
519+
XPathExecution() { this = range }
520+
521+
/** Gets the data flow node for the XPath expression being executed by this node. */
522+
DataFlow::Node getXPath() { result = range.getXPath() }
523+
524+
/**
525+
* Gets the name of this XPath expression execution, typically the name of an executing method.
526+
* This is used for nice alert messages and should include the module if possible.
527+
*/
528+
string getName() { result = range.getName() }
529+
}
530+
531+
/** Provides classes for modeling new regular-expression execution APIs. */
532+
module XPathExecution {
533+
/**
534+
* A data-flow node that executes a XPath expression.
535+
*
536+
* If the context of interest is such that merely constructing an XPath expression
537+
* would be valuabe to report, then consider using `XPathConstruction`.
538+
*
539+
* Extend this class to model new APIs. If you want to refine existing API models,
540+
* extend `XPathExecution` instead.
541+
*/
542+
abstract class Range extends DataFlow::Node {
543+
/** Gets the data flow node for the XPath expression being executed by this node. */
544+
abstract DataFlow::Node getXPath();
545+
546+
/**
547+
* Gets the name of this xpath expression execution, typically the name of an executing method.
548+
* This is used for nice alert messages and should include the module if possible.
549+
*/
550+
abstract string getName();
551+
}
552+
}
553+
}
554+
452555
/** Provides classes for modeling LDAP-related APIs. */
453556
module LDAP {
454557
/**

python/ql/lib/semmle/python/Frameworks.qll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ private import semmle.python.frameworks.Invoke
2424
private import semmle.python.frameworks.Jmespath
2525
private import semmle.python.frameworks.Ldap
2626
private import semmle.python.frameworks.Ldap3
27+
private import semmle.python.frameworks.Libxml2
28+
private import semmle.python.frameworks.Lxml
2729
private import semmle.python.frameworks.MarkupSafe
2830
private import semmle.python.frameworks.Multidict
2931
private import semmle.python.frameworks.Mysql
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
/**
2+
* Provides classes modeling security-relevant aspects of the `libxml2` PyPI package.
3+
*
4+
* See
5+
* - https://pypi.org/project/libxml2-python3/
6+
* - http://xmlsoft.org/python.html
7+
*/
8+
9+
private import python
10+
private import semmle.python.dataflow.new.DataFlow
11+
private import semmle.python.Concepts
12+
private import semmle.python.ApiGraphs
13+
14+
/**
15+
* Provides classes modeling security-relevant aspects of the `libxml2` PyPI package
16+
*
17+
* See
18+
* - https://pypi.org/project/libxml2-python3/
19+
* - http://xmlsoft.org/python.html
20+
*/
21+
private module Libxml2 {
22+
/**
23+
* A call to the `xpathEval` method of a parsed document.
24+
*
25+
* import libxml2
26+
* tree = libxml2.parseFile("file.xml")
27+
* r = tree.xpathEval('`sink`')
28+
*
29+
* See http://xmlsoft.org/python.html
30+
*/
31+
class XpathEvalCall extends XML::XPathExecution::Range, DataFlow::CallCfgNode {
32+
XpathEvalCall() {
33+
this =
34+
API::moduleImport("libxml2")
35+
.getMember("parseFile")
36+
.getReturn()
37+
.getMember("xpathEval")
38+
.getACall()
39+
}
40+
41+
override DataFlow::Node getXPath() { result = this.getArg(0) }
42+
43+
override string getName() { result = "libxml2" }
44+
}
45+
}
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
/**
2+
* Provides classes modeling security-relevant aspects of the `lxml` PyPI package.
3+
*
4+
* See
5+
* - https://pypi.org/project/lxml/
6+
* - https://lxml.de/tutorial.html
7+
*/
8+
9+
private import python
10+
private import semmle.python.dataflow.new.DataFlow
11+
private import semmle.python.Concepts
12+
private import semmle.python.ApiGraphs
13+
14+
/**
15+
* Provides classes modeling security-relevant aspects of the `lxml` PyPI package
16+
*
17+
* See
18+
* - https://pypi.org/project/lxml/
19+
* - https://lxml.de/tutorial.html
20+
*/
21+
private module Lxml {
22+
/**
23+
* A class constructor compiling an XPath expression.
24+
*
25+
* from lxml import etree
26+
* find_text = etree.XPath("`sink`")
27+
* find_text = etree.ETXPath("`sink`")
28+
*
29+
* See
30+
* - https://lxml.de/apidoc/lxml.etree.html#lxml.etree.XPath
31+
* - https://lxml.de/apidoc/lxml.etree.html#lxml.etree.ETXPath
32+
*/
33+
private class XPathClassCall extends XML::XPathConstruction::Range, DataFlow::CallCfgNode {
34+
XPathClassCall() {
35+
this = API::moduleImport("lxml").getMember("etree").getMember(["XPath", "ETXPath"]).getACall()
36+
}
37+
38+
override DataFlow::Node getXPath() { result in [this.getArg(0), this.getArgByName("path")] }
39+
40+
override string getName() { result = "lxml.etree" }
41+
}
42+
43+
/**
44+
* A call to the `xpath` method of a parsed document.
45+
*
46+
* from lxml import etree
47+
* root = etree.fromstring(file(XML_DB).read(), XMLParser())
48+
* find_text = root.xpath("`sink`")
49+
*
50+
* See https://lxml.de/apidoc/lxml.etree.html#lxml.etree._ElementTree.xpath
51+
* as well as
52+
* - https://lxml.de/apidoc/lxml.etree.html#lxml.etree.parse
53+
* - https://lxml.de/apidoc/lxml.etree.html#lxml.etree.fromstring
54+
* - https://lxml.de/apidoc/lxml.etree.html#lxml.etree.fromstringlist
55+
* - https://lxml.de/apidoc/lxml.etree.html#lxml.etree.HTML
56+
* - https://lxml.de/apidoc/lxml.etree.html#lxml.etree.XML
57+
*/
58+
class XPathCall extends XML::XPathExecution::Range, DataFlow::CallCfgNode {
59+
XPathCall() {
60+
this =
61+
API::moduleImport("lxml")
62+
.getMember("etree")
63+
.getMember(["parse", "fromstring", "fromstringlist", "HTML", "XML"])
64+
.getReturn()
65+
.getMember("xpath")
66+
.getACall()
67+
}
68+
69+
override DataFlow::Node getXPath() { result in [this.getArg(0), this.getArgByName("_path")] }
70+
71+
override string getName() { result = "lxml.etree" }
72+
}
73+
74+
class XPathEvaluatorCall extends XML::XPathExecution::Range, DataFlow::CallCfgNode {
75+
XPathEvaluatorCall() {
76+
this =
77+
API::moduleImport("lxml")
78+
.getMember("etree")
79+
.getMember("XPathEvaluator")
80+
.getReturn()
81+
.getACall()
82+
}
83+
84+
override DataFlow::Node getXPath() { result = this.getArg(0) }
85+
86+
override string getName() { result = "lxml.etree" }
87+
}
88+
}

python/ql/lib/semmle/python/frameworks/Stdlib.qll

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2836,6 +2836,70 @@ private module StdlibPrivate {
28362836
override string getKind() { result = Escaping::getRegexKind() }
28372837
}
28382838

2839+
// ---------------------------------------------------------------------------
2840+
// xml.etree.ElementTree
2841+
// ---------------------------------------------------------------------------
2842+
/**
2843+
* An instance of `xml.etree.ElementTree.ElementTree`.
2844+
*
2845+
* See https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.ElementTree
2846+
*/
2847+
private API::Node elementTreeInstance() {
2848+
//parse to a tree
2849+
result =
2850+
API::moduleImport("xml")
2851+
.getMember("etree")
2852+
.getMember("ElementTree")
2853+
.getMember("parse")
2854+
.getReturn()
2855+
or
2856+
// construct a tree without parsing
2857+
result =
2858+
API::moduleImport("xml")
2859+
.getMember("etree")
2860+
.getMember("ElementTree")
2861+
.getMember("ElementTree")
2862+
.getReturn()
2863+
}
2864+
2865+
/**
2866+
* An instance of `xml.etree.ElementTree.Element`.
2867+
*
2868+
* See https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.Element
2869+
*/
2870+
private API::Node elementInstance() {
2871+
// parse or go to the root of a tree
2872+
result = elementTreeInstance().getMember(["parse", "getroot"]).getReturn()
2873+
or
2874+
// parse directly to an element
2875+
result =
2876+
API::moduleImport("xml")
2877+
.getMember("etree")
2878+
.getMember("ElementTree")
2879+
.getMember(["fromstring", "fromstringlist", "XML"])
2880+
.getReturn()
2881+
}
2882+
2883+
/**
2884+
* A call to a find method on a tree or an element will execute an XPath expression.
2885+
*/
2886+
private class ElementTreeFindCall extends XML::XPathExecution::Range, DataFlow::CallCfgNode {
2887+
string methodName;
2888+
2889+
ElementTreeFindCall() {
2890+
methodName in ["find", "findall", "findtext"] and
2891+
(
2892+
this = elementTreeInstance().getMember(methodName).getACall()
2893+
or
2894+
this = elementInstance().getMember(methodName).getACall()
2895+
)
2896+
}
2897+
2898+
override DataFlow::Node getXPath() { result in [this.getArg(0), this.getArgByName("match")] }
2899+
2900+
override string getName() { result = "xml.etree" }
2901+
}
2902+
28392903
// ---------------------------------------------------------------------------
28402904
// urllib
28412905
// ---------------------------------------------------------------------------

0 commit comments

Comments
 (0)