Skip to content

Commit 8ab5617

Browse files
authored
Merge pull request #10539 from yoff/python/improve-API-graphs
Python: add subscript to API graphs
2 parents 7ffbc73 + a11948b commit 8ab5617

File tree

7 files changed

+120
-146
lines changed

7 files changed

+120
-146
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: minorAnalysis
3+
---
4+
* Added the ability to refer to subscript operations in the API graph. It is now possible to write `response().getMember("cookies").getASubscript()` to find code like `resp.cookies["key"]` (assuming `response` returns an API node for reponse objects).

python/ql/lib/semmle/python/ApiGraphs.qll

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,12 @@ module API {
243243
*/
244244
Node getAwaited() { result = this.getASuccessor(Label::await()) }
245245

246+
/**
247+
* Gets a node representing a subscript of this node.
248+
* For example `obj[x]` is a subscript of `obj`.
249+
*/
250+
Node getASubscript() { result = this.getASuccessor(Label::subscript()) }
251+
246252
/**
247253
* Gets a string representation of the lexicographically least among all shortest access paths
248254
* from the root to this node.
@@ -570,8 +576,6 @@ module API {
570576
* API graph node for the prefix `foo`), in accordance with the usual semantics of Python.
571577
*/
572578

573-
private import semmle.python.internal.Awaited
574-
575579
cached
576580
newtype TApiNode =
577581
/** The root of the API graph. */
@@ -747,6 +751,14 @@ module API {
747751
lbl = Label::return() and
748752
ref = pred.getACall()
749753
or
754+
// Awaiting a node that is a use of `base`
755+
lbl = Label::await() and
756+
ref = pred.getAnAwaited()
757+
or
758+
// Subscripting a node that is a use of `base`
759+
lbl = Label::subscript() and
760+
ref = pred.getASubscript()
761+
or
750762
// Subclassing a node
751763
lbl = Label::subclass() and
752764
exists(PY::ClassExpr clsExpr, DataFlow::Node superclass | pred.flowsTo(superclass) |
@@ -760,13 +772,6 @@ module API {
760772
ref.(DataFlow::ExprNode).getNode().getNode() = clsExpr.getADecoratorCall()
761773
)
762774
)
763-
or
764-
// awaiting
765-
exists(DataFlow::Node awaitedValue |
766-
lbl = Label::await() and
767-
ref = awaited(awaitedValue) and
768-
pred.flowsTo(awaitedValue)
769-
)
770775
)
771776
or
772777
exists(DataFlow::Node def, PY::CallableExpr fn |
@@ -986,6 +991,7 @@ module API {
986991
MkLabelReturn() or
987992
MkLabelSubclass() or
988993
MkLabelAwait() or
994+
MkLabelSubscript() or
989995
MkLabelEntryPoint(EntryPoint ep)
990996

991997
/** A label for a module. */
@@ -1061,6 +1067,11 @@ module API {
10611067
override string toString() { result = "getAwaited()" }
10621068
}
10631069

1070+
/** A label that gets the subscript of a sequence/mapping. */
1071+
class LabelSubscript extends ApiLabel, MkLabelSubscript {
1072+
override string toString() { result = "getASubscript()" }
1073+
}
1074+
10641075
/** A label for entry points. */
10651076
class LabelEntryPoint extends ApiLabel, MkLabelEntryPoint {
10661077
private EntryPoint entry;
@@ -1106,6 +1117,9 @@ module API {
11061117
/** Gets the `await` edge label. */
11071118
LabelAwait await() { any() }
11081119

1120+
/** Gets the `subscript` edge label. */
1121+
LabelSubscript subscript() { any() }
1122+
11091123
/** Gets the label going from the root node to the nodes associated with the given entry point. */
11101124
LabelEntryPoint entryPoint(EntryPoint ep) { result = MkLabelEntryPoint(ep) }
11111125
}

python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ private import python
1010
import DataFlowPublic
1111
private import DataFlowPrivate
1212
private import semmle.python.internal.CachedStages
13+
private import semmle.python.internal.Awaited
1314

1415
/**
1516
* A data flow node that is a source of local flow. This includes things like
@@ -95,6 +96,16 @@ class LocalSourceNode extends Node {
9596
*/
9697
CallCfgNode getACall() { Cached::call(this, result) }
9798

99+
/**
100+
* Gets an awaited value from this node.
101+
*/
102+
Node getAnAwaited() { Cached::await(this, result) }
103+
104+
/**
105+
* Gets a subscript of this node.
106+
*/
107+
Node getASubscript() { Cached::subscript(this, result) }
108+
98109
/**
99110
* Gets a call to the method `methodName` on this node.
100111
*
@@ -225,4 +236,26 @@ private module Cached {
225236
n = call.getFunction()
226237
)
227238
}
239+
240+
/**
241+
* Holds if `node` flows to a value that, when awaited, results in `awaited`.
242+
*/
243+
cached
244+
predicate await(LocalSourceNode node, Node awaited) {
245+
exists(Node awaitedValue |
246+
node.flowsTo(awaitedValue) and
247+
awaited = awaited(awaitedValue)
248+
)
249+
}
250+
251+
/**
252+
* Holds if `node` flows to a sequence/mapping of which `subscript` is a subscript.
253+
*/
254+
cached
255+
predicate subscript(LocalSourceNode node, CfgNode subscript) {
256+
exists(CfgNode seq, SubscriptNode subscriptNode | subscriptNode = subscript.getNode() |
257+
node.flowsTo(seq) and
258+
seq.getNode() = subscriptNode.getObject()
259+
)
260+
}
228261
}

python/ql/lib/semmle/python/frameworks/Flask.qll

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -424,22 +424,20 @@ module Flask {
424424
}
425425
}
426426

427+
private API::Node requestFileStorage() {
428+
// TODO: This approach for identifying member-access is very adhoc, and we should
429+
// be able to do something more structured for providing modeling of the members
430+
// of a container-object.
431+
result = request().getMember("files").getASubscript()
432+
or
433+
result = request().getMember("files").getMember("get").getReturn()
434+
or
435+
result = request().getMember("files").getMember("getlist").getReturn().getASubscript()
436+
}
437+
427438
/** An `FileStorage` instance that originates from a flask request. */
428439
private class FlaskRequestFileStorageInstances extends Werkzeug::FileStorage::InstanceSource {
429-
FlaskRequestFileStorageInstances() {
430-
// TODO: This approach for identifying member-access is very adhoc, and we should
431-
// be able to do something more structured for providing modeling of the members
432-
// of a container-object.
433-
exists(API::Node files | files = request().getMember("files") |
434-
this.asCfgNode().(SubscriptNode).getObject() =
435-
files.getAValueReachableFromSource().asCfgNode()
436-
or
437-
this = files.getMember("get").getACall()
438-
or
439-
this.asCfgNode().(SubscriptNode).getObject() =
440-
files.getMember("getlist").getReturn().getAValueReachableFromSource().asCfgNode()
441-
)
442-
}
440+
FlaskRequestFileStorageInstances() { this = requestFileStorage().asSource() }
443441
}
444442

445443
/** An `Headers` instance that originates from a flask request. */

python/ql/lib/semmle/python/frameworks/Stdlib.qll

Lines changed: 12 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1725,39 +1725,21 @@ private module StdlibPrivate {
17251725
API::Node getlistResult() { result = getlistRef().getReturn() }
17261726

17271727
/** Gets a reference to a list of fields. */
1728-
private DataFlow::TypeTrackingNode fieldList(DataFlow::TypeTracker t) {
1729-
t.start() and
1730-
// TODO: Should have better handling of subscripting
1731-
result.asCfgNode().(SubscriptNode).getObject() =
1732-
instance().getAValueReachableFromSource().asCfgNode()
1728+
API::Node fieldList() {
1729+
result = getlistResult()
17331730
or
1734-
exists(DataFlow::TypeTracker t2 | result = fieldList(t2).track(t2, t))
1735-
}
1736-
1737-
/** Gets a reference to a list of fields. */
1738-
DataFlow::Node fieldList() {
1739-
result = getlistResult().getAValueReachableFromSource() or
1740-
result = getvalueResult().getAValueReachableFromSource() or
1741-
fieldList(DataFlow::TypeTracker::end()).flowsTo(result)
1742-
}
1743-
1744-
/** Gets a reference to a field. */
1745-
private DataFlow::TypeTrackingNode field(DataFlow::TypeTracker t) {
1746-
t.start() and
1747-
// TODO: Should have better handling of subscripting
1748-
result.asCfgNode().(SubscriptNode).getObject() =
1749-
[instance().getAValueReachableFromSource(), fieldList()].asCfgNode()
1731+
result = getvalueResult()
17501732
or
1751-
exists(DataFlow::TypeTracker t2 | result = field(t2).track(t2, t))
1733+
result = instance().getASubscript()
17521734
}
17531735

17541736
/** Gets a reference to a field. */
1755-
DataFlow::Node field() {
1756-
result = getfirstResult().getAValueReachableFromSource()
1737+
API::Node field() {
1738+
result = getfirstResult()
17571739
or
1758-
result = getvalueResult().getAValueReachableFromSource()
1740+
result = getvalueResult()
17591741
or
1760-
field(DataFlow::TypeTracker::end()).flowsTo(result)
1742+
result = [instance(), fieldList()].getASubscript()
17611743
}
17621744

17631745
private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
@@ -1780,11 +1762,13 @@ private module StdlibPrivate {
17801762
)
17811763
or
17821764
// Indexing
1783-
nodeFrom in [instance().getAValueReachableFromSource(), fieldList()] and
1765+
nodeFrom in [
1766+
instance().getAValueReachableFromSource(), fieldList().getAValueReachableFromSource()
1767+
] and
17841768
nodeTo.asCfgNode().(SubscriptNode).getObject() = nodeFrom.asCfgNode()
17851769
or
17861770
// Attributes on Field
1787-
nodeFrom = field() and
1771+
nodeFrom = field().getAValueReachableFromSource() and
17881772
exists(DataFlow::AttrRead read | nodeTo = read and read.getObject() = nodeFrom |
17891773
read.getAttributeName() in ["value", "file", "filename"]
17901774
)

python/ql/src/experimental/semmle/python/frameworks/Django.qll

Lines changed: 8 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -85,35 +85,20 @@ private module ExperimentalPrivateDjango {
8585
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
8686

8787
/** Gets a reference to a header instance. */
88-
private DataFlow::LocalSourceNode headerInstance(DataFlow::TypeTracker t) {
89-
t.start() and
90-
(
91-
exists(SubscriptNode subscript |
92-
subscript.getObject() =
93-
baseClassRef().getReturn().getAValueReachableFromSource().asCfgNode() and
94-
result.asCfgNode() = subscript
95-
)
96-
or
97-
result.(DataFlow::AttrRead).getObject() =
98-
baseClassRef().getReturn().getAValueReachableFromSource()
99-
)
88+
API::Node headerInstance() {
89+
result = baseClassRef().getReturn().getASubscript()
10090
or
101-
exists(DataFlow::TypeTracker t2 | result = headerInstance(t2).track(t2, t))
102-
}
103-
104-
/** Gets a reference to a header instance use. */
105-
private DataFlow::Node headerInstance() {
106-
headerInstance(DataFlow::TypeTracker::end()).flowsTo(result)
91+
result = baseClassRef().getReturn().getAMember()
10792
}
10893

10994
/** Gets a reference to a header instance call with `__setitem__`. */
110-
private DataFlow::Node headerSetItemCall() {
95+
API::Node headerSetItem() {
11196
result = headerInstance() and
112-
result.(DataFlow::AttrRead).getAttributeName() = "__setitem__"
97+
result.asSource().(DataFlow::AttrRead).getAttributeName() = "__setitem__"
11398
}
11499

115100
class DjangoResponseSetItemCall extends DataFlow::CallCfgNode, HeaderDeclaration::Range {
116-
DjangoResponseSetItemCall() { this.getFunction() = headerSetItemCall() }
101+
DjangoResponseSetItemCall() { this = headerSetItem().getACall() }
117102

118103
override DataFlow::Node getNameArg() { result = this.getArg(0) }
119104

@@ -124,7 +109,8 @@ private module ExperimentalPrivateDjango {
124109
DataFlow::Node headerInput;
125110

126111
DjangoResponseDefinition() {
127-
this.asCfgNode().(DefinitionNode) = headerInstance().asCfgNode() and
112+
this.asCfgNode().(DefinitionNode) =
113+
headerInstance().getAValueReachableFromSource().asCfgNode() and
128114
headerInput.asCfgNode() = this.asCfgNode().(DefinitionNode).getValue()
129115
}
130116

0 commit comments

Comments
 (0)