Merge pull request #9579 from yoff/python/more-logic-tests

RasmusWL · web-flow · commit 1737d08145d8 · 2022-08-01T11:36:11.000+02:00
Python: Improve `BarrierGuard`
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll
@@ -527,16 +527,55 @@ class StarPatternElementNode extends Node, TStarPatternElementNode {
   override Location getLocation() { result = consumer.getLocation() }
 }
 
+/**
+ * Gets a node that controls whether other nodes are evaluated.
+ *
+ * In the base case, this is the last node of `conditionBlock`, and `flipped` is `false`.
+ * This definition accounts for (short circuting) `and`- and `or`-expressions, as the structure
+ * of basic blocks will reflect their semantics.
+ *
+ * However, in the program
+ * ```python
+ * if not is_safe(path):
+ *   return
+ * ```
+ * the last node in the `ConditionBlock` is `not is_safe(path)`.
+ *
+ * We would like to consider also `is_safe(path)` a guard node, albeit with `flipped` being `true`.
+ * Thus we recurse through `not`-expressions.
+ */
+ControlFlowNode guardNode(ConditionBlock conditionBlock, boolean flipped) {
+  // Base case: the last node truly does determine which successor is chosen
+  result = conditionBlock.getLastNode() and
+  flipped = false
+  or
+  // Recursive case: if a guard node is a `not`-expression,
+  // the operand is also a guard node, but with inverted polarity.
+  exists(UnaryExprNode notNode |
+    result = notNode.getOperand() and
+    notNode.getNode().getOp() instanceof Not
+  |
+    notNode = guardNode(conditionBlock, flipped.booleanNot())
+  )
+}
+
 /**
  * A node that controls whether other nodes are evaluated.
+ *
+ * The field `flipped` allows us to match `GuardNode`s underneath
+ * `not`-expressions and still choose the appropriate branch.
  */
 class GuardNode extends ControlFlowNode {
   ConditionBlock conditionBlock;
+  boolean flipped;
 
-  GuardNode() { this = conditionBlock.getLastNode() }
+  GuardNode() { this = guardNode(conditionBlock, flipped) }
 
   /** Holds if this guard controls block `b` upon evaluating to `branch`. */
-  predicate controlsBlock(BasicBlock b, boolean branch) { conditionBlock.controls(b, branch) }
+  predicate controlsBlock(BasicBlock b, boolean branch) {
+    branch in [true, false] and
+    conditionBlock.controls(b, branch.booleanXor(flipped))
+  }
 }
 
 /**
diff --git a/python/ql/test/experimental/dataflow/tainttracking/commonSanitizer/test_string_const_compare.py b/python/ql/test/experimental/dataflow/tainttracking/commonSanitizer/test_string_const_compare.py
@@ -50,7 +50,7 @@ def test_non_eq2():
     if not ts == "safe":
         ensure_tainted(ts) # $ tainted
     else:
-        ensure_not_tainted(ts) # $ SPURIOUS: tainted
+        ensure_not_tainted(ts)
 
 
 def test_in_list():
@@ -157,7 +157,7 @@ def test_not_in2():
     if not ts in ["safe", "also_safe"]:
         ensure_tainted(ts) # $ tainted
     else:
-        ensure_not_tainted(ts) # $ SPURIOUS: tainted
+        ensure_not_tainted(ts)
 
 
 def is_safe(x):
diff --git a/python/ql/test/experimental/dataflow/tainttracking/customSanitizer/InlineTaintTest.expected b/python/ql/test/experimental/dataflow/tainttracking/customSanitizer/InlineTaintTest.expected
@@ -6,12 +6,20 @@ isSanitizer
 | TestTaintTrackingConfiguration | test.py:34:39:34:39 | ControlFlowNode for s |
 | TestTaintTrackingConfiguration | test.py:52:28:52:28 | ControlFlowNode for s |
 | TestTaintTrackingConfiguration | test.py:66:10:66:29 | ControlFlowNode for emulated_escaping() |
-| TestTaintTrackingConfiguration | test_logical.py:30:28:30:28 | ControlFlowNode for s |
-| TestTaintTrackingConfiguration | test_logical.py:45:28:45:28 | ControlFlowNode for s |
-| TestTaintTrackingConfiguration | test_logical.py:50:28:50:28 | ControlFlowNode for s |
-| TestTaintTrackingConfiguration | test_logical.py:89:28:89:28 | ControlFlowNode for s |
-| TestTaintTrackingConfiguration | test_logical.py:100:28:100:28 | ControlFlowNode for s |
-| TestTaintTrackingConfiguration | test_logical.py:145:28:145:28 | ControlFlowNode for s |
+| TestTaintTrackingConfiguration | test_logical.py:33:28:33:28 | ControlFlowNode for s |
+| TestTaintTrackingConfiguration | test_logical.py:40:28:40:28 | ControlFlowNode for s |
+| TestTaintTrackingConfiguration | test_logical.py:48:28:48:28 | ControlFlowNode for s |
+| TestTaintTrackingConfiguration | test_logical.py:53:28:53:28 | ControlFlowNode for s |
+| TestTaintTrackingConfiguration | test_logical.py:92:28:92:28 | ControlFlowNode for s |
+| TestTaintTrackingConfiguration | test_logical.py:103:28:103:28 | ControlFlowNode for s |
+| TestTaintTrackingConfiguration | test_logical.py:111:28:111:28 | ControlFlowNode for s |
+| TestTaintTrackingConfiguration | test_logical.py:130:28:130:28 | ControlFlowNode for s |
+| TestTaintTrackingConfiguration | test_logical.py:137:28:137:28 | ControlFlowNode for s |
 | TestTaintTrackingConfiguration | test_logical.py:148:28:148:28 | ControlFlowNode for s |
-| TestTaintTrackingConfiguration | test_logical.py:155:28:155:28 | ControlFlowNode for s |
+| TestTaintTrackingConfiguration | test_logical.py:151:28:151:28 | ControlFlowNode for s |
+| TestTaintTrackingConfiguration | test_logical.py:158:28:158:28 | ControlFlowNode for s |
+| TestTaintTrackingConfiguration | test_logical.py:167:24:167:24 | ControlFlowNode for s |
+| TestTaintTrackingConfiguration | test_logical.py:176:24:176:24 | ControlFlowNode for s |
+| TestTaintTrackingConfiguration | test_logical.py:185:24:185:24 | ControlFlowNode for s |
+| TestTaintTrackingConfiguration | test_logical.py:193:24:193:24 | ControlFlowNode for s |
 | TestTaintTrackingConfiguration | test_reference.py:31:28:31:28 | ControlFlowNode for s |
diff --git a/python/ql/test/experimental/dataflow/tainttracking/customSanitizer/InlineTaintTest.ql b/python/ql/test/experimental/dataflow/tainttracking/customSanitizer/InlineTaintTest.ql
@@ -6,6 +6,12 @@ predicate isSafeCheck(DataFlow::GuardNode g, ControlFlowNode node, boolean branc
   branch = true
 }
 
+predicate isUnsafeCheck(DataFlow::GuardNode g, ControlFlowNode node, boolean branch) {
+  g.(CallNode).getNode().getFunc().(Name).getId() in ["is_unsafe", "emulated_is_unsafe"] and
+  node = g.(CallNode).getAnArg() and
+  branch = false
+}
+
 class CustomSanitizerOverrides extends TestTaintTrackingConfiguration {
   override predicate isSanitizer(DataFlow::Node node) {
     exists(Call call |
@@ -16,6 +22,8 @@ class CustomSanitizerOverrides extends TestTaintTrackingConfiguration {
     node.asExpr().(Call).getFunc().(Name).getId() = "emulated_escaping"
     or
     node = DataFlow::BarrierGuard<isSafeCheck/3>::getABarrierNode()
+    or
+    node = DataFlow::BarrierGuard<isUnsafeCheck/3>::getABarrierNode()
   }
 }
 
diff --git a/python/ql/test/experimental/dataflow/tainttracking/customSanitizer/test_logical.py b/python/ql/test/experimental/dataflow/tainttracking/customSanitizer/test_logical.py
@@ -22,6 +22,9 @@ def random_choice():
 def is_safe(arg):
     return arg == "safe"
 
+def is_unsafe(arg):
+    return arg == TAINTED_STRING
+
 
 def test_basic():
     s = TAINTED_STRING
@@ -34,7 +37,7 @@ def test_basic():
     if not is_safe(s):
         ensure_tainted(s) # $ tainted
     else:
-        ensure_not_tainted(s) # $ SPURIOUS: tainted
+        ensure_not_tainted(s)
 
 
 def test_if_in_depth():
@@ -105,7 +108,7 @@ def test_and():
         ensure_tainted(s) # $ tainted
     else:
         # cannot be tainted
-        ensure_not_tainted(s) # $ SPURIOUS: tainted
+        ensure_not_tainted(s)
 
 
 def test_tricky():
@@ -124,14 +127,14 @@ def test_nesting_not():
     s = TAINTED_STRING
 
     if not(not(is_safe(s))):
-        ensure_not_tainted(s) # $ SPURIOUS: tainted
+        ensure_not_tainted(s)
     else:
         ensure_tainted(s) # $ tainted
 
     if not(not(not(is_safe(s)))):
         ensure_tainted(s) # $ tainted
     else:
-        ensure_not_tainted(s) # $ SPURIOUS: tainted
+        ensure_not_tainted(s)
 
 
 # Adding `and True` makes the sanitizer trigger when it would otherwise not. See output in
@@ -161,7 +164,16 @@ def test_with_return():
     if not is_safe(s):
         return
 
-    ensure_not_tainted(s) # $ SPURIOUS: tainted
+    ensure_not_tainted(s)
+
+
+def test_with_return_neg():
+    s = TAINTED_STRING
+
+    if is_unsafe(s):
+        return
+
+    ensure_not_tainted(s)
 
 
 def test_with_exception():
@@ -170,7 +182,15 @@ def test_with_exception():
     if not is_safe(s):
         raise Exception("unsafe")
 
-    ensure_not_tainted(s) # $ SPURIOUS: tainted
+    ensure_not_tainted(s)
+
+def test_with_exception_neg():
+    s = TAINTED_STRING
+
+    if is_unsafe(s):
+        raise Exception("unsafe")
+
+    ensure_not_tainted(s)
 
 # Make tests runable
 
@@ -182,7 +202,12 @@ def test_with_exception():
 test_nesting_not()
 test_nesting_not_with_and_true()
 test_with_return()
+test_with_return_neg()
 try:
     test_with_exception()
 except:
     pass
+try:
+    test_with_exception_neg()
+except:
+    pass

Original file line number	Diff line number	Diff line change
`@@ -6,6 +6,12 @@ predicate isSafeCheck(DataFlow::GuardNode g, ControlFlowNode node, boolean branc`
`6`	`6`	`branch = true`
`7`	`7`	`}`
`8`	`8`
	`9`	`+predicate isUnsafeCheck(DataFlow::GuardNode g, ControlFlowNode node, boolean branch) {`
	`10`	`+ g.(CallNode).getNode().getFunc().(Name).getId() in ["is_unsafe", "emulated_is_unsafe"] and`
	`11`	`+ node = g.(CallNode).getAnArg() and`
	`12`	`+ branch = false`
	`13`	`+}`
	`14`	`+`
`9`	`15`	`class CustomSanitizerOverrides extends TestTaintTrackingConfiguration {`
`10`	`16`	`override predicate isSanitizer(DataFlow::Node node) {`
`11`	`17`	`exists(Call call \|`
`@@ -16,6 +22,8 @@ class CustomSanitizerOverrides extends TestTaintTrackingConfiguration {`
`16`	`22`	`node.asExpr().(Call).getFunc().(Name).getId() = "emulated_escaping"`
`17`	`23`	`or`
`18`	`24`	`node = DataFlow::BarrierGuard<isSafeCheck/3>::getABarrierNode()`
	`25`	`+ or`
	`26`	`+ node = DataFlow::BarrierGuard<isUnsafeCheck/3>::getABarrierNode()`
`19`	`27`	`}`
`20`	`28`	`}`
`21`	`29`