Skip to content

Commit 1437aef

Browse files
committed
Ruby: Use taint tracking instead of type tracking to define regExpSource
1 parent d97eaba commit 1437aef

File tree

2 files changed

+45
-19
lines changed

2 files changed

+45
-19
lines changed

ruby/ql/lib/codeql/ruby/controlflow/CfgNodes.qll

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -794,6 +794,20 @@ module ExprNodes {
794794
final override VariableWriteAccess getExpr() { result = ExprCfgNode.super.getExpr() }
795795
}
796796

797+
/** A control-flow node that wraps a `ConstantReadAccess` AST expression. */
798+
class ConstantReadAccessCfgNode extends ExprCfgNode {
799+
override ConstantReadAccess e;
800+
801+
final override ConstantReadAccess getExpr() { result = ExprCfgNode.super.getExpr() }
802+
}
803+
804+
/** A control-flow node that wraps a `ConstantWriteAccess` AST expression. */
805+
class ConstantWriteAccessCfgNode extends ExprCfgNode {
806+
override ConstantWriteAccess e;
807+
808+
final override ConstantWriteAccess getExpr() { result = ExprCfgNode.super.getExpr() }
809+
}
810+
797811
/** A control-flow node that wraps a `InstanceVariableWriteAccess` AST expression. */
798812
class InstanceVariableWriteAccessCfgNode extends ExprCfgNode {
799813
override InstanceVariableWriteAccess e;

ruby/ql/lib/codeql/ruby/security/performance/ParseRegExp.qll

Lines changed: 31 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@
88
private import codeql.ruby.ast.Literal as AST
99
private import codeql.Locations
1010
private import codeql.ruby.DataFlow
11-
private import codeql.ruby.TaintTracking
12-
private import codeql.ruby.typetracking.TypeTracker
11+
private import codeql.ruby.controlflow.CfgNodes
1312
private import codeql.ruby.ApiGraphs
13+
private import codeql.ruby.dataflow.internal.tainttrackingforlibraries.TaintTrackingImpl
1414

1515
/**
1616
* A `StringlikeLiteral` containing a regular expression term, that is, either
@@ -992,30 +992,42 @@ private predicate isInterpretedAsRegExp(DataFlow::Node source) {
992992
// The argument of a call that coerces the argument to a regular expression.
993993
exists(DataFlow::CallNode mce |
994994
mce.getMethodName() = ["match", "match?"] and
995-
source = mce.getArgument(0)
995+
source = mce.getArgument(0) and
996+
// exclude https://ruby-doc.org/core-2.4.0/Regexp.html#method-i-match
997+
not mce.getReceiver().asExpr().getExpr() instanceof AST::RegExpLiteral
996998
)
997999
}
9981000

999-
/**
1000-
* Gets a node whose value may flow (inter-procedurally) to `re`, where it is interpreted
1001-
* as a part of a regular expression.
1002-
*/
1003-
private DataFlow::Node regExpSource(DataFlow::Node re, TypeBackTracker t) {
1004-
t.start() and
1005-
re = result and
1006-
isInterpretedAsRegExp(result)
1007-
or
1008-
exists(TypeBackTracker t2, DataFlow::Node succ | succ = regExpSource(re, t2) |
1009-
t2 = t.smallstep(result, succ)
1010-
or
1011-
TaintTracking::localTaintStep(result, succ) and
1012-
t = t2
1013-
)
1001+
private class RegExpConfiguration extends Configuration {
1002+
RegExpConfiguration() { this = "RegExpConfiguration" }
1003+
1004+
override predicate isSource(DataFlow::Node source) {
1005+
source.asExpr() =
1006+
any(ExprCfgNode e |
1007+
e.getConstantValue().isString(_) and
1008+
not e instanceof ExprNodes::VariableReadAccessCfgNode and
1009+
not e instanceof ExprNodes::ConstantReadAccessCfgNode
1010+
)
1011+
}
1012+
1013+
override predicate isSink(DataFlow::Node sink) { isInterpretedAsRegExp(sink) }
1014+
1015+
override predicate isSanitizer(DataFlow::Node node) {
1016+
// stop flow if `node` is receiver of
1017+
// https://ruby-doc.org/core-2.4.0/String.html#method-i-match
1018+
exists(DataFlow::CallNode mce |
1019+
mce.getMethodName() = ["match", "match?"] and
1020+
node = mce.getReceiver() and
1021+
mce.getArgument(0).asExpr().getExpr() instanceof AST::RegExpLiteral
1022+
)
1023+
}
10141024
}
10151025

10161026
/**
10171027
* Gets a node whose value may flow (inter-procedurally) to `re`, where it is interpreted
10181028
* as a part of a regular expression.
10191029
*/
10201030
cached
1021-
DataFlow::Node regExpSource(DataFlow::Node re) { result = regExpSource(re, TypeBackTracker::end()) }
1031+
DataFlow::Node regExpSource(DataFlow::Node re) {
1032+
exists(RegExpConfiguration c | c.hasFlow(result, re))
1033+
}

0 commit comments

Comments
 (0)