Skip to content

Commit 1240c11

Browse files
committed
Ruby: parse some string literals as regex
In addition to regex literals, also parse normal string literals as regular expressions if they somehow "flow" into a method call that is known to interpret string values as regular expressions.
1 parent 5ce6b84 commit 1240c11

File tree

2 files changed

+89
-6
lines changed

2 files changed

+89
-6
lines changed

ruby/ql/lib/codeql/ruby/security/performance/ParseRegExp.qll

Lines changed: 85 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,32 @@
77

88
private import codeql.ruby.ast.Literal as AST
99
private import codeql.Locations
10+
private import codeql.ruby.DataFlow
11+
private import codeql.ruby.TaintTracking
12+
private import codeql.ruby.typetracking.TypeTracker
13+
private import codeql.ruby.ApiGraphs
14+
15+
/**
16+
* A `StringlikeLiteral` containing a regular expression term, that is, either
17+
* a regular expression literal, a string literal used in a context where
18+
* it is parsed as regular expression.
19+
*/
20+
abstract class RegExp extends AST::StringlikeLiteral {
21+
/**
22+
* Holds if this `RegExp` has the `s` flag for multi-line matching.
23+
*/
24+
predicate isDotAll() { none() }
25+
26+
/**
27+
* Holds if this `RegExp` has the `i` flag for case-insensitive matching.
28+
*/
29+
predicate isIgnoreCase() { none() }
30+
31+
/**
32+
* Gets the flags for this `RegExp`, or the empty string if it has no flags.
33+
*/
34+
string getFlags() { result = "" }
1035

11-
class RegExp extends AST::RegExpLiteral {
1236
/**
1337
* Helper predicate for `charSetStart(int start, int end)`.
1438
*
@@ -933,3 +957,63 @@ class RegExp extends AST::RegExpLiteral {
933957
this.lastPart(start, end)
934958
}
935959
}
960+
961+
private class RegExpLiteralRegExp extends RegExp, AST::RegExpLiteral {
962+
override predicate isDotAll() { this.hasMultilineFlag() }
963+
964+
override predicate isIgnoreCase() { this.hasCaseInsensitiveFlag() }
965+
966+
override string getFlags() { result = this.getFlagString() }
967+
}
968+
969+
private class ParsedStringRegExp extends RegExp {
970+
private DataFlow::Node parse;
971+
972+
ParsedStringRegExp() { this = regExpSource(parse).asExpr().getExpr() }
973+
974+
DataFlow::Node getAParse() { result = parse }
975+
976+
override predicate isDotAll() { none() }
977+
978+
override predicate isIgnoreCase() { none() }
979+
980+
override string getFlags() { none() }
981+
}
982+
983+
/**
984+
* Holds if `source` may be interpreted as a regular expression.
985+
*/
986+
cached
987+
private predicate isInterpretedAsRegExp(DataFlow::Node source) {
988+
// The first argument to an invocation of `Regexp.new` or `Regexp.compile`.
989+
source = API::getTopLevelMember("Regexp").getAMethodCall(["compile", "new"]).getArgument(0)
990+
or
991+
// The argument of a call that coerces the argument to a regular expression.
992+
exists(DataFlow::CallNode mce |
993+
mce.getMethodName() = ["match", "match?"] and
994+
source = mce.getArgument(0)
995+
)
996+
}
997+
998+
/**
999+
* Gets a node whose value may flow (inter-procedurally) to `re`, where it is interpreted
1000+
* as a part of a regular expression.
1001+
*/
1002+
private DataFlow::Node regExpSource(DataFlow::Node re, TypeBackTracker t) {
1003+
t.start() and
1004+
re = result and
1005+
isInterpretedAsRegExp(result)
1006+
or
1007+
exists(TypeBackTracker t2, DataFlow::Node succ | succ = regExpSource(re, t2) |
1008+
t2 = t.smallstep(result, succ)
1009+
or
1010+
TaintTracking::localTaintStep(result, succ) and
1011+
t = t2
1012+
)
1013+
}
1014+
1015+
/**
1016+
* Gets a node whose value may flow (inter-procedurally) to `re`, where it is interpreted
1017+
* as a part of a regular expression.
1018+
*/
1019+
DataFlow::Node regExpSource(DataFlow::Node re) { result = regExpSource(re, TypeBackTracker::end()) }

ruby/ql/lib/codeql/ruby/security/performance/RegExpTreeView.qll

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
private import codeql.ruby.ast.Literal as AST
22
private import codeql.Locations
33
private import ParseRegExp
4-
import codeql.Locations
54

65
/**
76
* Holds if `term` is an ecape class representing e.g. `\d`.
@@ -27,7 +26,7 @@ predicate isEscapeClass(RegExpTerm term, string clazz) {
2726
* Holds if the regular expression should not be considered.
2827
*/
2928
predicate isExcluded(RegExpParent parent) {
30-
parent.(RegExpTerm).getRegExp().hasFreeSpacingFlag() // exclude free-spacing mode regexes
29+
parent.(RegExpTerm).getRegExp().(AST::RegExpLiteral).hasFreeSpacingFlag() // exclude free-spacing mode regexes
3130
}
3231

3332
/**
@@ -93,11 +92,11 @@ class RegExpLiteral extends TRegExpLiteral, RegExpParent {
9392

9493
override RegExpTerm getChild(int i) { i = 0 and result.getRegExp() = re and result.isRootTerm() }
9594

96-
predicate isDotAll() { re.hasMultilineFlag() }
95+
predicate isDotAll() { re.isDotAll() }
9796

98-
predicate isIgnoreCase() { re.hasCaseInsensitiveFlag() }
97+
predicate isIgnoreCase() { re.isIgnoreCase() }
9998

100-
string getFlags() { result = re.getFlagString() }
99+
string getFlags() { result = re.getFlags() }
101100

102101
override string getAPrimaryQlClass() { result = "RegExpLiteral" }
103102
}

0 commit comments

Comments
 (0)