Skip to content

Commit b79d085

Browse files
authored
Merge pull request #8293 from aibaars/regex-pattern-source
Ruby: parse more string literals as regular expressions
2 parents 3fc2f2f + 22b0697 commit b79d085

File tree

6 files changed

+141
-7
lines changed

6 files changed

+141
-7
lines changed

.github/workflows/ruby-qltest.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ jobs:
6363
qltest:
6464
runs-on: ubuntu-latest
6565
strategy:
66+
fail-fast: false
6667
matrix:
6768
slice: ["1/2", "2/2"]
6869
steps:
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: minorAnalysis
3+
---
4+
* The `Regex` class is now an abstract class that extends `StringlikeLiteral` with implementations for `RegExpLiteral` and string literals that 'flow' into functions that are known to interpret string arguments as regular expressions such as `Regex.new` and `String.match`.

ruby/ql/lib/codeql/ruby/security/performance/ParseRegExp.qll

Lines changed: 85 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,32 @@
77

88
private import codeql.ruby.ast.Literal as AST
99
private import codeql.Locations
10+
private import codeql.ruby.DataFlow
11+
private import codeql.ruby.TaintTracking
12+
private import codeql.ruby.typetracking.TypeTracker
13+
private import codeql.ruby.ApiGraphs
14+
15+
/**
16+
* A `StringlikeLiteral` containing a regular expression term, that is, either
17+
* a regular expression literal, or a string literal used in a context where
18+
* it is parsed as regular expression.
19+
*/
20+
abstract class RegExp extends AST::StringlikeLiteral {
21+
/**
22+
* Holds if this `RegExp` has the `s` flag for multi-line matching.
23+
*/
24+
predicate isDotAll() { none() }
25+
26+
/**
27+
* Holds if this `RegExp` has the `i` flag for case-insensitive matching.
28+
*/
29+
predicate isIgnoreCase() { none() }
30+
31+
/**
32+
* Gets the flags for this `RegExp`, or the empty string if it has no flags.
33+
*/
34+
string getFlags() { result = "" }
1035

11-
class RegExp extends AST::RegExpLiteral {
1236
/**
1337
* Helper predicate for `charSetStart(int start, int end)`.
1438
*
@@ -933,3 +957,63 @@ class RegExp extends AST::RegExpLiteral {
933957
this.lastPart(start, end)
934958
}
935959
}
960+
961+
private class RegExpLiteralRegExp extends RegExp, AST::RegExpLiteral {
962+
override predicate isDotAll() { this.hasMultilineFlag() }
963+
964+
override predicate isIgnoreCase() { this.hasCaseInsensitiveFlag() }
965+
966+
override string getFlags() { result = this.getFlagString() }
967+
}
968+
969+
private class ParsedStringRegExp extends RegExp {
970+
private DataFlow::Node parse;
971+
972+
ParsedStringRegExp() { this = regExpSource(parse).asExpr().getExpr() }
973+
974+
DataFlow::Node getAParse() { result = parse }
975+
976+
override predicate isDotAll() { none() }
977+
978+
override predicate isIgnoreCase() { none() }
979+
980+
override string getFlags() { none() }
981+
}
982+
983+
/**
984+
* Holds if `source` may be interpreted as a regular expression.
985+
*/
986+
cached
987+
private predicate isInterpretedAsRegExp(DataFlow::Node source) {
988+
// The first argument to an invocation of `Regexp.new` or `Regexp.compile`.
989+
source = API::getTopLevelMember("Regexp").getAMethodCall(["compile", "new"]).getArgument(0)
990+
or
991+
// The argument of a call that coerces the argument to a regular expression.
992+
exists(DataFlow::CallNode mce |
993+
mce.getMethodName() = ["match", "match?"] and
994+
source = mce.getArgument(0)
995+
)
996+
}
997+
998+
/**
999+
* Gets a node whose value may flow (inter-procedurally) to `re`, where it is interpreted
1000+
* as a part of a regular expression.
1001+
*/
1002+
private DataFlow::Node regExpSource(DataFlow::Node re, TypeBackTracker t) {
1003+
t.start() and
1004+
re = result and
1005+
isInterpretedAsRegExp(result)
1006+
or
1007+
exists(TypeBackTracker t2, DataFlow::Node succ | succ = regExpSource(re, t2) |
1008+
t2 = t.smallstep(result, succ)
1009+
or
1010+
TaintTracking::localTaintStep(result, succ) and
1011+
t = t2
1012+
)
1013+
}
1014+
1015+
/**
1016+
* Gets a node whose value may flow (inter-procedurally) to `re`, where it is interpreted
1017+
* as a part of a regular expression.
1018+
*/
1019+
DataFlow::Node regExpSource(DataFlow::Node re) { result = regExpSource(re, TypeBackTracker::end()) }

ruby/ql/lib/codeql/ruby/security/performance/RegExpTreeView.qll

Lines changed: 49 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
private import codeql.ruby.ast.Literal as AST
2-
private import codeql.Locations
32
private import ParseRegExp
43
import codeql.Locations
4+
private import codeql.ruby.DataFlow
55

66
/**
77
* Holds if `term` is an ecape class representing e.g. `\d`.
@@ -27,7 +27,7 @@ predicate isEscapeClass(RegExpTerm term, string clazz) {
2727
* Holds if the regular expression should not be considered.
2828
*/
2929
predicate isExcluded(RegExpParent parent) {
30-
parent.(RegExpTerm).getRegExp().hasFreeSpacingFlag() // exclude free-spacing mode regexes
30+
parent.(RegExpTerm).getRegExp().(AST::RegExpLiteral).hasFreeSpacingFlag() // exclude free-spacing mode regexes
3131
}
3232

3333
/**
@@ -93,11 +93,11 @@ class RegExpLiteral extends TRegExpLiteral, RegExpParent {
9393

9494
override RegExpTerm getChild(int i) { i = 0 and result.getRegExp() = re and result.isRootTerm() }
9595

96-
predicate isDotAll() { re.hasMultilineFlag() }
96+
predicate isDotAll() { re.isDotAll() }
9797

98-
predicate isIgnoreCase() { re.hasCaseInsensitiveFlag() }
98+
predicate isIgnoreCase() { re.isIgnoreCase() }
9999

100-
string getFlags() { result = re.getFlagString() }
100+
string getFlags() { result = re.getFlags() }
101101

102102
override string getAPrimaryQlClass() { result = "RegExpLiteral" }
103103
}
@@ -795,3 +795,47 @@ class RegExpNamedCharacterProperty extends RegExpTerm, TRegExpNamedCharacterProp
795795
RegExpTerm getParsedRegExp(AST::RegExpLiteral re) {
796796
result.getRegExp() = re and result.isRootTerm()
797797
}
798+
799+
/**
800+
* A node whose value may flow to a position where it is interpreted
801+
* as a part of a regular expression.
802+
*/
803+
abstract class RegExpPatternSource extends DataFlow::Node {
804+
/**
805+
* Gets a node where the pattern of this node is parsed as a part of
806+
* a regular expression.
807+
*/
808+
abstract DataFlow::Node getAParse();
809+
810+
/**
811+
* Gets the root term of the regular expression parsed from this pattern.
812+
*/
813+
abstract RegExpTerm getRegExpTerm();
814+
}
815+
816+
/**
817+
* A regular expression literal, viewed as the pattern source for itself.
818+
*/
819+
private class RegExpLiteralPatternSource extends RegExpPatternSource {
820+
private AST::RegExpLiteral astNode;
821+
822+
RegExpLiteralPatternSource() { astNode = this.asExpr().getExpr() }
823+
824+
override DataFlow::Node getAParse() { result = this }
825+
826+
override RegExpTerm getRegExpTerm() { result = astNode.getParsed() }
827+
}
828+
829+
/**
830+
* A node whose string value may flow to a position where it is interpreted
831+
* as a part of a regular expression.
832+
*/
833+
private class StringRegExpPatternSource extends RegExpPatternSource {
834+
private DataFlow::Node parse;
835+
836+
StringRegExpPatternSource() { this = regExpSource(parse) }
837+
838+
override DataFlow::Node getAParse() { result = parse }
839+
840+
override RegExpTerm getRegExpTerm() { result.getRegExp() = this.asExpr().getExpr() }
841+
}

ruby/ql/test/query-tests/security/cwe-1333-exponential-redos/ReDoS.expected

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
| tst.rb:74:10:74:17 | (b\|a?b)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'b'. |
2121
| tst.rb:77:10:77:17 | (a\|aa?)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
2222
| tst.rb:83:10:83:16 | (.\|\\n)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\n'. |
23+
| tst.rb:89:21:89:28 | (a\|aa?)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
2324
| tst.rb:95:11:95:24 | ([\\S\\s]\|[^a])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '`'. |
2425
| tst.rb:101:11:101:19 | (.\|[^a])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '`'. |
2526
| tst.rb:107:11:107:19 | (b\|[^a])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'b'. |

ruby/ql/test/query-tests/security/cwe-1333-exponential-redos/tst.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@
8585
# GOOD
8686
good8 = /([\w.]+)*/
8787

88-
# BAD - we don't yet parse regexps constructed from strings
88+
# NOT GOOD
8989
bad17 = Regexp.new '(a|aa?)*b'
9090

9191
# GOOD - not used as regexp

0 commit comments

Comments
 (0)