Skip to content

Commit b9e96fb

Browse files
committed
sync changes to other languages
1 parent 97681ea commit b9e96fb

File tree

10 files changed

+45
-87
lines changed

10 files changed

+45
-87
lines changed

java/ql/lib/semmle/code/java/security/regexp/NfaUtils.qll

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1027,7 +1027,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
10271027
predicate reachesOnlyRejectableSuffixes(State fork, string w) {
10281028
isReDoSCandidate(fork, w) and
10291029
forex(State next | next = process(fork, w, w.length() - 1) | isLikelyRejectable(next)) and
1030-
not epsilonSucc*(getProcessPrevious(fork, _, w)) = AcceptAnySuffix(_) // we stop `process(..)` early if we can, check here if it happened.
1030+
not getProcessPrevious(fork, _, w) = acceptsAnySuffix() // we stop `process(..)` early if we can, check here if it happened.
10311031
}
10321032

10331033
/**
@@ -1284,7 +1284,7 @@ module Concretizer<CharTree Impl> {
12841284
private predicate isRelevant(Node n) {
12851285
isARelevantEnd(n)
12861286
or
1287-
exists(Node prev | isRelevant(prev) | n = getPrev(prev))
1287+
exists(Node succ | isRelevant(succ) | n = getPrev(succ))
12881288
}
12891289

12901290
/** Holds if `n` is a root with no predecessors. */
@@ -1299,19 +1299,15 @@ module Concretizer<CharTree Impl> {
12991299
}
13001300

13011301
/** Gets an ancestor of `end`, where `end` is a node that should have a result in `concretize`. */
1302-
private Node getANodeInLongChain(Node end) {
1303-
isARelevantEnd(end) and result = end
1304-
or
1305-
exists(Node prev | prev = getANodeInLongChain(end) | result = getPrev(prev))
1306-
}
1302+
private Node getAnAncestor(Node end) { isARelevantEnd(end) and result = getPrev*(end) }
13071303

13081304
/** Gets the `i`th character on the path from the root to `n`. */
13091305
pragma[noinline]
13101306
private string getPrefixChar(Node n, int i) {
1311-
exists(Node prev |
1312-
result = getChar(prev) and
1313-
prev = getANodeInLongChain(n) and
1314-
i = nodeDepth(prev)
1307+
exists(Node ancestor |
1308+
result = getChar(ancestor) and
1309+
ancestor = getAnAncestor(n) and
1310+
i = nodeDepth(ancestor)
13151311
)
13161312
}
13171313

java/ql/lib/semmle/code/java/security/regexp/SuperlinearBackTracking.qll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -351,7 +351,7 @@ predicate isReDoSCandidate(State state, string pump) { isPumpable(_, state, pump
351351
/**
352352
* Holds if repetitions of `pump` at `t` will cause polynomial backtracking.
353353
*/
354-
predicate polynimalReDoS(RegExpTerm t, string pump, string prefixMsg, RegExpTerm prev) {
354+
predicate polynomialReDoS(RegExpTerm t, string pump, string prefixMsg, RegExpTerm prev) {
355355
exists(State s, State pivot |
356356
ReDoSPruning<isReDoSCandidate/2>::hasReDoSResult(t, pump, s, prefixMsg) and
357357
isPumpable(pivot, s, _) and
@@ -363,7 +363,7 @@ predicate polynimalReDoS(RegExpTerm t, string pump, string prefixMsg, RegExpTerm
363363
* Gets a message for why `term` can cause polynomial backtracking.
364364
*/
365365
string getReasonString(RegExpTerm term, string pump, string prefixMsg, RegExpTerm prev) {
366-
polynimalReDoS(term, pump, prefixMsg, prev) and
366+
polynomialReDoS(term, pump, prefixMsg, prev) and
367367
result =
368368
"Strings " + prefixMsg + "with many repetitions of '" + pump +
369369
"' can start matching anywhere after the start of the preceeding " + prev

python/ql/lib/semmle/python/security/BadTagFilterQuery.qll

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@ class HtmlMatchingRegExp extends RootTerm {
4949
}
5050
}
5151

52+
/** DEPRECATED: Alias for HtmlMatchingRegExp */
53+
deprecated class HTMLMatchingRegExp = HtmlMatchingRegExp;
54+
5255
/**
5356
* Holds if `regexp` matches some HTML tags, but misses some HTML tags that it should match.
5457
*

python/ql/lib/semmle/python/security/regexp/NfaUtils.qll

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1027,7 +1027,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
10271027
predicate reachesOnlyRejectableSuffixes(State fork, string w) {
10281028
isReDoSCandidate(fork, w) and
10291029
forex(State next | next = process(fork, w, w.length() - 1) | isLikelyRejectable(next)) and
1030-
not epsilonSucc*(getProcessPrevious(fork, _, w)) = AcceptAnySuffix(_) // we stop `process(..)` early if we can, check here if it happened.
1030+
not getProcessPrevious(fork, _, w) = acceptsAnySuffix() // we stop `process(..)` early if we can, check here if it happened.
10311031
}
10321032

10331033
/**
@@ -1284,7 +1284,7 @@ module Concretizer<CharTree Impl> {
12841284
private predicate isRelevant(Node n) {
12851285
isARelevantEnd(n)
12861286
or
1287-
exists(Node prev | isRelevant(prev) | n = getPrev(prev))
1287+
exists(Node succ | isRelevant(succ) | n = getPrev(succ))
12881288
}
12891289

12901290
/** Holds if `n` is a root with no predecessors. */
@@ -1299,19 +1299,15 @@ module Concretizer<CharTree Impl> {
12991299
}
13001300

13011301
/** Gets an ancestor of `end`, where `end` is a node that should have a result in `concretize`. */
1302-
private Node getANodeInLongChain(Node end) {
1303-
isARelevantEnd(end) and result = end
1304-
or
1305-
exists(Node prev | prev = getANodeInLongChain(end) | result = getPrev(prev))
1306-
}
1302+
private Node getAnAncestor(Node end) { isARelevantEnd(end) and result = getPrev*(end) }
13071303

13081304
/** Gets the `i`th character on the path from the root to `n`. */
13091305
pragma[noinline]
13101306
private string getPrefixChar(Node n, int i) {
1311-
exists(Node prev |
1312-
result = getChar(prev) and
1313-
prev = getANodeInLongChain(n) and
1314-
i = nodeDepth(prev)
1307+
exists(Node ancestor |
1308+
result = getChar(ancestor) and
1309+
ancestor = getAnAncestor(n) and
1310+
i = nodeDepth(ancestor)
13151311
)
13161312
}
13171313

python/ql/lib/semmle/python/security/regexp/RegexpMatching.qll

Lines changed: 6 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -28,18 +28,10 @@ signature predicate isRegexpMatchingCandidateSig(
2828
* A module for determining if a regexp matches a given string,
2929
* and reasoning about which capture groups are filled by a given string.
3030
*
31-
* The module parameter `isRegexpMatchingCandidateSig` determines which string should be tested,
31+
* The module parameter `isCandidate` determines which strings should be tested,
3232
* and the results can be read from the `matches` and `fillsCaptureGroup` predicates.
3333
*/
3434
module RegexpMatching<isRegexpMatchingCandidateSig/4 isCandidate> {
35-
private predicate test(RootTerm reg, string str, boolean ignorePrefix) {
36-
isCandidate(reg, str, ignorePrefix, false)
37-
}
38-
39-
private predicate testWithGroups(RootTerm reg, string str, boolean ignorePrefix) {
40-
isCandidate(reg, str, ignorePrefix, true)
41-
}
42-
4335
/**
4436
* Gets a state the regular expression `reg` can be in after matching the `i`th char in `str`.
4537
* The regular expression is modeled as a non-determistic finite automaton,
@@ -50,11 +42,7 @@ module RegexpMatching<isRegexpMatchingCandidateSig/4 isCandidate> {
5042
private State getAState(RootTerm reg, int i, string str, boolean ignorePrefix) {
5143
// start state, the -1 position before any chars have been matched
5244
i = -1 and
53-
(
54-
test(reg, str, ignorePrefix)
55-
or
56-
testWithGroups(reg, str, ignorePrefix)
57-
) and
45+
isCandidate(reg, str, ignorePrefix, _) and
5846
result.getRepr().getRootTerm() = reg and
5947
isStartState(result)
6048
or
@@ -109,13 +97,7 @@ module RegexpMatching<isRegexpMatchingCandidateSig/4 isCandidate> {
10997
// The `getAnInputSymbolMatching` relation specialized to the chars that exists in strings tested by a `MatchedRegExp`.
11098
pragma[noinline]
11199
private InputSymbol specializedGetAnInputSymbolMatching(string char) {
112-
exists(string s, RootTerm r |
113-
test(r, s, _)
114-
or
115-
testWithGroups(r, s, _)
116-
|
117-
char = s.charAt(_)
118-
) and
100+
exists(string s, RootTerm r | isCandidate(r, s, _, _) | char = s.charAt(_)) and
119101
result = getAnInputSymbolMatching(char)
120102
}
121103

@@ -124,7 +106,7 @@ module RegexpMatching<isRegexpMatchingCandidateSig/4 isCandidate> {
124106
* Starts with an accepting state as found by `getAState` and searches backwards
125107
* to the start state through the reachable states (as found by `getAState`).
126108
*
127-
* This predicate holds the invariant that the result state can be reached with `i` steps from a start state,
109+
* This predicate satisfies the invariant that the result state can be reached with `i` steps from a start state,
128110
* and an accepting state can be found after (`str.length() - 1 - i`) steps from the result.
129111
* The result state is therefore always on a valid path where `reg` accepts `str`.
130112
*
@@ -133,7 +115,7 @@ module RegexpMatching<isRegexpMatchingCandidateSig/4 isCandidate> {
133115
*/
134116
private State getAStateThatReachesAccept(RootTerm reg, int i, string str, boolean ignorePrefix) {
135117
// base case, reaches an accepting state from the last state in `getAState(..)`
136-
testWithGroups(reg, str, ignorePrefix) and
118+
isCandidate(reg, str, ignorePrefix, true) and
137119
i = str.length() - 1 and
138120
result = getAState(reg, i, str, ignorePrefix) and
139121
epsilonSucc*(result) = Accept(_)
@@ -154,7 +136,7 @@ module RegexpMatching<isRegexpMatchingCandidateSig/4 isCandidate> {
154136
}
155137

156138
/**
157-
* Holds if `reg` matches `str`, where `str` is either in the `test` or `testWithGroups` predicate.
139+
* Holds if `reg` matches `str`, where `str` is in the `isCandidate` predicate.
158140
*/
159141
predicate matches(RootTerm reg, string str) {
160142
exists(State state | state = getAState(reg, str.length() - 1, str, _) |

python/ql/lib/semmle/python/security/regexp/SuperlinearBackTracking.qll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -351,7 +351,7 @@ predicate isReDoSCandidate(State state, string pump) { isPumpable(_, state, pump
351351
/**
352352
* Holds if repetitions of `pump` at `t` will cause polynomial backtracking.
353353
*/
354-
predicate polynimalReDoS(RegExpTerm t, string pump, string prefixMsg, RegExpTerm prev) {
354+
predicate polynomialReDoS(RegExpTerm t, string pump, string prefixMsg, RegExpTerm prev) {
355355
exists(State s, State pivot |
356356
ReDoSPruning<isReDoSCandidate/2>::hasReDoSResult(t, pump, s, prefixMsg) and
357357
isPumpable(pivot, s, _) and
@@ -363,7 +363,7 @@ predicate polynimalReDoS(RegExpTerm t, string pump, string prefixMsg, RegExpTerm
363363
* Gets a message for why `term` can cause polynomial backtracking.
364364
*/
365365
string getReasonString(RegExpTerm term, string pump, string prefixMsg, RegExpTerm prev) {
366-
polynimalReDoS(term, pump, prefixMsg, prev) and
366+
polynomialReDoS(term, pump, prefixMsg, prev) and
367367
result =
368368
"Strings " + prefixMsg + "with many repetitions of '" + pump +
369369
"' can start matching anywhere after the start of the preceeding " + prev

ruby/ql/lib/codeql/ruby/security/BadTagFilterQuery.qll

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@ class HtmlMatchingRegExp extends RootTerm {
4949
}
5050
}
5151

52+
/** DEPRECATED: Alias for HtmlMatchingRegExp */
53+
deprecated class HTMLMatchingRegExp = HtmlMatchingRegExp;
54+
5255
/**
5356
* Holds if `regexp` matches some HTML tags, but misses some HTML tags that it should match.
5457
*

ruby/ql/lib/codeql/ruby/security/regexp/NfaUtils.qll

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1027,7 +1027,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
10271027
predicate reachesOnlyRejectableSuffixes(State fork, string w) {
10281028
isReDoSCandidate(fork, w) and
10291029
forex(State next | next = process(fork, w, w.length() - 1) | isLikelyRejectable(next)) and
1030-
not epsilonSucc*(getProcessPrevious(fork, _, w)) = AcceptAnySuffix(_) // we stop `process(..)` early if we can, check here if it happened.
1030+
not getProcessPrevious(fork, _, w) = acceptsAnySuffix() // we stop `process(..)` early if we can, check here if it happened.
10311031
}
10321032

10331033
/**
@@ -1284,7 +1284,7 @@ module Concretizer<CharTree Impl> {
12841284
private predicate isRelevant(Node n) {
12851285
isARelevantEnd(n)
12861286
or
1287-
exists(Node prev | isRelevant(prev) | n = getPrev(prev))
1287+
exists(Node succ | isRelevant(succ) | n = getPrev(succ))
12881288
}
12891289

12901290
/** Holds if `n` is a root with no predecessors. */
@@ -1299,19 +1299,15 @@ module Concretizer<CharTree Impl> {
12991299
}
13001300

13011301
/** Gets an ancestor of `end`, where `end` is a node that should have a result in `concretize`. */
1302-
private Node getANodeInLongChain(Node end) {
1303-
isARelevantEnd(end) and result = end
1304-
or
1305-
exists(Node prev | prev = getANodeInLongChain(end) | result = getPrev(prev))
1306-
}
1302+
private Node getAnAncestor(Node end) { isARelevantEnd(end) and result = getPrev*(end) }
13071303

13081304
/** Gets the `i`th character on the path from the root to `n`. */
13091305
pragma[noinline]
13101306
private string getPrefixChar(Node n, int i) {
1311-
exists(Node prev |
1312-
result = getChar(prev) and
1313-
prev = getANodeInLongChain(n) and
1314-
i = nodeDepth(prev)
1307+
exists(Node ancestor |
1308+
result = getChar(ancestor) and
1309+
ancestor = getAnAncestor(n) and
1310+
i = nodeDepth(ancestor)
13151311
)
13161312
}
13171313

ruby/ql/lib/codeql/ruby/security/regexp/RegexpMatching.qll

Lines changed: 6 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -28,18 +28,10 @@ signature predicate isRegexpMatchingCandidateSig(
2828
* A module for determining if a regexp matches a given string,
2929
* and reasoning about which capture groups are filled by a given string.
3030
*
31-
* The module parameter `isRegexpMatchingCandidateSig` determines which string should be tested,
31+
* The module parameter `isCandidate` determines which strings should be tested,
3232
* and the results can be read from the `matches` and `fillsCaptureGroup` predicates.
3333
*/
3434
module RegexpMatching<isRegexpMatchingCandidateSig/4 isCandidate> {
35-
private predicate test(RootTerm reg, string str, boolean ignorePrefix) {
36-
isCandidate(reg, str, ignorePrefix, false)
37-
}
38-
39-
private predicate testWithGroups(RootTerm reg, string str, boolean ignorePrefix) {
40-
isCandidate(reg, str, ignorePrefix, true)
41-
}
42-
4335
/**
4436
* Gets a state the regular expression `reg` can be in after matching the `i`th char in `str`.
4537
* The regular expression is modeled as a non-determistic finite automaton,
@@ -50,11 +42,7 @@ module RegexpMatching<isRegexpMatchingCandidateSig/4 isCandidate> {
5042
private State getAState(RootTerm reg, int i, string str, boolean ignorePrefix) {
5143
// start state, the -1 position before any chars have been matched
5244
i = -1 and
53-
(
54-
test(reg, str, ignorePrefix)
55-
or
56-
testWithGroups(reg, str, ignorePrefix)
57-
) and
45+
isCandidate(reg, str, ignorePrefix, _) and
5846
result.getRepr().getRootTerm() = reg and
5947
isStartState(result)
6048
or
@@ -109,13 +97,7 @@ module RegexpMatching<isRegexpMatchingCandidateSig/4 isCandidate> {
10997
// The `getAnInputSymbolMatching` relation specialized to the chars that exists in strings tested by a `MatchedRegExp`.
11098
pragma[noinline]
11199
private InputSymbol specializedGetAnInputSymbolMatching(string char) {
112-
exists(string s, RootTerm r |
113-
test(r, s, _)
114-
or
115-
testWithGroups(r, s, _)
116-
|
117-
char = s.charAt(_)
118-
) and
100+
exists(string s, RootTerm r | isCandidate(r, s, _, _) | char = s.charAt(_)) and
119101
result = getAnInputSymbolMatching(char)
120102
}
121103

@@ -124,7 +106,7 @@ module RegexpMatching<isRegexpMatchingCandidateSig/4 isCandidate> {
124106
* Starts with an accepting state as found by `getAState` and searches backwards
125107
* to the start state through the reachable states (as found by `getAState`).
126108
*
127-
* This predicate holds the invariant that the result state can be reached with `i` steps from a start state,
109+
* This predicate satisfies the invariant that the result state can be reached with `i` steps from a start state,
128110
* and an accepting state can be found after (`str.length() - 1 - i`) steps from the result.
129111
* The result state is therefore always on a valid path where `reg` accepts `str`.
130112
*
@@ -133,7 +115,7 @@ module RegexpMatching<isRegexpMatchingCandidateSig/4 isCandidate> {
133115
*/
134116
private State getAStateThatReachesAccept(RootTerm reg, int i, string str, boolean ignorePrefix) {
135117
// base case, reaches an accepting state from the last state in `getAState(..)`
136-
testWithGroups(reg, str, ignorePrefix) and
118+
isCandidate(reg, str, ignorePrefix, true) and
137119
i = str.length() - 1 and
138120
result = getAState(reg, i, str, ignorePrefix) and
139121
epsilonSucc*(result) = Accept(_)
@@ -154,7 +136,7 @@ module RegexpMatching<isRegexpMatchingCandidateSig/4 isCandidate> {
154136
}
155137

156138
/**
157-
* Holds if `reg` matches `str`, where `str` is either in the `test` or `testWithGroups` predicate.
139+
* Holds if `reg` matches `str`, where `str` is in the `isCandidate` predicate.
158140
*/
159141
predicate matches(RootTerm reg, string str) {
160142
exists(State state | state = getAState(reg, str.length() - 1, str, _) |

ruby/ql/lib/codeql/ruby/security/regexp/SuperlinearBackTracking.qll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -351,7 +351,7 @@ predicate isReDoSCandidate(State state, string pump) { isPumpable(_, state, pump
351351
/**
352352
* Holds if repetitions of `pump` at `t` will cause polynomial backtracking.
353353
*/
354-
predicate polynimalReDoS(RegExpTerm t, string pump, string prefixMsg, RegExpTerm prev) {
354+
predicate polynomialReDoS(RegExpTerm t, string pump, string prefixMsg, RegExpTerm prev) {
355355
exists(State s, State pivot |
356356
ReDoSPruning<isReDoSCandidate/2>::hasReDoSResult(t, pump, s, prefixMsg) and
357357
isPumpable(pivot, s, _) and
@@ -363,7 +363,7 @@ predicate polynimalReDoS(RegExpTerm t, string pump, string prefixMsg, RegExpTerm
363363
* Gets a message for why `term` can cause polynomial backtracking.
364364
*/
365365
string getReasonString(RegExpTerm term, string pump, string prefixMsg, RegExpTerm prev) {
366-
polynimalReDoS(term, pump, prefixMsg, prev) and
366+
polynomialReDoS(term, pump, prefixMsg, prev) and
367367
result =
368368
"Strings " + prefixMsg + "with many repetitions of '" + pump +
369369
"' can start matching anywhere after the start of the preceeding " + prev

0 commit comments

Comments
 (0)