Skip to content

Commit ba1ad00

Browse files
authored
Merge pull request #10062 from erik-krogh/redosPrefix
JS: use the shared regular expression libraries in `js/case-sensitive-middleware-path`
2 parents b0ae128 + f1799ae commit ba1ad00

File tree

7 files changed

+606
-485
lines changed

7 files changed

+606
-485
lines changed

java/ql/lib/semmle/code/java/security/regexp/NfaUtils.qll

Lines changed: 124 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,6 @@ class RegExpRoot extends RegExpTerm {
9393
* Holds if this root term is relevant to the ReDoS analysis.
9494
*/
9595
predicate isRelevant() {
96-
// there is at least one repetition
97-
getRoot(any(InfiniteRepetitionQuantifier q)) = this and
9896
// is actually used as a RegExp
9997
this.isUsedAsRegExp() and
10098
// not excluded for library specific reasons
@@ -877,6 +875,101 @@ predicate isStartState(State state) {
877875
*/
878876
signature predicate isCandidateSig(State state, string pump);
879877

878+
/**
879+
* Holds if `state` is a candidate for ReDoS.
880+
*/
881+
signature predicate isCandidateSig(State state);
882+
883+
/**
884+
* Predicates for constructing a prefix string that leads to a given state.
885+
*/
886+
module PrefixConstruction<isCandidateSig/1 isCandidate> {
887+
/**
888+
* Holds if `state` is the textually last start state for the regular expression.
889+
*/
890+
private predicate lastStartState(State state) {
891+
exists(RegExpRoot root |
892+
state =
893+
max(State s, Location l |
894+
s = stateInRelevantRegexp() and
895+
isStartState(s) and
896+
getRoot(s.getRepr()) = root and
897+
l = s.getRepr().getLocation()
898+
|
899+
s
900+
order by
901+
l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(),
902+
l.getEndLine()
903+
)
904+
)
905+
}
906+
907+
/**
908+
* Holds if there exists any transition (Epsilon() or other) from `a` to `b`.
909+
*/
910+
private predicate existsTransition(State a, State b) { delta(a, _, b) }
911+
912+
/**
913+
* Gets the minimum number of transitions it takes to reach `state` from the `start` state.
914+
*/
915+
int prefixLength(State start, State state) =
916+
shortestDistances(lastStartState/1, existsTransition/2)(start, state, result)
917+
918+
/**
919+
* Gets the minimum number of transitions it takes to reach `state` from the start state.
920+
*/
921+
private int lengthFromStart(State state) { result = prefixLength(_, state) }
922+
923+
/**
924+
* Gets a string for which the regular expression will reach `state`.
925+
*
926+
* Has at most one result for any given `state`.
927+
* This predicate will not always have a result even if there is a ReDoS issue in
928+
* the regular expression.
929+
*/
930+
string prefix(State state) {
931+
lastStartState(state) and
932+
result = ""
933+
or
934+
// the search stops past the last redos candidate state.
935+
lengthFromStart(state) <= max(lengthFromStart(any(State s | isCandidate(s)))) and
936+
exists(State prev |
937+
// select a unique predecessor (by an arbitrary measure)
938+
prev =
939+
min(State s, Location loc |
940+
lengthFromStart(s) = lengthFromStart(state) - 1 and
941+
loc = s.getRepr().getLocation() and
942+
delta(s, _, state)
943+
|
944+
s
945+
order by
946+
loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(),
947+
s.getRepr().toString()
948+
)
949+
|
950+
// greedy search for the shortest prefix
951+
result = prefix(prev) and delta(prev, Epsilon(), state)
952+
or
953+
not delta(prev, Epsilon(), state) and
954+
result = prefix(prev) + getCanonicalEdgeChar(prev, state)
955+
)
956+
}
957+
958+
/**
959+
* Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA.
960+
*/
961+
private string getCanonicalEdgeChar(State prev, State next) {
962+
result =
963+
min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next))
964+
}
965+
966+
/** Gets a state within a regular expression that contains a candidate state. */
967+
pragma[noinline]
968+
State stateInRelevantRegexp() {
969+
exists(State s | isCandidate(s) | getRoot(s.getRepr()) = getRoot(result.getRepr()))
970+
}
971+
}
972+
880973
/**
881974
* A module for pruning candidate ReDoS states.
882975
* The candidates are specified by the `isCandidate` signature predicate.
@@ -910,95 +1003,9 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
9101003
/** Gets a state that can reach the `accept-any` state using only epsilon steps. */
9111004
private State acceptsAnySuffix() { epsilonSucc*(result) = AcceptAnySuffix(_) }
9121005

913-
/**
914-
* Predicates for constructing a prefix string that leads to a given state.
915-
*/
916-
private module PrefixConstruction {
917-
/**
918-
* Holds if `state` is the textually last start state for the regular expression.
919-
*/
920-
private predicate lastStartState(State state) {
921-
exists(RegExpRoot root |
922-
state =
923-
max(State s, Location l |
924-
s = stateInPumpableRegexp() and
925-
isStartState(s) and
926-
getRoot(s.getRepr()) = root and
927-
l = s.getRepr().getLocation()
928-
|
929-
s
930-
order by
931-
l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(),
932-
l.getEndLine()
933-
)
934-
)
935-
}
936-
937-
/**
938-
* Holds if there exists any transition (Epsilon() or other) from `a` to `b`.
939-
*/
940-
private predicate existsTransition(State a, State b) { delta(a, _, b) }
941-
942-
/**
943-
* Gets the minimum number of transitions it takes to reach `state` from the `start` state.
944-
*/
945-
int prefixLength(State start, State state) =
946-
shortestDistances(lastStartState/1, existsTransition/2)(start, state, result)
947-
948-
/**
949-
* Gets the minimum number of transitions it takes to reach `state` from the start state.
950-
*/
951-
private int lengthFromStart(State state) { result = prefixLength(_, state) }
952-
953-
/**
954-
* Gets a string for which the regular expression will reach `state`.
955-
*
956-
* Has at most one result for any given `state`.
957-
* This predicate will not always have a result even if there is a ReDoS issue in
958-
* the regular expression.
959-
*/
960-
string prefix(State state) {
961-
lastStartState(state) and
962-
result = ""
963-
or
964-
// the search stops past the last redos candidate state.
965-
lengthFromStart(state) <= max(lengthFromStart(any(State s | isReDoSCandidate(s, _)))) and
966-
exists(State prev |
967-
// select a unique predecessor (by an arbitrary measure)
968-
prev =
969-
min(State s, Location loc |
970-
lengthFromStart(s) = lengthFromStart(state) - 1 and
971-
loc = s.getRepr().getLocation() and
972-
delta(s, _, state)
973-
|
974-
s
975-
order by
976-
loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(),
977-
s.getRepr().toString()
978-
)
979-
|
980-
// greedy search for the shortest prefix
981-
result = prefix(prev) and delta(prev, Epsilon(), state)
982-
or
983-
not delta(prev, Epsilon(), state) and
984-
result = prefix(prev) + getCanonicalEdgeChar(prev, state)
985-
)
986-
}
1006+
predicate isCandidateState(State s) { isReDoSCandidate(s, _) }
9871007

988-
/**
989-
* Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA.
990-
*/
991-
private string getCanonicalEdgeChar(State prev, State next) {
992-
result =
993-
min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next))
994-
}
995-
996-
/** Gets a state within a regular expression that has a pumpable state. */
997-
pragma[noinline]
998-
State stateInPumpableRegexp() {
999-
exists(State s | isReDoSCandidate(s, _) | getRoot(s.getRepr()) = getRoot(result.getRepr()))
1000-
}
1001-
}
1008+
import PrefixConstruction<isCandidateState/1> as Prefix
10021009

10031010
/**
10041011
* Predicates for testing the presence of a rejecting suffix.
@@ -1018,8 +1025,6 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
10181025
* using epsilon transitions. But any attempt at repeating `w` will end in a state that accepts all suffixes.
10191026
*/
10201027
private module SuffixConstruction {
1021-
import PrefixConstruction
1022-
10231028
/**
10241029
* Holds if all states reachable from `fork` by repeating `w`
10251030
* are likely rejectable by appending some suffix.
@@ -1036,7 +1041,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
10361041
*/
10371042
pragma[noinline]
10381043
private predicate isLikelyRejectable(State s) {
1039-
s = stateInPumpableRegexp() and
1044+
s = Prefix::stateInRelevantRegexp() and
10401045
(
10411046
// exists a reject edge with some char.
10421047
hasRejectEdge(s)
@@ -1052,15 +1057,15 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
10521057
* Holds if `s` is not an accept state, and there is no epsilon transition to an accept state.
10531058
*/
10541059
predicate isRejectState(State s) {
1055-
s = stateInPumpableRegexp() and not epsilonSucc*(s) = Accept(_)
1060+
s = Prefix::stateInRelevantRegexp() and not epsilonSucc*(s) = Accept(_)
10561061
}
10571062

10581063
/**
10591064
* Holds if there is likely a non-empty suffix leading to rejection starting in `s`.
10601065
*/
10611066
pragma[noopt]
10621067
predicate hasEdgeToLikelyRejectable(State s) {
1063-
s = stateInPumpableRegexp() and
1068+
s = Prefix::stateInRelevantRegexp() and
10641069
// all edges (at least one) with some char leads to another state that is rejectable.
10651070
// the `next` states might not share a common suffix, which can cause FPs.
10661071
exists(string char | char = hasEdgeToLikelyRejectableHelper(s) |
@@ -1076,7 +1081,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
10761081
*/
10771082
pragma[noinline]
10781083
private string hasEdgeToLikelyRejectableHelper(State s) {
1079-
s = stateInPumpableRegexp() and
1084+
s = Prefix::stateInRelevantRegexp() and
10801085
not hasRejectEdge(s) and
10811086
not isRejectState(s) and
10821087
deltaClosedChar(s, result, _)
@@ -1088,8 +1093,8 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
10881093
* `prev` to `next` that the character symbol `char`.
10891094
*/
10901095
predicate deltaClosedChar(State prev, string char, State next) {
1091-
prev = stateInPumpableRegexp() and
1092-
next = stateInPumpableRegexp() and
1096+
prev = Prefix::stateInRelevantRegexp() and
1097+
next = Prefix::stateInRelevantRegexp() and
10931098
deltaClosed(prev, getAnInputSymbolMatchingRelevant(char), next)
10941099
}
10951100

@@ -1099,18 +1104,28 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
10991104
result = getAnInputSymbolMatching(char)
11001105
}
11011106

1107+
pragma[noinline]
1108+
RegExpRoot relevantRoot() {
1109+
exists(RegExpTerm term, State s |
1110+
s.getRepr() = term and isCandidateState(s) and result = term.getRootTerm()
1111+
)
1112+
}
1113+
11021114
/**
11031115
* Gets a char used for finding possible suffixes inside `root`.
11041116
*/
11051117
pragma[noinline]
11061118
private string relevant(RegExpRoot root) {
1107-
exists(ascii(result)) and exists(root)
1108-
or
1109-
exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _))
1110-
or
1111-
// The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation).
1112-
// The three chars must be kept in sync with `hasSimpleRejectEdge`.
1113-
result = ["|", "\n", "Z"] and exists(root)
1119+
root = relevantRoot() and
1120+
(
1121+
exists(ascii(result)) and exists(root)
1122+
or
1123+
exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _))
1124+
or
1125+
// The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation).
1126+
// The three chars must be kept in sync with `hasSimpleRejectEdge`.
1127+
result = ["|", "\n", "Z"] and exists(root)
1128+
)
11141129
}
11151130

11161131
/**
@@ -1208,12 +1223,12 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
12081223
predicate hasReDoSResult(RegExpTerm t, string pump, State s, string prefixMsg) {
12091224
isReDoSAttackable(t, pump, s) and
12101225
(
1211-
prefixMsg = "starting with '" + escape(PrefixConstruction::prefix(s)) + "' and " and
1212-
not PrefixConstruction::prefix(s) = ""
1226+
prefixMsg = "starting with '" + escape(Prefix::prefix(s)) + "' and " and
1227+
not Prefix::prefix(s) = ""
12131228
or
1214-
PrefixConstruction::prefix(s) = "" and prefixMsg = ""
1229+
Prefix::prefix(s) = "" and prefixMsg = ""
12151230
or
1216-
not exists(PrefixConstruction::prefix(s)) and prefixMsg = ""
1231+
not exists(Prefix::prefix(s)) and prefixMsg = ""
12171232
)
12181233
}
12191234

0 commit comments

Comments
 (0)