@@ -93,8 +93,6 @@ class RegExpRoot extends RegExpTerm {
93
93
* Holds if this root term is relevant to the ReDoS analysis.
94
94
*/
95
95
predicate isRelevant ( ) {
96
- // there is at least one repetition
97
- getRoot ( any ( InfiniteRepetitionQuantifier q ) ) = this and
98
96
// is actually used as a RegExp
99
97
this .isUsedAsRegExp ( ) and
100
98
// not excluded for library specific reasons
@@ -877,6 +875,101 @@ predicate isStartState(State state) {
877
875
*/
878
876
signature predicate isCandidateSig ( State state , string pump ) ;
879
877
878
+ /**
879
+ * Holds if `state` is a candidate for ReDoS.
880
+ */
881
+ signature predicate isCandidateSig ( State state ) ;
882
+
883
+ /**
884
+ * Predicates for constructing a prefix string that leads to a given state.
885
+ */
886
+ module PrefixConstruction< isCandidateSig / 1 isCandidate> {
887
+ /**
888
+ * Holds if `state` is the textually last start state for the regular expression.
889
+ */
890
+ private predicate lastStartState ( State state ) {
891
+ exists ( RegExpRoot root |
892
+ state =
893
+ max ( State s , Location l |
894
+ s = stateInRelevantRegexp ( ) and
895
+ isStartState ( s ) and
896
+ getRoot ( s .getRepr ( ) ) = root and
897
+ l = s .getRepr ( ) .getLocation ( )
898
+ |
899
+ s
900
+ order by
901
+ l .getStartLine ( ) , l .getStartColumn ( ) , s .getRepr ( ) .toString ( ) , l .getEndColumn ( ) ,
902
+ l .getEndLine ( )
903
+ )
904
+ )
905
+ }
906
+
907
+ /**
908
+ * Holds if there exists any transition (Epsilon() or other) from `a` to `b`.
909
+ */
910
+ private predicate existsTransition ( State a , State b ) { delta ( a , _, b ) }
911
+
912
+ /**
913
+ * Gets the minimum number of transitions it takes to reach `state` from the `start` state.
914
+ */
915
+ int prefixLength ( State start , State state ) =
916
+ shortestDistances( lastStartState / 1 , existsTransition / 2 ) ( start , state , result )
917
+
918
+ /**
919
+ * Gets the minimum number of transitions it takes to reach `state` from the start state.
920
+ */
921
+ private int lengthFromStart ( State state ) { result = prefixLength ( _, state ) }
922
+
923
+ /**
924
+ * Gets a string for which the regular expression will reach `state`.
925
+ *
926
+ * Has at most one result for any given `state`.
927
+ * This predicate will not always have a result even if there is a ReDoS issue in
928
+ * the regular expression.
929
+ */
930
+ string prefix ( State state ) {
931
+ lastStartState ( state ) and
932
+ result = ""
933
+ or
934
+ // the search stops past the last redos candidate state.
935
+ lengthFromStart ( state ) <= max ( lengthFromStart ( any ( State s | isCandidate ( s ) ) ) ) and
936
+ exists ( State prev |
937
+ // select a unique predecessor (by an arbitrary measure)
938
+ prev =
939
+ min ( State s , Location loc |
940
+ lengthFromStart ( s ) = lengthFromStart ( state ) - 1 and
941
+ loc = s .getRepr ( ) .getLocation ( ) and
942
+ delta ( s , _, state )
943
+ |
944
+ s
945
+ order by
946
+ loc .getStartLine ( ) , loc .getStartColumn ( ) , loc .getEndLine ( ) , loc .getEndColumn ( ) ,
947
+ s .getRepr ( ) .toString ( )
948
+ )
949
+ |
950
+ // greedy search for the shortest prefix
951
+ result = prefix ( prev ) and delta ( prev , Epsilon ( ) , state )
952
+ or
953
+ not delta ( prev , Epsilon ( ) , state ) and
954
+ result = prefix ( prev ) + getCanonicalEdgeChar ( prev , state )
955
+ )
956
+ }
957
+
958
+ /**
959
+ * Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA.
960
+ */
961
+ private string getCanonicalEdgeChar ( State prev , State next ) {
962
+ result =
963
+ min ( string c | delta ( prev , any ( InputSymbol symbol | c = intersect ( Any ( ) , symbol ) ) , next ) )
964
+ }
965
+
966
+ /** Gets a state within a regular expression that contains a candidate state. */
967
+ pragma [ noinline]
968
+ State stateInRelevantRegexp ( ) {
969
+ exists ( State s | isCandidate ( s ) | getRoot ( s .getRepr ( ) ) = getRoot ( result .getRepr ( ) ) )
970
+ }
971
+ }
972
+
880
973
/**
881
974
* A module for pruning candidate ReDoS states.
882
975
* The candidates are specified by the `isCandidate` signature predicate.
@@ -910,95 +1003,9 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
910
1003
/** Gets a state that can reach the `accept-any` state using only epsilon steps. */
911
1004
private State acceptsAnySuffix ( ) { epsilonSucc * ( result ) = AcceptAnySuffix ( _) }
912
1005
913
- /**
914
- * Predicates for constructing a prefix string that leads to a given state.
915
- */
916
- private module PrefixConstruction {
917
- /**
918
- * Holds if `state` is the textually last start state for the regular expression.
919
- */
920
- private predicate lastStartState ( State state ) {
921
- exists ( RegExpRoot root |
922
- state =
923
- max ( State s , Location l |
924
- s = stateInPumpableRegexp ( ) and
925
- isStartState ( s ) and
926
- getRoot ( s .getRepr ( ) ) = root and
927
- l = s .getRepr ( ) .getLocation ( )
928
- |
929
- s
930
- order by
931
- l .getStartLine ( ) , l .getStartColumn ( ) , s .getRepr ( ) .toString ( ) , l .getEndColumn ( ) ,
932
- l .getEndLine ( )
933
- )
934
- )
935
- }
936
-
937
- /**
938
- * Holds if there exists any transition (Epsilon() or other) from `a` to `b`.
939
- */
940
- private predicate existsTransition ( State a , State b ) { delta ( a , _, b ) }
941
-
942
- /**
943
- * Gets the minimum number of transitions it takes to reach `state` from the `start` state.
944
- */
945
- int prefixLength ( State start , State state ) =
946
- shortestDistances( lastStartState / 1 , existsTransition / 2 ) ( start , state , result )
947
-
948
- /**
949
- * Gets the minimum number of transitions it takes to reach `state` from the start state.
950
- */
951
- private int lengthFromStart ( State state ) { result = prefixLength ( _, state ) }
952
-
953
- /**
954
- * Gets a string for which the regular expression will reach `state`.
955
- *
956
- * Has at most one result for any given `state`.
957
- * This predicate will not always have a result even if there is a ReDoS issue in
958
- * the regular expression.
959
- */
960
- string prefix ( State state ) {
961
- lastStartState ( state ) and
962
- result = ""
963
- or
964
- // the search stops past the last redos candidate state.
965
- lengthFromStart ( state ) <= max ( lengthFromStart ( any ( State s | isReDoSCandidate ( s , _) ) ) ) and
966
- exists ( State prev |
967
- // select a unique predecessor (by an arbitrary measure)
968
- prev =
969
- min ( State s , Location loc |
970
- lengthFromStart ( s ) = lengthFromStart ( state ) - 1 and
971
- loc = s .getRepr ( ) .getLocation ( ) and
972
- delta ( s , _, state )
973
- |
974
- s
975
- order by
976
- loc .getStartLine ( ) , loc .getStartColumn ( ) , loc .getEndLine ( ) , loc .getEndColumn ( ) ,
977
- s .getRepr ( ) .toString ( )
978
- )
979
- |
980
- // greedy search for the shortest prefix
981
- result = prefix ( prev ) and delta ( prev , Epsilon ( ) , state )
982
- or
983
- not delta ( prev , Epsilon ( ) , state ) and
984
- result = prefix ( prev ) + getCanonicalEdgeChar ( prev , state )
985
- )
986
- }
1006
+ predicate isCandidateState ( State s ) { isReDoSCandidate ( s , _) }
987
1007
988
- /**
989
- * Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA.
990
- */
991
- private string getCanonicalEdgeChar ( State prev , State next ) {
992
- result =
993
- min ( string c | delta ( prev , any ( InputSymbol symbol | c = intersect ( Any ( ) , symbol ) ) , next ) )
994
- }
995
-
996
- /** Gets a state within a regular expression that has a pumpable state. */
997
- pragma [ noinline]
998
- State stateInPumpableRegexp ( ) {
999
- exists ( State s | isReDoSCandidate ( s , _) | getRoot ( s .getRepr ( ) ) = getRoot ( result .getRepr ( ) ) )
1000
- }
1001
- }
1008
+ import PrefixConstruction< isCandidateState / 1 > as Prefix
1002
1009
1003
1010
/**
1004
1011
* Predicates for testing the presence of a rejecting suffix.
@@ -1018,8 +1025,6 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
1018
1025
* using epsilon transitions. But any attempt at repeating `w` will end in a state that accepts all suffixes.
1019
1026
*/
1020
1027
private module SuffixConstruction {
1021
- import PrefixConstruction
1022
-
1023
1028
/**
1024
1029
* Holds if all states reachable from `fork` by repeating `w`
1025
1030
* are likely rejectable by appending some suffix.
@@ -1036,7 +1041,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
1036
1041
*/
1037
1042
pragma [ noinline]
1038
1043
private predicate isLikelyRejectable ( State s ) {
1039
- s = stateInPumpableRegexp ( ) and
1044
+ s = Prefix :: stateInRelevantRegexp ( ) and
1040
1045
(
1041
1046
// exists a reject edge with some char.
1042
1047
hasRejectEdge ( s )
@@ -1052,15 +1057,15 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
1052
1057
* Holds if `s` is not an accept state, and there is no epsilon transition to an accept state.
1053
1058
*/
1054
1059
predicate isRejectState ( State s ) {
1055
- s = stateInPumpableRegexp ( ) and not epsilonSucc * ( s ) = Accept ( _)
1060
+ s = Prefix :: stateInRelevantRegexp ( ) and not epsilonSucc * ( s ) = Accept ( _)
1056
1061
}
1057
1062
1058
1063
/**
1059
1064
* Holds if there is likely a non-empty suffix leading to rejection starting in `s`.
1060
1065
*/
1061
1066
pragma [ noopt]
1062
1067
predicate hasEdgeToLikelyRejectable ( State s ) {
1063
- s = stateInPumpableRegexp ( ) and
1068
+ s = Prefix :: stateInRelevantRegexp ( ) and
1064
1069
// all edges (at least one) with some char leads to another state that is rejectable.
1065
1070
// the `next` states might not share a common suffix, which can cause FPs.
1066
1071
exists ( string char | char = hasEdgeToLikelyRejectableHelper ( s ) |
@@ -1076,7 +1081,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
1076
1081
*/
1077
1082
pragma [ noinline]
1078
1083
private string hasEdgeToLikelyRejectableHelper ( State s ) {
1079
- s = stateInPumpableRegexp ( ) and
1084
+ s = Prefix :: stateInRelevantRegexp ( ) and
1080
1085
not hasRejectEdge ( s ) and
1081
1086
not isRejectState ( s ) and
1082
1087
deltaClosedChar ( s , result , _)
@@ -1088,8 +1093,8 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
1088
1093
* `prev` to `next` that the character symbol `char`.
1089
1094
*/
1090
1095
predicate deltaClosedChar ( State prev , string char , State next ) {
1091
- prev = stateInPumpableRegexp ( ) and
1092
- next = stateInPumpableRegexp ( ) and
1096
+ prev = Prefix :: stateInRelevantRegexp ( ) and
1097
+ next = Prefix :: stateInRelevantRegexp ( ) and
1093
1098
deltaClosed ( prev , getAnInputSymbolMatchingRelevant ( char ) , next )
1094
1099
}
1095
1100
@@ -1099,18 +1104,28 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
1099
1104
result = getAnInputSymbolMatching ( char )
1100
1105
}
1101
1106
1107
+ pragma [ noinline]
1108
+ RegExpRoot relevantRoot ( ) {
1109
+ exists ( RegExpTerm term , State s |
1110
+ s .getRepr ( ) = term and isCandidateState ( s ) and result = term .getRootTerm ( )
1111
+ )
1112
+ }
1113
+
1102
1114
/**
1103
1115
* Gets a char used for finding possible suffixes inside `root`.
1104
1116
*/
1105
1117
pragma [ noinline]
1106
1118
private string relevant ( RegExpRoot root ) {
1107
- exists ( ascii ( result ) ) and exists ( root )
1108
- or
1109
- exists ( InputSymbol s | belongsTo ( s , root ) | result = intersect ( s , _) )
1110
- or
1111
- // The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation).
1112
- // The three chars must be kept in sync with `hasSimpleRejectEdge`.
1113
- result = [ "|" , "\n" , "Z" ] and exists ( root )
1119
+ root = relevantRoot ( ) and
1120
+ (
1121
+ exists ( ascii ( result ) ) and exists ( root )
1122
+ or
1123
+ exists ( InputSymbol s | belongsTo ( s , root ) | result = intersect ( s , _) )
1124
+ or
1125
+ // The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation).
1126
+ // The three chars must be kept in sync with `hasSimpleRejectEdge`.
1127
+ result = [ "|" , "\n" , "Z" ] and exists ( root )
1128
+ )
1114
1129
}
1115
1130
1116
1131
/**
@@ -1208,12 +1223,12 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
1208
1223
predicate hasReDoSResult ( RegExpTerm t , string pump , State s , string prefixMsg ) {
1209
1224
isReDoSAttackable ( t , pump , s ) and
1210
1225
(
1211
- prefixMsg = "starting with '" + escape ( PrefixConstruction :: prefix ( s ) ) + "' and " and
1212
- not PrefixConstruction :: prefix ( s ) = ""
1226
+ prefixMsg = "starting with '" + escape ( Prefix :: prefix ( s ) ) + "' and " and
1227
+ not Prefix :: prefix ( s ) = ""
1213
1228
or
1214
- PrefixConstruction :: prefix ( s ) = "" and prefixMsg = ""
1229
+ Prefix :: prefix ( s ) = "" and prefixMsg = ""
1215
1230
or
1216
- not exists ( PrefixConstruction :: prefix ( s ) ) and prefixMsg = ""
1231
+ not exists ( Prefix :: prefix ( s ) ) and prefixMsg = ""
1217
1232
)
1218
1233
}
1219
1234
0 commit comments