Skip to content

Commit 9078e13

Browse files
Apply reveiw suggestions
- make java imports private - qdoc fixes - reorder predicates - simplifications
1 parent b854a21 commit 9078e13

File tree

3 files changed

+82
-86
lines changed

3 files changed

+82
-86
lines changed

java/ql/lib/semmle/code/java/regex/RegexTreeView.qll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/** Provides a class hierarchy corresponding to a parse tree of regular expressions. */
22

3-
import java
3+
private import java
44
private import semmle.code.java.regex.regex
55

66
/**

java/ql/lib/semmle/code/java/regex/regex.qll

Lines changed: 80 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ abstract class RegexString extends StringLiteral {
6262

6363
/**
6464
* Helper predicate for `quote`.
65-
* Holds if the char at `pos` is the one-based `index`th occourence of a quote delimiter (`\Q` or `\E`)
65+
* Holds if the char at `pos` is the one-based `index`th occurence of a quote delimiter (`\Q` or `\E`)
6666
* Result is `true` for `\Q` and `false` for `\E`.
6767
*/
6868
private boolean quoteDelimiter(int index, int pos) {
@@ -73,7 +73,7 @@ abstract class RegexString extends StringLiteral {
7373
/** Holds if a quoted sequence is found between `start` and `end` */
7474
predicate quote(int start, int end) { this.quote(start, end, _, _) }
7575

76-
/** Holds if a quoted sequence is found between `start` and `end`, with ontent found between `inner_start` and `inner_end`. */
76+
/** Holds if a quoted sequence is found between `start` and `end`, with content found between `inner_start` and `inner_end`. */
7777
predicate quote(int start, int end, int inner_start, int inner_end) {
7878
exists(int index |
7979
this.quoteDelimiter(index, start) = true and
@@ -98,7 +98,7 @@ abstract class RegexString extends StringLiteral {
9898
}
9999

100100
/**
101-
* A control sequence, `\cx`
101+
* Holds if there is a control sequence, `\cx`, between `start` and `end`.
102102
* `x` may be any ascii character including special characters.
103103
*/
104104
predicate controlEscape(int start, int end) {
@@ -107,6 +107,65 @@ abstract class RegexString extends StringLiteral {
107107
end = start + 3
108108
}
109109

110+
pragma[inline]
111+
private predicate isOctal(int index) { this.getChar(index) = [0 .. 7].toString() }
112+
113+
/** An escape sequence that includes braces, such as named characters (\N{degree sign}), named classes (\p{Lower}), or hex values (\x{h..h}) */
114+
private predicate escapedBraces(int start, int end) {
115+
this.escapingChar(start) and
116+
this.getChar(start + 1) = ["N", "p", "P", "x"] and
117+
this.getChar(start + 2) = "{" and
118+
end = min(int i | start + 2 < i and this.getChar(i - 1) = "}")
119+
}
120+
121+
/**
122+
* Holds if an escaped character is found between `start` and `end`.
123+
* Escaped characters include hex values, octal values and named escapes,
124+
* but excludes backreferences.
125+
*/
126+
predicate escapedCharacter(int start, int end) {
127+
this.escapingChar(start) and
128+
not this.backreference(start, _) and
129+
(
130+
// hex value \xhh
131+
this.getChar(start + 1) = "x" and
132+
this.getChar(start + 2) != "{" and
133+
end = start + 4
134+
or
135+
// octal value \0o, \0oo, or \0ooo. Max of 0377.
136+
this.getChar(start + 1) = "0" and
137+
this.isOctal(start + 2) and
138+
(
139+
if this.isOctal(start + 3)
140+
then
141+
if this.isOctal(start + 4) and this.getChar(start + 2) in ["0", "1", "2", "3"]
142+
then end = start + 5
143+
else end = start + 4
144+
else end = start + 3
145+
)
146+
or
147+
// 16-bit hex value \uhhhh
148+
this.getChar(start + 1) = "u" and end = start + 6
149+
or
150+
this.escapedBraces(start, end)
151+
or
152+
// Boundary matchers \b, \b{g}
153+
this.getChar(start + 1) = "b" and
154+
(
155+
if this.getText().substring(start + 2, start + 5) = "{g}"
156+
then end = start + 5
157+
else end = start + 2
158+
)
159+
or
160+
this.controlEscape(start, end)
161+
or
162+
// escape not handled above, update when adding a new case
163+
not this.getChar(start + 1) in ["x", "0", "u", "p", "P", "N", "b", "c"] and
164+
not exists(this.getChar(start + 1).toInt()) and
165+
end = start + 2
166+
)
167+
}
168+
110169
private string nonEscapedCharAt(int i) {
111170
result = this.getChar(i) and
112171
not exists(int x, int y | this.escapedCharacter(x, y) and i in [x .. y - 1]) and
@@ -128,7 +187,7 @@ abstract class RegexString extends StringLiteral {
128187

129188
/**
130189
* Holds if the character at `pos` starts a character set delimiter.
131-
* Result is 1 for `[` and 0 for `]`.
190+
* Result is 1 for `[` and -1 for `]`.
132191
*/
133192
private int charSetDelimiter(int pos) {
134193
result = 1 and this.charSetStart0(pos, _)
@@ -145,17 +204,14 @@ abstract class RegexString extends StringLiteral {
145204
pos = rank[index](int p | exists(this.charSetDelimiter(p)))
146205
}
147206

148-
bindingset[x]
149-
private int max_zero(int x) { result = max([x, 0]) }
150-
151207
/**
152208
* Gets the nesting depth of character classes after position `pos`,
153209
* where `pos` is the position of a character set delimiter.
154210
*/
155211
private int charSetDepth(int index, int pos) {
156-
index = 1 and result = max_zero(charSetDelimiter(index, pos))
212+
index = 1 and result = 0.maximum(this.charSetDelimiter(index, pos))
157213
or
158-
result = max_zero(charSetDelimiter(index, pos) + charSetDepth(index - 1, _))
214+
result = 0.maximum(this.charSetDelimiter(index, pos) + this.charSetDepth(index - 1, _))
159215
}
160216

161217
/** Hold if a top-level character set starts between `start` and `end`. */
@@ -209,26 +265,10 @@ abstract class RegexString extends StringLiteral {
209265

210266
/** An indexed version of `charSetToken/3` */
211267
private predicate charSetToken(int charset_start, int index, int token_start, int token_end) {
212-
token_start =
213-
rank[index](int start, int end | this.charSetToken(charset_start, start, end) | start) and
268+
token_start = rank[index](int start | this.charSetToken(charset_start, start, _) | start) and
214269
this.charSetToken(charset_start, token_start, token_end)
215270
}
216271

217-
/**
218-
* Holds if the character set starting at `charset_start` contains either
219-
* a character or a range found between `start` and `end`.
220-
*/
221-
predicate charSetChild(int charset_start, int start, int end) {
222-
this.charSetToken(charset_start, start, end) and
223-
not exists(int range_start, int range_end |
224-
this.charRange(charset_start, range_start, _, _, range_end) and
225-
range_start <= start and
226-
range_end >= end
227-
)
228-
or
229-
this.charRange(charset_start, start, _, _, end)
230-
}
231-
232272
/**
233273
* Helper predicate for `charRange`.
234274
* We can determine where character ranges end by a left to right sweep.
@@ -272,63 +312,19 @@ abstract class RegexString extends StringLiteral {
272312
)
273313
}
274314

275-
pragma[inline]
276-
private predicate isOctal(int index) { this.getChar(index) = [0 .. 7].toString() }
277-
278-
/** An escape sequence that includes braces, such as named characters (\N{degree sign}), named classes (\p{Lower}), or hex values (\x{h..h}) */
279-
private predicate escapedBraces(int start, int end) {
280-
this.escapingChar(start) and
281-
this.getChar(start + 1) = ["N", "p", "P", "x"] and
282-
this.getChar(start + 2) = "{" and
283-
end = min(int i | start + 2 < i and this.getChar(i - 1) = "}")
284-
}
285-
286315
/**
287-
* Holds if an escaped character is found between `start` and `end`.
288-
* Escaped characters include hex values, octal values and named escapes,
289-
* but excludes backreferences.
316+
* Holds if the character set starting at `charset_start` contains either
317+
* a character or a range found between `start` and `end`.
290318
*/
291-
predicate escapedCharacter(int start, int end) {
292-
this.escapingChar(start) and
293-
not this.backreference(start, _) and
294-
(
295-
// hex value \xhh
296-
this.getChar(start + 1) = "x" and
297-
this.getChar(start + 2) != "{" and
298-
end = start + 4
299-
or
300-
// octal value \0o, \0oo, or \0ooo. Max of 0377.
301-
this.getChar(start + 1) = "0" and
302-
this.isOctal(start + 2) and
303-
(
304-
if this.isOctal(start + 3)
305-
then
306-
if this.isOctal(start + 4) and this.getChar(start + 2) in ["0", "1", "2", "3"]
307-
then end = start + 5
308-
else end = start + 4
309-
else end = start + 3
310-
)
311-
or
312-
// 16-bit hex value \uhhhh
313-
this.getChar(start + 1) = "u" and end = start + 6
314-
or
315-
this.escapedBraces(start, end)
316-
or
317-
// Boundary matchers \b, \b{g}
318-
this.getChar(start + 1) = "b" and
319-
(
320-
if this.getText().substring(start + 2, start + 5) = "{g}"
321-
then end = start + 5
322-
else end = start + 2
323-
)
324-
or
325-
this.controlEscape(start, end)
326-
or
327-
// escape not handled above, update when adding a new case
328-
not this.getChar(start + 1) in ["x", "0", "u", "p", "P", "N", "b", "c"] and
329-
not exists(this.getChar(start + 1).toInt()) and
330-
end = start + 2
319+
predicate charSetChild(int charset_start, int start, int end) {
320+
this.charSetToken(charset_start, start, end) and
321+
not exists(int range_start, int range_end |
322+
this.charRange(charset_start, range_start, _, _, range_end) and
323+
range_start <= start and
324+
range_end >= end
331325
)
326+
or
327+
this.charRange(charset_start, start, _, _, end)
332328
}
333329

334330
/** Holds if `index` is inside a character set. */
@@ -871,9 +867,9 @@ abstract class RegexString extends StringLiteral {
871867
* Holds if a character is represented between `start` and `end` in the source literal.
872868
*/
873869
private predicate sourceCharacter(int start, int end) {
874-
sourceEscapedCharacter(start, end)
870+
this.sourceEscapedCharacter(start, end)
875871
or
876-
sourceNonEscapedCharacter(start) and
872+
this.sourceNonEscapedCharacter(start) and
877873
end = start + 1
878874
}
879875

@@ -885,8 +881,8 @@ abstract class RegexString extends StringLiteral {
885881
*/
886882
predicate sourceCharacter(int pos, int start, int end) {
887883
exists(this.getChar(pos)) and
888-
sourceCharacter(start, end) and
889-
start = rank[pos + 2](int s | sourceCharacter(s, _))
884+
this.sourceCharacter(start, end) and
885+
start = rank[pos + 2](int s | this.sourceCharacter(s, _))
890886
}
891887
}
892888

java/ql/lib/semmle/code/java/security/performance/ReDoSUtilSpecific.qll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
* This is the interface to the shared ReDoS library.
44
*/
55

6-
import java
6+
private import java
77
import semmle.code.java.regex.RegexTreeView
88

99
/**

0 commit comments

Comments
 (0)