Skip to content

Commit 6bd9616

Browse files
committed
Ruby: interpret string escape sequences in getConstantValue()
1 parent bcdbfef commit 6bd9616

File tree

13 files changed

+712
-58
lines changed

13 files changed

+712
-58
lines changed

ruby/ql/lib/codeql/NumberUtils.qll

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
/**
2+
* Provides predicates for working with numeric values and their string
3+
* representations.
4+
*/
5+
6+
/**
7+
* Gets the integer value of `hex` when interpreted as hex. `hex` must be a
8+
* valid hexadecimal string and, for integer-wrapping reasons, no longer than 6
9+
* digits.
10+
*
11+
* ```
12+
* "0" => 0
13+
* "FF" => 255
14+
* "f00d" => 61453
15+
* ```
16+
*/
17+
bindingset[hex]
18+
int parseHexInt(string hex) {
19+
hex.length() <= 6 and
20+
result =
21+
sum(int index, string c |
22+
c = hex.charAt(index)
23+
|
24+
sixteenToThe(hex.length() - 1 - index) * toHex(c)
25+
)
26+
}
27+
28+
/**
29+
* Gets the integer value of `octal` when interpreted as octal. `octal` must be
30+
* a valid octal string and, for integer-wrapping reasons, no longer than 10
31+
* digits.
32+
*
33+
* ```
34+
* "0" => 0
35+
* "77" => 63
36+
* "76543210" => 16434824
37+
* ```
38+
*/
39+
bindingset[octal]
40+
int parseOctalInt(string octal) {
41+
octal.length() <= 10 and
42+
result =
43+
sum(int index, string c |
44+
c = octal.charAt(index)
45+
|
46+
eightToThe(octal.length() - 1 - index) * toOctal(c)
47+
)
48+
}
49+
50+
/** Gets the integer value of the `hex` char. */
51+
private int toHex(string hex) {
52+
hex = [0 .. 9].toString() and
53+
result = hex.toInt()
54+
or
55+
result = 10 and hex = ["a", "A"]
56+
or
57+
result = 11 and hex = ["b", "B"]
58+
or
59+
result = 12 and hex = ["c", "C"]
60+
or
61+
result = 13 and hex = ["d", "D"]
62+
or
63+
result = 14 and hex = ["e", "E"]
64+
or
65+
result = 15 and hex = ["f", "F"]
66+
}
67+
68+
/** Gets the integer value of the `octal` char. */
69+
private int toOctal(string octal) {
70+
octal = "0" and result = 0
71+
or
72+
octal = "1" and result = 1
73+
or
74+
octal = "2" and result = 2
75+
or
76+
octal = "3" and result = 3
77+
or
78+
octal = "4" and result = 4
79+
or
80+
octal = "5" and result = 5
81+
or
82+
octal = "6" and result = 6
83+
or
84+
octal = "7" and result = 7
85+
}
86+
87+
/** Gets the value of 16 to the power of `n`. */
88+
int sixteenToThe(int n) {
89+
// 16**7 is the largest power of 16 that fits in an int.
90+
n in [0 .. 7] and result = 1.bitShiftLeft(4 * n)
91+
}
92+
93+
/** Gets the value of 8 to the power of `n`. */
94+
int eightToThe(int n) {
95+
// 8**10 is the largest power of 8 that fits in an int.
96+
n in [0 .. 10] and result = 1.bitShiftLeft(3 * n)
97+
}

ruby/ql/lib/codeql/ruby/ast/Literal.qll

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -230,13 +230,18 @@ class StringTextComponent extends StringComponent, TStringTextComponentNonRegexp
230230

231231
StringTextComponent() { this = TStringTextComponentNonRegexp(g) }
232232

233-
final override string toString() { result = g.getValue() }
233+
final override string toString() { result = this.getRawText() }
234234

235235
final override ConstantValue::ConstantStringValue getConstantValue() {
236-
result.isString(g.getValue())
236+
result.isString(this.getUnescapedText())
237237
}
238238

239239
final override string getAPrimaryQlClass() { result = "StringTextComponent" }
240+
241+
/** Gets the text of this component as it appears in the source code. */
242+
final string getRawText() { result = g.getValue() }
243+
244+
final private string getUnescapedText() { result = unescapeTextComponent(this.getRawText()) }
240245
}
241246

242247
/**
@@ -247,13 +252,18 @@ class StringEscapeSequenceComponent extends StringComponent, TStringEscapeSequen
247252

248253
StringEscapeSequenceComponent() { this = TStringEscapeSequenceComponentNonRegexp(g) }
249254

250-
final override string toString() { result = g.getValue() }
255+
final override string toString() { result = this.getRawText() }
251256

252257
final override ConstantValue::ConstantStringValue getConstantValue() {
253-
result.isString(g.getValue())
258+
result.isString(this.getUnescapedText())
254259
}
255260

256261
final override string getAPrimaryQlClass() { result = "StringEscapeSequenceComponent" }
262+
263+
/** Gets the text of this component as it appears in the source code. */
264+
final string getRawText() { result = g.getValue() }
265+
266+
final private string getUnescapedText() { result = unescapeEscapeSequence(this.getRawText()) }
257267
}
258268

259269
/**

ruby/ql/lib/codeql/ruby/ast/internal/Literal.qll

Lines changed: 72 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ private import AST
33
private import Constant
44
private import TreeSitter
55
private import codeql.ruby.controlflow.CfgNodes
6+
private import codeql.NumberUtils
67

78
int parseInteger(Ruby::Integer i) {
89
exists(string s | s = i.getValue().toLowerCase().replaceAll("_", "") |
@@ -148,16 +149,85 @@ private class RequiredFileLiteralConstantValue extends RequiredConstantValue {
148149

149150
private class RequiredStringTextComponentConstantValue extends RequiredConstantValue {
150151
override predicate requiredString(string s) {
151-
s = any(Ruby::Token t | exists(TStringTextComponentNonRegexp(t))).getValue()
152+
s =
153+
unescapeTextComponent(any(Ruby::Token t | exists(TStringTextComponentNonRegexp(t))).getValue())
152154
}
153155
}
154156

155157
private class RequiredStringEscapeSequenceComponentConstantValue extends RequiredConstantValue {
156158
override predicate requiredString(string s) {
157-
s = any(Ruby::Token t | exists(TStringEscapeSequenceComponentNonRegexp(t))).getValue()
159+
s =
160+
unescapeEscapeSequence(any(Ruby::Token t | exists(TStringEscapeSequenceComponentNonRegexp(t)))
161+
.getValue())
158162
}
159163
}
160164

165+
/**
166+
* Gets the string represented by the escape sequence in `escaped`. For example:
167+
*
168+
* ```
169+
* \\ => \
170+
* \141 => a
171+
* \u0078 => x
172+
* ```
173+
*/
174+
bindingset[escaped]
175+
string unescapeEscapeSequence(string escaped) {
176+
result = unescapeKnownEscapeSequence(escaped)
177+
or
178+
// Any other character following a backslash is just that character.
179+
not exists(unescapeKnownEscapeSequence(escaped)) and
180+
result = escaped.suffix(1)
181+
}
182+
183+
bindingset[escaped]
184+
private string unescapeKnownEscapeSequence(string escaped) {
185+
escaped = "\\\\" and result = "\\"
186+
or
187+
escaped = "\\'" and result = "'"
188+
or
189+
escaped = "\\\"" and result = "\""
190+
or
191+
escaped = "\\a" and result = 7.toUnicode()
192+
or
193+
escaped = "\\b" and result = 8.toUnicode()
194+
or
195+
escaped = "\\t" and result = "\t"
196+
or
197+
escaped = "\\n" and result = "\n"
198+
or
199+
escaped = "\\v" and result = 11.toUnicode()
200+
or
201+
escaped = "\\f" and result = 12.toUnicode()
202+
or
203+
escaped = "\\r" and result = "\r"
204+
or
205+
escaped = "\\e" and result = 27.toUnicode()
206+
or
207+
escaped = "\\s" and result = " "
208+
or
209+
escaped = ["\\c?", "\\C-?"] and result = 127.toUnicode()
210+
or
211+
result = parseOctalInt(escaped.regexpCapture("\\\\([0-7]{1,3})", 1)).toUnicode()
212+
or
213+
result = parseHexInt(escaped.regexpCapture("\\\\x([0-9a-fA-F]{1,2})", 1)).toUnicode()
214+
or
215+
result = parseHexInt(escaped.regexpCapture("\\\\u([0-9a-fA-F]{4})", 1)).toUnicode()
216+
or
217+
result = parseHexInt(escaped.regexpCapture("\\\\u\\{([0-9a-fA-F]{1,6})\\}", 1)).toUnicode()
218+
}
219+
220+
/**
221+
* Gets the result of unescaping a string text component by replacing `\\` and
222+
* `\'` with `\` and `'`, respectively.
223+
*
224+
* ```rb
225+
* 'foo\\bar \'baz\'' # foo\bar 'baz'
226+
* ```
227+
*/
228+
bindingset[text]
229+
string unescapeTextComponent(string text) { result = text.regexpReplaceAll("\\\\(['\\\\])", "$1") }
230+
161231
class TRegExpComponent =
162232
TStringTextComponentRegexp or TStringEscapeSequenceComponentRegexp or
163233
TStringInterpolationComponentRegexp;

ruby/ql/lib/codeql/ruby/security/performance/RegExpTreeView.qll

Lines changed: 2 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
private import codeql.ruby.ast.Literal as AST
22
private import ParseRegExp
3+
private import codeql.NumberUtils
34
import codeql.Locations
45
private import codeql.ruby.DataFlow
56

@@ -423,48 +424,15 @@ class RegExpEscape extends RegExpNormalChar {
423424
* E.g. for `\u0061` this returns "a".
424425
*/
425426
private string getUnicode() {
426-
exists(int codepoint | codepoint = sum(this.getHexValueFromUnicode(_)) |
427-
result = codepoint.toUnicode()
428-
)
429-
}
430-
431-
/**
432-
* Gets int value for the `index`th char in the hex number of the unicode escape.
433-
* E.g. for `\u0061` and `index = 2` this returns 96 (the number `6` interpreted as hex).
434-
*/
435-
private int getHexValueFromUnicode(int index) {
436427
this.isUnicode() and
437-
exists(string hex, string char | hex = this.getText().suffix(2) |
438-
char = hex.charAt(index) and
439-
result = 16.pow(hex.length() - index - 1) * toHex(char)
440-
)
428+
result = parseHexInt(this.getText().suffix(2)).toUnicode()
441429
}
442430

443431
string getUnescaped() { result = this.getText().suffix(1) }
444432

445433
override string getAPrimaryQlClass() { result = "RegExpEscape" }
446434
}
447435

448-
/**
449-
* Gets the hex number for the `hex` char.
450-
*/
451-
private int toHex(string hex) {
452-
hex = [0 .. 9].toString() and
453-
result = hex.toInt()
454-
or
455-
result = 10 and hex = ["a", "A"]
456-
or
457-
result = 11 and hex = ["b", "B"]
458-
or
459-
result = 12 and hex = ["c", "C"]
460-
or
461-
result = 13 and hex = ["d", "D"]
462-
or
463-
result = 14 and hex = ["e", "E"]
464-
or
465-
result = 15 and hex = ["f", "F"]
466-
}
467-
468436
/**
469437
* A word boundary, that is, a regular expression term of the form `\b`.
470438
*/

0 commit comments

Comments
 (0)