Skip to content

Commit 70ec709

Browse files
authored
Merge pull request #8142 from github/hmac/incomplete-multi-char-sanitization
2 parents e93ff86 + 1f4dad4 commit 70ec709

15 files changed

+724
-198
lines changed

config/identical-files.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -597,5 +597,9 @@
597597
"Swift patterns test file": [
598598
"swift/ql/test/extractor-tests/patterns/patterns.swift",
599599
"swift/ql/test/library-tests/parent/patterns.swift"
600+
],
601+
"IncompleteMultiCharacterSanitization JS/Ruby": [
602+
"javascript/ql/lib/semmle/javascript/security/IncompleteMultiCharacterSanitizationQuery.qll",
603+
"ruby/ql/lib/codeql/ruby/security/IncompleteMultiCharacterSanitizationQuery.qll"
600604
]
601605
}
Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
/**
2+
* Provides shared predicates for reasoning about improper multi-character sanitization.
3+
*/
4+
5+
import IncompleteMultiCharacterSanitizationSpecific
6+
7+
/**
8+
* A prefix that may be dangerous to sanitize explicitly.
9+
*
10+
* Note that this class exists solely as a (necessary) optimization for this query.
11+
*/
12+
private class DangerousPrefix extends string {
13+
DangerousPrefix() {
14+
this = ["/..", "../"] or
15+
this = "<!--" or
16+
this = "<" + ["iframe", "script", "cript", "scrip", "style"]
17+
}
18+
}
19+
20+
/**
21+
* A substring of a prefix that may be dangerous to sanitize explicitly.
22+
*/
23+
private class DangerousPrefixSubstring extends string {
24+
DangerousPrefixSubstring() {
25+
exists(DangerousPrefix s | this = s.substring([0 .. s.length()], [0 .. s.length()]))
26+
}
27+
}
28+
29+
/**
30+
* Gets a char from a dangerous prefix that is matched by `t`.
31+
*/
32+
pragma[noinline]
33+
private DangerousPrefixSubstring getADangerousMatchedChar(EmptyReplaceRegExpTerm t) {
34+
t.isNullable() and result = ""
35+
or
36+
result = t.getAMatchedString()
37+
or
38+
// A substring matched by some character class. This is only used to match the "word" part of a HTML tag (e.g. "iframe" in "<iframe").
39+
exists(NfaUtils::CharacterClass cc |
40+
cc = NfaUtils::getCanonicalCharClass(t) and
41+
cc.matches(result) and
42+
result.regexpMatch("\\w") and
43+
// excluding character classes that match ">" (e.g. /<[^<]*>/), as these might consume nested HTML tags, and thus prevent the dangerous pattern this query is looking for.
44+
not cc.matches(">")
45+
)
46+
or
47+
t instanceof RegExpDot and
48+
result.length() = 1
49+
or
50+
(
51+
t instanceof RegExpOpt or
52+
t instanceof RegExpStar or
53+
t instanceof RegExpPlus or
54+
t instanceof RegExpGroup or
55+
t instanceof RegExpAlt
56+
) and
57+
result = getADangerousMatchedChar(t.getAChild())
58+
}
59+
60+
/**
61+
* Gets a dangerous prefix that is in the prefix language of `t`.
62+
*/
63+
private DangerousPrefix getADangerousMatchedPrefix(EmptyReplaceRegExpTerm t) {
64+
result = getADangerousMatchedPrefixSubstring(t) and
65+
not exists(EmptyReplaceRegExpTerm pred | pred = t.getPredecessor+() and not pred.isNullable())
66+
}
67+
68+
/**
69+
* Gets a substring of a dangerous prefix that is in the language starting at `t` (ignoring lookarounds).
70+
*
71+
* Note that the language of `t` is slightly restricted as not all RegExpTerm types are supported.
72+
*/
73+
private DangerousPrefixSubstring getADangerousMatchedPrefixSubstring(EmptyReplaceRegExpTerm t) {
74+
result = getADangerousMatchedChar(t) + getADangerousMatchedPrefixSubstring(t.getSuccessor())
75+
or
76+
result = getADangerousMatchedChar(t)
77+
or
78+
// loop around for repetitions (only considering alphanumeric characters in the repetition)
79+
exists(RepetitionMatcher repetition | t = repetition |
80+
result = getADangerousMatchedPrefixSubstring(repetition) + repetition.getAChar()
81+
)
82+
}
83+
84+
private class RepetitionMatcher extends EmptyReplaceRegExpTerm {
85+
string char;
86+
87+
pragma[noinline]
88+
RepetitionMatcher() {
89+
(this instanceof RegExpPlus or this instanceof RegExpStar) and
90+
char = getADangerousMatchedChar(this.getAChild()) and
91+
char.regexpMatch("\\w")
92+
}
93+
94+
pragma[noinline]
95+
string getAChar() { result = char }
96+
}
97+
98+
/**
99+
* Holds if `t` may match the dangerous `prefix` and some suffix, indicating intent to prevent a vulnerability of kind `kind`.
100+
*/
101+
predicate matchesDangerousPrefix(EmptyReplaceRegExpTerm t, string prefix, string kind) {
102+
prefix = getADangerousMatchedPrefix(t) and
103+
(
104+
kind = "path injection" and
105+
prefix = ["/..", "../"] and
106+
// If the regex is matching explicit path components, it is unlikely that it's being used as a sanitizer.
107+
not t.getSuccessor*().getAMatchedString().regexpMatch("(?is).*[a-z0-9_-].*")
108+
or
109+
kind = "HTML element injection" and
110+
(
111+
// comments
112+
prefix = "<!--" and
113+
// If the regex is matching explicit textual content of an HTML comment, it is unlikely that it's being used as a sanitizer.
114+
not t.getSuccessor*().getAMatchedString().regexpMatch("(?is).*[a-z0-9_].*")
115+
or
116+
// specific tags
117+
// the `cript|scrip` case has been observed in the wild several times
118+
prefix = "<" + ["iframe", "script", "cript", "scrip", "style"]
119+
)
120+
)
121+
or
122+
kind = "HTML attribute injection" and
123+
prefix =
124+
[
125+
// ordinary event handler prefix
126+
"on",
127+
// angular prefixes
128+
"ng-", "ng:", "data-ng-", "x-ng-"
129+
] and
130+
(
131+
// explicit matching: `onclick` and `ng-bind`
132+
t.getAMatchedString().regexpMatch("(?i)" + prefix + "[a-z]+")
133+
or
134+
// regexp-based matching: `on[a-z]+`
135+
exists(EmptyReplaceRegExpTerm start | start = t.getAChild() |
136+
start.getAMatchedString().regexpMatch("(?i)[^a-z]*" + prefix) and
137+
isCommonWordMatcher(start.getSuccessor())
138+
)
139+
)
140+
}
141+
142+
/**
143+
* Holds if `t` is a common pattern for matching words
144+
*/
145+
private predicate isCommonWordMatcher(RegExpTerm t) {
146+
exists(RegExpTerm quantified | quantified = t.(RegExpQuantifier).getChild(0) |
147+
// [a-z]+ and similar
148+
quantified
149+
.(RegExpCharacterClass)
150+
.getAChild()
151+
.(RegExpCharacterRange)
152+
.isRange(["a", "A"], ["z", "Z"])
153+
or
154+
// \w+ or [\w]+
155+
[quantified, quantified.(RegExpCharacterClass).getAChild()]
156+
.(RegExpCharacterClassEscape)
157+
.getValue() = "w"
158+
)
159+
}
160+
161+
/**
162+
* Holds if `replace` has a pattern argument containing a regular expression
163+
* `dangerous` which matches a dangerous string beginning with `prefix`, in an
164+
* attempt to avoid a vulnerability of kind `kind`.
165+
*/
166+
predicate isResult(
167+
StringSubstitutionCall replace, EmptyReplaceRegExpTerm dangerous, string prefix, string kind
168+
) {
169+
exists(EmptyReplaceRegExpTerm regexp |
170+
replace = regexp.getCall() and
171+
dangerous.getRootTerm() = regexp and
172+
// skip leading optional elements
173+
not dangerous.isNullable() and
174+
// only warn about the longest match
175+
prefix = max(string m | matchesDangerousPrefix(dangerous, m, kind) | m order by m.length(), m) and
176+
// only warn once per kind
177+
not exists(EmptyReplaceRegExpTerm other |
178+
other = dangerous.getAChild+() or other = dangerous.getPredecessor+()
179+
|
180+
matchesDangerousPrefix(other, _, kind) and
181+
not other.isNullable()
182+
) and
183+
// avoid anchored terms
184+
not exists(RegExpAnchor a | regexp = a.getRootTerm()) and
185+
// Don't flag replace operations that are called repeatedly in a loop, as they can actually work correctly.
186+
not replace.flowsTo(replace.getReceiver+())
187+
)
188+
}
189+
190+
/**
191+
* Holds if `replace` has a pattern argument containing a regular expression
192+
* `dangerous` which matches a dangerous string beginning with `prefix`. `msg`
193+
* is the alert we report.
194+
*/
195+
query predicate problems(
196+
StringSubstitutionCall replace, string msg, EmptyReplaceRegExpTerm dangerous, string prefix
197+
) {
198+
exists(string kind |
199+
isResult(replace, dangerous, prefix, kind) and
200+
msg = "This string may still contain $@, which may cause a " + kind + " vulnerability."
201+
)
202+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
/**
2+
* Provides language-specific predicates for reasoning about improper multi-character sanitization.
3+
*/
4+
5+
import javascript
6+
import semmle.javascript.security.regexp.NfaUtils as NfaUtils
7+
8+
class StringSubstitutionCall = StringReplaceCall;
9+
10+
/**
11+
* A regexp term that matches substrings that should be replaced with the empty string.
12+
*/
13+
class EmptyReplaceRegExpTerm extends RegExpTerm {
14+
EmptyReplaceRegExpTerm() {
15+
exists(StringReplaceCall replace |
16+
[replace.getRawReplacement(), replace.getCallback(1).getAReturn()].mayHaveStringValue("") and
17+
this = replace.getRegExp().getRoot().getAChild*()
18+
)
19+
}
20+
21+
/**
22+
* Get the substitution call that uses this regexp term.
23+
*/
24+
StringSubstitutionCall getCall() { this = result.getRegExp().getRoot() }
25+
}

0 commit comments

Comments
 (0)