Skip to content

Commit 230192d

Browse files
authored
Merge pull request #9267 from hmac/hmac/improper-memoization
Ruby: Add Improper Memoization query
2 parents e95194c + 3112964 commit 230192d

File tree

7 files changed

+331
-0
lines changed

7 files changed

+331
-0
lines changed
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
/**
2+
* Provides predicates for reasoning about improper memoization methods.
3+
*/
4+
5+
private import ruby
6+
private import codeql.ruby.DataFlow
7+
private import codeql.ruby.dataflow.internal.DataFlowDispatch
8+
9+
/**
10+
* A `||=` statement that memoizes a value by storing it in an instance variable.
11+
*/
12+
private class MemoStmt extends AssignLogicalOrExpr {
13+
private InstanceVariableAccess instanceVariable;
14+
15+
MemoStmt() { instanceVariable.getParent*() = this.getLeftOperand() }
16+
17+
/**
18+
* Gets the variable access on the LHS of this statement.
19+
* This is the `@a` in `@a ||= e`.
20+
*/
21+
VariableAccess getVariableAccess() { result = instanceVariable }
22+
}
23+
24+
/**
25+
* A `||=` statement that stores a value at a particular location in a Hash,
26+
* which itself is stored in an instance variable.
27+
* For example:
28+
* ```rb
29+
* @a[key] ||= e
30+
* ```
31+
*/
32+
private class HashMemoStmt extends MemoStmt {
33+
HashMemoStmt() {
34+
exists(ElementReference e |
35+
e = this.getLeftOperand() and this.getVariableAccess().getParent+() = e
36+
)
37+
}
38+
}
39+
40+
/**
41+
* A method that may be performing memoization.
42+
*/
43+
private class MemoCandidate extends Method {
44+
MemoCandidate() { this = any(MemoStmt m).getEnclosingMethod() }
45+
}
46+
47+
/**
48+
* Holds if parameter `p` of `m` is read in the right hand side of `assign`.
49+
*/
50+
private predicate parameterUsedInMemoValue(Method m, Parameter p, MemoStmt a) {
51+
p = m.getAParameter() and
52+
a.getEnclosingMethod() = m and
53+
p.getAVariable().getAnAccess().getParent*() = a.getRightOperand()
54+
}
55+
56+
/**
57+
* Holds if parameter `p` of `m` is read in the left hand side of `assign`.
58+
*/
59+
private predicate parameterUsedInMemoKey(Method m, Parameter p, HashMemoStmt a) {
60+
p = m.getAParameter() and
61+
a.getEnclosingMethod() = m and
62+
p.getAVariable().getAnAccess().getParent+() = a.getLeftOperand()
63+
}
64+
65+
/**
66+
* Holds if the assignment `s` is returned from its parent method `m`.
67+
*/
68+
private predicate memoReturnedFromMethod(Method m, MemoStmt s) {
69+
exists(DataFlow::Node n | n.asExpr().getExpr() = s and exprNodeReturnedFrom(n, m))
70+
or
71+
// If we don't have flow (e.g. due to the dataflow library not supporting instance variable flow yet),
72+
// fall back to a syntactic heuristic: does the last statement in the method mention the memoization variable?
73+
m.getLastStmt().getAChild*().(InstanceVariableReadAccess).getVariable() =
74+
s.getVariableAccess().getVariable()
75+
}
76+
77+
/**
78+
* Holds if `m` is a memoization method with a parameter `p` which is not used in the memoization key.
79+
* This can cause stale or incorrect values to be returned when the method is called with different arguments.
80+
*/
81+
predicate isImproperMemoizationMethod(Method m, Parameter p, AssignLogicalOrExpr s) {
82+
m instanceof MemoCandidate and
83+
m.getName() != "initialize" and
84+
parameterUsedInMemoValue(m, p, s) and
85+
not parameterUsedInMemoKey(m, p, s) and
86+
memoReturnedFromMethod(m, s)
87+
}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: newQuery
3+
---
4+
* Added a new query, `rb/improper-memoization`. The query finds cases where the parameter of a memoization method is not used in the memoization key.
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
<!DOCTYPE qhelp PUBLIC
2+
"-//Semmle//qhelp//EN"
3+
"qhelp.dtd">
4+
<qhelp>
5+
<overview>
6+
<p>
7+
A common pattern in Ruby is to memoize the result of a method by storing
8+
it in an instance variable. If the method has no parameters, it can simply
9+
be stored directly in a variable.
10+
</p>
11+
12+
<sample language="ruby">
13+
def answer
14+
@answer ||= calculate_answer
15+
end
16+
</sample>
17+
18+
<p>
19+
If the method takes parameters, then there are multiple results to store
20+
(one for each combination of parameter value). In this case the values
21+
should be stored in a hash, keyed by the parameter values.
22+
</p>
23+
24+
<sample language="ruby">
25+
def answer(x, y)
26+
@answer ||= {}
27+
@answer[x] ||= {}
28+
@answer[x][y] ||= calculate_answer
29+
end
30+
</sample>
31+
32+
<p>
33+
If a memoization method takes parameters but does not include them in the
34+
memoization key, subsequent calls to the method with different parameter
35+
values may incorrectly return the same result. This can lead to the method
36+
returning stale data, or leaking sensitive information.
37+
</p>
38+
</overview>
39+
40+
<example>
41+
<p>
42+
In this example, the method does not include its parameters in the
43+
memoization key. The first call to this method will cache the result, and
44+
subsequent calls will return the same result regardless of what arguments
45+
are given.
46+
</p>
47+
48+
<sample language="ruby">
49+
def answer(x, y)
50+
@answer ||= calculate_answer(x, y)
51+
end
52+
</sample>
53+
54+
<p>
55+
This can be fixed by storing the result of <code>calculate_answer</code>
56+
in a hash, keyed by the parameter values.
57+
</p>
58+
59+
<sample language="ruby">
60+
def answer(x, y)
61+
@answer ||= {}
62+
@answer[x] ||= {}
63+
@answer[x][y] ||= calculate_answer(x, y)
64+
end
65+
</sample>
66+
67+
<p>
68+
Note that if the result of <code>calculate_answer</code> is
69+
<code>false</code> or <code>nil</code>, then it will not be cached.
70+
To cache these values you can use a different pattern:
71+
</p>
72+
73+
<sample language="ruby">
74+
def answer(x, y)
75+
@answer ||= Hash.new do |h1, x|
76+
h1[x] = Hash.new do |h2, y|
77+
h2[y] = calculate_answer(x, y)
78+
end
79+
end
80+
@answer[x][y]
81+
end
82+
</sample>
83+
</example>
84+
</qhelp>
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
/**
2+
* @name Improper Memoization
3+
* @description Omitting a parameter from the key of a memoization method can lead to reading stale or incorrect data.
4+
* @kind problem
5+
* @problem.severity warning
6+
* @precision high
7+
* @tags security
8+
* @id rb/improper-memoization
9+
*/
10+
11+
import ruby
12+
import codeql.ruby.security.ImproperMemoizationQuery
13+
14+
from Method m, Parameter p, AssignLogicalOrExpr s
15+
where isImproperMemoizationMethod(m, p, s)
16+
select m, "A $@ in this memoization method does not form part of the memoization key.", p,
17+
"parameter"
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
failures
2+
| improper_memoization.rb:100:1:104:3 | m14 | Unexpected result: result=BAD |
3+
#select
4+
| improper_memoization.rb:50:1:55:3 | m7 | improper_memoization.rb:50:8:50:10 | arg | improper_memoization.rb:51:3:53:5 | ... \|\|= ... |
5+
| improper_memoization.rb:58:1:63:3 | m8 | improper_memoization.rb:58:8:58:10 | arg | improper_memoization.rb:59:3:61:5 | ... \|\|= ... |
6+
| improper_memoization.rb:66:1:68:3 | m9 | improper_memoization.rb:66:8:66:10 | arg | improper_memoization.rb:67:3:67:34 | ... \|\|= ... |
7+
| improper_memoization.rb:71:1:73:3 | m10 | improper_memoization.rb:71:9:71:12 | arg1 | improper_memoization.rb:72:3:72:42 | ... \|\|= ... |
8+
| improper_memoization.rb:71:1:73:3 | m10 | improper_memoization.rb:71:15:71:18 | arg2 | improper_memoization.rb:72:3:72:42 | ... \|\|= ... |
9+
| improper_memoization.rb:76:1:79:3 | m11 | improper_memoization.rb:76:15:76:18 | arg2 | improper_memoization.rb:78:3:78:48 | ... \|\|= ... |
10+
| improper_memoization.rb:82:1:87:3 | m12 | improper_memoization.rb:82:15:82:18 | arg2 | improper_memoization.rb:83:3:85:5 | ... \|\|= ... |
11+
| improper_memoization.rb:90:1:97:3 | m13 | improper_memoization.rb:90:9:90:10 | id | improper_memoization.rb:91:3:95:5 | ... \|\|= ... |
12+
| improper_memoization.rb:100:1:104:3 | m14 | improper_memoization.rb:100:9:100:11 | arg | improper_memoization.rb:103:3:103:40 | ... \|\|= ... |
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import ruby
2+
import TestUtilities.InlineExpectationsTest
3+
import codeql.ruby.security.ImproperMemoizationQuery
4+
5+
class ImproperMemoizationTest extends InlineExpectationsTest {
6+
ImproperMemoizationTest() { this = "ImproperMemoizationTest" }
7+
8+
override string getARelevantTag() { result = "BAD" }
9+
10+
override predicate hasActualResult(Location location, string element, string tag, string value) {
11+
tag = "result" and
12+
value = "BAD" and
13+
exists(Expr e |
14+
isImproperMemoizationMethod(e, _, _) and
15+
location = e.getLocation() and
16+
element = e.toString()
17+
)
18+
}
19+
}
20+
21+
from Method m, Parameter p, AssignLogicalOrExpr s
22+
where isImproperMemoizationMethod(m, p, s)
23+
select m, p, s
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
# GOOD - Should not trigger CodeQL rule
2+
3+
# No arguments passed to method
4+
def m1
5+
@m1 ||= long_running_method
6+
end
7+
8+
# No arguments passed to method
9+
def m2
10+
@m2 ||= begin
11+
long_running_method
12+
end
13+
end
14+
15+
# OK: argument used in key.
16+
# May be incorrect if arg is `false` or `nil`.
17+
def m3(arg)
18+
@m3 ||= {}
19+
@m3[arg] ||= long_running_method(arg)
20+
end
21+
22+
# OK: both arguments used in key.
23+
# May be incorrect if either arg is `false` or `nil`.
24+
def m4(arg1, arg2)
25+
@m4 ||= {}
26+
@m4[[arg1, arg2]] ||= result(arg1, arg2)
27+
end
28+
29+
# OK: argument used in key.
30+
# Still correct if arg is `false` or `nil`.
31+
def m5(arg)
32+
@m5 ||= Hash.new do |h1, key|
33+
h1[key] = long_running_method(key)
34+
end
35+
@m5[arg]
36+
end
37+
38+
# OK: both arguments used in key.
39+
# Still correct if either arg is `false` or `nil`.
40+
def m6(arg1, arg2)
41+
@m6 ||= Hash.new do |h1, arg1|
42+
h1[arg1] = Hash.new do |h2, arg2|
43+
h2[arg2] = result(arg1, arg2)
44+
end
45+
end
46+
@m6[arg1][arg2]
47+
end
48+
49+
# Bad: method has parameter but only one result is memoized.
50+
def m7(arg) # $result=BAD
51+
@m7 ||= begin
52+
arg += 3
53+
end
54+
@m7
55+
end
56+
57+
# Bad: method has parameter but only one result is memoized.
58+
def m8(arg) # $result=BAD
59+
@m8 ||= begin
60+
long_running_method(arg)
61+
end
62+
@m8
63+
end
64+
65+
# Bad: method has parameter but only one result is memoized.
66+
def m9(arg) # $result=BAD
67+
@m9 ||= long_running_method(arg)
68+
end
69+
70+
# Bad: method has parameter but only one result is memoized.
71+
def m10(arg1, arg2) # $result=BAD
72+
@m10 ||= long_running_method(arg1, arg2)
73+
end
74+
75+
# Bad: `arg2` not used in key.
76+
def m11(arg1, arg2) # $result=BAD
77+
@m11 ||= {}
78+
@m11[arg1] ||= long_running_method(arg1, arg2)
79+
end
80+
81+
# Bad: `arg2` not used in key.
82+
def m12(arg1, arg2) # $result=BAD
83+
@m12 ||= Hash.new do |h1, arg1|
84+
h1[arg1] = result(arg1, arg2)
85+
end
86+
@m12[arg1]
87+
end
88+
89+
# Bad: arg not used in key.
90+
def m13(id:) # $result=BAD
91+
@m13 ||= Rails.cache.fetch("product_sku/#{id}", expires_in: 30.minutes) do
92+
ActiveRecord::Base.transaction do
93+
ProductSku.find_by(id: id)
94+
end
95+
end
96+
@m13
97+
end
98+
99+
# Good (FP): arg is used in key via string interpolation.
100+
def m14(arg)
101+
@m14 ||= {}
102+
key = "foo/#{arg}"
103+
@m14[key] ||= long_running_method(arg)
104+
end

0 commit comments

Comments
 (0)