Skip to content

Commit 4482669

Browse files
committed
C++: Add a new 'InvalidPointerDeref' query to experimental.
1 parent b8a5aa5 commit 4482669

File tree

6 files changed

+1181
-0
lines changed

6 files changed

+1181
-0
lines changed
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
void *malloc(unsigned);
2+
unsigned get_size();
3+
void write_data(const unsigned char*, const unsigned char*);
4+
5+
int main(int argc, char* argv[]) {
6+
unsigned size = get_size();
7+
8+
{
9+
unsigned char *begin = (unsigned char*)malloc(size);
10+
if(!begin) return -1;
11+
12+
unsigned char* end = begin + size;
13+
write_data(begin, end);
14+
*end = '\0'; // BAD: Out-of-bounds write
15+
}
16+
17+
{
18+
unsigned char *begin = (unsigned char*)malloc(size);
19+
if(!begin) return -1;
20+
21+
unsigned char* end = begin + size;
22+
write_data(begin, end);
23+
*(end - 1) = '\0'; // GOOD: writing to the last byte
24+
}
25+
26+
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
<!DOCTYPE qhelp PUBLIC
2+
"-//Semmle//qhelp//EN"
3+
"qhelp.dtd">
4+
<qhelp>
5+
<overview>
6+
<p>The program performs an out-of-bounds read or write operation. In addition to causing program instability, techniques exist which may allow an attacker to use this vulnerability to execute arbitrary code.</p>
7+
8+
</overview>
9+
<recommendation>
10+
11+
<p>Ensure that pointer dereferences are properly guarded to ensure that they cannot be used to read or write past the end of the allocation.</p>
12+
13+
</recommendation>
14+
<example>
15+
<p>The first example allocates a buffer of size <code>size</code> and creates a local variable that stores the location that is one byte past the end of the allocation.
16+
This local variable is then dereferenced which results in an out-of-bounds write.
17+
The second example subtracts one from the <code>end</code> variable before dereferencing it. This subtraction ensures that the write correctly updates the final byte of the allocation.</p>
18+
<sample src="InvalidPointerDeref.cpp" />
19+
20+
</example>
21+
<references>
22+
23+
<li>CERT C Coding Standard:
24+
<a href="https://wiki.sei.cmu.edu/confluence/display/c/ARR30-C.+Do+not+form+or+use+out-of-bounds+pointers+or+array+subscripts">ARR30-C. Do not form or use out-of-bounds pointers or array subscripts</a>.</li>
25+
<li>
26+
OWASP:
27+
<a href="https://owasp.org/www-community/vulnerabilities/Buffer_Overflow">Buffer Overflow</a>.
28+
</li>
29+
30+
</references>
31+
</qhelp>
Lines changed: 316 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,316 @@
1+
/**
2+
* @name Invalid pointer dereference
3+
* @description Dereferencing a pointer that points past it allocation is undefined behavior
4+
* and may lead to security vulnerabilities.
5+
* @kind path-problem
6+
* @problem.severity error
7+
* @precision high
8+
* @id cpp/invalid-pointer-deref
9+
* @tags reliability
10+
* security
11+
* external/cwe/cwe-119
12+
* external/cwe/cwe-125
13+
* external/cwe/cwe-193
14+
* external/cwe/cwe-787
15+
*/
16+
17+
import cpp
18+
import experimental.semmle.code.cpp.dataflow.ProductFlow
19+
import experimental.semmle.code.cpp.ir.dataflow.DataFlow3
20+
import experimental.semmle.code.cpp.semantic.analysis.RangeAnalysis
21+
import experimental.semmle.code.cpp.semantic.SemanticBound
22+
import experimental.semmle.code.cpp.semantic.SemanticExprSpecific
23+
import semmle.code.cpp.ir.IR
24+
25+
pragma[nomagic]
26+
Instruction getABoundIn(SemBound b, IRFunction func) {
27+
result = b.getExpr(0) and
28+
result.getEnclosingIRFunction() = func
29+
}
30+
31+
/**
32+
* Holds if `i <= b + delta`.
33+
*/
34+
pragma[nomagic]
35+
predicate bounded(Instruction i, Instruction b, int delta) {
36+
exists(SemBound bound, IRFunction func |
37+
semBounded(getSemanticExpr(i), bound, delta, true, _) and
38+
b = getABoundIn(bound, func) and
39+
i.getEnclosingIRFunction() = func
40+
)
41+
}
42+
43+
/**
44+
* Holds if the combination of `n` and `state` represents an appropriate
45+
* source for the expression `e` suitable for use-use flow.
46+
*/
47+
private predicate hasSizeImpl(Expr e, DataFlow::Node n, string state) {
48+
// The simple case: If the size is a variable access with no qualifier we can just use the
49+
// dataflow node for that expression and no state.
50+
exists(VariableAccess va |
51+
va = e and
52+
not va instanceof FieldAccess and
53+
n.asConvertedExpr() = va.getFullyConverted() and
54+
state = "0"
55+
)
56+
or
57+
// If the size is a choice between two expressions we allow both to be nodes representing the size.
58+
exists(ConditionalExpr cond | cond = e | hasSizeImpl([cond.getThen(), cond.getElse()], n, state))
59+
or
60+
// If the size is an expression plus a constant, we pick the dataflow node of the expression and
61+
// remember the constant in the state.
62+
exists(Expr const, Expr nonconst |
63+
e.(AddExpr).hasOperands(const, nonconst) and
64+
state = const.getValue() and
65+
hasSizeImpl(nonconst, n, _)
66+
)
67+
or
68+
exists(Expr const, Expr nonconst |
69+
e.(SubExpr).hasOperands(const, nonconst) and
70+
state = "-" + const.getValue() and
71+
hasSizeImpl(nonconst, n, _)
72+
)
73+
}
74+
75+
/**
76+
* Holds if `(n, state)` pair represents the source of flow for the size
77+
* expression associated with `alloc`.
78+
*/
79+
predicate hasSize(AllocationExpr alloc, DataFlow::Node n, string state) {
80+
hasSizeImpl(alloc.getSizeExpr(), n, state)
81+
}
82+
83+
/**
84+
* A product-flow configuration for flow from an (allocation, size) pair to a
85+
* pointer-arithmetic operation that is non-strictly upper-bounded by `allocation + size`.
86+
*/
87+
class AllocToInvalidPointerConf extends ProductFlow::Configuration {
88+
AllocToInvalidPointerConf() { this = "AllocToInvalidPointerConf" }
89+
90+
override predicate isSourcePair(
91+
DataFlow::Node source1, string state1, DataFlow::Node source2, string state2
92+
) {
93+
state1 = "" and
94+
hasSize(source1.asConvertedExpr(), source2, state2)
95+
}
96+
97+
override predicate isSinkPair(
98+
DataFlow::Node sink1, DataFlow::FlowState state1, DataFlow::Node sink2,
99+
DataFlow::FlowState state2
100+
) {
101+
state1 = "" and
102+
exists(int delta |
103+
isSinkImpl(_, sink1, sink2, delta) and
104+
state2 = delta.toString()
105+
)
106+
}
107+
}
108+
109+
pragma[nomagic]
110+
predicate pointerAddInstructionHasOperands(
111+
PointerAddInstruction pai, Instruction left, Instruction right
112+
) {
113+
pai.getLeft() = left and
114+
pai.getRight() = right
115+
}
116+
117+
/**
118+
* Holds if `pai` is non-strictly upper bounded by `b + delta` and `sink1` is the
119+
* non-constant operand of the pointer-arithmetic operation.
120+
*/
121+
pragma[nomagic]
122+
predicate pointerAddInstructionHasBounds(
123+
PointerAddInstruction pai, DataFlow::Node sink1, Instruction sink2, int delta
124+
) {
125+
exists(Instruction right |
126+
pointerAddInstructionHasOperands(pai, sink1.asInstruction(), right) and
127+
bounded(right, sink2, delta)
128+
)
129+
}
130+
131+
/**
132+
* Holds if `pai` is non-strictly upper bounded by `sink2 + delta` and `sink1` is the
133+
* non-constant operand of the pointer-arithmetic operation.
134+
*/
135+
predicate isSinkImpl(
136+
PointerAddInstruction pai, DataFlow::Node sink1, DataFlow::Node sink2, int delta
137+
) {
138+
pointerAddInstructionHasBounds(pai, sink1, sink2.asInstruction(), delta)
139+
}
140+
141+
/**
142+
* Holds if `sink` is a sink for `InvalidPointerToDerefConf` and `i` is a `StoreInstruction` that
143+
* writes to an address that non-strictly upper-bounds `sink`, or `i` is a `LoadInstruction` that
144+
* reads from an address that non-strictly upper-bounds `sink`.
145+
*/
146+
predicate isInvalidPointerDerefSink(DataFlow::Node sink, Instruction i, string operation) {
147+
exists(AddressOperand addr, int delta |
148+
bounded(addr.getDef(), sink.asInstruction(), delta) and
149+
delta >= 0 and
150+
i.getAnOperand() = addr
151+
|
152+
i instanceof StoreInstruction and
153+
operation = "write"
154+
or
155+
i instanceof LoadInstruction and
156+
operation = "read"
157+
)
158+
}
159+
160+
/**
161+
* A configuration to track flow from a pointer-arithmetic operation found
162+
* by `AllocToInvalidPointerConf` to a dereference of the pointer.
163+
*/
164+
class InvalidPointerToDerefConf extends DataFlow3::Configuration {
165+
InvalidPointerToDerefConf() { this = "InvalidPointerToDerefConf" }
166+
167+
override predicate isSource(DataFlow::Node source) { invalidPointerToDerefSource(_, source, _) }
168+
169+
override predicate isSink(DataFlow::Node sink) { isInvalidPointerDerefSink(sink, _, _) }
170+
}
171+
172+
predicate invalidPointerToDerefSource(
173+
PointerArithmeticInstruction pai, DataFlow::Node source, int delta
174+
) {
175+
exists(ProductFlow::Configuration conf, DataFlow::PathNode p, DataFlow::Node sink1 |
176+
p.getNode() = sink1 and
177+
conf.hasFlowPath(_, _, p, _) and
178+
isSinkImpl(pai, sink1, _, _) and
179+
bounded(source.asInstruction(), pai, delta) and
180+
delta >= 0
181+
)
182+
}
183+
184+
newtype TMergedPathNode =
185+
// The path nodes computed by the first projection of `AllocToInvalidPointerConf`
186+
TPathNode1(DataFlow::PathNode p) or
187+
// The path nodes computed by `InvalidPointerToDerefConf`
188+
TPathNode3(DataFlow3::PathNode p) or
189+
// The read/write that uses the invalid pointer identified by `InvalidPointerToDerefConf`.
190+
// This one is needed because the sink identified by `InvalidPointerToDerefConf` is the
191+
// pointer, but we want to raise an alert at the dereference.
192+
TPathNodeSink(Instruction i) {
193+
exists(DataFlow::Node n |
194+
any(InvalidPointerToDerefConf conf).hasFlow(_, n) and
195+
isInvalidPointerDerefSink(n, i, _)
196+
)
197+
}
198+
199+
class MergedPathNode extends TMergedPathNode {
200+
string toString() { none() }
201+
202+
final DataFlow::PathNode asPathNode1() { this = TPathNode1(result) }
203+
204+
final DataFlow3::PathNode asPathNode3() { this = TPathNode3(result) }
205+
206+
final Instruction asSinkNode() { this = TPathNodeSink(result) }
207+
208+
predicate hasLocationInfo(
209+
string filepath, int startline, int startcolumn, int endline, int endcolumn
210+
) {
211+
none()
212+
}
213+
}
214+
215+
class PathNode1 extends MergedPathNode, TPathNode1 {
216+
override string toString() {
217+
exists(DataFlow::PathNode p |
218+
this = TPathNode1(p) and
219+
result = p.toString()
220+
)
221+
}
222+
223+
override predicate hasLocationInfo(
224+
string filepath, int startline, int startcolumn, int endline, int endcolumn
225+
) {
226+
this.asPathNode1().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
227+
}
228+
}
229+
230+
class PathNode3 extends MergedPathNode, TPathNode3 {
231+
override string toString() {
232+
exists(DataFlow3::PathNode p |
233+
this = TPathNode3(p) and
234+
result = p.toString()
235+
)
236+
}
237+
238+
override predicate hasLocationInfo(
239+
string filepath, int startline, int startcolumn, int endline, int endcolumn
240+
) {
241+
this.asPathNode3().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
242+
}
243+
}
244+
245+
class PathSinkNode extends MergedPathNode, TPathNodeSink {
246+
override string toString() {
247+
exists(Instruction i |
248+
this = TPathNodeSink(i) and
249+
result = i.toString()
250+
)
251+
}
252+
253+
override predicate hasLocationInfo(
254+
string filepath, int startline, int startcolumn, int endline, int endcolumn
255+
) {
256+
this.asSinkNode()
257+
.getLocation()
258+
.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
259+
}
260+
}
261+
262+
query predicate edges(MergedPathNode node1, MergedPathNode node2) {
263+
node1.asPathNode1().getASuccessor() = node2.asPathNode1()
264+
or
265+
joinOn1(_, node1.asPathNode1(), node2.asPathNode3())
266+
or
267+
node1.asPathNode3().getASuccessor() = node2.asPathNode3()
268+
or
269+
joinOn2(node1.asPathNode3(), node2.asSinkNode(), _)
270+
}
271+
272+
/**
273+
* Holds if `p1` is a sink of `AllocToInvalidPointerConf` and `p2` is a source
274+
* of `InvalidPointerToDerefConf`, and they are connected through `pai`.
275+
*/
276+
predicate joinOn1(PointerArithmeticInstruction pai, DataFlow::PathNode p1, DataFlow3::PathNode p2) {
277+
isSinkImpl(pai, p1.getNode(), _, _) and
278+
invalidPointerToDerefSource(pai, p2.getNode(), _)
279+
}
280+
281+
/**
282+
* Holds if `p1` is a sink of `InvalidPointerToDerefConf` and `i` is the instruction
283+
* that dereferences `p1`. The string `operation` describes whether the `i` is
284+
* a `StoreInstruction` or `LoadInstruction`.
285+
*/
286+
predicate joinOn2(DataFlow3::PathNode p1, Instruction i, string operation) {
287+
isInvalidPointerDerefSink(p1.getNode(), i, operation)
288+
}
289+
290+
predicate hasFlowPath(
291+
MergedPathNode source1, MergedPathNode sink, DataFlow3::PathNode source3,
292+
PointerArithmeticInstruction pai, string operation
293+
) {
294+
exists(
295+
AllocToInvalidPointerConf conf1, InvalidPointerToDerefConf conf2, DataFlow3::PathNode sink3,
296+
DataFlow::PathNode sink1
297+
|
298+
conf1.hasFlowPath(source1.asPathNode1(), _, sink1, _) and
299+
joinOn1(pai, sink1, source3) and
300+
conf2.hasFlowPath(source3, sink3) and
301+
joinOn2(sink3, sink.asSinkNode(), operation)
302+
)
303+
}
304+
305+
from
306+
MergedPathNode source, MergedPathNode sink, int k, string kstr, DataFlow3::PathNode source3,
307+
PointerArithmeticInstruction pai, string operation, Expr offset, DataFlow::Node n
308+
where
309+
hasFlowPath(source, sink, source3, pai, operation) and
310+
invalidPointerToDerefSource(pai, source3.getNode(), k) and
311+
offset = pai.getRight().getUnconvertedResultExpression() and
312+
n = source.asPathNode1().getNode() and
313+
if k = 0 then kstr = "" else kstr = " + " + k
314+
select sink, source, sink,
315+
"This " + operation + " might be out of bounds, as the pointer might be equal to $@ + $@" + kstr +
316+
".", n, n.toString(), offset, offset.toString()

0 commit comments

Comments
 (0)