Skip to content

Commit db8a310

Browse files
authored
Merge pull request #9089 from ihsinme/ihsinme-patch-87
CPP: Add query for CWE-125 Out-of-bounds Read with different interpretation of the string when use mbtowc
2 parents 975edac + 4fdf4b2 commit db8a310

File tree

9 files changed

+723
-0
lines changed

9 files changed

+723
-0
lines changed
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
2+
...
3+
mbtowc(&wc, ptr, 4)); // BAD:we can get unpredictable results
4+
...
5+
mbtowc(&wc, ptr, MB_LEN_MAX); // GOOD
6+
...
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
<!DOCTYPE qhelp PUBLIC
2+
"-//Semmle//qhelp//EN"
3+
"qhelp.dtd">
4+
<qhelp>
5+
<overview>
6+
<p> Using a function to convert multibyte or wide characters with an invalid length argument may result in an out-of-range access error or unexpected results.</p>
7+
8+
</overview>
9+
10+
<example>
11+
<p>The following example shows the erroneous and corrected method of using function mbtowc.</p>
12+
<sample src="DangerousWorksWithMultibyteOrWideCharacters.cpp" />
13+
14+
</example>
15+
<references>
16+
17+
<li>
18+
CERT Coding Standard:
19+
<a href="https://wiki.sei.cmu.edu/confluence/display/c/ARR30-C.+Do+not+form+or+use+out-of-bounds+pointers+or+array+subscripts">ARR30-C. Do not form or use out-of-bounds pointers or array subscripts - SEI CERT C Coding Standard - Confluence</a>.
20+
</li>
21+
22+
</references>
23+
</qhelp>
Lines changed: 236 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,236 @@
1+
/**
2+
* @name Dangerous use convert function.
3+
* @description Using convert function with an invalid length argument can result in an out-of-bounds access error or unexpected result.
4+
* @kind problem
5+
* @id cpp/dangerous-use-convert-function
6+
* @problem.severity warning
7+
* @precision medium
8+
* @tags correctness
9+
* security
10+
* external/cwe/cwe-125
11+
*/
12+
13+
import cpp
14+
import semmle.code.cpp.valuenumbering.GlobalValueNumbering
15+
16+
/** Holds if there are indications that the variable is treated as a string. */
17+
predicate exprMayBeString(Expr exp) {
18+
(
19+
exists(StringLiteral sl | globalValueNumber(exp) = globalValueNumber(sl))
20+
or
21+
exists(FunctionCall fctmp |
22+
(
23+
fctmp.getAnArgument().(VariableAccess).getTarget() = exp.(VariableAccess).getTarget() or
24+
globalValueNumber(fctmp.getAnArgument()) = globalValueNumber(exp)
25+
) and
26+
fctmp.getTarget().hasName(["strlen", "strcat", "strncat", "strcpy", "sptintf", "printf"])
27+
)
28+
or
29+
exists(AssignExpr astmp |
30+
astmp.getRValue().getValue() = "0" and
31+
astmp.getLValue().(ArrayExpr).getArrayBase().(VariableAccess).getTarget() =
32+
exp.(VariableAccess).getTarget()
33+
)
34+
or
35+
exists(ComparisonOperation cotmp, Expr exptmp1, Expr exptmp2 |
36+
exptmp1.getValue() = "0" and
37+
(
38+
exptmp2.(PointerDereferenceExpr).getOperand().(VariableAccess).getTarget() =
39+
exp.(VariableAccess).getTarget() or
40+
exptmp2.(ArrayExpr).getArrayBase().(VariableAccess).getTarget() =
41+
exp.getAChild().(VariableAccess).getTarget()
42+
) and
43+
cotmp.hasOperands(exptmp1, exptmp2)
44+
)
45+
)
46+
}
47+
48+
/** Holds if expression is constant or operator call `sizeof`. */
49+
predicate argConstOrSizeof(Expr exp) {
50+
exp.getValue().toInt() > 1 or
51+
exp.(SizeofTypeOperator).getTypeOperand().getSize() > 1
52+
}
53+
54+
/** Holds if expression is macro. */
55+
predicate argMacro(Expr exp) {
56+
exists(MacroInvocation matmp |
57+
exp = matmp.getExpr() and
58+
(
59+
matmp.getMacroName() = "MB_LEN_MAX" or
60+
matmp.getMacroName() = "MB_CUR_MAX"
61+
)
62+
)
63+
}
64+
65+
/** Holds if erroneous situations of using functions `mbtowc` and `mbrtowc` are detected. */
66+
predicate findUseCharacterConversion(Expr exp, string msg) {
67+
exists(FunctionCall fc |
68+
fc = exp and
69+
(
70+
exists(Loop lptmp | lptmp = fc.getEnclosingStmt().getParentStmt*()) and
71+
fc.getTarget().hasName(["mbtowc", "mbrtowc", "_mbtowc_l"]) and
72+
not fc.getArgument(0).isConstant() and
73+
not fc.getArgument(1).isConstant() and
74+
(
75+
exprMayBeString(fc.getArgument(1)) and
76+
argConstOrSizeof(fc.getArgument(2)) and
77+
fc.getArgument(2).getValue().toInt() < 5 and
78+
not argMacro(fc.getArgument(2)) and
79+
msg = "Size can be less than maximum character length, use macro MB_CUR_MAX."
80+
or
81+
not exprMayBeString(fc.getArgument(1)) and
82+
(
83+
argConstOrSizeof(fc.getArgument(2))
84+
or
85+
argMacro(fc.getArgument(2))
86+
or
87+
exists(DecrementOperation dotmp |
88+
globalValueNumber(dotmp.getAnOperand()) = globalValueNumber(fc.getArgument(2)) and
89+
not exists(AssignSubExpr aetmp |
90+
(
91+
aetmp.getLValue().(VariableAccess).getTarget() =
92+
fc.getArgument(2).(VariableAccess).getTarget() or
93+
globalValueNumber(aetmp.getLValue()) = globalValueNumber(fc.getArgument(2))
94+
) and
95+
globalValueNumber(aetmp.getRValue()) = globalValueNumber(fc)
96+
)
97+
)
98+
) and
99+
msg =
100+
"Access beyond the allocated memory is possible, the length can change without changing the pointer."
101+
or
102+
exists(AssignPointerAddExpr aetmp |
103+
(
104+
aetmp.getLValue().(VariableAccess).getTarget() =
105+
fc.getArgument(0).(VariableAccess).getTarget() or
106+
globalValueNumber(aetmp.getLValue()) = globalValueNumber(fc.getArgument(0))
107+
) and
108+
globalValueNumber(aetmp.getRValue()) = globalValueNumber(fc)
109+
) and
110+
msg = "Maybe you're using the function's return value incorrectly."
111+
)
112+
)
113+
)
114+
}
115+
116+
/** Holds if detecting erroneous situations of working with multibyte characters. */
117+
predicate findUseMultibyteCharacter(Expr exp, string msg) {
118+
exists(ArrayType arrayType, ArrayExpr arrayExpr |
119+
arrayExpr = exp and
120+
arrayExpr.getArrayBase().getType() = arrayType and
121+
(
122+
exists(AssignExpr assZero, SizeofExprOperator sizeofArray, Expr oneValue |
123+
oneValue.getValue() = "1" and
124+
sizeofArray.getExprOperand().getType() = arrayType and
125+
assZero.getLValue() = arrayExpr and
126+
arrayExpr.getArrayOffset().(SubExpr).hasOperands(sizeofArray, oneValue) and
127+
assZero.getRValue().getValue() = "0"
128+
) and
129+
arrayType.getArraySize() != arrayType.getByteSize() and
130+
msg =
131+
"The size of the array element is greater than one byte, so the offset will point outside the array."
132+
or
133+
exists(FunctionCall mbFunction |
134+
(
135+
mbFunction.getTarget().getName().matches("_mbs%") or
136+
mbFunction.getTarget().getName().matches("mbs%") or
137+
mbFunction.getTarget().getName().matches("_mbc%") or
138+
mbFunction.getTarget().getName().matches("mbc%")
139+
) and
140+
mbFunction.getAnArgument().(VariableAccess).getTarget().getADeclarationEntry().getType() =
141+
arrayType
142+
) and
143+
exists(Loop loop, SizeofExprOperator sizeofArray, AssignExpr assignExpr |
144+
arrayExpr.getEnclosingStmt().getParentStmt*() = loop and
145+
sizeofArray.getExprOperand().getType() = arrayType and
146+
assignExpr.getLValue() = arrayExpr and
147+
loop.getCondition().(LTExpr).getLeftOperand().(VariableAccess).getTarget() =
148+
arrayExpr.getArrayOffset().getAChild*().(VariableAccess).getTarget() and
149+
loop.getCondition().(LTExpr).getRightOperand() = sizeofArray
150+
) and
151+
msg =
152+
"This buffer may contain multibyte characters, so attempting to copy may result in part of the last character being lost."
153+
)
154+
)
155+
or
156+
exists(FunctionCall mbccpy, Loop loop, SizeofExprOperator sizeofOp |
157+
mbccpy.getTarget().hasName("_mbccpy") and
158+
mbccpy.getArgument(0) = exp and
159+
exp.getEnclosingStmt().getParentStmt*() = loop and
160+
sizeofOp.getExprOperand().getType() =
161+
exp.getAChild*().(VariableAccess).getTarget().getADeclarationEntry().getType() and
162+
loop.getCondition().(LTExpr).getLeftOperand().(VariableAccess).getTarget() =
163+
exp.getAChild*().(VariableAccess).getTarget() and
164+
loop.getCondition().(LTExpr).getRightOperand() = sizeofOp and
165+
msg =
166+
"This buffer may contain multibyte characters, so an attempt to copy may result in an overflow."
167+
)
168+
}
169+
170+
/** Holds if erroneous situations of using functions `MultiByteToWideChar` and `WideCharToMultiByte` or `mbstowcs` and `_mbstowcs_l` and `mbsrtowcs` are detected. */
171+
predicate findUseStringConversion(
172+
Expr exp, string msg, int posBufSrc, int posBufDst, int posSizeDst, string nameCalls
173+
) {
174+
exists(FunctionCall fc |
175+
fc = exp and
176+
posBufSrc in [0 .. fc.getNumberOfArguments() - 1] and
177+
posSizeDst in [0 .. fc.getNumberOfArguments() - 1] and
178+
(
179+
fc.getTarget().hasName(nameCalls) and
180+
(
181+
globalValueNumber(fc.getArgument(posBufDst)) = globalValueNumber(fc.getArgument(posBufSrc)) and
182+
msg =
183+
"According to the definition of the functions, if the source buffer and the destination buffer are the same, undefined behavior is possible."
184+
or
185+
exists(ArrayType arrayDst |
186+
fc.getArgument(posBufDst).(VariableAccess).getTarget().getADeclarationEntry().getType() =
187+
arrayDst and
188+
fc.getArgument(posSizeDst).getValue().toInt() >= arrayDst.getArraySize() and
189+
not exists(AssignExpr assZero |
190+
assZero.getLValue().(ArrayExpr).getArrayBase().(VariableAccess).getTarget() =
191+
fc.getArgument(posBufDst).(VariableAccess).getTarget() and
192+
assZero.getRValue().getValue() = "0"
193+
) and
194+
not exists(Expr someExp, FunctionCall checkSize |
195+
checkSize.getASuccessor*() = fc and
196+
checkSize.getTarget().hasName(nameCalls) and
197+
checkSize.getArgument(posSizeDst).getValue() = "0" and
198+
globalValueNumber(checkSize) = globalValueNumber(someExp) and
199+
someExp.getEnclosingStmt().getParentStmt*() instanceof IfStmt
200+
) and
201+
exprMayBeString(fc.getArgument(posBufDst)) and
202+
msg =
203+
"According to the definition of the functions, it is not guaranteed to write a null character at the end of the string, so access beyond the bounds of the destination buffer is possible."
204+
)
205+
or
206+
exists(FunctionCall allocMem |
207+
allocMem.getTarget().hasName(["calloc", "malloc"]) and
208+
globalValueNumber(fc.getArgument(posBufDst)) = globalValueNumber(allocMem) and
209+
(
210+
allocMem.getArgument(allocMem.getNumberOfArguments() - 1).getValue() = "1" or
211+
not exists(SizeofOperator sizeofOperator |
212+
globalValueNumber(allocMem
213+
.getArgument(allocMem.getNumberOfArguments() - 1)
214+
.getAChild*()) = globalValueNumber(sizeofOperator)
215+
)
216+
) and
217+
msg =
218+
"The buffer destination has a type other than char, you need to take this into account when allocating memory."
219+
)
220+
or
221+
fc.getArgument(posBufDst).getValue() = "0" and
222+
fc.getArgument(posSizeDst).getValue() != "0" and
223+
msg =
224+
"If the destination buffer is NULL and its size is not 0, then undefined behavior is possible."
225+
)
226+
)
227+
)
228+
}
229+
230+
from Expr exp, string msg
231+
where
232+
findUseCharacterConversion(exp, msg) or
233+
findUseMultibyteCharacter(exp, msg) or
234+
findUseStringConversion(exp, msg, 1, 0, 2, ["mbstowcs", "_mbstowcs_l", "mbsrtowcs"]) or
235+
findUseStringConversion(exp, msg, 2, 4, 5, ["MultiByteToWideChar", "WideCharToMultiByte"])
236+
select exp, msg
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
| test1.cpp:28:5:28:23 | call to WideCharToMultiByte | According to the definition of the functions, if the source buffer and the destination buffer are the same, undefined behavior is possible. |
2+
| test1.cpp:29:5:29:23 | call to MultiByteToWideChar | According to the definition of the functions, if the source buffer and the destination buffer are the same, undefined behavior is possible. |
3+
| test1.cpp:45:3:45:21 | call to WideCharToMultiByte | According to the definition of the functions, it is not guaranteed to write a null character at the end of the string, so access beyond the bounds of the destination buffer is possible. |
4+
| test1.cpp:58:3:58:21 | call to MultiByteToWideChar | The buffer destination has a type other than char, you need to take this into account when allocating memory. |
5+
| test1.cpp:70:3:70:21 | call to MultiByteToWideChar | The buffer destination has a type other than char, you need to take this into account when allocating memory. |
6+
| test1.cpp:76:10:76:28 | call to WideCharToMultiByte | If the destination buffer is NULL and its size is not 0, then undefined behavior is possible. |
7+
| test1.cpp:93:5:93:23 | call to WideCharToMultiByte | According to the definition of the functions, it is not guaranteed to write a null character at the end of the string, so access beyond the bounds of the destination buffer is possible. |
8+
| test2.cpp:15:5:15:12 | call to mbstowcs | According to the definition of the functions, if the source buffer and the destination buffer are the same, undefined behavior is possible. |
9+
| test2.cpp:17:5:17:15 | call to _mbstowcs_l | According to the definition of the functions, if the source buffer and the destination buffer are the same, undefined behavior is possible. |
10+
| test2.cpp:19:5:19:13 | call to mbsrtowcs | According to the definition of the functions, if the source buffer and the destination buffer are the same, undefined behavior is possible. |
11+
| test2.cpp:35:3:35:10 | call to mbstowcs | According to the definition of the functions, it is not guaranteed to write a null character at the end of the string, so access beyond the bounds of the destination buffer is possible. |
12+
| test2.cpp:48:3:48:10 | call to mbstowcs | The buffer destination has a type other than char, you need to take this into account when allocating memory. |
13+
| test2.cpp:60:3:60:10 | call to mbstowcs | The buffer destination has a type other than char, you need to take this into account when allocating memory. |
14+
| test2.cpp:66:10:66:17 | call to mbstowcs | If the destination buffer is NULL and its size is not 0, then undefined behavior is possible. |
15+
| test2.cpp:80:3:80:10 | call to mbstowcs | According to the definition of the functions, it is not guaranteed to write a null character at the end of the string, so access beyond the bounds of the destination buffer is possible. |
16+
| test3.cpp:16:5:16:13 | access to array | This buffer may contain multibyte characters, so attempting to copy may result in part of the last character being lost. |
17+
| test3.cpp:36:13:36:18 | ... + ... | This buffer may contain multibyte characters, so an attempt to copy may result in an overflow. |
18+
| test3.cpp:47:3:47:24 | access to array | The size of the array element is greater than one byte, so the offset will point outside the array. |
19+
| test.cpp:66:27:66:32 | call to mbtowc | Size can be less than maximum character length, use macro MB_CUR_MAX. |
20+
| test.cpp:76:27:76:32 | call to mbtowc | Size can be less than maximum character length, use macro MB_CUR_MAX. |
21+
| test.cpp:106:11:106:16 | call to mbtowc | Access beyond the allocated memory is possible, the length can change without changing the pointer. |
22+
| test.cpp:123:11:123:16 | call to mbtowc | Access beyond the allocated memory is possible, the length can change without changing the pointer. |
23+
| test.cpp:140:11:140:16 | call to mbtowc | Access beyond the allocated memory is possible, the length can change without changing the pointer. |
24+
| test.cpp:158:11:158:16 | call to mbtowc | Access beyond the allocated memory is possible, the length can change without changing the pointer. |
25+
| test.cpp:181:11:181:16 | call to mbtowc | Access beyond the allocated memory is possible, the length can change without changing the pointer. |
26+
| test.cpp:197:11:197:16 | call to mbtowc | Maybe you're using the function's return value incorrectly. |
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
experimental/Security/CWE/CWE-125/DangerousWorksWithMultibyteOrWideCharacters.ql

0 commit comments

Comments
 (0)