Skip to content

Commit dab2222

Browse files
committed
STR37-C: Handle macros in <ctype.h>
The <ctype.h> commonly implements its APIs using either macros or functions or some combination of the two. Our query only assumed functions were used, whereas macros are practically used by both gcc and clang, and these can vary depending on compiler flags. The CharFunctions.qll library now provides a unified interface from which to get a unique expression for each use of an API in the library, hopefully regardless of whether it is a macro or a function. To do this we have had to hard code assumptions about the structure of the macros, however our matrix compiler testing should flag if these assumptions are broken with a particular version of a supported compiler.
1 parent 71c5ae5 commit dab2222

File tree

3 files changed

+124
-51
lines changed

3 files changed

+124
-51
lines changed

c/cert/src/rules/STR37-C/ToCharacterHandlingFunctionsRepresentableAsUChar.ql

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,11 @@ import cpp
1616
import codingstandards.c.cert
1717
import codingstandards.cpp.CharFunctions
1818

19-
from FunctionCall fc, Expr arg
19+
from UseOfToOrIsChar useOfCharAPI, Expr arg
2020
where
21-
not isExcluded(fc, Strings2Package::toCharacterHandlingFunctionsRepresentableAsUCharQuery()) and
22-
// examine all impacted functions
23-
fc.getTarget() instanceof CToOrIsCharFunction and
24-
arg = fc.getArgument(0).getFullyConverted() and
25-
// report on cases where either the explicit or implicit cast
26-
// on the parameter type is not unsigned
27-
not arg.(CStyleCast).getExpr().getType() instanceof UnsignedCharType
28-
select fc, "$@ to character-handling function may not be representable as an unsigned char.", arg,
29-
"Argument"
21+
not isExcluded(useOfCharAPI,
22+
Strings2Package::toCharacterHandlingFunctionsRepresentableAsUCharQuery()) and
23+
arg = useOfCharAPI.getConvertedArgument() and
24+
not arg.getType() instanceof UnsignedCharType
25+
select useOfCharAPI,
26+
"$@ to character-handling function may not be representable as an unsigned char.", arg, "Argument"
Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,28 @@
1-
| test.c:7:3:7:9 | call to isalnum | $@ to character-handling function may not be representable as an unsigned char. | test.c:7:11:7:12 | (int)... | Argument |
2-
| test.c:8:3:8:13 | call to isalpha | $@ to character-handling function may not be representable as an unsigned char. | test.c:8:11:8:12 | (int)... | Argument |
3-
| test.c:10:3:10:9 | call to isblank | $@ to character-handling function may not be representable as an unsigned char. | test.c:10:11:10:12 | (int)... | Argument |
4-
| test.c:11:3:11:9 | call to iscntrl | $@ to character-handling function may not be representable as an unsigned char. | test.c:11:11:11:12 | (int)... | Argument |
5-
| test.c:12:3:12:13 | call to isdigit | $@ to character-handling function may not be representable as an unsigned char. | test.c:12:11:12:12 | (int)... | Argument |
6-
| test.c:13:3:13:13 | call to isgraph | $@ to character-handling function may not be representable as an unsigned char. | test.c:13:11:13:12 | (int)... | Argument |
7-
| test.c:14:3:14:13 | call to islower | $@ to character-handling function may not be representable as an unsigned char. | test.c:14:11:14:12 | (int)... | Argument |
8-
| test.c:15:3:15:13 | call to isprint | $@ to character-handling function may not be representable as an unsigned char. | test.c:15:11:15:12 | (int)... | Argument |
9-
| test.c:16:3:16:9 | call to ispunct | $@ to character-handling function may not be representable as an unsigned char. | test.c:16:11:16:12 | (int)... | Argument |
10-
| test.c:17:3:17:13 | call to __isspace | $@ to character-handling function may not be representable as an unsigned char. | test.c:17:11:17:12 | (int)... | Argument |
11-
| test.c:18:3:18:13 | call to isupper | $@ to character-handling function may not be representable as an unsigned char. | test.c:18:11:18:12 | (int)... | Argument |
12-
| test.c:19:3:19:10 | call to isxdigit | $@ to character-handling function may not be representable as an unsigned char. | test.c:19:12:19:13 | (int)... | Argument |
13-
| test.c:21:3:21:9 | call to toupper | $@ to character-handling function may not be representable as an unsigned char. | test.c:21:11:21:12 | (int)... | Argument |
14-
| test.c:22:3:22:9 | call to tolower | $@ to character-handling function may not be representable as an unsigned char. | test.c:22:11:22:12 | (int)... | Argument |
1+
| test.c:7:3:7:9 | call to isalnum | $@ to character-handling function may not be representable as an unsigned char. | test.c:7:11:7:12 | * ... | Argument |
2+
| test.c:8:3:8:13 | isalpha(a) | $@ to character-handling function may not be representable as an unsigned char. | test.c:8:11:8:12 | * ... | Argument |
3+
| test.c:10:3:10:9 | call to isblank | $@ to character-handling function may not be representable as an unsigned char. | test.c:10:11:10:12 | * ... | Argument |
4+
| test.c:11:3:11:9 | call to iscntrl | $@ to character-handling function may not be representable as an unsigned char. | test.c:11:11:11:12 | * ... | Argument |
5+
| test.c:12:3:12:13 | isdigit(a) | $@ to character-handling function may not be representable as an unsigned char. | test.c:12:3:12:13 | (...) | Argument |
6+
| test.c:13:3:13:13 | isgraph(a) | $@ to character-handling function may not be representable as an unsigned char. | test.c:13:3:13:13 | (...) | Argument |
7+
| test.c:14:3:14:13 | islower(a) | $@ to character-handling function may not be representable as an unsigned char. | test.c:14:3:14:13 | (...) | Argument |
8+
| test.c:15:3:15:13 | isprint(a) | $@ to character-handling function may not be representable as an unsigned char. | test.c:15:3:15:13 | (...) | Argument |
9+
| test.c:16:3:16:9 | call to ispunct | $@ to character-handling function may not be representable as an unsigned char. | test.c:16:11:16:12 | * ... | Argument |
10+
| test.c:17:3:17:13 | call to __isspace | $@ to character-handling function may not be representable as an unsigned char. | test.c:17:11:17:12 | * ... | Argument |
11+
| test.c:18:3:18:13 | isupper(a) | $@ to character-handling function may not be representable as an unsigned char. | test.c:18:3:18:13 | (...) | Argument |
12+
| test.c:19:3:19:10 | call to isxdigit | $@ to character-handling function may not be representable as an unsigned char. | test.c:19:12:19:13 | * ... | Argument |
13+
| test.c:21:3:21:9 | call to toupper | $@ to character-handling function may not be representable as an unsigned char. | test.c:21:11:21:12 | * ... | Argument |
14+
| test.c:22:3:22:9 | call to tolower | $@ to character-handling function may not be representable as an unsigned char. | test.c:22:11:22:12 | * ... | Argument |
1515
| test.c:70:3:70:9 | call to isalnum | $@ to character-handling function may not be representable as an unsigned char. | test.c:70:11:70:11 | t | Argument |
16-
| test.c:71:3:71:12 | call to isalpha | $@ to character-handling function may not be representable as an unsigned char. | test.c:71:11:71:11 | t | Argument |
16+
| test.c:71:3:71:12 | isalpha(a) | $@ to character-handling function may not be representable as an unsigned char. | test.c:71:11:71:11 | t | Argument |
1717
| test.c:73:3:73:9 | call to isblank | $@ to character-handling function may not be representable as an unsigned char. | test.c:73:11:73:11 | t | Argument |
1818
| test.c:74:3:74:9 | call to iscntrl | $@ to character-handling function may not be representable as an unsigned char. | test.c:74:11:74:11 | t | Argument |
19-
| test.c:75:3:75:12 | call to isdigit | $@ to character-handling function may not be representable as an unsigned char. | test.c:75:11:75:11 | t | Argument |
20-
| test.c:76:3:76:12 | call to isgraph | $@ to character-handling function may not be representable as an unsigned char. | test.c:76:11:76:11 | t | Argument |
21-
| test.c:77:3:77:12 | call to islower | $@ to character-handling function may not be representable as an unsigned char. | test.c:77:11:77:11 | t | Argument |
22-
| test.c:78:3:78:12 | call to isprint | $@ to character-handling function may not be representable as an unsigned char. | test.c:78:11:78:11 | t | Argument |
19+
| test.c:75:3:75:12 | isdigit(a) | $@ to character-handling function may not be representable as an unsigned char. | test.c:75:3:75:12 | (...) | Argument |
20+
| test.c:76:3:76:12 | isgraph(a) | $@ to character-handling function may not be representable as an unsigned char. | test.c:76:3:76:12 | (...) | Argument |
21+
| test.c:77:3:77:12 | islower(a) | $@ to character-handling function may not be representable as an unsigned char. | test.c:77:3:77:12 | (...) | Argument |
22+
| test.c:78:3:78:12 | isprint(a) | $@ to character-handling function may not be representable as an unsigned char. | test.c:78:3:78:12 | (...) | Argument |
2323
| test.c:79:3:79:9 | call to ispunct | $@ to character-handling function may not be representable as an unsigned char. | test.c:79:11:79:11 | t | Argument |
2424
| test.c:80:3:80:12 | call to __isspace | $@ to character-handling function may not be representable as an unsigned char. | test.c:80:11:80:11 | t | Argument |
25-
| test.c:81:3:81:12 | call to isupper | $@ to character-handling function may not be representable as an unsigned char. | test.c:81:11:81:11 | t | Argument |
25+
| test.c:81:3:81:12 | isupper(a) | $@ to character-handling function may not be representable as an unsigned char. | test.c:81:3:81:12 | (...) | Argument |
2626
| test.c:82:3:82:10 | call to isxdigit | $@ to character-handling function may not be representable as an unsigned char. | test.c:82:12:82:12 | t | Argument |
2727
| test.c:84:3:84:9 | call to toupper | $@ to character-handling function may not be representable as an unsigned char. | test.c:84:11:84:11 | t | Argument |
2828
| test.c:85:3:85:9 | call to tolower | $@ to character-handling function may not be representable as an unsigned char. | test.c:85:11:85:11 | t | Argument |
Lines changed: 97 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,107 @@
11
import cpp
22

3-
/**
4-
* Models a class of functions that are either testers of characters
5-
* or standard library conversion functions.
6-
*/
7-
class CToOrIsCharFunction extends Function {
8-
CToOrIsCharFunction() {
9-
this instanceof CIsCharFunction or
10-
this instanceof CToCharFunction
11-
}
3+
private string getCToOrIsName() {
4+
result =
5+
[
6+
"isalnum", "isalpha", "isascii", "isblank", "iscntrl", "isdigit", "isgraph", "islower",
7+
"isprint", "ispunct", "isspace", "isupper", "isxdigit", "__isspace", "toascii", "toupper",
8+
"tolower"
9+
]
1210
}
1311

1412
/**
15-
* Models a class of functions that test characters.
13+
* A use of one of the APIs in the `<ctype.h>` header that test or convert characters.
14+
*
15+
* Note: these operations are commonly implemented as either function or a macro. This class
16+
* abstracts away from those details, providing a `getConvertedArgument` predicate to get the
17+
* argument after any conversions specified by the user, excluding any conversions induced by
18+
* the structure of the macro, or
1619
*/
17-
class CIsCharFunction extends Function {
18-
CIsCharFunction() {
19-
getName() in [
20-
"isalnum", "isalpha", "isascii", "isblank", "iscntrl", "isdigit", "isgraph", "islower",
21-
"isprint", "ispunct", "isspace", "isupper", "isxdigit", "__isspace"
22-
]
20+
abstract class UseOfToOrIsChar extends Element {
21+
/** */
22+
abstract Expr getConvertedArgument();
23+
}
24+
25+
private class CToOrIsCharFunctionCall extends FunctionCall, UseOfToOrIsChar {
26+
CToOrIsCharFunctionCall() {
27+
getTarget().getName() = getCToOrIsName() and
28+
// Some library implementations, such as musl, include a "dead" call to the same function
29+
// that has also been implemented as a macro, in order to retain the right types. We exclude
30+
// this call because it does not appear in the control flow or data flow graph. However,
31+
// isspace directly calls __isspace, which is allowed
32+
(
33+
getTarget().getName() = "__isspace" or
34+
not any(CToOrIsCharMacroInvocation mi).getAnExpandedElement() = this
35+
)
2336
}
37+
38+
override Expr getConvertedArgument() { result = getArgument(0).getExplicitlyConverted() }
2439
}
2540

26-
/**
27-
* Models a class of functions convert characters.
28-
*/
29-
class CToCharFunction extends Function {
30-
CToCharFunction() { getName() in ["toascii", "toupper", "tolower"] }
41+
private class CToOrIsCharMacroInvocation extends MacroInvocation, UseOfToOrIsChar {
42+
CToOrIsCharMacroInvocation() { getMacroName() = getCToOrIsName() }
43+
44+
override Expr getConvertedArgument() {
45+
/*
46+
* There is no common approach to how the macros are defined, so we handle
47+
* each compiler/library case individually. Fortunately, there's no conflict
48+
* between different compilers.
49+
*/
50+
51+
// For the "is" APIs, if clang and gcc use a macro, then it expands to an
52+
// array access on the left hand side of an &
53+
exists(ArrayExpr ae | ae = getExpr().(BitwiseAndExpr).getLeftOperand() |
54+
// Casted to an explicit (int), so we want unwind only a single conversion
55+
result = ae.getArrayOffset().getFullyConverted().(Conversion).getExpr()
56+
)
57+
or
58+
// For the tolower/toupper cases, a secondary macro is expanded
59+
exists(MacroInvocation mi |
60+
mi.getParentInvocation() = this and
61+
mi.getMacroName() = "__tobody"
62+
|
63+
/*
64+
* tolower and toupper can be defined by macros which:
65+
* - if the size of the type is greater than 1
66+
* - then check if it's a compile time constant
67+
* - then use c < -128 || c > 255 ? c : (a)[c]
68+
* - else call the function
69+
* - else (a)[c]
70+
*/
71+
72+
exists(ArrayExpr ae |
73+
ae = mi.getAnExpandedElement() and
74+
result = ae.getArrayOffset() and
75+
// There are two array access, but only one should be reachable
76+
result.getBasicBlock().isReachable()
77+
)
78+
or
79+
exists(ConditionalExpr ce |
80+
ce = mi.getAnExpandedElement() and
81+
result = ce.getThen() and
82+
result.getBasicBlock().isReachable()
83+
)
84+
)
85+
or
86+
// musl uses a conditional expression as the expansion
87+
exists(ConditionalExpr ce | ce = getExpr() |
88+
// for most macro expansions, the else is a subtraction inside a `<`
89+
exists(SubExpr s |
90+
not getMacroName() = "isalpha" and
91+
s = ce.getElse().(LTExpr).getLeftOperand() and
92+
// Casted to an explicit (int), so we want unwind only a single conversion
93+
result = s.getLeftOperand().getFullyConverted().(Conversion).getExpr()
94+
)
95+
or
96+
// for isalpha, the else is a bitwise or inside a subtraction inside a `<`
97+
exists(BitwiseOrExpr bo |
98+
// Casted to an explicit (unsigned)
99+
getMacroName() = "isalpha" and
100+
bo = ce.getElse().(LTExpr).getLeftOperand().(SubExpr).getLeftOperand() and
101+
// Casted to an explicit (int), so we want unwind only a single conversion
102+
result =
103+
bo.getLeftOperand().getFullyConverted().(Conversion).getExpr().(ParenthesisExpr).getExpr()
104+
)
105+
)
106+
}
31107
}

0 commit comments

Comments
 (0)