|
1 | 1 | import cpp
|
2 | 2 |
|
3 |
| -/** |
4 |
| - * Models a class of functions that are either testers of characters |
5 |
| - * or standard library conversion functions. |
6 |
| - */ |
7 |
| -class CToOrIsCharFunction extends Function { |
8 |
| - CToOrIsCharFunction() { |
9 |
| - this instanceof CIsCharFunction or |
10 |
| - this instanceof CToCharFunction |
11 |
| - } |
| 3 | +private string getCToOrIsName() { |
| 4 | + result = |
| 5 | + [ |
| 6 | + "isalnum", "isalpha", "isascii", "isblank", "iscntrl", "isdigit", "isgraph", "islower", |
| 7 | + "isprint", "ispunct", "isspace", "isupper", "isxdigit", "__isspace", "toascii", "toupper", |
| 8 | + "tolower" |
| 9 | + ] |
12 | 10 | }
|
13 | 11 |
|
14 | 12 | /**
|
15 |
| - * Models a class of functions that test characters. |
| 13 | + * A use of one of the APIs in the `<ctype.h>` header that test or convert characters. |
| 14 | + * |
| 15 | + * Note: these operations are commonly implemented as either function or a macro. This class |
| 16 | + * abstracts away from those details, providing a `getConvertedArgument` predicate to get the |
| 17 | + * argument after any conversions specified by the user, excluding any conversions induced by |
| 18 | + * the structure of the macro, or |
16 | 19 | */
|
17 |
| -class CIsCharFunction extends Function { |
18 |
| - CIsCharFunction() { |
19 |
| - getName() in [ |
20 |
| - "isalnum", "isalpha", "isascii", "isblank", "iscntrl", "isdigit", "isgraph", "islower", |
21 |
| - "isprint", "ispunct", "isspace", "isupper", "isxdigit", "__isspace" |
22 |
| - ] |
| 20 | +abstract class UseOfToOrIsChar extends Element { |
| 21 | + /** */ |
| 22 | + abstract Expr getConvertedArgument(); |
| 23 | +} |
| 24 | + |
| 25 | +private class CToOrIsCharFunctionCall extends FunctionCall, UseOfToOrIsChar { |
| 26 | + CToOrIsCharFunctionCall() { |
| 27 | + getTarget().getName() = getCToOrIsName() and |
| 28 | + // Some library implementations, such as musl, include a "dead" call to the same function |
| 29 | + // that has also been implemented as a macro, in order to retain the right types. We exclude |
| 30 | + // this call because it does not appear in the control flow or data flow graph. However, |
| 31 | + // isspace directly calls __isspace, which is allowed |
| 32 | + ( |
| 33 | + getTarget().getName() = "__isspace" or |
| 34 | + not any(CToOrIsCharMacroInvocation mi).getAnExpandedElement() = this |
| 35 | + ) |
23 | 36 | }
|
| 37 | + |
| 38 | + override Expr getConvertedArgument() { result = getArgument(0).getExplicitlyConverted() } |
24 | 39 | }
|
25 | 40 |
|
26 |
| -/** |
27 |
| - * Models a class of functions convert characters. |
28 |
| - */ |
29 |
| -class CToCharFunction extends Function { |
30 |
| - CToCharFunction() { getName() in ["toascii", "toupper", "tolower"] } |
| 41 | +private class CToOrIsCharMacroInvocation extends MacroInvocation, UseOfToOrIsChar { |
| 42 | + CToOrIsCharMacroInvocation() { getMacroName() = getCToOrIsName() } |
| 43 | + |
| 44 | + override Expr getConvertedArgument() { |
| 45 | + /* |
| 46 | + * There is no common approach to how the macros are defined, so we handle |
| 47 | + * each compiler/library case individually. Fortunately, there's no conflict |
| 48 | + * between different compilers. |
| 49 | + */ |
| 50 | + |
| 51 | + // For the "is" APIs, if clang and gcc use a macro, then it expands to an |
| 52 | + // array access on the left hand side of an & |
| 53 | + exists(ArrayExpr ae | ae = getExpr().(BitwiseAndExpr).getLeftOperand() | |
| 54 | + // Casted to an explicit (int), so we want unwind only a single conversion |
| 55 | + result = ae.getArrayOffset().getFullyConverted().(Conversion).getExpr() |
| 56 | + ) |
| 57 | + or |
| 58 | + // For the tolower/toupper cases, a secondary macro is expanded |
| 59 | + exists(MacroInvocation mi | |
| 60 | + mi.getParentInvocation() = this and |
| 61 | + mi.getMacroName() = "__tobody" |
| 62 | + | |
| 63 | + /* |
| 64 | + * tolower and toupper can be defined by macros which: |
| 65 | + * - if the size of the type is greater than 1 |
| 66 | + * - then check if it's a compile time constant |
| 67 | + * - then use c < -128 || c > 255 ? c : (a)[c] |
| 68 | + * - else call the function |
| 69 | + * - else (a)[c] |
| 70 | + */ |
| 71 | + |
| 72 | + exists(ArrayExpr ae | |
| 73 | + ae = mi.getAnExpandedElement() and |
| 74 | + result = ae.getArrayOffset() and |
| 75 | + // There are two array access, but only one should be reachable |
| 76 | + result.getBasicBlock().isReachable() |
| 77 | + ) |
| 78 | + or |
| 79 | + exists(ConditionalExpr ce | |
| 80 | + ce = mi.getAnExpandedElement() and |
| 81 | + result = ce.getThen() and |
| 82 | + result.getBasicBlock().isReachable() |
| 83 | + ) |
| 84 | + ) |
| 85 | + or |
| 86 | + // musl uses a conditional expression as the expansion |
| 87 | + exists(ConditionalExpr ce | ce = getExpr() | |
| 88 | + // for most macro expansions, the else is a subtraction inside a `<` |
| 89 | + exists(SubExpr s | |
| 90 | + not getMacroName() = "isalpha" and |
| 91 | + s = ce.getElse().(LTExpr).getLeftOperand() and |
| 92 | + // Casted to an explicit (int), so we want unwind only a single conversion |
| 93 | + result = s.getLeftOperand().getFullyConverted().(Conversion).getExpr() |
| 94 | + ) |
| 95 | + or |
| 96 | + // for isalpha, the else is a bitwise or inside a subtraction inside a `<` |
| 97 | + exists(BitwiseOrExpr bo | |
| 98 | + // Casted to an explicit (unsigned) |
| 99 | + getMacroName() = "isalpha" and |
| 100 | + bo = ce.getElse().(LTExpr).getLeftOperand().(SubExpr).getLeftOperand() and |
| 101 | + // Casted to an explicit (int), so we want unwind only a single conversion |
| 102 | + result = |
| 103 | + bo.getLeftOperand().getFullyConverted().(Conversion).getExpr().(ParenthesisExpr).getExpr() |
| 104 | + ) |
| 105 | + ) |
| 106 | + } |
31 | 107 | }
|
0 commit comments