Skip to content

Commit f68c0a2

Browse files
committed
[analyzer] Add path note tags to standard library function summaries.
The patch is straightforward except the tiny fix in BugReporterVisitors.cpp that suppresses a default note for "Assuming pointer value is null" when a note tag from the checker is present. This is probably the right thing to do but also definitely not a complete solution to the problem of different sources of path notes being unaware of each other, which is a large and annoying issue that we have to deal with. Note tags really help there because they're nicely introspectable. The problem is demonstrated by the newly added getenv() test. Differential Revision: https://reviews.llvm.org/D122285
1 parent 11d3e31 commit f68c0a2

File tree

4 files changed

+191
-67
lines changed

4 files changed

+191
-67
lines changed

clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp

Lines changed: 122 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -38,15 +38,6 @@
3838
// Non-pure functions, for which only partial improvement over the default
3939
// behavior is expected, are modeled via check::PostCall, non-intrusively.
4040
//
41-
// The following standard C functions are currently supported:
42-
//
43-
// fgetc getline isdigit isupper toascii
44-
// fread isalnum isgraph isxdigit
45-
// fwrite isalpha islower read
46-
// getc isascii isprint write
47-
// getchar isblank ispunct toupper
48-
// getdelim iscntrl isspace tolower
49-
//
5041
//===----------------------------------------------------------------------===//
5142

5243
#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
@@ -384,7 +375,43 @@ class StdLibraryFunctionsChecker
384375
};
385376

386377
/// The complete list of constraints that defines a single branch.
387-
typedef std::vector<ValueConstraintPtr> ConstraintSet;
378+
using ConstraintSet = std::vector<ValueConstraintPtr>;
379+
380+
/// A single branch of a function summary.
381+
///
382+
/// A branch is defined by a series of constraints - "assumptions" -
383+
/// that together form a single possible outcome of invoking the function.
384+
/// When static analyzer considers a branch, it tries to introduce
385+
/// a child node in the Exploded Graph. The child node has to include
386+
/// constraints that define the branch. If the constraints contradict
387+
/// existing constraints in the state, the node is not created and the branch
388+
/// is dropped; otherwise it's queued for future exploration.
389+
/// The branch is accompanied by a note text that may be displayed
390+
/// to the user when a bug is found on a path that takes this branch.
391+
///
392+
/// For example, consider the branches in `isalpha(x)`:
393+
/// Branch 1)
394+
/// x is in range ['A', 'Z'] or in ['a', 'z']
395+
/// then the return value is not 0. (I.e. out-of-range [0, 0])
396+
/// and the note may say "Assuming the character is alphabetical"
397+
/// Branch 2)
398+
/// x is out-of-range ['A', 'Z'] and out-of-range ['a', 'z']
399+
/// then the return value is 0
400+
/// and the note may say "Assuming the character is non-alphabetical".
401+
class SummaryCase {
402+
ConstraintSet Constraints;
403+
StringRef Note;
404+
405+
public:
406+
SummaryCase(ConstraintSet &&Constraints, StringRef Note)
407+
: Constraints(std::move(Constraints)), Note(Note) {}
408+
409+
SummaryCase(const ConstraintSet &Constraints, StringRef Note)
410+
: Constraints(Constraints), Note(Note) {}
411+
412+
const ConstraintSet &getConstraints() const { return Constraints; }
413+
StringRef getNote() const { return Note; }
414+
};
388415

389416
using ArgTypes = std::vector<Optional<QualType>>;
390417
using RetType = Optional<QualType>;
@@ -451,23 +478,12 @@ class StdLibraryFunctionsChecker
451478
return T;
452479
}
453480

454-
using Cases = std::vector<ConstraintSet>;
481+
using SummaryCases = std::vector<SummaryCase>;
455482

456483
/// A summary includes information about
457484
/// * function prototype (signature)
458485
/// * approach to invalidation,
459-
/// * a list of branches - a list of list of ranges -
460-
/// A branch represents a path in the exploded graph of a function (which
461-
/// is a tree). So, a branch is a series of assumptions. In other words,
462-
/// branches represent split states and additional assumptions on top of
463-
/// the splitting assumption.
464-
/// For example, consider the branches in `isalpha(x)`
465-
/// Branch 1)
466-
/// x is in range ['A', 'Z'] or in ['a', 'z']
467-
/// then the return value is not 0. (I.e. out-of-range [0, 0])
468-
/// Branch 2)
469-
/// x is out-of-range ['A', 'Z'] and out-of-range ['a', 'z']
470-
/// then the return value is 0.
486+
/// * a list of branches - so, a list of list of ranges,
471487
/// * a list of argument constraints, that must be true on every branch.
472488
/// If these constraints are not satisfied that means a fatal error
473489
/// usually resulting in undefined behaviour.
@@ -482,7 +498,7 @@ class StdLibraryFunctionsChecker
482498
/// signature is matched.
483499
class Summary {
484500
const InvalidationKind InvalidationKd;
485-
Cases CaseConstraints;
501+
SummaryCases Cases;
486502
ConstraintSet ArgConstraints;
487503

488504
// The function to which the summary applies. This is set after lookup and
@@ -492,12 +508,12 @@ class StdLibraryFunctionsChecker
492508
public:
493509
Summary(InvalidationKind InvalidationKd) : InvalidationKd(InvalidationKd) {}
494510

495-
Summary &Case(ConstraintSet &&CS) {
496-
CaseConstraints.push_back(std::move(CS));
511+
Summary &Case(ConstraintSet &&CS, StringRef Note = "") {
512+
Cases.push_back(SummaryCase(std::move(CS), Note));
497513
return *this;
498514
}
499-
Summary &Case(const ConstraintSet &CS) {
500-
CaseConstraints.push_back(CS);
515+
Summary &Case(const ConstraintSet &CS, StringRef Note = "") {
516+
Cases.push_back(SummaryCase(CS, Note));
501517
return *this;
502518
}
503519
Summary &ArgConstraint(ValueConstraintPtr VC) {
@@ -508,7 +524,7 @@ class StdLibraryFunctionsChecker
508524
}
509525

510526
InvalidationKind getInvalidationKd() const { return InvalidationKd; }
511-
const Cases &getCaseConstraints() const { return CaseConstraints; }
527+
const SummaryCases &getCases() const { return Cases; }
512528
const ConstraintSet &getArgConstraints() const { return ArgConstraints; }
513529

514530
QualType getArgType(ArgNo ArgN) const {
@@ -530,8 +546,8 @@ class StdLibraryFunctionsChecker
530546
// Once we know the exact type of the function then do validation check on
531547
// all the given constraints.
532548
bool validateByConstraints(const FunctionDecl *FD) const {
533-
for (const ConstraintSet &Case : CaseConstraints)
534-
for (const ValueConstraintPtr &Constraint : Case)
549+
for (const SummaryCase &Case : Cases)
550+
for (const ValueConstraintPtr &Constraint : Case.getConstraints())
535551
if (!Constraint->checkValidity(FD))
536552
return false;
537553
for (const ValueConstraintPtr &Constraint : ArgConstraints)
@@ -842,18 +858,29 @@ void StdLibraryFunctionsChecker::checkPostCall(const CallEvent &Call,
842858
// Now apply the constraints.
843859
const Summary &Summary = *FoundSummary;
844860
ProgramStateRef State = C.getState();
861+
const ExplodedNode *Node = C.getPredecessor();
845862

846863
// Apply case/branch specifications.
847-
for (const ConstraintSet &Case : Summary.getCaseConstraints()) {
864+
for (const SummaryCase &Case : Summary.getCases()) {
848865
ProgramStateRef NewState = State;
849-
for (const ValueConstraintPtr &Constraint : Case) {
866+
for (const ValueConstraintPtr &Constraint : Case.getConstraints()) {
850867
NewState = Constraint->apply(NewState, Call, Summary, C);
851868
if (!NewState)
852869
break;
853870
}
854871

855-
if (NewState && NewState != State)
856-
C.addTransition(NewState);
872+
if (NewState && NewState != State) {
873+
StringRef Note = Case.getNote();
874+
const NoteTag *Tag = C.getNoteTag(
875+
// Sorry couldn't help myself.
876+
[Node, Note]() {
877+
// Don't emit "Assuming..." note when we ended up
878+
// knowing in advance which branch is taken.
879+
return (Node->succ_size() > 1) ? Note.str() : "";
880+
},
881+
/*IsPrunable=*/true);
882+
C.addTransition(NewState, Tag);
883+
}
857884
}
858885
}
859886

@@ -1211,7 +1238,8 @@ void StdLibraryFunctionsChecker::initFunctionSummaries(
12111238
// Boils down to isupper() or islower() or isdigit().
12121239
.Case({ArgumentCondition(0U, WithinRange,
12131240
{{'0', '9'}, {'A', 'Z'}, {'a', 'z'}}),
1214-
ReturnValueCondition(OutOfRange, SingleValue(0))})
1241+
ReturnValueCondition(OutOfRange, SingleValue(0))},
1242+
"Assuming the character is alphanumeric")
12151243
// The locale-specific range.
12161244
// No post-condition. We are completely unaware of
12171245
// locale-specific return values.
@@ -1220,65 +1248,81 @@ void StdLibraryFunctionsChecker::initFunctionSummaries(
12201248
{ArgumentCondition(
12211249
0U, OutOfRange,
12221250
{{'0', '9'}, {'A', 'Z'}, {'a', 'z'}, {128, UCharRangeMax}}),
1223-
ReturnValueCondition(WithinRange, SingleValue(0))})
1251+
ReturnValueCondition(WithinRange, SingleValue(0))},
1252+
"Assuming the character is non-alphanumeric")
12241253
.ArgConstraint(ArgumentCondition(
12251254
0U, WithinRange, {{EOFv, EOFv}, {0, UCharRangeMax}})));
12261255
addToFunctionSummaryMap(
12271256
"isalpha", Signature(ArgTypes{IntTy}, RetType{IntTy}),
12281257
Summary(EvalCallAsPure)
12291258
.Case({ArgumentCondition(0U, WithinRange, {{'A', 'Z'}, {'a', 'z'}}),
1230-
ReturnValueCondition(OutOfRange, SingleValue(0))})
1259+
ReturnValueCondition(OutOfRange, SingleValue(0))},
1260+
"Assuming the character is alphabetical")
12311261
// The locale-specific range.
12321262
.Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})})
12331263
.Case({ArgumentCondition(
12341264
0U, OutOfRange,
12351265
{{'A', 'Z'}, {'a', 'z'}, {128, UCharRangeMax}}),
1236-
ReturnValueCondition(WithinRange, SingleValue(0))}));
1266+
ReturnValueCondition(WithinRange, SingleValue(0))},
1267+
"Assuming the character is non-alphabetical"));
12371268
addToFunctionSummaryMap(
12381269
"isascii", Signature(ArgTypes{IntTy}, RetType{IntTy}),
12391270
Summary(EvalCallAsPure)
12401271
.Case({ArgumentCondition(0U, WithinRange, Range(0, 127)),
1241-
ReturnValueCondition(OutOfRange, SingleValue(0))})
1272+
ReturnValueCondition(OutOfRange, SingleValue(0))},
1273+
"Assuming the character is an ASCII character")
12421274
.Case({ArgumentCondition(0U, OutOfRange, Range(0, 127)),
1243-
ReturnValueCondition(WithinRange, SingleValue(0))}));
1275+
ReturnValueCondition(WithinRange, SingleValue(0))},
1276+
"Assuming the character is not an ASCII character"));
12441277
addToFunctionSummaryMap(
12451278
"isblank", Signature(ArgTypes{IntTy}, RetType{IntTy}),
12461279
Summary(EvalCallAsPure)
12471280
.Case({ArgumentCondition(0U, WithinRange, {{'\t', '\t'}, {' ', ' '}}),
1248-
ReturnValueCondition(OutOfRange, SingleValue(0))})
1281+
ReturnValueCondition(OutOfRange, SingleValue(0))},
1282+
"Assuming the character is a blank character")
12491283
.Case({ArgumentCondition(0U, OutOfRange, {{'\t', '\t'}, {' ', ' '}}),
1250-
ReturnValueCondition(WithinRange, SingleValue(0))}));
1284+
ReturnValueCondition(WithinRange, SingleValue(0))},
1285+
"Assuming the character is not a blank character"));
12511286
addToFunctionSummaryMap(
12521287
"iscntrl", Signature(ArgTypes{IntTy}, RetType{IntTy}),
12531288
Summary(EvalCallAsPure)
12541289
.Case({ArgumentCondition(0U, WithinRange, {{0, 32}, {127, 127}}),
1255-
ReturnValueCondition(OutOfRange, SingleValue(0))})
1290+
ReturnValueCondition(OutOfRange, SingleValue(0))},
1291+
"Assuming the character is a control character")
12561292
.Case({ArgumentCondition(0U, OutOfRange, {{0, 32}, {127, 127}}),
1257-
ReturnValueCondition(WithinRange, SingleValue(0))}));
1293+
ReturnValueCondition(WithinRange, SingleValue(0))},
1294+
"Assuming the character is not a control character"));
12581295
addToFunctionSummaryMap(
12591296
"isdigit", Signature(ArgTypes{IntTy}, RetType{IntTy}),
12601297
Summary(EvalCallAsPure)
12611298
.Case({ArgumentCondition(0U, WithinRange, Range('0', '9')),
1262-
ReturnValueCondition(OutOfRange, SingleValue(0))})
1299+
ReturnValueCondition(OutOfRange, SingleValue(0))},
1300+
"Assuming the character is a digit")
12631301
.Case({ArgumentCondition(0U, OutOfRange, Range('0', '9')),
1264-
ReturnValueCondition(WithinRange, SingleValue(0))}));
1302+
ReturnValueCondition(WithinRange, SingleValue(0))},
1303+
"Assuming the character is not a digit"));
12651304
addToFunctionSummaryMap(
12661305
"isgraph", Signature(ArgTypes{IntTy}, RetType{IntTy}),
12671306
Summary(EvalCallAsPure)
12681307
.Case({ArgumentCondition(0U, WithinRange, Range(33, 126)),
1269-
ReturnValueCondition(OutOfRange, SingleValue(0))})
1270-
.Case({ArgumentCondition(0U, OutOfRange, Range(33, 126)),
1271-
ReturnValueCondition(WithinRange, SingleValue(0))}));
1308+
ReturnValueCondition(OutOfRange, SingleValue(0))},
1309+
"Assuming the character has graphical representation")
1310+
.Case(
1311+
{ArgumentCondition(0U, OutOfRange, Range(33, 126)),
1312+
ReturnValueCondition(WithinRange, SingleValue(0))},
1313+
"Assuming the character does not have graphical representation"));
12721314
addToFunctionSummaryMap(
12731315
"islower", Signature(ArgTypes{IntTy}, RetType{IntTy}),
12741316
Summary(EvalCallAsPure)
12751317
// Is certainly lowercase.
12761318
.Case({ArgumentCondition(0U, WithinRange, Range('a', 'z')),
1277-
ReturnValueCondition(OutOfRange, SingleValue(0))})
1319+
ReturnValueCondition(OutOfRange, SingleValue(0))},
1320+
"Assuming the character is a lowercase letter")
12781321
// Is ascii but not lowercase.
12791322
.Case({ArgumentCondition(0U, WithinRange, Range(0, 127)),
12801323
ArgumentCondition(0U, OutOfRange, Range('a', 'z')),
1281-
ReturnValueCondition(WithinRange, SingleValue(0))})
1324+
ReturnValueCondition(WithinRange, SingleValue(0))},
1325+
"Assuming the character is not a lowercase letter")
12821326
// The locale-specific range.
12831327
.Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})})
12841328
// Is not an unsigned char.
@@ -1288,52 +1332,62 @@ void StdLibraryFunctionsChecker::initFunctionSummaries(
12881332
"isprint", Signature(ArgTypes{IntTy}, RetType{IntTy}),
12891333
Summary(EvalCallAsPure)
12901334
.Case({ArgumentCondition(0U, WithinRange, Range(32, 126)),
1291-
ReturnValueCondition(OutOfRange, SingleValue(0))})
1335+
ReturnValueCondition(OutOfRange, SingleValue(0))},
1336+
"Assuming the character is printable")
12921337
.Case({ArgumentCondition(0U, OutOfRange, Range(32, 126)),
1293-
ReturnValueCondition(WithinRange, SingleValue(0))}));
1338+
ReturnValueCondition(WithinRange, SingleValue(0))},
1339+
"Assuming the character is non-printable"));
12941340
addToFunctionSummaryMap(
12951341
"ispunct", Signature(ArgTypes{IntTy}, RetType{IntTy}),
12961342
Summary(EvalCallAsPure)
12971343
.Case({ArgumentCondition(
12981344
0U, WithinRange,
12991345
{{'!', '/'}, {':', '@'}, {'[', '`'}, {'{', '~'}}),
1300-
ReturnValueCondition(OutOfRange, SingleValue(0))})
1346+
ReturnValueCondition(OutOfRange, SingleValue(0))},
1347+
"Assuming the character is a punctuation mark")
13011348
.Case({ArgumentCondition(
13021349
0U, OutOfRange,
13031350
{{'!', '/'}, {':', '@'}, {'[', '`'}, {'{', '~'}}),
1304-
ReturnValueCondition(WithinRange, SingleValue(0))}));
1351+
ReturnValueCondition(WithinRange, SingleValue(0))},
1352+
"Assuming the character is not a punctuation mark"));
13051353
addToFunctionSummaryMap(
13061354
"isspace", Signature(ArgTypes{IntTy}, RetType{IntTy}),
13071355
Summary(EvalCallAsPure)
13081356
// Space, '\f', '\n', '\r', '\t', '\v'.
13091357
.Case({ArgumentCondition(0U, WithinRange, {{9, 13}, {' ', ' '}}),
1310-
ReturnValueCondition(OutOfRange, SingleValue(0))})
1358+
ReturnValueCondition(OutOfRange, SingleValue(0))},
1359+
"Assuming the character is a whitespace character")
13111360
// The locale-specific range.
13121361
.Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})})
13131362
.Case({ArgumentCondition(0U, OutOfRange,
13141363
{{9, 13}, {' ', ' '}, {128, UCharRangeMax}}),
1315-
ReturnValueCondition(WithinRange, SingleValue(0))}));
1364+
ReturnValueCondition(WithinRange, SingleValue(0))},
1365+
"Assuming the character is not a whitespace character"));
13161366
addToFunctionSummaryMap(
13171367
"isupper", Signature(ArgTypes{IntTy}, RetType{IntTy}),
13181368
Summary(EvalCallAsPure)
13191369
// Is certainly uppercase.
13201370
.Case({ArgumentCondition(0U, WithinRange, Range('A', 'Z')),
1321-
ReturnValueCondition(OutOfRange, SingleValue(0))})
1371+
ReturnValueCondition(OutOfRange, SingleValue(0))},
1372+
"Assuming the character is an uppercase letter")
13221373
// The locale-specific range.
13231374
.Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})})
13241375
// Other.
13251376
.Case({ArgumentCondition(0U, OutOfRange,
13261377
{{'A', 'Z'}, {128, UCharRangeMax}}),
1327-
ReturnValueCondition(WithinRange, SingleValue(0))}));
1378+
ReturnValueCondition(WithinRange, SingleValue(0))},
1379+
"Assuming the character is not an uppercase letter"));
13281380
addToFunctionSummaryMap(
13291381
"isxdigit", Signature(ArgTypes{IntTy}, RetType{IntTy}),
13301382
Summary(EvalCallAsPure)
13311383
.Case({ArgumentCondition(0U, WithinRange,
13321384
{{'0', '9'}, {'A', 'F'}, {'a', 'f'}}),
1333-
ReturnValueCondition(OutOfRange, SingleValue(0))})
1385+
ReturnValueCondition(OutOfRange, SingleValue(0))},
1386+
"Assuming the character is a hexadecimal digit")
13341387
.Case({ArgumentCondition(0U, OutOfRange,
13351388
{{'0', '9'}, {'A', 'F'}, {'a', 'f'}}),
1336-
ReturnValueCondition(WithinRange, SingleValue(0))}));
1389+
ReturnValueCondition(WithinRange, SingleValue(0))},
1390+
"Assuming the character is not a hexadecimal digit"));
13371391
addToFunctionSummaryMap(
13381392
"toupper", Signature(ArgTypes{IntTy}, RetType{IntTy}),
13391393
Summary(EvalCallAsPure)
@@ -1435,12 +1489,14 @@ void StdLibraryFunctionsChecker::initFunctionSummaries(
14351489
GetLineSummary);
14361490

14371491
{
1438-
Summary GetenvSummary = Summary(NoEvalCall)
1439-
.ArgConstraint(NotNull(ArgNo(0)))
1440-
.Case({NotNull(Ret)});
1492+
Summary GetenvSummary =
1493+
Summary(NoEvalCall)
1494+
.ArgConstraint(NotNull(ArgNo(0)))
1495+
.Case({NotNull(Ret)}, "Assuming the environment variable exists");
14411496
// In untrusted environments the envvar might not exist.
14421497
if (!ShouldAssumeControlledEnvironment)
1443-
GetenvSummary.Case({NotNull(Ret)->negate()});
1498+
GetenvSummary.Case({NotNull(Ret)->negate()},
1499+
"Assuming the environment variable does not exist");
14441500

14451501
// char *getenv(const char *name);
14461502
addToFunctionSummaryMap(

clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1695,6 +1695,13 @@ PathDiagnosticPieceRef TrackConstraintBRVisitor::VisitNode(
16951695

16961696
// Construct a new PathDiagnosticPiece.
16971697
ProgramPoint P = N->getLocation();
1698+
1699+
// If this node already have a specialized note, it's probably better
1700+
// than our generic note.
1701+
// FIXME: This only looks for note tags, not for other ways to add a note.
1702+
if (isa_and_nonnull<NoteTag>(P.getTag()))
1703+
return nullptr;
1704+
16981705
PathDiagnosticLocation L =
16991706
PathDiagnosticLocation::create(P, BRC.getSourceManager());
17001707
if (!L.isValid())

clang/test/Analysis/std-c-library-functions-arg-constraints.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@ void test_alnum_concrete(int v) {
3838
}
3939

4040
void test_alnum_symbolic(int x) {
41-
int ret = isalnum(x);
41+
int ret = isalnum(x); // \
42+
// bugpath-note{{Assuming the character is non-alphanumeric}}
4243
(void)ret;
4344

4445
clang_analyzer_eval(EOF <= x && x <= 255); // \

0 commit comments

Comments
 (0)