Skip to content

Commit 7d935d0

Browse files
committed
[dataflow] improve determinism of generated SAT system
Fixes two places where we relied on map iteration order when processing values, which leaked nondeterminism into the generated SAT formulas. Adds a couple of tests that directly assert that the SAT system is equivalent on each run. It's desirable that the formulas are deterministic based on the input: - our SAT solver is naive and perfermance is sensitive to even simple semantics-preserving transformations like A|B to B|A. (e.g. it's likely to choose a different variable to split on). Timeout failures are bad, but *flaky* ones are terrible to debug. - similarly when debugging, it's important to have a consistent understanding of what e.g. "V23" means across runs. --- Both changes in this patch were isolated from a nullability analysis of real-world code which was extremely slow, spending ages in the SAT solver at "random" points that varied on each run. I've included a reduced version of the code as a regression test. One of the changes shows up directly as flow-condition nondeterminism with a no-op analysis, the other relied on bits of the nullability analysis but I found a synthetic example to show the problem. Differential Revision: https://reviews.llvm.org/D154948
1 parent 304e974 commit 7d935d0

File tree

5 files changed

+146
-6
lines changed

5 files changed

+146
-6
lines changed

clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "clang/Analysis/FlowSensitive/Value.h"
2929
#include "llvm/ADT/DenseMap.h"
3030
#include "llvm/ADT/DenseSet.h"
31+
#include "llvm/ADT/MapVector.h"
3132
#include "llvm/Support/Compiler.h"
3233
#include "llvm/Support/ErrorHandling.h"
3334
#include <memory>
@@ -634,8 +635,9 @@ class Environment {
634635
// block.
635636
llvm::DenseMap<const ValueDecl *, StorageLocation *> DeclToLoc;
636637
llvm::DenseMap<const Expr *, StorageLocation *> ExprToLoc;
637-
638-
llvm::DenseMap<const StorageLocation *, Value *> LocToVal;
638+
// We preserve insertion order so that join/widen process values in
639+
// deterministic sequence. This in turn produces deterministic SAT formulas.
640+
llvm::MapVector<const StorageLocation *, Value *> LocToVal;
639641

640642
// Maps locations of struct members to symbolic values of the structs that own
641643
// them and the decls of the struct members.

clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "clang/Analysis/FlowSensitive/Value.h"
2121
#include "llvm/ADT/DenseMap.h"
2222
#include "llvm/ADT/DenseSet.h"
23+
#include "llvm/ADT/MapVector.h"
2324
#include "llvm/ADT/STLExtras.h"
2425
#include "llvm/Support/Casting.h"
2526
#include "llvm/Support/ErrorHandling.h"
@@ -505,7 +506,7 @@ LatticeJoinEffect Environment::widen(const Environment &PrevEnv,
505506
assert(ExprToLoc.size() <= PrevEnv.ExprToLoc.size());
506507
// assert(MemberLocToStruct.size() <= PrevEnv.MemberLocToStruct.size());
507508

508-
llvm::DenseMap<const StorageLocation *, Value *> WidenedLocToVal;
509+
llvm::MapVector<const StorageLocation *, Value *> WidenedLocToVal;
509510
for (auto &Entry : LocToVal) {
510511
const StorageLocation *Loc = Entry.first;
511512
assert(Loc != nullptr);

clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -275,8 +275,7 @@ class JoinedStateBuilder {
275275
/// `std::nullopt` represent basic blocks that are not evaluated yet.
276276
static TypeErasedDataflowAnalysisState
277277
computeBlockInputState(const CFGBlock &Block, AnalysisContext &AC) {
278-
llvm::DenseSet<const CFGBlock *> Preds;
279-
Preds.insert(Block.pred_begin(), Block.pred_end());
278+
std::vector<const CFGBlock *> Preds(Block.pred_begin(), Block.pred_end());
280279
if (Block.getTerminator().isTemporaryDtorsBranch()) {
281280
// This handles a special case where the code that produced the CFG includes
282281
// a conditional operator with a branch that constructs a temporary and
@@ -305,7 +304,7 @@ computeBlockInputState(const CFGBlock &Block, AnalysisContext &AC) {
305304
auto &StmtToBlock = AC.CFCtx.getStmtToBlock();
306305
auto StmtBlock = StmtToBlock.find(Block.getTerminatorStmt());
307306
assert(StmtBlock != StmtToBlock.end());
308-
Preds.erase(StmtBlock->getSecond());
307+
llvm::erase_value(Preds, StmtBlock->getSecond());
309308
}
310309
}
311310

clang/unittests/Analysis/FlowSensitive/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ add_clang_unittest(ClangAnalysisFlowSensitiveTests
1010
DataflowAnalysisContextTest.cpp
1111
DataflowEnvironmentTest.cpp
1212
DebugSupportTest.cpp
13+
DeterminismTest.cpp
1314
LoggerTest.cpp
1415
MapLatticeTest.cpp
1516
MatchSwitchTest.cpp
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
//===- unittests/Analysis/FlowSensitive/DeterminismTest.cpp ---------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "TestingSupport.h"
10+
#include "clang/AST/Decl.h"
11+
#include "clang/Analysis/FlowSensitive/ControlFlowContext.h"
12+
#include "clang/Analysis/FlowSensitive/DataflowAnalysis.h"
13+
#include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h"
14+
#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
15+
#include "clang/Analysis/FlowSensitive/Formula.h"
16+
#include "clang/Analysis/FlowSensitive/NoopAnalysis.h"
17+
#include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h"
18+
#include "clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h"
19+
#include "clang/Basic/LLVM.h"
20+
#include "clang/Testing/TestAST.h"
21+
#include "llvm/Support/Error.h"
22+
#include "llvm/Support/raw_ostream.h"
23+
#include "gtest/gtest.h"
24+
#include <memory>
25+
#include <string>
26+
27+
namespace clang::dataflow {
28+
29+
// Run a no-op analysis, and return a textual representation of the
30+
// flow-condition at function exit.
31+
std::string analyzeAndPrintExitCondition(llvm::StringRef Code) {
32+
DataflowAnalysisContext DACtx(std::make_unique<WatchedLiteralsSolver>());
33+
clang::TestAST AST(Code);
34+
const auto *Target =
35+
cast<FunctionDecl>(test::findValueDecl(AST.context(), "target"));
36+
Environment InitEnv(DACtx, *Target);
37+
auto CFCtx = cantFail(ControlFlowContext::build(*Target));
38+
39+
NoopAnalysis Analysis(AST.context(), DataflowAnalysisOptions{});
40+
41+
auto Result = runDataflowAnalysis(CFCtx, Analysis, InitEnv);
42+
EXPECT_FALSE(!Result) << Result.takeError();
43+
44+
Atom FinalFC = (*Result)[CFCtx.getCFG().getExit().getBlockID()]
45+
->Env.getFlowConditionToken();
46+
std::string Textual;
47+
llvm::raw_string_ostream OS(Textual);
48+
DACtx.dumpFlowCondition(FinalFC, OS);
49+
return Textual;
50+
}
51+
52+
TEST(DeterminismTest, NestedSwitch) {
53+
// Example extracted from real-world code that had wildly nondeterministic
54+
// analysis times.
55+
// Its flow condition depends on the order we join predecessor blocks.
56+
const char *Code = R"cpp(
57+
struct Tree;
58+
struct Rep {
59+
Tree *tree();
60+
int length;
61+
};
62+
struct Tree {
63+
int height();
64+
Rep *edge(int);
65+
int length;
66+
};
67+
struct RetVal {};
68+
int getInt();
69+
bool maybe();
70+
71+
RetVal make(int size);
72+
inline RetVal target(int size, Tree& self) {
73+
Tree* tree = &self;
74+
const int height = self.height();
75+
Tree* n1 = tree;
76+
Tree* n2 = tree;
77+
switch (height) {
78+
case 3:
79+
tree = tree->edge(0)->tree();
80+
if (maybe()) return {};
81+
n2 = tree;
82+
case 2:
83+
tree = tree->edge(0)->tree();
84+
n1 = tree;
85+
if (maybe()) return {};
86+
case 1:
87+
tree = tree->edge(0)->tree();
88+
if (maybe()) return {};
89+
case 0:
90+
Rep* edge = tree->edge(0);
91+
if (maybe()) return {};
92+
int avail = getInt();
93+
if (avail == 0) return {};
94+
int delta = getInt();
95+
RetVal span = {};
96+
edge->length += delta;
97+
switch (height) {
98+
case 3:
99+
n1->length += delta;
100+
case 2:
101+
n1->length += delta;
102+
case 1:
103+
n1->length += delta;
104+
case 0:
105+
n1->length += delta;
106+
return span;
107+
}
108+
break;
109+
}
110+
return make(size);
111+
}
112+
)cpp";
113+
114+
std::string Cond = analyzeAndPrintExitCondition(Code);
115+
for (unsigned I = 0; I < 10; ++I)
116+
EXPECT_EQ(Cond, analyzeAndPrintExitCondition(Code));
117+
}
118+
119+
TEST(DeterminismTest, ValueMergeOrder) {
120+
// Artificial example whose final flow condition variable numbering depends
121+
// on the order in which we merge a, b, and c.
122+
const char *Code = R"cpp(
123+
bool target(bool a, bool b, bool c) {
124+
if (a)
125+
b = c;
126+
else
127+
c = b;
128+
return a && b && c;
129+
}
130+
)cpp";
131+
132+
std::string Cond = analyzeAndPrintExitCondition(Code);
133+
for (unsigned I = 0; I < 10; ++I)
134+
EXPECT_EQ(Cond, analyzeAndPrintExitCondition(Code));
135+
}
136+
137+
} // namespace clang::dataflow

0 commit comments

Comments
 (0)