Skip to content

Commit 820403c

Browse files
authored
[analyzer] Never create Regions wrapping reference TypedValueRegions (NFCI) (llvm#118096)
Like in the test case: ```c++ struct String { String(const String &) {} }; struct MatchComponent { unsigned numbers[2]; String prerelease; MatchComponent(MatchComponent const &) = default; }; MatchComponent get(); void consume(MatchComponent const &); MatchComponent parseMatchComponent() { MatchComponent component = get(); component.numbers[0] = 10; component.numbers[1] = 20; return component; // We should have no stack addr escape warning here. } void top() { consume(parseMatchComponent()); } ``` When calling `consume(parseMatchComponent())` the `parseMatchComponent()` would return a copy of a temporary of `component`. That copy would invoke the `MatchComponent::MatchComponent(const MatchComponent &)` ctor. That ctor would have a (reference typed) ParamVarRegion, holding the location (lvalue) of the object we are about to copy (&component). So far so good, but just before evaluating the binding operation for initializing the `numbers` field of the temporary, we evaluate the ArrayInitLoopExpr representing the by-value elementwise copy of the array `component.numbers`. This is represented by a LazyCompoundVal, because we (usually) don't just copy large arrays and bind many individual direct bindings. Rather, we take a snapshot by using a LCV. However, notice that the LCV representing this copy would look like this: lazyCompoundVal{ParamVarRegion{"reference param of cctor"}.numbers} Notice that it refers to the numbers field of a reference. It would be much better to desugar the reference to the actual object, thus it should be: `lazyCompoundVal{component.numbers}` Actually, when binding the result of the ArrayInitLoopExpr to the `temp_object.numbers` in the compiler-generated member initializer of the cctor, we should have two individual direct bindings because this is a "small array": ``` binding &Element{temp_object.numbers, 0} <- loadFrom(&Element{component.numbers, 0}) binding &Element{temp_object.numbers, 1} <- loadFrom(&Element{component.numbers, 1}) ``` Where `loadFrom(...)` would be: ``` loadFrom(&Element{component.numbers, 0}): 10 U32b loadFrom(&Element{component.numbers, 1}): 20 U32b ``` So the store should look like this, after PostInitializer of `temp_object.numbers`: ``` temp_object at offset 0: 10 U32b temp_object at offset 32: 20 U32b ``` The lesson is that it's okay to have TypedValueRegions of references as long as we don't form subregions of those. If we ever want to refer to a subregion of a "reference" we actually meant to "desugar" the reference and slice a subregion of the pointee of the reference instead. Once this canonicalization is in place, we can also drop the special handling of references in `ProcessInitializer`, because now reference TypedValueRegions are eagerly desugared into their referee region when forming a subregion of it. There should be no practical differences, but there are of course bugs that this patch may surface.
1 parent 9ebab70 commit 820403c

File tree

5 files changed

+48
-9
lines changed

5 files changed

+48
-9
lines changed

clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -487,6 +487,7 @@ class ProgramState : public llvm::FoldingSetNode {
487487
friend void ProgramStateRetain(const ProgramState *state);
488488
friend void ProgramStateRelease(const ProgramState *state);
489489

490+
SVal desugarReference(SVal Val) const;
490491
SVal wrapSymbolicRegion(SVal Base) const;
491492
};
492493

clang/lib/StaticAnalyzer/Core/ExprEngine.cpp

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1206,15 +1206,7 @@ void ExprEngine::ProcessInitializer(const CFGInitializer CFGInit,
12061206
while ((ASE = dyn_cast<ArraySubscriptExpr>(Init)))
12071207
Init = ASE->getBase()->IgnoreImplicit();
12081208

1209-
SVal LValue = State->getSVal(Init, stackFrame);
1210-
if (!Field->getType()->isReferenceType()) {
1211-
if (std::optional<Loc> LValueLoc = LValue.getAs<Loc>()) {
1212-
InitVal = State->getSVal(*LValueLoc);
1213-
} else if (auto CV = LValue.getAs<nonloc::CompoundVal>()) {
1214-
// Initializer list for an array.
1215-
InitVal = *CV;
1216-
}
1217-
}
1209+
InitVal = State->getSVal(Init, stackFrame);
12181210

12191211
// If we fail to get the value for some reason, use a symbolic value.
12201212
if (InitVal.isUnknownOrUndef()) {

clang/lib/StaticAnalyzer/Core/MemRegion.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,11 @@ using namespace ento;
6565
// MemRegion Construction.
6666
//===----------------------------------------------------------------------===//
6767

68+
[[maybe_unused]] static bool isAReferenceTypedValueRegion(const MemRegion *R) {
69+
const auto *TyReg = llvm::dyn_cast<TypedValueRegion>(R);
70+
return TyReg && TyReg->getValueType()->isReferenceType();
71+
}
72+
6873
template <typename RegionTy, typename SuperTy, typename Arg1Ty>
6974
RegionTy* MemRegionManager::getSubRegion(const Arg1Ty arg1,
7075
const SuperTy *superRegion) {
@@ -76,6 +81,7 @@ RegionTy* MemRegionManager::getSubRegion(const Arg1Ty arg1,
7681
if (!R) {
7782
R = new (A) RegionTy(arg1, superRegion);
7883
Regions.InsertNode(R, InsertPos);
84+
assert(!isAReferenceTypedValueRegion(superRegion));
7985
}
8086

8187
return R;
@@ -92,6 +98,7 @@ RegionTy* MemRegionManager::getSubRegion(const Arg1Ty arg1, const Arg2Ty arg2,
9298
if (!R) {
9399
R = new (A) RegionTy(arg1, arg2, superRegion);
94100
Regions.InsertNode(R, InsertPos);
101+
assert(!isAReferenceTypedValueRegion(superRegion));
95102
}
96103

97104
return R;
@@ -110,6 +117,7 @@ RegionTy* MemRegionManager::getSubRegion(const Arg1Ty arg1, const Arg2Ty arg2,
110117
if (!R) {
111118
R = new (A) RegionTy(arg1, arg2, arg3, superRegion);
112119
Regions.InsertNode(R, InsertPos);
120+
assert(!isAReferenceTypedValueRegion(superRegion));
113121
}
114122

115123
return R;

clang/lib/StaticAnalyzer/Core/ProgramState.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,16 @@ ProgramStateRef ProgramState::killBinding(Loc LV) const {
205205
return makeWithStore(newStore);
206206
}
207207

208+
/// We should never form a MemRegion that would wrap a TypedValueRegion of a
209+
/// reference type. What we actually wanted was to create a MemRegion refering
210+
/// to the pointee of that reference.
211+
SVal ProgramState::desugarReference(SVal Val) const {
212+
const auto *TyReg = dyn_cast_or_null<TypedValueRegion>(Val.getAsRegion());
213+
if (!TyReg || !TyReg->getValueType()->isReferenceType())
214+
return Val;
215+
return getSVal(TyReg);
216+
}
217+
208218
/// SymbolicRegions are expected to be wrapped by an ElementRegion as a
209219
/// canonical representation. As a canonical representation, SymbolicRegions
210220
/// should be wrapped by ElementRegions before getting a FieldRegion.
@@ -445,12 +455,14 @@ void ProgramState::setStore(const StoreRef &newStore) {
445455
}
446456

447457
SVal ProgramState::getLValue(const FieldDecl *D, SVal Base) const {
458+
Base = desugarReference(Base);
448459
Base = wrapSymbolicRegion(Base);
449460
return getStateManager().StoreMgr->getLValueField(D, Base);
450461
}
451462

452463
SVal ProgramState::getLValue(const IndirectFieldDecl *D, SVal Base) const {
453464
StoreManager &SM = *getStateManager().StoreMgr;
465+
Base = desugarReference(Base);
454466
Base = wrapSymbolicRegion(Base);
455467

456468
// FIXME: This should work with `SM.getLValueField(D->getAnonField(), Base)`,

clang/test/Analysis/initializer.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -366,3 +366,29 @@ void testI() {
366366
clang_analyzer_eval(B::b == 2); // expected-warning{{TRUE}}
367367
}
368368
} // namespace dont_skip_vbase_initializers_in_most_derived_class
369+
370+
namespace elementwise_copy_small_array_from_post_initializer_of_cctor {
371+
struct String {
372+
String(const String &) {}
373+
};
374+
375+
struct MatchComponent {
376+
unsigned numbers[2];
377+
String prerelease;
378+
MatchComponent(MatchComponent const &) = default;
379+
};
380+
381+
MatchComponent get();
382+
void consume(MatchComponent const &);
383+
384+
MatchComponent parseMatchComponent() {
385+
MatchComponent component = get();
386+
component.numbers[0] = 10;
387+
component.numbers[1] = 20;
388+
return component; // We should have no stack addr escape warning here.
389+
}
390+
391+
void top() {
392+
consume(parseMatchComponent());
393+
}
394+
} // namespace elementwise_copy_small_array_from_post_initializer_of_cctor

0 commit comments

Comments
 (0)