Revert "[AssumptionCache] caches @llvm.experimental.guard's"

xortator · xortator · commit 0cbb8ec030e2 · 2023-02-20T18:38:07.000+07:00
This reverts commit f9599bb. For some reason it caused us a huge compile time regression in downstream workloads. Not sure whether the source of it is in upstream code ir not. Temporarily reverting until investigated. Differential Revision: https://reviews.llvm.org/D142330
diff --git a/llvm/include/llvm/Analysis/AssumptionCache.h b/llvm/include/llvm/Analysis/AssumptionCache.h
@@ -26,7 +26,7 @@
 
 namespace llvm {
 
-class CondGuardInst;
+class AssumeInst;
 class Function;
 class raw_ostream;
 class TargetTransformInfo;
@@ -120,15 +120,15 @@ class AssumptionCache {
   ///
   /// The call passed in must be an instruction within this function and must
   /// not already be in the cache.
-  void registerAssumption(CondGuardInst *CI);
+  void registerAssumption(AssumeInst *CI);
 
   /// Remove an \@llvm.assume intrinsic from this function's cache if it has
   /// been added to the cache earlier.
-  void unregisterAssumption(CondGuardInst *CI);
+  void unregisterAssumption(AssumeInst *CI);
 
   /// Update the cache of values being affected by this assumption (i.e.
   /// the values about which this assumption provides information).
-  void updateAffectedValues(CondGuardInst *CI);
+  void updateAffectedValues(AssumeInst *CI);
 
   /// Clear the cache of \@llvm.assume intrinsics for a function.
   ///
diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h
@@ -1513,20 +1513,9 @@ class GCResultInst : public GCProjectionInst {
   }
 };
 
-/// This represents intrinsics that guard a condition
-class CondGuardInst : public IntrinsicInst {
-public:
-  static bool classof(const IntrinsicInst *I) {
-    return I->getIntrinsicID() == Intrinsic::assume ||
-           I->getIntrinsicID() == Intrinsic::experimental_guard;
-  }
-  static bool classof(const Value *V) {
-    return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
-  }
-};
 
 /// This represents the llvm.assume intrinsic.
-class AssumeInst : public CondGuardInst {
+class AssumeInst : public IntrinsicInst {
 public:
   static bool classof(const IntrinsicInst *I) {
     return I->getIntrinsicID() == Intrinsic::assume;
diff --git a/llvm/lib/Analysis/AssumeBundleQueries.cpp b/llvm/lib/Analysis/AssumeBundleQueries.cpp
@@ -162,7 +162,7 @@ llvm::getKnowledgeForValue(const Value *V,
     return RetainedKnowledge::none();
   if (AC) {
     for (AssumptionCache::ResultElem &Elem : AC->assumptionsFor(V)) {
-      auto *II = dyn_cast_or_null<AssumeInst>(Elem.Assume);
+      auto *II = cast_or_null<AssumeInst>(Elem.Assume);
       if (!II || Elem.Index == AssumptionCache::ExprResultIdx)
         continue;
       if (RetainedKnowledge RK = getKnowledgeFromBundle(
diff --git a/llvm/lib/Analysis/AssumptionCache.cpp b/llvm/lib/Analysis/AssumptionCache.cpp
@@ -6,8 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file contains a pass that keeps track of @llvm.assume and
-// @llvm.experimental.guard intrinsics in the functions of a module.
+// This file contains a pass that keeps track of @llvm.assume intrinsics in
+// the functions of a module.
 //
 //===----------------------------------------------------------------------===//
 
@@ -140,7 +140,7 @@ findAffectedValues(CallBase *CI, TargetTransformInfo *TTI,
   }
 }
 
-void AssumptionCache::updateAffectedValues(CondGuardInst *CI) {
+void AssumptionCache::updateAffectedValues(AssumeInst *CI) {
   SmallVector<AssumptionCache::ResultElem, 16> Affected;
   findAffectedValues(CI, TTI, Affected);
 
@@ -153,7 +153,7 @@ void AssumptionCache::updateAffectedValues(CondGuardInst *CI) {
   }
 }
 
-void AssumptionCache::unregisterAssumption(CondGuardInst *CI) {
+void AssumptionCache::unregisterAssumption(AssumeInst *CI) {
   SmallVector<AssumptionCache::ResultElem, 16> Affected;
   findAffectedValues(CI, TTI, Affected);
 
@@ -217,18 +217,18 @@ void AssumptionCache::scanFunction() {
   // to this cache.
   for (BasicBlock &B : F)
     for (Instruction &I : B)
-      if (isa<CondGuardInst>(&I))
+      if (isa<AssumeInst>(&I))
         AssumeHandles.push_back({&I, ExprResultIdx});
 
   // Mark the scan as complete.
   Scanned = true;
 
   // Update affected values.
   for (auto &A : AssumeHandles)
-    updateAffectedValues(cast<CondGuardInst>(A));
+    updateAffectedValues(cast<AssumeInst>(A));
 }
 
-void AssumptionCache::registerAssumption(CondGuardInst *CI) {
+void AssumptionCache::registerAssumption(AssumeInst *CI) {
   // If we haven't scanned the function yet, just drop this assumption. It will
   // be found when we scan later.
   if (!Scanned)
@@ -238,9 +238,9 @@ void AssumptionCache::registerAssumption(CondGuardInst *CI) {
 
 #ifndef NDEBUG
   assert(CI->getParent() &&
-         "Cannot a register CondGuardInst not in a basic block");
+         "Cannot register @llvm.assume call not in a basic block");
   assert(&F == CI->getParent()->getParent() &&
-         "Cannot a register CondGuardInst not in this function");
+         "Cannot register @llvm.assume call not in this function");
 
   // We expect the number of assumptions to be small, so in an asserts build
   // check that we don't accumulate duplicates and that all assumptions point
@@ -252,8 +252,8 @@ void AssumptionCache::registerAssumption(CondGuardInst *CI) {
 
     assert(&F == cast<Instruction>(VH)->getParent()->getParent() &&
            "Cached assumption not inside this function!");
-    assert(isa<CondGuardInst>(VH) &&
-           "Cached something other than CondGuardInst!");
+    assert(match(cast<CallInst>(VH), m_Intrinsic<Intrinsic::assume>()) &&
+           "Cached something other than a call to @llvm.assume!");
     assert(AssumptionSet.insert(VH).second &&
            "Cache contains multiple copies of a call!");
   }
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -1771,7 +1771,8 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty,
       // these to compute max backedge taken counts, but can still use
       // these to prove lack of overflow.  Use this fact to avoid
       // doing extra work that may not pay off.
-      if (!isa<SCEVCouldNotCompute>(MaxBECount) || !AC.assumptions().empty()) {
+      if (!isa<SCEVCouldNotCompute>(MaxBECount) || HasGuards ||
+          !AC.assumptions().empty()) {
 
         auto NewFlags = proveNoUnsignedWrapViaInduction(AR);
         setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), NewFlags);
@@ -5147,7 +5148,8 @@ ScalarEvolution::proveNoSignedWrapViaInduction(const SCEVAddRecExpr *AR) {
   // these to prove lack of overflow.  Use this fact to avoid
   // doing extra work that may not pay off.
 
-  if (isa<SCEVCouldNotCompute>(MaxBECount) && AC.assumptions().empty())
+  if (isa<SCEVCouldNotCompute>(MaxBECount) && !HasGuards &&
+      AC.assumptions().empty())
     return Result;
 
   // If the backedge is guarded by a comparison with the pre-inc  value the
@@ -5200,7 +5202,8 @@ ScalarEvolution::proveNoUnsignedWrapViaInduction(const SCEVAddRecExpr *AR) {
   // these to prove lack of overflow.  Use this fact to avoid
   // doing extra work that may not pay off.
 
-  if (isa<SCEVCouldNotCompute>(MaxBECount) && AC.assumptions().empty())
+  if (isa<SCEVCouldNotCompute>(MaxBECount) && !HasGuards &&
+      AC.assumptions().empty())
     return Result;
 
   // If the backedge is guarded by a comparison with the pre-inc  value the
@@ -11388,7 +11391,7 @@ bool ScalarEvolution::isImpliedViaGuard(const BasicBlock *BB,
                                         ICmpInst::Predicate Pred,
                                         const SCEV *LHS, const SCEV *RHS) {
   // No need to even try if we know the module has no guards.
-  if (AC.assumptions().empty())
+  if (!HasGuards)
     return false;
 
   return any_of(*BB, [&](const Instruction &I) {
@@ -11598,6 +11601,15 @@ bool ScalarEvolution::isBasicBlockEntryGuardedByCond(const BasicBlock *BB,
       return true;
   }
 
+  // Check conditions due to any @llvm.experimental.guard intrinsics.
+  auto *GuardDecl = F.getParent()->getFunction(
+      Intrinsic::getName(Intrinsic::experimental_guard));
+  if (GuardDecl)
+    for (const auto *GU : GuardDecl->users())
+      if (const auto *Guard = dyn_cast<IntrinsicInst>(GU))
+        if (Guard->getFunction() == BB->getParent() && DT.dominates(Guard, BB))
+          if (ProveViaCond(Guard->getArgOperand(0), false))
+            return true;
   return false;
 }
 
@@ -13470,11 +13482,25 @@ ScalarEvolution::ScalarEvolution(Function &F, TargetLibraryInfo &TLI,
                                  LoopInfo &LI)
     : F(F), TLI(TLI), AC(AC), DT(DT), LI(LI),
       CouldNotCompute(new SCEVCouldNotCompute()), ValuesAtScopes(64),
-      LoopDispositions(64), BlockDispositions(64) {}
+      LoopDispositions(64), BlockDispositions(64) {
+  // To use guards for proving predicates, we need to scan every instruction in
+  // relevant basic blocks, and not just terminators.  Doing this is a waste of
+  // time if the IR does not actually contain any calls to
+  // @llvm.experimental.guard, so do a quick check and remember this beforehand.
+  //
+  // This pessimizes the case where a pass that preserves ScalarEvolution wants
+  // to _add_ guards to the module when there weren't any before, and wants
+  // ScalarEvolution to optimize based on those guards.  For now we prefer to be
+  // efficient in lieu of being smart in that rather obscure case.
+
+  auto *GuardDecl = F.getParent()->getFunction(
+      Intrinsic::getName(Intrinsic::experimental_guard));
+  HasGuards = GuardDecl && !GuardDecl->use_empty();
+}
 
 ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg)
-    : F(Arg.F), TLI(Arg.TLI), AC(Arg.AC), DT(Arg.DT), LI(Arg.LI),
-      CouldNotCompute(std::move(Arg.CouldNotCompute)),
+    : F(Arg.F), HasGuards(Arg.HasGuards), TLI(Arg.TLI), AC(Arg.AC), DT(Arg.DT),
+      LI(Arg.LI), CouldNotCompute(std::move(Arg.CouldNotCompute)),
       ValueExprMap(std::move(Arg.ValueExprMap)),
       PendingLoopPredicates(std::move(Arg.PendingLoopPredicates)),
       PendingPhiRanges(std::move(Arg.PendingPhiRanges)),
@@ -15166,7 +15192,16 @@ const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) {
     Terms.emplace_back(AssumeI->getOperand(0), true);
   }
 
-  // Second, collect conditions from dominating branches. Starting at the loop
+  // Second, collect information from llvm.experimental.guards dominating the loop.
+  auto *GuardDecl = F.getParent()->getFunction(
+      Intrinsic::getName(Intrinsic::experimental_guard));
+  if (GuardDecl)
+    for (const auto *GU : GuardDecl->users())
+      if (const auto *Guard = dyn_cast<IntrinsicInst>(GU))
+        if (Guard->getFunction() == Header->getParent() && DT.dominates(Guard, Header))
+          Terms.emplace_back(Guard->getArgOperand(0), true);
+
+  // Third, collect conditions from dominating branches. Starting at the loop
   // predecessor, climb up the predecessor chain, as long as there are
   // predecessors that can be found that have unique successors leading to the
   // original header.
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
@@ -616,14 +616,17 @@ static bool isKnownNonZeroFromAssume(const Value *V, const Query &Q) {
   for (auto &AssumeVH : Q.AC->assumptionsFor(V)) {
     if (!AssumeVH)
       continue;
-    CondGuardInst *I = cast<CondGuardInst>(AssumeVH);
+    CallInst *I = cast<CallInst>(AssumeVH);
     assert(I->getFunction() == Q.CxtI->getFunction() &&
            "Got assumption for the wrong function!");
 
     // Warning: This loop can end up being somewhat performance sensitive.
     // We're running this loop for once for each value queried resulting in a
     // runtime of ~O(#assumes * #values).
 
+    assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume &&
+           "must be an assume intrinsic");
+
     Value *RHS;
     CmpInst::Predicate Pred;
     auto m_V = m_CombineOr(m_Specific(V), m_PtrToInt(m_Specific(V)));
@@ -661,14 +664,17 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known,
   for (auto &AssumeVH : Q.AC->assumptionsFor(V)) {
     if (!AssumeVH)
       continue;
-    CondGuardInst *I = cast<CondGuardInst>(AssumeVH);
+    CallInst *I = cast<CallInst>(AssumeVH);
     assert(I->getParent()->getParent() == Q.CxtI->getParent()->getParent() &&
            "Got assumption for the wrong function!");
 
     // Warning: This loop can end up being somewhat performance sensitive.
     // We're running this loop for once for each value queried resulting in a
     // runtime of ~O(#assumes * #values).
 
+    assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume &&
+           "must be an assume intrinsic");
+
     Value *Arg = I->getArgOperand(0);
 
     if (Arg == V && isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
@@ -7492,9 +7498,11 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool ForSigned,
     for (auto &AssumeVH : AC->assumptionsFor(V)) {
       if (!AssumeVH)
         continue;
-      IntrinsicInst *I = cast<IntrinsicInst>(AssumeVH);
+      CallInst *I = cast<CallInst>(AssumeVH);
       assert(I->getParent()->getParent() == CtxI->getParent()->getParent() &&
              "Got assumption for the wrong function!");
+      assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume &&
+             "must be an assume intrinsic");
 
       if (!isValidAssumeForContext(I, CtxI, DT))
         continue;
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -1649,14 +1649,14 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC,
     }
   }
 
-  // Remove CondGuardInsts that will be moved to the new function from the old
-  // function's assumption cache.
+  // Remove @llvm.assume calls that will be moved to the new function from the
+  // old function's assumption cache.
   for (BasicBlock *Block : Blocks) {
     for (Instruction &I : llvm::make_early_inc_range(*Block)) {
-      if (auto *CI = dyn_cast<CondGuardInst>(&I)) {
+      if (auto *AI = dyn_cast<AssumeInst>(&I)) {
         if (AC)
-          AC->unregisterAssumption(CI);
-        CI->eraseFromParent();
+          AC->unregisterAssumption(AI);
+        AI->eraseFromParent();
       }
     }
   }
@@ -1850,7 +1850,7 @@ bool CodeExtractor::verifyAssumptionCache(const Function &OldFunc,
                                           const Function &NewFunc,
                                           AssumptionCache *AC) {
   for (auto AssumeVH : AC->assumptions()) {
-    auto *I = dyn_cast_or_null<CondGuardInst>(AssumeVH);
+    auto *I = dyn_cast_or_null<CallInst>(AssumeVH);
     if (!I)
       continue;
 
@@ -1862,7 +1862,7 @@ bool CodeExtractor::verifyAssumptionCache(const Function &OldFunc,
     // that were previously in the old function, but that have now been moved
     // to the new function.
     for (auto AffectedValVH : AC->assumptionsFor(I->getOperand(0))) {
-      auto *AffectedCI = dyn_cast_or_null<CondGuardInst>(AffectedValVH);
+      auto *AffectedCI = dyn_cast_or_null<CallInst>(AffectedValVH);
       if (!AffectedCI)
         continue;
       if (AffectedCI->getFunction() != &OldFunc)
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -2333,7 +2333,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
       for (BasicBlock &NewBlock :
            make_range(FirstNewBlock->getIterator(), Caller->end()))
         for (Instruction &I : NewBlock)
-          if (auto *II = dyn_cast<CondGuardInst>(&I))
+          if (auto *II = dyn_cast<AssumeInst>(&I))
             IFI.GetAssumptionCache(*Caller).registerAssumption(II);
   }
 
diff --git a/llvm/test/Analysis/AssumptionCache/basic.ll b/llvm/test/Analysis/AssumptionCache/basic.ll
@@ -3,15 +3,12 @@
 target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
 
 declare void @llvm.assume(i1)
-declare void @llvm.experimental.guard(i1, ...)
 
 define void @test1(i32 %a) {
 ; CHECK-LABEL: Cached assumptions for function: test1
 ; CHECK-NEXT: icmp ne i32 %{{.*}}, 0
 ; CHECK-NEXT: icmp slt i32 %{{.*}}, 0
 ; CHECK-NEXT: icmp sgt i32 %{{.*}}, 0
-; CHECK-NEXT: icmp ult i32 %{{.*}}, 0
-; CHECK-NEXT: icmp ugt i32 %{{.*}}, 0
 
 entry:
   %cond1 = icmp ne i32 %a, 0
@@ -20,10 +17,6 @@ entry:
   call void @llvm.assume(i1 %cond2)
   %cond3 = icmp sgt i32 %a, 0
   call void @llvm.assume(i1 %cond3)
-  %cond4 = icmp ult i32 %a, 0
-  call void (i1, ...) @llvm.experimental.guard(i1 %cond4) [ "deopt"() ]
-  %cond5 = icmp ugt i32 %a, 0
-  call void (i1, ...) @llvm.experimental.guard(i1 %cond5) [ "deopt"() ]
 
   ret void
 }
diff --git a/llvm/test/Analysis/ScalarEvolution/guards.ll b/llvm/test/Analysis/ScalarEvolution/guards.ll
@@ -86,7 +86,7 @@ entry:
 loop:
 ; CHECK: loop:
 ; CHECK:  call void (i1, ...) @llvm.experimental.guard(i1 true) [ "deopt"() ]
-; CHECK:  %iv.inc.cmp = icmp ult i32 %iv.inc, %len
+; CHECK:  %iv.inc.cmp = icmp slt i32 %iv.inc, %len
 ; CHECK:  call void (i1, ...) @llvm.experimental.guard(i1 %iv.inc.cmp) [ "deopt"() ]
 ; CHECK: leave:
   %iv = phi i32 [ 0, %entry ], [ %iv.inc, %loop ]
@@ -129,7 +129,7 @@ left:
 
 be:
 ; CHECK: be:
-; CHECK-NEXT:  %iv.cmp = icmp ult i32 %iv, %len
+; CHECK-NEXT:  %iv.cmp = icmp slt i32 %iv, %len
 ; CHECK-NEXT:  call void (i1, ...) @llvm.experimental.guard(i1 %iv.cmp) [ "deopt"() ]
 ; CHECK: leave:
 
diff --git a/llvm/test/Transforms/EarlyCSE/guards.ll b/llvm/test/Transforms/EarlyCSE/guards.ll
@@ -83,10 +83,13 @@ define i32 @test3.unhandled(i32 %val) {
 ; CHECK-LABEL: @test3.unhandled(
 ; CHECK-NEXT:    [[COND0:%.*]] = icmp slt i32 [[VAL:%.*]], 40
 ; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[COND0]]) [ "deopt"() ]
-; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
+; CHECK-NEXT:    [[COND1:%.*]] = icmp sge i32 [[VAL]], 40
+; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[COND1]]) [ "deopt"() ]
 ; CHECK-NEXT:    ret i32 0
 ;
 
+; Demonstrates a case we do not yet handle (it is legal to fold %cond2
+; to false)
   %cond0 = icmp slt i32 %val, 40
   call void(i1,...) @llvm.experimental.guard(i1 %cond0) [ "deopt"() ]
   %cond1 = icmp sge i32 %val, 40

Original file line number	Diff line number	Diff line change
`@@ -2333,7 +2333,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,`
`2333`	`2333`	`for (BasicBlock &NewBlock :`
`2334`	`2334`	`make_range(FirstNewBlock->getIterator(), Caller->end()))`
`2335`	`2335`	`for (Instruction &I : NewBlock)`
`2336`		`- if (auto *II = dyn_cast<CondGuardInst>(&I))`
	`2336`	`+ if (auto *II = dyn_cast<AssumeInst>(&I))`
`2337`	`2337`	`IFI.GetAssumptionCache(*Caller).registerAssumption(II);`
`2338`	`2338`	`}`
`2339`	`2339`