codeplaysoftware
diff --git a/‎bolt/lib/Core/DIEBuilder.cpp
Lines changed: 9 additions & 7 deletions b/‎bolt/lib/Core/DIEBuilder.cpp
Lines changed: 9 additions & 7 deletions
diff --git a/‎bolt/lib/Passes/SplitFunctions.cpp
Lines changed: 64 additions & 36 deletions b/‎bolt/lib/Passes/SplitFunctions.cpp
Lines changed: 64 additions & 36 deletions
diff --git a/‎bolt/test/X86/cdsplit-call-scale.s
Lines changed: 4 additions & 5 deletions b/‎bolt/test/X86/cdsplit-call-scale.s
Lines changed: 4 additions & 5 deletions
diff --git a/‎clang-tools-extra/clangd/index/SymbolCollector.cpp
Lines changed: 21 additions & 17 deletions b/‎clang-tools-extra/clangd/index/SymbolCollector.cpp
Lines changed: 21 additions & 17 deletions
diff --git a/‎clang-tools-extra/clangd/index/SymbolCollector.h
Lines changed: 8 additions & 1 deletion b/‎clang-tools-extra/clangd/index/SymbolCollector.h
Lines changed: 8 additions & 1 deletion
diff --git a/‎clang-tools-extra/clangd/unittests/IndexActionTests.cpp
Lines changed: 30 additions & 0 deletions b/‎clang-tools-extra/clangd/unittests/IndexActionTests.cpp
Lines changed: 30 additions & 0 deletions
diff --git a/‎clang/docs/ControlFlowIntegrityDesign.rst
Lines changed: 1 addition & 1 deletion b/‎clang/docs/ControlFlowIntegrityDesign.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎clang/docs/LanguageExtensions.rst
Lines changed: 3 additions & 3 deletions b/‎clang/docs/LanguageExtensions.rst
Lines changed: 3 additions & 3 deletions
@@ -266,13 +266,11 @@ void DIEBuilder::buildCompileUnits(const bool Init) {
 }
 void DIEBuilder::buildCompileUnits(const std::vector<DWARFUnit *> &CUs) {
   BuilderState.reset(new State());
-  // Initializing to full size because there could be cross CU references with
-  // different abbrev offsets. LLVM happens to output CUs that have cross CU
-  // references with the same abbrev table. So destinations end up in the first
-  // set, even if they themselves don't have src cross cu ref. We could have
-  // cases where this is not the case. In which case this container needs to be
-  // big enough for all.
-  getState().CloneUnitCtxMap.resize(DwarfContext->getNumCompileUnits());
+  // Allocating enough for current batch being processed.
+  // In real use cases we either processing a batch of CUs with no cross
+  // references, or if they do have them it is due to LTO. With clang they will
+  // share the same abbrev table. In either case this vector will not grow.
+  getState().CloneUnitCtxMap.resize(CUs.size());
   getState().Type = ProcessingType::CUs;
   for (DWARFUnit *CU : CUs)
     registerUnit(*CU, false);
@@ -897,6 +895,10 @@ void DIEBuilder::registerUnit(DWARFUnit &DU, bool NeedSort) {
                 });
   }
   getState().UnitIDMap[getHash(DU)] = getState().DUList.size();
+  // This handles the case where we do have cross cu references, but CUs do not
+  // share the same abbrev table.
+  if (getState().DUList.size() == getState().CloneUnitCtxMap.size())
+    getState().CloneUnitCtxMap.emplace_back();
   getState().DUList.push_back(&DU);
 }
 
 
@@ -175,8 +175,12 @@ struct SplitCacheDirected final : public SplitStrategy {
   void fragment(const BlockIt Start, const BlockIt End) override {
     BasicBlockOrder BlockOrder(Start, End);
     BinaryFunction &BF = *BlockOrder.front()->getFunction();
+    // No need to re-split small functions.
+    if (BlockOrder.size() <= 2)
+      return;
 
     size_t BestSplitIndex = findSplitIndex(BF, BlockOrder);
+    assert(BestSplitIndex < BlockOrder.size());
 
     // Assign fragments based on the computed best split index.
     // All basic blocks with index up to the best split index become hot.
@@ -200,10 +204,12 @@ struct SplitCacheDirected final : public SplitStrategy {
   };
 
   struct SplitScore {
-    size_t SplitIndex;
+    size_t SplitIndex = size_t(-1);
     size_t HotSizeReduction = 0;
     double LocalScore = 0;
     double CoverCallScore = 0;
+
+    double sum() const { return LocalScore + CoverCallScore; }
   };
 
   // Auxiliary variables used by the algorithm.
@@ -303,7 +309,7 @@ struct SplitCacheDirected final : public SplitStrategy {
                              const size_t SplitIndex) {
     assert(SplitIndex < BlockOrder.size() && "Invalid split index");
 
-    // Update function layout assuming hot-warm splitting at SplitIndex
+    // Update function layout assuming hot-warm splitting at SplitIndex.
     for (size_t Index = 0; Index < BlockOrder.size(); Index++) {
       BinaryBasicBlock *BB = BlockOrder[Index];
       if (BB->getFragmentNum() == FragmentNum::cold())
@@ -319,8 +325,8 @@ struct SplitCacheDirected final : public SplitStrategy {
     // Populate BB.OutputAddressRange with estimated new start and end addresses
     // and compute the old end address of the hot section and the new end
     // address of the hot section.
-    size_t OldHotEndAddr;
-    size_t NewHotEndAddr;
+    size_t OldHotEndAddr{0};
+    size_t NewHotEndAddr{0};
     size_t CurrentAddr = BBOffsets[BlockOrder[0]];
     for (BinaryBasicBlock *BB : BlockOrder) {
       // We only care about new addresses of blocks in hot/warm.
@@ -492,20 +498,15 @@ struct SplitCacheDirected final : public SplitStrategy {
   }
 
   /// Compute the split score of splitting a function at a given index.
-  /// The split score consists of local score and cover score. Cover call score
-  /// is expensive to compute. As a result, we pass in a \p ReferenceScore and
-  /// compute cover score only when the local score exceeds that in the
-  /// ReferenceScore or that the size reduction of the hot fragment is larger
-  /// than that achieved by the split index of the ReferenceScore. This function
-  /// returns \p Score of SplitScore type. It contains the local score and cover
-  /// score (if computed) of the current splitting index. For easier book
-  /// keeping and comparison, it also stores the split index and the resulting
-  /// reduction in hot fragment size.
+  /// The split score consists of local score and cover score. This function
+  /// returns \p Score of SplitScore type. It contains the local score and
+  /// cover score of the current splitting index. For easier book keeping and
+  /// comparison, it also stores the split index and the resulting reduction
+  /// in hot fragment size.
   SplitScore computeSplitScore(const BinaryFunction &BF,
                                const BasicBlockOrder &BlockOrder,
                                const size_t SplitIndex,
-                               const std::vector<CallInfo> &CoverCalls,
-                               const SplitScore &ReferenceScore) {
+                               const std::vector<CallInfo> &CoverCalls) {
     // Populate BinaryBasicBlock::OutputAddressRange with estimated
     // new start and end addresses after hot-warm splitting at SplitIndex.
     size_t OldHotEnd;
@@ -533,47 +534,74 @@ struct SplitCacheDirected final : public SplitStrategy {
     // increamented in place.
     computeJumpScore(BlockOrder, SplitIndex, Score);
 
-    // There is no need to compute CoverCallScore if we have already found
-    // another split index with a bigger LocalScore and bigger HotSizeReduction.
-    if (Score.LocalScore <= ReferenceScore.LocalScore &&
-        Score.HotSizeReduction <= ReferenceScore.HotSizeReduction)
-      return Score;
-
     // Compute CoverCallScore and store in Score in place.
     computeCoverCallScore(BlockOrder, SplitIndex, CoverCalls, Score);
     return Score;
   }
 
+  /// Find the most likely successor of a basic block when it has one or two
+  /// successors. Return nullptr otherwise.
+  const BinaryBasicBlock *getMostLikelySuccessor(const BinaryBasicBlock *BB) {
+    if (BB->succ_size() == 1)
+      return BB->getSuccessor();
+    if (BB->succ_size() == 2) {
+      uint64_t TakenCount = BB->getTakenBranchInfo().Count;
+      assert(TakenCount != BinaryBasicBlock::COUNT_NO_PROFILE);
+      uint64_t NonTakenCount = BB->getFallthroughBranchInfo().Count;
+      assert(NonTakenCount != BinaryBasicBlock::COUNT_NO_PROFILE);
+      if (TakenCount > NonTakenCount)
+        return BB->getConditionalSuccessor(true);
+      else if (TakenCount < NonTakenCount)
+        return BB->getConditionalSuccessor(false);
+    }
+    return nullptr;
+  }
+
   /// Find the best index for splitting. The returned value is the index of the
   /// last hot basic block. Hence, "no splitting" is equivalent to returning the
   /// value which is one less than the size of the function.
   size_t findSplitIndex(const BinaryFunction &BF,
                         const BasicBlockOrder &BlockOrder) {
+    assert(BlockOrder.size() > 2);
     // Find all function calls that can be shortened if we move blocks of the
     // current function to warm/cold
     const std::vector<CallInfo> CoverCalls = extractCoverCalls(BF);
 
-    // Try all possible split indices (blocks with Index <= SplitIndex are in
-    // hot) and find the one maximizing the splitting score.
+    // Find the existing hot-cold splitting index.
+    size_t HotColdIndex = 0;
+    while (HotColdIndex + 1 < BlockOrder.size()) {
+      if (BlockOrder[HotColdIndex + 1]->getFragmentNum() == FragmentNum::cold())
+        break;
+      HotColdIndex++;
+    }
+    assert(HotColdIndex + 1 == BlockOrder.size() ||
+           (BlockOrder[HotColdIndex]->getFragmentNum() == FragmentNum::main() &&
+            BlockOrder[HotColdIndex + 1]->getFragmentNum() ==
+                FragmentNum::cold()));
+
+    // Try all possible split indices up to HotColdIndex (blocks that have
+    // Index <= SplitIndex are in hot) and find the one maximizing the
+    // splitting score.
     SplitScore BestScore;
-    double BestScoreSum = -1.0;
-    SplitScore ReferenceScore;
-    for (size_t Index = 0; Index < BlockOrder.size(); Index++) {
+    for (size_t Index = 0; Index <= HotColdIndex; Index++) {
       const BinaryBasicBlock *LastHotBB = BlockOrder[Index];
-      // No need to keep cold blocks in the hot section.
-      if (LastHotBB->getFragmentNum() == FragmentNum::cold())
-        break;
+      assert(LastHotBB->getFragmentNum() != FragmentNum::cold());
+
+      // Do not break jump to the most likely successor.
+      if (Index + 1 < BlockOrder.size() &&
+          BlockOrder[Index + 1] == getMostLikelySuccessor(LastHotBB))
+        continue;
+
       const SplitScore Score =
-          computeSplitScore(BF, BlockOrder, Index, CoverCalls, ReferenceScore);
-      double ScoreSum = Score.LocalScore + Score.CoverCallScore;
-      if (ScoreSum > BestScoreSum) {
-        BestScoreSum = ScoreSum;
+          computeSplitScore(BF, BlockOrder, Index, CoverCalls);
+      if (Score.sum() > BestScore.sum())
         BestScore = Score;
-      }
-      if (Score.LocalScore > ReferenceScore.LocalScore)
-        ReferenceScore = Score;
     }
 
+    // If we don't find a good splitting point, fallback to the original one.
+    if (BestScore.SplitIndex == size_t(-1))
+      return HotColdIndex;
+
     return BestScore.SplitIndex;
   }
 };
 
@@ -2,8 +2,9 @@
 # When -call-scale=0.0, the tested function is 2-way splitted.
 # When -call-scale=1.0, the tested function is 3-way splitted with 5 blocks
 # in warm because of the increased benefit of shortening the call edges.
-# When -call-scale=1000.0, the tested function is 3-way splitted with 7 blocks
-# in warm because of the strong benefit of shortening the call edges.
+# When -call-scale=1000.0, the tested function is still 3-way splitted with
+# 5 blocks in warm because cdsplit does not allow hot-warm splitting to break
+# a fall through branch from a basic block to its most likely successor.
 
 # RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %s -o %t.o
 # RUN: link_fdata %s %t.o %t.fdata
@@ -39,12 +40,10 @@
 # MEDINCENTIVE: {{^\.Ltmp5}}
 
 # HIGHINCENTIVE: Binary Function "chain" after split-functions
-# HIGHINCENTIVE: {{^\.LBB00}}
+# HIGHINCENTIVE: {{^\.Ltmp1}}
 # HIGHINCENTIVE: -------   HOT-COLD SPLIT POINT   -------
 # HIGHINCENTIVE: {{^\.LFT1}}
 # HIGHINCENTIVE: -------   HOT-COLD SPLIT POINT   -------
-# HIGHINCENTIVE: {{^\.LFT0}}
-# HIGHINCENTIVE: {{^\.Ltmp1}}
 # HIGHINCENTIVE: {{^\.Ltmp0}}
 # HIGHINCENTIVE: {{^\.Ltmp2}}
 # HIGHINCENTIVE: {{^\.Ltmp3}}
 
@@ -826,22 +826,8 @@ void SymbolCollector::setIncludeLocation(const Symbol &S, SourceLocation DefLoc,
   // We update providers for a symbol with each occurence, as SymbolCollector
   // might run while parsing, rather than at the end of a translation unit.
   // Hence we see more and more redecls over time.
-  auto [It, Inserted] = SymbolProviders.try_emplace(S.ID);
-  auto Headers =
+  SymbolProviders[S.ID] =
       include_cleaner::headersForSymbol(Sym, SM, Opts.PragmaIncludes);
-  if (Headers.empty())
-    return;
-
-  auto *HeadersIter = Headers.begin();
-  include_cleaner::Header H = *HeadersIter;
-  while (HeadersIter != Headers.end() &&
-         H.kind() == include_cleaner::Header::Physical &&
-         !tooling::isSelfContainedHeader(H.physical(), SM,
-                                         PP->getHeaderSearchInfo())) {
-    H = *HeadersIter;
-    HeadersIter++;
-  }
-  It->second = H;
 }
 
 llvm::StringRef getStdHeader(const Symbol *S, const LangOptions &LangOpts) {
@@ -889,7 +875,7 @@ void SymbolCollector::finish() {
   llvm::DenseMap<include_cleaner::Header, std::string> HeaderSpelling;
   // Fill in IncludeHeaders.
   // We delay this until end of TU so header guards are all resolved.
-  for (const auto &[SID, OptionalProvider] : SymbolProviders) {
+  for (const auto &[SID, Providers] : SymbolProviders) {
     const Symbol *S = Symbols.find(SID);
     if (!S)
       continue;
@@ -931,9 +917,27 @@ void SymbolCollector::finish() {
       continue;
     }
 
-    assert(Directives == Symbol::Include);
     // For #include's, use the providers computed by the include-cleaner
     // library.
+    assert(Directives == Symbol::Include);
+    // Ignore providers that are not self-contained, this is especially
+    // important for symbols defined in the main-file. We want to prefer the
+    // header, if possible.
+    // TODO: Limit this to specifically ignore main file, when we're indexing a
+    // non-header file?
+    auto SelfContainedProvider =
+        [this](llvm::ArrayRef<include_cleaner::Header> Providers)
+        -> std::optional<include_cleaner::Header> {
+      for (const auto &H : Providers) {
+        if (H.kind() != include_cleaner::Header::Physical)
+          return H;
+        if (tooling::isSelfContainedHeader(H.physical(), PP->getSourceManager(),
+                                           PP->getHeaderSearchInfo()))
+          return H;
+      }
+      return std::nullopt;
+    };
+    const auto OptionalProvider = SelfContainedProvider(Providers);
     if (!OptionalProvider)
       continue;
     const auto &H = *OptionalProvider;
 
@@ -15,18 +15,25 @@
 #include "index/Relation.h"
 #include "index/Symbol.h"
 #include "index/SymbolID.h"
+#include "index/SymbolLocation.h"
 #include "index/SymbolOrigin.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Decl.h"
+#include "clang/Basic/LLVM.h"
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Index/IndexDataConsumer.h"
 #include "clang/Index/IndexSymbol.h"
 #include "clang/Sema/CodeCompleteConsumer.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
 #include <functional>
 #include <memory>
 #include <optional>
+#include <string>
+#include <utility>
 
 namespace clang {
 namespace clangd {
@@ -177,7 +184,7 @@ class SymbolCollector : public index::IndexDataConsumer {
 
   // Providers for Symbol.IncludeHeaders.
   // The final spelling is calculated in finish().
-  llvm::DenseMap<SymbolID, std::optional<include_cleaner::Header>>
+  llvm::DenseMap<SymbolID, llvm::SmallVector<include_cleaner::Header>>
       SymbolProviders;
   // Files which contain ObjC symbols.
   // This is finalized and used in finish().
 
@@ -341,6 +341,36 @@ TEST_F(IndexActionTest, SymbolFromCC) {
                   hasName("foo"),
                   includeHeader(URI::create(testPath("main.h")).toString()))));
 }
+
+TEST_F(IndexActionTest, IncludeHeaderForwardDecls) {
+  std::string MainFilePath = testPath("main.cpp");
+  addFile(MainFilePath, R"cpp(
+#include "fwd.h"
+#include "full.h"
+ )cpp");
+  addFile(testPath("fwd.h"), R"cpp(
+#ifndef _FWD_H_
+#define _FWD_H_
+struct Foo;
+#endif
+ )cpp");
+  addFile(testPath("full.h"), R"cpp(
+#ifndef _FULL_H_
+#define _FULL_H_
+struct Foo {};
+
+// This decl is important, as otherwise we detect control macro for the file,
+// before handling definition of Foo.
+void other();
+#endif
+ )cpp");
+  IndexFileIn IndexFile = runIndexingAction(MainFilePath);
+  EXPECT_THAT(*IndexFile.Symbols,
+              testing::Contains(AllOf(
+                  hasName("Foo"),
+                  includeHeader(URI::create(testPath("full.h")).toString()))))
+      << *IndexFile.Symbols->begin();
+}
 } // namespace
 } // namespace clangd
 } // namespace clang
@@ -349,7 +349,7 @@ address point. Note that libraries like libcxxabi do assume this property.
 
 (2) virtual function entry layout property
 
-For each virtual function the distance between an virtual table entry for this function and the corresponding
+For each virtual function the distance between a virtual table entry for this function and the corresponding
 address point is always the same. This property ensures that dynamic dispatch still works with the interleaving layout.
 
 Note that the interleaving scheme in the CFI implementation guarantees both properties above whereas the original scheme proposed
 
@@ -2019,7 +2019,7 @@ would be +1.  ``ns_returns_autoreleased`` specifies that the returned object is
 autorelease pool.
 
 **Usage**: The ``ns_consumed`` and ``cf_consumed`` attributes can be placed on
-an parameter declaration; they specify that the argument is expected to have a
+a parameter declaration; they specify that the argument is expected to have a
 +1 retain count, which will be balanced in some way by the function or method.
 The ``ns_consumes_self`` attribute can only be placed on an Objective-C
 method; it specifies that the method expects its ``self`` parameter to have a
@@ -3622,7 +3622,7 @@ scalar calls of ``__builtin_isfpclass`` applied to the input elementwise.
 The result of ``__builtin_isfpclass`` is a boolean value, if the first argument
 is a scalar, or an integer vector with the same element count as the first
 argument. The element type in this vector has the same bit length as the
-element of the the first argument type.
+element of the first argument type.
 
 This function never raises floating-point exceptions and does not canonicalize
 its input. The floating-point argument is not promoted, its data class is
@@ -4980,7 +4980,7 @@ Clang supports the following match rules:
 - ``record(unless(is_union))``: Can be used to apply attributes only to
   ``struct`` and ``class`` declarations.
 
-- ``enum``: Can be be used to apply attributes to enumeration declarations.
+- ``enum``: Can be used to apply attributes to enumeration declarations.
 
 - ``enum_constant``: Can be used to apply attributes to enumerators.