Skip to content

Commit 297704c

Browse files
author
iclsrc
committed
Merge from 'main' to 'sycl-web' (269 commits)
CONFLICT (content): Merge conflict in openmp/libomptarget/test/lit.cfg
2 parents b296dfd + 12250c4 commit 297704c

File tree

858 files changed

+66688
-11964
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

858 files changed

+66688
-11964
lines changed

bolt/lib/Core/DIEBuilder.cpp

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -266,13 +266,11 @@ void DIEBuilder::buildCompileUnits(const bool Init) {
266266
}
267267
void DIEBuilder::buildCompileUnits(const std::vector<DWARFUnit *> &CUs) {
268268
BuilderState.reset(new State());
269-
// Initializing to full size because there could be cross CU references with
270-
// different abbrev offsets. LLVM happens to output CUs that have cross CU
271-
// references with the same abbrev table. So destinations end up in the first
272-
// set, even if they themselves don't have src cross cu ref. We could have
273-
// cases where this is not the case. In which case this container needs to be
274-
// big enough for all.
275-
getState().CloneUnitCtxMap.resize(DwarfContext->getNumCompileUnits());
269+
// Allocating enough for current batch being processed.
270+
// In real use cases we either processing a batch of CUs with no cross
271+
// references, or if they do have them it is due to LTO. With clang they will
272+
// share the same abbrev table. In either case this vector will not grow.
273+
getState().CloneUnitCtxMap.resize(CUs.size());
276274
getState().Type = ProcessingType::CUs;
277275
for (DWARFUnit *CU : CUs)
278276
registerUnit(*CU, false);
@@ -897,6 +895,10 @@ void DIEBuilder::registerUnit(DWARFUnit &DU, bool NeedSort) {
897895
});
898896
}
899897
getState().UnitIDMap[getHash(DU)] = getState().DUList.size();
898+
// This handles the case where we do have cross cu references, but CUs do not
899+
// share the same abbrev table.
900+
if (getState().DUList.size() == getState().CloneUnitCtxMap.size())
901+
getState().CloneUnitCtxMap.emplace_back();
900902
getState().DUList.push_back(&DU);
901903
}
902904

bolt/lib/Passes/SplitFunctions.cpp

Lines changed: 64 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -175,8 +175,12 @@ struct SplitCacheDirected final : public SplitStrategy {
175175
void fragment(const BlockIt Start, const BlockIt End) override {
176176
BasicBlockOrder BlockOrder(Start, End);
177177
BinaryFunction &BF = *BlockOrder.front()->getFunction();
178+
// No need to re-split small functions.
179+
if (BlockOrder.size() <= 2)
180+
return;
178181

179182
size_t BestSplitIndex = findSplitIndex(BF, BlockOrder);
183+
assert(BestSplitIndex < BlockOrder.size());
180184

181185
// Assign fragments based on the computed best split index.
182186
// All basic blocks with index up to the best split index become hot.
@@ -200,10 +204,12 @@ struct SplitCacheDirected final : public SplitStrategy {
200204
};
201205

202206
struct SplitScore {
203-
size_t SplitIndex;
207+
size_t SplitIndex = size_t(-1);
204208
size_t HotSizeReduction = 0;
205209
double LocalScore = 0;
206210
double CoverCallScore = 0;
211+
212+
double sum() const { return LocalScore + CoverCallScore; }
207213
};
208214

209215
// Auxiliary variables used by the algorithm.
@@ -303,7 +309,7 @@ struct SplitCacheDirected final : public SplitStrategy {
303309
const size_t SplitIndex) {
304310
assert(SplitIndex < BlockOrder.size() && "Invalid split index");
305311

306-
// Update function layout assuming hot-warm splitting at SplitIndex
312+
// Update function layout assuming hot-warm splitting at SplitIndex.
307313
for (size_t Index = 0; Index < BlockOrder.size(); Index++) {
308314
BinaryBasicBlock *BB = BlockOrder[Index];
309315
if (BB->getFragmentNum() == FragmentNum::cold())
@@ -319,8 +325,8 @@ struct SplitCacheDirected final : public SplitStrategy {
319325
// Populate BB.OutputAddressRange with estimated new start and end addresses
320326
// and compute the old end address of the hot section and the new end
321327
// address of the hot section.
322-
size_t OldHotEndAddr;
323-
size_t NewHotEndAddr;
328+
size_t OldHotEndAddr{0};
329+
size_t NewHotEndAddr{0};
324330
size_t CurrentAddr = BBOffsets[BlockOrder[0]];
325331
for (BinaryBasicBlock *BB : BlockOrder) {
326332
// We only care about new addresses of blocks in hot/warm.
@@ -492,20 +498,15 @@ struct SplitCacheDirected final : public SplitStrategy {
492498
}
493499

494500
/// Compute the split score of splitting a function at a given index.
495-
/// The split score consists of local score and cover score. Cover call score
496-
/// is expensive to compute. As a result, we pass in a \p ReferenceScore and
497-
/// compute cover score only when the local score exceeds that in the
498-
/// ReferenceScore or that the size reduction of the hot fragment is larger
499-
/// than that achieved by the split index of the ReferenceScore. This function
500-
/// returns \p Score of SplitScore type. It contains the local score and cover
501-
/// score (if computed) of the current splitting index. For easier book
502-
/// keeping and comparison, it also stores the split index and the resulting
503-
/// reduction in hot fragment size.
501+
/// The split score consists of local score and cover score. This function
502+
/// returns \p Score of SplitScore type. It contains the local score and
503+
/// cover score of the current splitting index. For easier book keeping and
504+
/// comparison, it also stores the split index and the resulting reduction
505+
/// in hot fragment size.
504506
SplitScore computeSplitScore(const BinaryFunction &BF,
505507
const BasicBlockOrder &BlockOrder,
506508
const size_t SplitIndex,
507-
const std::vector<CallInfo> &CoverCalls,
508-
const SplitScore &ReferenceScore) {
509+
const std::vector<CallInfo> &CoverCalls) {
509510
// Populate BinaryBasicBlock::OutputAddressRange with estimated
510511
// new start and end addresses after hot-warm splitting at SplitIndex.
511512
size_t OldHotEnd;
@@ -533,47 +534,74 @@ struct SplitCacheDirected final : public SplitStrategy {
533534
// increamented in place.
534535
computeJumpScore(BlockOrder, SplitIndex, Score);
535536

536-
// There is no need to compute CoverCallScore if we have already found
537-
// another split index with a bigger LocalScore and bigger HotSizeReduction.
538-
if (Score.LocalScore <= ReferenceScore.LocalScore &&
539-
Score.HotSizeReduction <= ReferenceScore.HotSizeReduction)
540-
return Score;
541-
542537
// Compute CoverCallScore and store in Score in place.
543538
computeCoverCallScore(BlockOrder, SplitIndex, CoverCalls, Score);
544539
return Score;
545540
}
546541

542+
/// Find the most likely successor of a basic block when it has one or two
543+
/// successors. Return nullptr otherwise.
544+
const BinaryBasicBlock *getMostLikelySuccessor(const BinaryBasicBlock *BB) {
545+
if (BB->succ_size() == 1)
546+
return BB->getSuccessor();
547+
if (BB->succ_size() == 2) {
548+
uint64_t TakenCount = BB->getTakenBranchInfo().Count;
549+
assert(TakenCount != BinaryBasicBlock::COUNT_NO_PROFILE);
550+
uint64_t NonTakenCount = BB->getFallthroughBranchInfo().Count;
551+
assert(NonTakenCount != BinaryBasicBlock::COUNT_NO_PROFILE);
552+
if (TakenCount > NonTakenCount)
553+
return BB->getConditionalSuccessor(true);
554+
else if (TakenCount < NonTakenCount)
555+
return BB->getConditionalSuccessor(false);
556+
}
557+
return nullptr;
558+
}
559+
547560
/// Find the best index for splitting. The returned value is the index of the
548561
/// last hot basic block. Hence, "no splitting" is equivalent to returning the
549562
/// value which is one less than the size of the function.
550563
size_t findSplitIndex(const BinaryFunction &BF,
551564
const BasicBlockOrder &BlockOrder) {
565+
assert(BlockOrder.size() > 2);
552566
// Find all function calls that can be shortened if we move blocks of the
553567
// current function to warm/cold
554568
const std::vector<CallInfo> CoverCalls = extractCoverCalls(BF);
555569

556-
// Try all possible split indices (blocks with Index <= SplitIndex are in
557-
// hot) and find the one maximizing the splitting score.
570+
// Find the existing hot-cold splitting index.
571+
size_t HotColdIndex = 0;
572+
while (HotColdIndex + 1 < BlockOrder.size()) {
573+
if (BlockOrder[HotColdIndex + 1]->getFragmentNum() == FragmentNum::cold())
574+
break;
575+
HotColdIndex++;
576+
}
577+
assert(HotColdIndex + 1 == BlockOrder.size() ||
578+
(BlockOrder[HotColdIndex]->getFragmentNum() == FragmentNum::main() &&
579+
BlockOrder[HotColdIndex + 1]->getFragmentNum() ==
580+
FragmentNum::cold()));
581+
582+
// Try all possible split indices up to HotColdIndex (blocks that have
583+
// Index <= SplitIndex are in hot) and find the one maximizing the
584+
// splitting score.
558585
SplitScore BestScore;
559-
double BestScoreSum = -1.0;
560-
SplitScore ReferenceScore;
561-
for (size_t Index = 0; Index < BlockOrder.size(); Index++) {
586+
for (size_t Index = 0; Index <= HotColdIndex; Index++) {
562587
const BinaryBasicBlock *LastHotBB = BlockOrder[Index];
563-
// No need to keep cold blocks in the hot section.
564-
if (LastHotBB->getFragmentNum() == FragmentNum::cold())
565-
break;
588+
assert(LastHotBB->getFragmentNum() != FragmentNum::cold());
589+
590+
// Do not break jump to the most likely successor.
591+
if (Index + 1 < BlockOrder.size() &&
592+
BlockOrder[Index + 1] == getMostLikelySuccessor(LastHotBB))
593+
continue;
594+
566595
const SplitScore Score =
567-
computeSplitScore(BF, BlockOrder, Index, CoverCalls, ReferenceScore);
568-
double ScoreSum = Score.LocalScore + Score.CoverCallScore;
569-
if (ScoreSum > BestScoreSum) {
570-
BestScoreSum = ScoreSum;
596+
computeSplitScore(BF, BlockOrder, Index, CoverCalls);
597+
if (Score.sum() > BestScore.sum())
571598
BestScore = Score;
572-
}
573-
if (Score.LocalScore > ReferenceScore.LocalScore)
574-
ReferenceScore = Score;
575599
}
576600

601+
// If we don't find a good splitting point, fallback to the original one.
602+
if (BestScore.SplitIndex == size_t(-1))
603+
return HotColdIndex;
604+
577605
return BestScore.SplitIndex;
578606
}
579607
};

bolt/test/X86/cdsplit-call-scale.s

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,9 @@
22
# When -call-scale=0.0, the tested function is 2-way splitted.
33
# When -call-scale=1.0, the tested function is 3-way splitted with 5 blocks
44
# in warm because of the increased benefit of shortening the call edges.
5-
# When -call-scale=1000.0, the tested function is 3-way splitted with 7 blocks
6-
# in warm because of the strong benefit of shortening the call edges.
5+
# When -call-scale=1000.0, the tested function is still 3-way splitted with
6+
# 5 blocks in warm because cdsplit does not allow hot-warm splitting to break
7+
# a fall through branch from a basic block to its most likely successor.
78

89
# RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %s -o %t.o
910
# RUN: link_fdata %s %t.o %t.fdata
@@ -39,12 +40,10 @@
3940
# MEDINCENTIVE: {{^\.Ltmp5}}
4041

4142
# HIGHINCENTIVE: Binary Function "chain" after split-functions
42-
# HIGHINCENTIVE: {{^\.LBB00}}
43+
# HIGHINCENTIVE: {{^\.Ltmp1}}
4344
# HIGHINCENTIVE: ------- HOT-COLD SPLIT POINT -------
4445
# HIGHINCENTIVE: {{^\.LFT1}}
4546
# HIGHINCENTIVE: ------- HOT-COLD SPLIT POINT -------
46-
# HIGHINCENTIVE: {{^\.LFT0}}
47-
# HIGHINCENTIVE: {{^\.Ltmp1}}
4847
# HIGHINCENTIVE: {{^\.Ltmp0}}
4948
# HIGHINCENTIVE: {{^\.Ltmp2}}
5049
# HIGHINCENTIVE: {{^\.Ltmp3}}

clang-tools-extra/clangd/index/SymbolCollector.cpp

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -826,22 +826,8 @@ void SymbolCollector::setIncludeLocation(const Symbol &S, SourceLocation DefLoc,
826826
// We update providers for a symbol with each occurence, as SymbolCollector
827827
// might run while parsing, rather than at the end of a translation unit.
828828
// Hence we see more and more redecls over time.
829-
auto [It, Inserted] = SymbolProviders.try_emplace(S.ID);
830-
auto Headers =
829+
SymbolProviders[S.ID] =
831830
include_cleaner::headersForSymbol(Sym, SM, Opts.PragmaIncludes);
832-
if (Headers.empty())
833-
return;
834-
835-
auto *HeadersIter = Headers.begin();
836-
include_cleaner::Header H = *HeadersIter;
837-
while (HeadersIter != Headers.end() &&
838-
H.kind() == include_cleaner::Header::Physical &&
839-
!tooling::isSelfContainedHeader(H.physical(), SM,
840-
PP->getHeaderSearchInfo())) {
841-
H = *HeadersIter;
842-
HeadersIter++;
843-
}
844-
It->second = H;
845831
}
846832

847833
llvm::StringRef getStdHeader(const Symbol *S, const LangOptions &LangOpts) {
@@ -889,7 +875,7 @@ void SymbolCollector::finish() {
889875
llvm::DenseMap<include_cleaner::Header, std::string> HeaderSpelling;
890876
// Fill in IncludeHeaders.
891877
// We delay this until end of TU so header guards are all resolved.
892-
for (const auto &[SID, OptionalProvider] : SymbolProviders) {
878+
for (const auto &[SID, Providers] : SymbolProviders) {
893879
const Symbol *S = Symbols.find(SID);
894880
if (!S)
895881
continue;
@@ -931,9 +917,27 @@ void SymbolCollector::finish() {
931917
continue;
932918
}
933919

934-
assert(Directives == Symbol::Include);
935920
// For #include's, use the providers computed by the include-cleaner
936921
// library.
922+
assert(Directives == Symbol::Include);
923+
// Ignore providers that are not self-contained, this is especially
924+
// important for symbols defined in the main-file. We want to prefer the
925+
// header, if possible.
926+
// TODO: Limit this to specifically ignore main file, when we're indexing a
927+
// non-header file?
928+
auto SelfContainedProvider =
929+
[this](llvm::ArrayRef<include_cleaner::Header> Providers)
930+
-> std::optional<include_cleaner::Header> {
931+
for (const auto &H : Providers) {
932+
if (H.kind() != include_cleaner::Header::Physical)
933+
return H;
934+
if (tooling::isSelfContainedHeader(H.physical(), PP->getSourceManager(),
935+
PP->getHeaderSearchInfo()))
936+
return H;
937+
}
938+
return std::nullopt;
939+
};
940+
const auto OptionalProvider = SelfContainedProvider(Providers);
937941
if (!OptionalProvider)
938942
continue;
939943
const auto &H = *OptionalProvider;

clang-tools-extra/clangd/index/SymbolCollector.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,18 +15,25 @@
1515
#include "index/Relation.h"
1616
#include "index/Symbol.h"
1717
#include "index/SymbolID.h"
18+
#include "index/SymbolLocation.h"
1819
#include "index/SymbolOrigin.h"
1920
#include "clang/AST/ASTContext.h"
2021
#include "clang/AST/Decl.h"
22+
#include "clang/Basic/LLVM.h"
2123
#include "clang/Basic/SourceLocation.h"
2224
#include "clang/Basic/SourceManager.h"
2325
#include "clang/Index/IndexDataConsumer.h"
2426
#include "clang/Index/IndexSymbol.h"
2527
#include "clang/Sema/CodeCompleteConsumer.h"
2628
#include "llvm/ADT/DenseMap.h"
29+
#include "llvm/ADT/DenseSet.h"
30+
#include "llvm/ADT/SmallVector.h"
31+
#include "llvm/ADT/StringRef.h"
2732
#include <functional>
2833
#include <memory>
2934
#include <optional>
35+
#include <string>
36+
#include <utility>
3037

3138
namespace clang {
3239
namespace clangd {
@@ -177,7 +184,7 @@ class SymbolCollector : public index::IndexDataConsumer {
177184

178185
// Providers for Symbol.IncludeHeaders.
179186
// The final spelling is calculated in finish().
180-
llvm::DenseMap<SymbolID, std::optional<include_cleaner::Header>>
187+
llvm::DenseMap<SymbolID, llvm::SmallVector<include_cleaner::Header>>
181188
SymbolProviders;
182189
// Files which contain ObjC symbols.
183190
// This is finalized and used in finish().

clang-tools-extra/clangd/unittests/IndexActionTests.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,36 @@ TEST_F(IndexActionTest, SymbolFromCC) {
341341
hasName("foo"),
342342
includeHeader(URI::create(testPath("main.h")).toString()))));
343343
}
344+
345+
TEST_F(IndexActionTest, IncludeHeaderForwardDecls) {
346+
std::string MainFilePath = testPath("main.cpp");
347+
addFile(MainFilePath, R"cpp(
348+
#include "fwd.h"
349+
#include "full.h"
350+
)cpp");
351+
addFile(testPath("fwd.h"), R"cpp(
352+
#ifndef _FWD_H_
353+
#define _FWD_H_
354+
struct Foo;
355+
#endif
356+
)cpp");
357+
addFile(testPath("full.h"), R"cpp(
358+
#ifndef _FULL_H_
359+
#define _FULL_H_
360+
struct Foo {};
361+
362+
// This decl is important, as otherwise we detect control macro for the file,
363+
// before handling definition of Foo.
364+
void other();
365+
#endif
366+
)cpp");
367+
IndexFileIn IndexFile = runIndexingAction(MainFilePath);
368+
EXPECT_THAT(*IndexFile.Symbols,
369+
testing::Contains(AllOf(
370+
hasName("Foo"),
371+
includeHeader(URI::create(testPath("full.h")).toString()))))
372+
<< *IndexFile.Symbols->begin();
373+
}
344374
} // namespace
345375
} // namespace clangd
346376
} // namespace clang

clang/docs/ControlFlowIntegrityDesign.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -349,7 +349,7 @@ address point. Note that libraries like libcxxabi do assume this property.
349349

350350
(2) virtual function entry layout property
351351

352-
For each virtual function the distance between an virtual table entry for this function and the corresponding
352+
For each virtual function the distance between a virtual table entry for this function and the corresponding
353353
address point is always the same. This property ensures that dynamic dispatch still works with the interleaving layout.
354354

355355
Note that the interleaving scheme in the CFI implementation guarantees both properties above whereas the original scheme proposed

clang/docs/LanguageExtensions.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2019,7 +2019,7 @@ would be +1. ``ns_returns_autoreleased`` specifies that the returned object is
20192019
autorelease pool.
20202020
20212021
**Usage**: The ``ns_consumed`` and ``cf_consumed`` attributes can be placed on
2022-
an parameter declaration; they specify that the argument is expected to have a
2022+
a parameter declaration; they specify that the argument is expected to have a
20232023
+1 retain count, which will be balanced in some way by the function or method.
20242024
The ``ns_consumes_self`` attribute can only be placed on an Objective-C
20252025
method; it specifies that the method expects its ``self`` parameter to have a
@@ -3622,7 +3622,7 @@ scalar calls of ``__builtin_isfpclass`` applied to the input elementwise.
36223622
The result of ``__builtin_isfpclass`` is a boolean value, if the first argument
36233623
is a scalar, or an integer vector with the same element count as the first
36243624
argument. The element type in this vector has the same bit length as the
3625-
element of the the first argument type.
3625+
element of the first argument type.
36263626
36273627
This function never raises floating-point exceptions and does not canonicalize
36283628
its input. The floating-point argument is not promoted, its data class is
@@ -4980,7 +4980,7 @@ Clang supports the following match rules:
49804980
- ``record(unless(is_union))``: Can be used to apply attributes only to
49814981
``struct`` and ``class`` declarations.
49824982
4983-
- ``enum``: Can be be used to apply attributes to enumeration declarations.
4983+
- ``enum``: Can be used to apply attributes to enumeration declarations.
49844984
49854985
- ``enum_constant``: Can be used to apply attributes to enumerators.
49864986

0 commit comments

Comments
 (0)