Skip to content

Commit 8e06fbd

Browse files
authored
Merge pull request llvm#448 from AMD-Lightning-Internal/amd/merge/upstream_merge_20250202223002
merge main into amd-staging
2 parents 5416cfe + ee8a45e commit 8e06fbd

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+1484
-330
lines changed

clang/lib/Sema/SemaTemplateDeductionGuide.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1227,11 +1227,14 @@ void DeclareImplicitDeductionGuidesForTypeAlias(
12271227
NewParam->setScopeInfo(0, I);
12281228
FPTL.setParam(I, NewParam);
12291229
}
1230-
auto *Transformed = cast<FunctionDecl>(buildDeductionGuide(
1230+
auto *Transformed = cast<CXXDeductionGuideDecl>(buildDeductionGuide(
12311231
SemaRef, AliasTemplate, /*TemplateParams=*/nullptr,
12321232
/*Constructor=*/nullptr, DG->getExplicitSpecifier(), FunctionType,
12331233
AliasTemplate->getBeginLoc(), AliasTemplate->getLocation(),
12341234
AliasTemplate->getEndLoc(), DG->isImplicit()));
1235+
Transformed->setSourceDeductionGuide(DG);
1236+
Transformed->setSourceDeductionGuideKind(
1237+
CXXDeductionGuideDecl::SourceDeductionGuideKind::Alias);
12351238

12361239
// FIXME: Here the synthesized deduction guide is not a templated
12371240
// function. Per [dcl.decl]p4, the requires-clause shall be present only

clang/unittests/AST/ASTImporterTest.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8193,6 +8193,29 @@ TEST_P(ImportFunctions, CTADAliasTemplate) {
81938193
EXPECT_TRUE(ToD->getSourceDeductionGuide());
81948194
}
81958195

8196+
TEST_P(ImportFunctions, CTADAliasTemplateWithExplicitSourceDeductionGuide) {
8197+
Decl *TU = getTuDecl(
8198+
R"(
8199+
template <typename T> struct A {
8200+
A(T);
8201+
};
8202+
template<typename T>
8203+
using B = A<T>;
8204+
A(int) -> A<double>; // explicit
8205+
B b{(int)0};
8206+
)",
8207+
Lang_CXX20, "input.cc");
8208+
auto *FromD = FirstDeclMatcher<CXXDeductionGuideDecl>().match(
8209+
TU, cxxDeductionGuideDecl(hasParameter(0, hasType(asString("int"))),
8210+
hasName("<deduction guide for B>"),
8211+
hasReturnTypeLoc(loc(asString("A<double>")))));
8212+
auto *ToD = Import(FromD, Lang_CXX20);
8213+
ASSERT_TRUE(ToD);
8214+
EXPECT_TRUE(ToD->getSourceDeductionGuideKind() ==
8215+
CXXDeductionGuideDecl::SourceDeductionGuideKind::Alias);
8216+
EXPECT_TRUE(ToD->getSourceDeductionGuide());
8217+
}
8218+
81968219
TEST_P(ImportFunctions, ParmVarDeclDeclContext) {
81978220
constexpr auto FromTUCode = R"(
81988221
void f(int P);

lld/ELF/BPSectionOrderer.cpp

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
//===- BPSectionOrderer.cpp -----------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "BPSectionOrderer.h"
10+
#include "InputFiles.h"
11+
#include "InputSection.h"
12+
#include "SymbolTable.h"
13+
#include "Symbols.h"
14+
#include "lld/Common/BPSectionOrdererBase.inc"
15+
#include "llvm/Support/Endian.h"
16+
17+
using namespace llvm;
18+
using namespace lld::elf;
19+
20+
namespace {
21+
struct BPOrdererELF;
22+
}
23+
template <> struct lld::BPOrdererTraits<struct BPOrdererELF> {
24+
using Section = elf::InputSectionBase;
25+
using Defined = elf::Defined;
26+
};
27+
namespace {
28+
struct BPOrdererELF : lld::BPOrderer<BPOrdererELF> {
29+
DenseMap<const InputSectionBase *, Defined *> secToSym;
30+
31+
static uint64_t getSize(const Section &sec) { return sec.getSize(); }
32+
static bool isCodeSection(const Section &sec) {
33+
return sec.flags & llvm::ELF::SHF_EXECINSTR;
34+
}
35+
ArrayRef<Defined *> getSymbols(const Section &sec) {
36+
auto it = secToSym.find(&sec);
37+
if (it == secToSym.end())
38+
return {};
39+
return ArrayRef(it->second);
40+
}
41+
42+
static void
43+
getSectionHashes(const Section &sec, llvm::SmallVectorImpl<uint64_t> &hashes,
44+
const llvm::DenseMap<const void *, uint64_t> &sectionToIdx) {
45+
constexpr unsigned windowSize = 4;
46+
47+
// Calculate content hashes: k-mers and the last k-1 bytes.
48+
ArrayRef<uint8_t> data = sec.content();
49+
if (data.size() >= windowSize)
50+
for (size_t i = 0; i <= data.size() - windowSize; ++i)
51+
hashes.push_back(llvm::support::endian::read32le(data.data() + i));
52+
for (uint8_t byte : data.take_back(windowSize - 1))
53+
hashes.push_back(byte);
54+
55+
llvm::sort(hashes);
56+
hashes.erase(std::unique(hashes.begin(), hashes.end()), hashes.end());
57+
}
58+
59+
static StringRef getSymName(const Defined &sym) { return sym.getName(); }
60+
static uint64_t getSymValue(const Defined &sym) { return sym.value; }
61+
static uint64_t getSymSize(const Defined &sym) { return sym.size; }
62+
};
63+
} // namespace
64+
65+
DenseMap<const InputSectionBase *, int> elf::runBalancedPartitioning(
66+
Ctx &ctx, StringRef profilePath, bool forFunctionCompression,
67+
bool forDataCompression, bool compressionSortStartupFunctions,
68+
bool verbose) {
69+
// Collect candidate sections and associated symbols.
70+
SmallVector<InputSectionBase *> sections;
71+
DenseMap<CachedHashStringRef, DenseSet<unsigned>> rootSymbolToSectionIdxs;
72+
BPOrdererELF orderer;
73+
74+
auto addSection = [&](Symbol &sym) {
75+
auto *d = dyn_cast<Defined>(&sym);
76+
if (!d)
77+
return;
78+
auto *sec = dyn_cast_or_null<InputSectionBase>(d->section);
79+
if (!sec || sec->size == 0 || !orderer.secToSym.try_emplace(sec, d).second)
80+
return;
81+
rootSymbolToSectionIdxs[CachedHashStringRef(getRootSymbol(sym.getName()))]
82+
.insert(sections.size());
83+
sections.emplace_back(sec);
84+
};
85+
86+
for (Symbol *sym : ctx.symtab->getSymbols())
87+
addSection(*sym);
88+
for (ELFFileBase *file : ctx.objectFiles)
89+
for (Symbol *sym : file->getLocalSymbols())
90+
addSection(*sym);
91+
return orderer.computeOrder(profilePath, forFunctionCompression,
92+
forDataCompression,
93+
compressionSortStartupFunctions, verbose,
94+
sections, rootSymbolToSectionIdxs);
95+
}

lld/ELF/BPSectionOrderer.h

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
//===- BPSectionOrderer.h -------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
///
9+
/// This file uses Balanced Partitioning to order sections to improve startup
10+
/// time and compressed size.
11+
///
12+
//===----------------------------------------------------------------------===//
13+
14+
#ifndef LLD_ELF_BPSECTION_ORDERER_H
15+
#define LLD_ELF_BPSECTION_ORDERER_H
16+
17+
#include "llvm/ADT/DenseMap.h"
18+
#include "llvm/ADT/StringRef.h"
19+
20+
namespace lld::elf {
21+
struct Ctx;
22+
class InputSectionBase;
23+
24+
/// Run Balanced Partitioning to find the optimal function and data order to
25+
/// improve startup time and compressed size.
26+
///
27+
/// It is important that -ffunction-sections and -fdata-sections compiler flags
28+
/// are used to ensure functions and data are in their own sections and thus
29+
/// can be reordered.
30+
llvm::DenseMap<const InputSectionBase *, int>
31+
runBalancedPartitioning(Ctx &ctx, llvm::StringRef profilePath,
32+
bool forFunctionCompression, bool forDataCompression,
33+
bool compressionSortStartupFunctions, bool verbose);
34+
35+
} // namespace lld::elf
36+
37+
#endif

lld/ELF/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ add_lld_library(lldELF
3737
Arch/X86.cpp
3838
Arch/X86_64.cpp
3939
ARMErrataFix.cpp
40+
BPSectionOrderer.cpp
4041
CallGraphSort.cpp
4142
DWARF.cpp
4243
Driver.cpp
@@ -72,6 +73,7 @@ add_lld_library(lldELF
7273
Object
7374
Option
7475
Passes
76+
ProfileData
7577
Support
7678
TargetParser
7779
TransformUtils

lld/ELF/Config.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,12 @@ struct Config {
264264
bool armBe8 = false;
265265
BsymbolicKind bsymbolic = BsymbolicKind::None;
266266
CGProfileSortKind callGraphProfileSort;
267+
llvm::StringRef irpgoProfilePath;
268+
bool bpStartupFunctionSort = false;
269+
bool bpCompressionSortStartupFunctions = false;
270+
bool bpFunctionOrderForCompression = false;
271+
bool bpDataOrderForCompression = false;
272+
bool bpVerboseSectionOrderer = false;
267273
bool checkSections;
268274
bool checkDynamicRelocs;
269275
std::optional<llvm::DebugCompressionType> compressDebugSections;

lld/ELF/Driver.cpp

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1118,6 +1118,53 @@ static CGProfileSortKind getCGProfileSortKind(Ctx &ctx,
11181118
return CGProfileSortKind::None;
11191119
}
11201120

1121+
static void parseBPOrdererOptions(Ctx &ctx, opt::InputArgList &args) {
1122+
if (auto *arg = args.getLastArg(OPT_bp_compression_sort)) {
1123+
StringRef s = arg->getValue();
1124+
if (s == "function") {
1125+
ctx.arg.bpFunctionOrderForCompression = true;
1126+
} else if (s == "data") {
1127+
ctx.arg.bpDataOrderForCompression = true;
1128+
} else if (s == "both") {
1129+
ctx.arg.bpFunctionOrderForCompression = true;
1130+
ctx.arg.bpDataOrderForCompression = true;
1131+
} else if (s != "none") {
1132+
ErrAlways(ctx) << arg->getSpelling()
1133+
<< ": expected [none|function|data|both]";
1134+
}
1135+
if (s != "none" && args.hasArg(OPT_call_graph_ordering_file))
1136+
ErrAlways(ctx) << "--bp-compression-sort is incompatible with "
1137+
"--call-graph-ordering-file";
1138+
}
1139+
if (auto *arg = args.getLastArg(OPT_bp_startup_sort)) {
1140+
StringRef s = arg->getValue();
1141+
if (s == "function") {
1142+
ctx.arg.bpStartupFunctionSort = true;
1143+
} else if (s != "none") {
1144+
ErrAlways(ctx) << arg->getSpelling() << ": expected [none|function]";
1145+
}
1146+
if (s != "none" && args.hasArg(OPT_call_graph_ordering_file))
1147+
ErrAlways(ctx) << "--bp-startup-sort=function is incompatible with "
1148+
"--call-graph-ordering-file";
1149+
}
1150+
1151+
ctx.arg.bpCompressionSortStartupFunctions =
1152+
args.hasFlag(OPT_bp_compression_sort_startup_functions,
1153+
OPT_no_bp_compression_sort_startup_functions, false);
1154+
ctx.arg.bpVerboseSectionOrderer = args.hasArg(OPT_verbose_bp_section_orderer);
1155+
1156+
ctx.arg.irpgoProfilePath = args.getLastArgValue(OPT_irpgo_profile);
1157+
if (ctx.arg.irpgoProfilePath.empty()) {
1158+
if (ctx.arg.bpStartupFunctionSort)
1159+
ErrAlways(ctx) << "--bp-startup-sort=function must be used with "
1160+
"--irpgo-profile";
1161+
if (ctx.arg.bpCompressionSortStartupFunctions)
1162+
ErrAlways(ctx)
1163+
<< "--bp-compression-sort-startup-functions must be used with "
1164+
"--irpgo-profile";
1165+
}
1166+
}
1167+
11211168
static DebugCompressionType getCompressionType(Ctx &ctx, StringRef s,
11221169
StringRef option) {
11231170
DebugCompressionType type = StringSwitch<DebugCompressionType>(s)
@@ -1259,6 +1306,7 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) {
12591306
ctx.arg.bsymbolic = BsymbolicKind::All;
12601307
}
12611308
ctx.arg.callGraphProfileSort = getCGProfileSortKind(ctx, args);
1309+
parseBPOrdererOptions(ctx, args);
12621310
ctx.arg.checkSections =
12631311
args.hasFlag(OPT_check_sections, OPT_no_check_sections, true);
12641312
ctx.arg.chroot = args.getLastArgValue(OPT_chroot);

lld/ELF/Options.td

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,19 @@ def call_graph_profile_sort: JJ<"call-graph-profile-sort=">,
141141
def : FF<"no-call-graph-profile-sort">, Alias<call_graph_profile_sort>, AliasArgs<["none"]>,
142142
Flags<[HelpHidden]>;
143143

144+
defm irpgo_profile: EEq<"irpgo-profile",
145+
"Read a temporary profile file for use with --bp-startup-sort=">;
146+
def bp_compression_sort: JJ<"bp-compression-sort=">, MetaVarName<"[none,function,data,both]">,
147+
HelpText<"Improve Lempel-Ziv compression by grouping similar sections together, resulting in a smaller compressed app size">;
148+
def bp_startup_sort: JJ<"bp-startup-sort=">, MetaVarName<"[none,function]">,
149+
HelpText<"Utilize a temporal profile file to reduce page faults during program startup">;
150+
151+
// Auxiliary options related to balanced partition
152+
defm bp_compression_sort_startup_functions: BB<"bp-compression-sort-startup-functions",
153+
"When --irpgo-profile is pecified, prioritize function similarity for compression in addition to startup time", "">;
154+
def verbose_bp_section_orderer: FF<"verbose-bp-section-orderer">,
155+
HelpText<"Print information on balanced partitioning">;
156+
144157
// --chroot doesn't have a help text because it is an internal option.
145158
def chroot: Separate<["--"], "chroot">;
146159

lld/ELF/Writer.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "Writer.h"
1010
#include "AArch64ErrataFix.h"
1111
#include "ARMErrataFix.h"
12+
#include "BPSectionOrderer.h"
1213
#include "CallGraphSort.h"
1314
#include "Config.h"
1415
#include "InputFiles.h"
@@ -1082,8 +1083,18 @@ static void maybeShuffle(Ctx &ctx,
10821083
// that don't appear in the order file.
10831084
static DenseMap<const InputSectionBase *, int> buildSectionOrder(Ctx &ctx) {
10841085
DenseMap<const InputSectionBase *, int> sectionOrder;
1085-
if (!ctx.arg.callGraphProfile.empty())
1086+
if (ctx.arg.bpStartupFunctionSort || ctx.arg.bpFunctionOrderForCompression ||
1087+
ctx.arg.bpDataOrderForCompression) {
1088+
TimeTraceScope timeScope("Balanced Partitioning Section Orderer");
1089+
sectionOrder = runBalancedPartitioning(
1090+
ctx, ctx.arg.bpStartupFunctionSort ? ctx.arg.irpgoProfilePath : "",
1091+
ctx.arg.bpFunctionOrderForCompression,
1092+
ctx.arg.bpDataOrderForCompression,
1093+
ctx.arg.bpCompressionSortStartupFunctions,
1094+
ctx.arg.bpVerboseSectionOrderer);
1095+
} else if (!ctx.arg.callGraphProfile.empty()) {
10861096
sectionOrder = computeCallGraphProfileOrder(ctx);
1097+
}
10871098

10881099
if (ctx.arg.symbolOrderingFile.empty())
10891100
return sectionOrder;

lld/MachO/BPSectionOrderer.cpp

Lines changed: 11 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -26,20 +26,16 @@ struct BPOrdererMachO;
2626
}
2727
template <> struct lld::BPOrdererTraits<struct BPOrdererMachO> {
2828
using Section = macho::InputSection;
29-
using Symbol = macho::Symbol;
29+
using Defined = macho::Defined;
3030
};
3131
namespace {
3232
struct BPOrdererMachO : lld::BPOrderer<BPOrdererMachO> {
3333
static uint64_t getSize(const Section &sec) { return sec.getSize(); }
3434
static bool isCodeSection(const Section &sec) {
3535
return macho::isCodeSection(&sec);
3636
}
37-
static SmallVector<Symbol *, 0> getSymbols(const Section &sec) {
38-
SmallVector<Symbol *, 0> symbols;
39-
for (auto *sym : sec.symbols)
40-
if (auto *d = llvm::dyn_cast_or_null<Defined>(sym))
41-
symbols.emplace_back(d);
42-
return symbols;
37+
static ArrayRef<Defined *> getSymbols(const Section &sec) {
38+
return sec.symbols;
4339
}
4440

4541
// Linkage names can be prefixed with "_" or "l_" on Mach-O. See
@@ -80,17 +76,11 @@ struct BPOrdererMachO : lld::BPOrderer<BPOrdererMachO> {
8076
hashes.erase(std::unique(hashes.begin(), hashes.end()), hashes.end());
8177
}
8278

83-
static llvm::StringRef getSymName(const Symbol &sym) { return sym.getName(); }
84-
static uint64_t getSymValue(const Symbol &sym) {
85-
if (auto *d = dyn_cast<Defined>(&sym))
86-
return d->value;
87-
return 0;
88-
}
89-
static uint64_t getSymSize(const Symbol &sym) {
90-
if (auto *d = dyn_cast<Defined>(&sym))
91-
return d->size;
92-
return 0;
79+
static llvm::StringRef getSymName(const Defined &sym) {
80+
return sym.getName();
9381
}
82+
static uint64_t getSymValue(const Defined &sym) { return sym.value; }
83+
static uint64_t getSymSize(const Defined &sym) { return sym.size; }
9484

9585
private:
9686
static uint64_t
@@ -141,8 +131,8 @@ DenseMap<const InputSection *, int> lld::macho::runBalancedPartitioning(
141131
}
142132
}
143133

144-
return BPOrdererMachO::computeOrder(profilePath, forFunctionCompression,
145-
forDataCompression,
146-
compressionSortStartupFunctions, verbose,
147-
sections, rootSymbolToSectionIdxs);
134+
return BPOrdererMachO().computeOrder(profilePath, forFunctionCompression,
135+
forDataCompression,
136+
compressionSortStartupFunctions, verbose,
137+
sections, rootSymbolToSectionIdxs);
148138
}

0 commit comments

Comments
 (0)