Skip to content

Commit 38d83bf

Browse files
committed
Merge branch 'main' into vp-arm-mve-transform
2 parents 2fbdc7c + c34dc9a commit 38d83bf

File tree

1,034 files changed

+69271
-128230
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,034 files changed

+69271
-128230
lines changed

.ci/compute-projects.sh

Lines changed: 6 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,15 @@ function compute-projects-to-test() {
1818
shift
1919
projects=${@}
2020
for project in ${projects}; do
21+
echo "${project}"
2122
case ${project} in
2223
lld)
23-
for p in lld bolt cross-project-tests; do
24+
for p in bolt cross-project-tests; do
2425
echo $p
2526
done
2627
;;
2728
llvm)
28-
for p in llvm bolt clang clang-tools-extra lld lldb mlir polly; do
29+
for p in bolt clang clang-tools-extra lld lldb mlir polly; do
2930
echo $p
3031
done
3132
# Flang is not stable in Windows CI at the moment
@@ -35,30 +36,21 @@ function compute-projects-to-test() {
3536
;;
3637
clang)
3738
# lldb is temporarily removed to alleviate Linux pre-commit CI waiting times
38-
for p in clang clang-tools-extra compiler-rt cross-project-tests; do
39+
for p in clang-tools-extra compiler-rt cross-project-tests; do
3940
echo $p
4041
done
4142
;;
4243
clang-tools-extra)
43-
for p in clang-tools-extra libc; do
44-
echo $p
45-
done
44+
echo libc
4645
;;
4746
mlir)
48-
echo mlir
49-
# Flang is not stable in Windows CI at the moment
50-
if [[ $isForWindows == 0 ]]; then
51-
echo flang
52-
fi
53-
;;
54-
flang-rt)
5547
# Flang is not stable in Windows CI at the moment
5648
if [[ $isForWindows == 0 ]]; then
5749
echo flang
5850
fi
5951
;;
6052
*)
61-
echo "${project}"
53+
# Nothing to do
6254
;;
6355
esac
6456
done
@@ -73,11 +65,6 @@ function compute-runtimes-to-test() {
7365
echo $p
7466
done
7567
;;
76-
flang)
77-
for p in flang-rt; do
78-
echo $p
79-
done
80-
;;
8168
*)
8269
# Nothing to do
8370
;;

.ci/generate-buildkite-pipeline-premerge

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ fi
7373
# needs while letting them run on the infrastructure provided by LLVM.
7474

7575
# Figure out which projects need to be built on each platform
76-
all_projects="bolt clang clang-tools-extra compiler-rt cross-project-tests flang flang-rt libc libclc lld lldb llvm mlir openmp polly pstl"
76+
all_projects="bolt clang clang-tools-extra compiler-rt cross-project-tests flang libc libclc lld lldb llvm mlir openmp polly pstl"
7777
modified_projects="$(keep-modified-projects ${all_projects})"
7878

7979
linux_projects_to_test=$(exclude-linux $(compute-projects-to-test 0 ${modified_projects}))

.ci/metrics/metrics.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
# This means we essentially have a list of workflows sorted by creation date,
4444
# and that's all we can deduce from it. So for each iteration, we'll blindly
4545
# process the last N workflows.
46-
GITHUB_WORKFLOWS_MAX_PROCESS_COUNT = 1000
46+
GITHUB_WORKFLOWS_MAX_PROCESS_COUNT = 2000
4747
# Second reason for the cut: reaching a workflow older than X.
4848
# This means we will miss long-tails (exceptional jobs running for more than
4949
# X hours), but that's also the case with the count cutoff above.

.ci/monolithic-linux.sh

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,6 @@ cmake -S "${MONOREPO_ROOT}"/llvm -B "${BUILD_DIR}" \
6565
-D CMAKE_CXX_FLAGS=-gmlt \
6666
-D LLVM_CCACHE_BUILD=ON \
6767
-D MLIR_ENABLE_BINDINGS_PYTHON=ON \
68-
-D FLANG_ENABLE_FLANG_RT=OFF \
6968
-D CMAKE_INSTALL_PREFIX="${INSTALL_DIR}"
7069

7170
echo "--- ninja"
@@ -96,9 +95,6 @@ if [[ "${runtimes}" != "" ]]; then
9695
cmake -S "${MONOREPO_ROOT}/runtimes" -B "${RUNTIMES_BUILD_DIR}" -GNinja \
9796
-D CMAKE_C_COMPILER="${INSTALL_DIR}/bin/clang" \
9897
-D CMAKE_CXX_COMPILER="${INSTALL_DIR}/bin/clang++" \
99-
-D CMAKE_Fortran_COMPILER="${BUILD_DIR}/bin/flang" \
100-
-D CMAKE_Fortran_COMPILER_WORKS=ON \
101-
-D LLVM_BINARY_DIR="${BUILD_DIR}" \
10298
-D LLVM_ENABLE_RUNTIMES="${runtimes}" \
10399
-D LIBCXX_CXX_ABI=libcxxabi \
104100
-D CMAKE_BUILD_TYPE=RelWithDebInfo \
@@ -117,9 +113,6 @@ if [[ "${runtimes}" != "" ]]; then
117113
cmake -S "${MONOREPO_ROOT}/runtimes" -B "${RUNTIMES_BUILD_DIR}" -GNinja \
118114
-D CMAKE_C_COMPILER="${INSTALL_DIR}/bin/clang" \
119115
-D CMAKE_CXX_COMPILER="${INSTALL_DIR}/bin/clang++" \
120-
-D CMAKE_Fortran_COMPILER="${BUILD_DIR}/bin/flang" \
121-
-D CMAKE_Fortran_COMPILER_WORKS=ON \
122-
-D LLVM_BINARY_DIR="${BUILD_DIR}" \
123116
-D LLVM_ENABLE_RUNTIMES="${runtimes}" \
124117
-D LIBCXX_CXX_ABI=libcxxabi \
125118
-D CMAKE_BUILD_TYPE=RelWithDebInfo \
@@ -138,9 +131,6 @@ if [[ "${runtimes}" != "" ]]; then
138131
cmake -S "${MONOREPO_ROOT}/runtimes" -B "${RUNTIMES_BUILD_DIR}" -GNinja \
139132
-D CMAKE_C_COMPILER="${INSTALL_DIR}/bin/clang" \
140133
-D CMAKE_CXX_COMPILER="${INSTALL_DIR}/bin/clang++" \
141-
-D CMAKE_Fortran_COMPILER="${BUILD_DIR}/bin/flang" \
142-
-D CMAKE_Fortran_COMPILER_WORKS=ON \
143-
-D LLVM_BINARY_DIR="${BUILD_DIR}" \
144134
-D LLVM_ENABLE_RUNTIMES="${runtimes}" \
145135
-D LIBCXX_CXX_ABI=libcxxabi \
146136
-D CMAKE_BUILD_TYPE=RelWithDebInfo \

bolt/lib/Target/AArch64/AArch64MCSymbolizer.cpp

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -125,15 +125,39 @@ AArch64MCSymbolizer::adjustRelocation(const Relocation &Rel,
125125
// instruction pairs and will perform necessary adjustments.
126126
ErrorOr<uint64_t> SymbolValue = BC.getSymbolValue(*Rel.Symbol);
127127
assert(SymbolValue && "Symbol value should be set");
128-
(void)SymbolValue;
129-
130-
AdjustedRel.Symbol = BC.registerNameAtAddress("__BOLT_got_zero", 0, 0, 0);
131-
AdjustedRel.Addend = Rel.Value;
128+
const uint64_t SymbolPageAddr = *SymbolValue & ~0xfffULL;
129+
130+
// Check if defined symbol and GOT are on the same page. If they are not,
131+
// disambiguate the operand.
132+
if (BC.MIB->isADRP(Inst) && Rel.Addend == 0 &&
133+
SymbolPageAddr == Rel.Value &&
134+
!isPageAddressValidForGOT(SymbolPageAddr)) {
135+
AdjustedRel.Type = ELF::R_AARCH64_ADR_PREL_PG_HI21;
136+
} else {
137+
AdjustedRel.Symbol = BC.registerNameAtAddress("__BOLT_got_zero", 0, 0, 0);
138+
AdjustedRel.Addend = Rel.Value;
139+
}
132140
}
133141

134142
return AdjustedRel;
135143
}
136144

145+
bool AArch64MCSymbolizer::isPageAddressValidForGOT(uint64_t PageAddress) const {
146+
assert(!(PageAddress & 0xfffULL) && "Page address not aligned at 4KB");
147+
148+
ErrorOr<BinarySection &> GOT =
149+
Function.getBinaryContext().getUniqueSectionByName(".got");
150+
if (!GOT || !GOT->getSize())
151+
return false;
152+
153+
const uint64_t GOTFirstPageAddress = GOT->getAddress() & ~0xfffULL;
154+
const uint64_t GOTLastPageAddress =
155+
(GOT->getAddress() + GOT->getSize() - 1) & ~0xfffULL;
156+
157+
return PageAddress >= GOTFirstPageAddress &&
158+
PageAddress <= GOTLastPageAddress;
159+
}
160+
137161
void AArch64MCSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &CStream,
138162
int64_t Value,
139163
uint64_t Address) {}

bolt/lib/Target/AArch64/AArch64MCSymbolizer.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ class AArch64MCSymbolizer : public MCSymbolizer {
2828
std::optional<Relocation> adjustRelocation(const Relocation &Rel,
2929
const MCInst &Inst) const;
3030

31+
/// Return true if \p PageAddress is a valid page address for .got section.
32+
bool isPageAddressValidForGOT(uint64_t PageAddress) const;
33+
3134
public:
3235
AArch64MCSymbolizer(BinaryFunction &Function, bool CreateNewSymbols = true)
3336
: MCSymbolizer(*Function.getBinaryContext().Ctx.get(), nullptr),

bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ class RISCVMCPlusBuilder : public MCPlusBuilder {
244244
Inst.clear();
245245
Inst.addOperand(MCOperand::createExpr(RISCVMCExpr::create(
246246
MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx),
247-
RISCVMCExpr::VK_RISCV_CALL, *Ctx)));
247+
RISCVMCExpr::VK_CALL, *Ctx)));
248248
}
249249

250250
void createCall(MCInst &Inst, const MCSymbol *Target,
@@ -434,19 +434,19 @@ class RISCVMCPlusBuilder : public MCPlusBuilder {
434434
case ELF::R_RISCV_TLS_GOT_HI20:
435435
// The GOT is reused so no need to create GOT relocations
436436
case ELF::R_RISCV_PCREL_HI20:
437-
return RISCVMCExpr::create(Expr, RISCVMCExpr::VK_RISCV_PCREL_HI, Ctx);
437+
return RISCVMCExpr::create(Expr, RISCVMCExpr::VK_PCREL_HI, Ctx);
438438
case ELF::R_RISCV_PCREL_LO12_I:
439439
case ELF::R_RISCV_PCREL_LO12_S:
440-
return RISCVMCExpr::create(Expr, RISCVMCExpr::VK_RISCV_PCREL_LO, Ctx);
440+
return RISCVMCExpr::create(Expr, RISCVMCExpr::VK_PCREL_LO, Ctx);
441441
case ELF::R_RISCV_HI20:
442-
return RISCVMCExpr::create(Expr, RISCVMCExpr::VK_RISCV_HI, Ctx);
442+
return RISCVMCExpr::create(Expr, RISCVMCExpr::VK_HI, Ctx);
443443
case ELF::R_RISCV_LO12_I:
444444
case ELF::R_RISCV_LO12_S:
445-
return RISCVMCExpr::create(Expr, RISCVMCExpr::VK_RISCV_LO, Ctx);
445+
return RISCVMCExpr::create(Expr, RISCVMCExpr::VK_LO, Ctx);
446446
case ELF::R_RISCV_CALL:
447-
return RISCVMCExpr::create(Expr, RISCVMCExpr::VK_RISCV_CALL, Ctx);
447+
return RISCVMCExpr::create(Expr, RISCVMCExpr::VK_CALL, Ctx);
448448
case ELF::R_RISCV_CALL_PLT:
449-
return RISCVMCExpr::create(Expr, RISCVMCExpr::VK_RISCV_CALL_PLT, Ctx);
449+
return RISCVMCExpr::create(Expr, RISCVMCExpr::VK_CALL_PLT, Ctx);
450450
}
451451
}
452452

@@ -471,8 +471,8 @@ class RISCVMCPlusBuilder : public MCPlusBuilder {
471471
switch (cast<RISCVMCExpr>(ImmExpr)->getKind()) {
472472
default:
473473
return false;
474-
case RISCVMCExpr::VK_RISCV_CALL:
475-
case RISCVMCExpr::VK_RISCV_CALL_PLT:
474+
case RISCVMCExpr::VK_CALL:
475+
case RISCVMCExpr::VK_CALL_PLT:
476476
return true;
477477
}
478478
}
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
## Check that BOLT symbolizer properly handles loads from GOT, including
2+
## instruction sequences changed/relaxed by the linker.
3+
4+
# RUN: split-file %s %t
5+
6+
# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %t/main.s \
7+
# RUN: -o %t/main.o
8+
# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %t/near.s \
9+
# RUN: -o %t/near.o
10+
# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %t/far.s \
11+
# RUN: -o %t/far.o
12+
# RUN: %clang %cflags %t/main.o %t/near.o %t/far.o -o %t/main.exe -Wl,-q -static
13+
# RUN: llvm-bolt %t/main.exe -o %t/main.bolt --keep-nops --print-disasm \
14+
# RUN: --print-only=_start | FileCheck %s
15+
16+
#--- main.s
17+
18+
.text
19+
.globl _start
20+
.p2align 2
21+
.type _start, @function
22+
# CHECK-LABEL: _start
23+
_start:
24+
25+
## Function address load relaxable into nop+adr.
26+
# CHECK: nop
27+
# CHECK-NEXT: adr x0, near
28+
adrp x0, :got:near
29+
ldr x0, [x0, :got_lo12:near]
30+
31+
## Function address load relaxable into adrp+add.
32+
# CHECK-NEXT: adrp x1, far
33+
# CHECK-NEXT: add x1, x1, :lo12:far
34+
adrp x1, :got:far
35+
ldr x1, [x1, :got_lo12:far]
36+
37+
## Non-relaxable due to the instruction in-between.
38+
# CHECK-NEXT: adrp x2, __BOLT_got_zero
39+
# CHECK-NEXT: nop
40+
# CHECK-NEXT: ldr x2, [x2, :lo12:__BOLT_got_zero{{.*}}]
41+
adrp x2, :got:near
42+
nop
43+
ldr x2, [x2, :got_lo12:near]
44+
45+
## Load data object with local visibility. Relaxable into adrp+add.
46+
# CHECK-NEXT: adrp x3, "local_far_data/1"
47+
# CHECK-NEXT: add x3, x3, :lo12:"local_far_data/1"
48+
adrp x3, :got:local_far_data
49+
ldr x3, [x3, :got_lo12:local_far_data]
50+
51+
## Global data reference relaxable into adrp+add.
52+
# CHECK-NEXT: adrp x4, far_data
53+
# CHECK-NEXT: add x4, x4, :lo12:far_data
54+
adrp x4, :got:far_data
55+
ldr x4, [x4, :got_lo12:far_data]
56+
57+
ret
58+
.size _start, .-_start
59+
60+
.weak near
61+
.weak far
62+
.weak far_data
63+
64+
## Data object separated by more than 1MB from _start.
65+
.data
66+
.type local_far_data, @object
67+
local_far_data:
68+
.xword 0
69+
.size local_far_data, .-local_far_data
70+
71+
#--- near.s
72+
73+
.text
74+
.globl near
75+
.type near, @function
76+
near:
77+
ret
78+
.size near, .-near
79+
80+
#--- far.s
81+
82+
.text
83+
84+
## Insert 1MB of empty space to make objects after it unreachable by adr
85+
## instructions in _start.
86+
.space 0x100000
87+
88+
.globl far
89+
.type far, @function
90+
far:
91+
ret
92+
.size far, .-far
93+
94+
.data
95+
.globl far_data
96+
.type far_data, @object
97+
far_data:
98+
.xword 0
99+
.size far_data, .-far_data
100+

clang/docs/ClangOffloadBundler.rst

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -266,15 +266,14 @@ without differentiation based on offload kind.
266266
The target triple of the code object. See `Target Triple
267267
<https://clang.llvm.org/docs/CrossCompilation.html#target-triple>`_.
268268

269-
The bundler accepts target triples with or without the optional environment
270-
field:
269+
LLVM target triples can be with or without the optional environment field:
271270

272271
``<arch><sub>-<vendor>-<sys>``, or
273272
``<arch><sub>-<vendor>-<sys>-<env>``
274273

275-
However, in order to standardize outputs for tools that consume bitcode
276-
bundles, bundles written by the bundler internally use only the 4-field
277-
target triple:
274+
However, in order to standardize outputs for tools that consume bitcode bundles
275+
and to parse target ID containing dashes, the bundler only accepts target
276+
triples in the 4-field format:
278277

279278
``<arch><sub>-<vendor>-<sys>-<env>``
280279

@@ -543,4 +542,4 @@ The compressed offload bundle begins with a header followed by the compressed bi
543542
- **Compressed Data**:
544543
The actual compressed binary data follows the header. Its size can be inferred from the total size of the file minus the header size.
545544

546-
> **Note**: Version 3 of the format is under development. It uses 64-bit fields for Total File Size and Uncompressed Binary Size to support files larger than 4GB. To experiment with version 3, set the environment variable `COMPRESSED_BUNDLE_FORMAT_VERSION=3`. This support is experimental and not recommended for production use.
545+
> **Note**: Version 3 of the format is under development. It uses 64-bit fields for Total File Size and Uncompressed Binary Size to support files larger than 4GB. To experiment with version 3, set the environment variable `COMPRESSED_BUNDLE_FORMAT_VERSION=3`. This support is experimental and not recommended for production use.

clang/docs/LanguageExtensions.rst

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1652,6 +1652,7 @@ Designated initializers (N494) C
16521652
Array & element qualification (N2607) C23 C89
16531653
Attributes (N2335) C23 C89
16541654
``#embed`` (N3017) C23 C89, C++
1655+
Octal literals prefixed with ``0o`` or ``0O`` C2y C89, C++
16551656
============================================= ================================ ============= =============
16561657

16571658
Builtin type aliases
@@ -1911,6 +1912,40 @@ A simplistic usage example as might be seen in standard C++ headers follows:
19111912
// Emulate type trait for compatibility with other compilers.
19121913
#endif
19131914

1915+
1916+
.. _builtin_structured_binding_size-doc:
1917+
1918+
__builtin_structured_binding_size (C++)
1919+
---------------------------------------
1920+
1921+
The ``__builtin_structured_binding_size(T)`` type trait returns
1922+
the *structured binding size* ([dcl.struct.bind]) of type ``T``
1923+
1924+
This is equivalent to the size of the pack ``p`` in ``auto&& [...p] = declval<T&>();``.
1925+
If the argument cannot be decomposed, ``__builtin_structured_binding_size(T)``
1926+
is not a valid expression (``__builtin_structured_binding_size`` is SFINAE-friendly).
1927+
1928+
builtin arrays, builtin SIMD vectors,
1929+
builtin complex types, *tuple-like* types, and decomposable class types
1930+
are decomposable types.
1931+
1932+
A type is considered a valid *tuple-like* if ``std::tuple_size_v<T>`` is a valid expression,
1933+
even if there is no valid ``std::tuple_element`` specialization or suitable
1934+
``get`` function for that type.
1935+
1936+
.. code-block:: c++
1937+
1938+
template<std::size_t Idx, typename T>
1939+
requires (Idx < __builtin_structured_binding_size(T))
1940+
decltype(auto) constexpr get_binding(T&& obj) {
1941+
auto && [...p] = std::forward<T>(obj);
1942+
return p...[Idx];
1943+
}
1944+
struct S { int a = 0, b = 42; };
1945+
static_assert(__builtin_structured_binding_size(S) == 2);
1946+
static_assert(get_binding<1>(S{}) == 42);
1947+
1948+
19141949
Blocks
19151950
======
19161951

0 commit comments

Comments
 (0)