Skip to content

Commit d164fd9

Browse files
author
Andrew Lamzed-Short
authored
[SYCL] Reimplemented -f[no]sycl-early-optimizations flag (#7701)
Reimplemented the `-f[no]sycl-early-optimizations` compiler flag to separate it from the meaning of `-disable-llvm-passes` for more fidelity. This required a change to its definition, setting of a new codegen option behind-the-scenes, and small logic changes to the optimization pipeline to factor in the new flag. Existing tests all still pass.
1 parent 4713aeb commit d164fd9

File tree

11 files changed

+165
-91
lines changed

11 files changed

+165
-91
lines changed

clang/include/clang/Basic/CodeGenOptions.def

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -513,6 +513,9 @@ CODEGENOPT(OpaquePointers, 1, 0)
513513
/// non-deleting destructors. (No effect on Microsoft ABI.)
514514
CODEGENOPT(CtorDtorReturnThis, 1, 0)
515515

516+
/// Whether to disable the standard optimization pipeline for the SYCL device compiler.
517+
CODEGENOPT(DisableSYCLEarlyOpts, 1, 0)
518+
516519
#undef CODEGENOPT
517520
#undef ENUM_CODEGENOPT
518521
#undef VALUE_CODEGENOPT

clang/include/clang/Driver/Options.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5110,7 +5110,8 @@ def : Flag<["-"], "fno-sycl-explicit-simd">,
51105110
Flags<[CoreOption, Deprecated]>,
51115111
Group<clang_ignored_legacy_options_Group>,
51125112
HelpText<"Disable SYCL explicit SIMD extension. (deprecated)">;
5113-
defm sycl_early_optimizations : OptOutCC1FFlag<"sycl-early-optimizations", "Enable", "Disable", " standard optimization pipeline for SYCL device compiler", [CoreOption]>;
5113+
defm sycl_early_optimizations : OptOutCC1FFlag<"sycl-early-optimizations", "Enable", "Disable", " standard optimization pipeline for SYCL device compiler", [CoreOption]>,
5114+
MarshallingInfoFlag<CodeGenOpts<"DisableSYCLEarlyOpts">>;
51145115
def fsycl_dead_args_optimization : Flag<["-"], "fsycl-dead-args-optimization">,
51155116
Group<sycl_Group>, Flags<[NoArgumentUnused, CoreOption]>, HelpText<"Enables "
51165117
"elimination of DPC++ dead kernel arguments">;

clang/lib/CodeGen/BackendUtil.cpp

Lines changed: 22 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -916,11 +916,6 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
916916

917917
ModulePassManager MPM;
918918

919-
// FIXME: Change this when -fno-sycl-early-optimizations is not tied to
920-
// -disable-llvm-passes.
921-
if (CodeGenOpts.DisableLLVMPasses && LangOpts.SYCLIsDevice)
922-
MPM.addPass(SYCLPropagateAspectsUsagePass());
923-
924919
if (!CodeGenOpts.DisableLLVMPasses) {
925920
// Map our optimization levels into one of the distinct levels used to
926921
// configure the pipeline.
@@ -1021,7 +1016,10 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
10211016
MPM.addPass(InstrProfiling(*Options, false));
10221017
});
10231018

1024-
if (CodeGenOpts.OptimizationLevel == 0) {
1019+
if (CodeGenOpts.DisableSYCLEarlyOpts) {
1020+
MPM =
1021+
PB.buildO0DefaultPipeline(OptimizationLevel::O0, IsLTO || IsThinLTO);
1022+
} else if (CodeGenOpts.OptimizationLevel == 0) {
10251023
MPM = PB.buildO0DefaultPipeline(Level, IsLTO || IsThinLTO);
10261024
} else if (IsThinLTO) {
10271025
MPM = PB.buildThinLTOPreLinkDefaultPipeline(Level);
@@ -1035,31 +1033,26 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
10351033
MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass()));
10361034
MPM.addPass(ModuleMemProfilerPass());
10371035
}
1038-
}
1039-
if (LangOpts.SYCLIsDevice) {
1040-
MPM.addPass(SYCLMutatePrintfAddrspacePass());
1041-
if (!CodeGenOpts.DisableLLVMPasses && LangOpts.EnableDAEInSpirKernels)
1042-
MPM.addPass(DeadArgumentEliminationSYCLPass());
1043-
}
10441036

1045-
// Add SPIRITTAnnotations pass to the pass manager if
1046-
// -fsycl-instrument-device-code option was passed. This option can be used
1047-
// only with spir triple.
1048-
if (LangOpts.SYCLIsDevice && CodeGenOpts.SPIRITTAnnotations) {
1049-
assert(TargetTriple.isSPIR() &&
1050-
"ITT annotations can only be added to a module with spir target");
1051-
MPM.addPass(SPIRITTAnnotationsPass());
1052-
}
1037+
if (LangOpts.SYCLIsDevice) {
1038+
MPM.addPass(SYCLMutatePrintfAddrspacePass());
1039+
if (LangOpts.EnableDAEInSpirKernels)
1040+
MPM.addPass(DeadArgumentEliminationSYCLPass());
1041+
1042+
// Add SPIRITTAnnotations pass to the pass manager if
1043+
// -fsycl-instrument-device-code option was passed. This option can be
1044+
// used only with spir triple.
1045+
if (CodeGenOpts.SPIRITTAnnotations) {
1046+
assert(
1047+
TargetTriple.isSPIR() &&
1048+
"ITT annotations can only be added to a module with spir target");
1049+
MPM.addPass(SPIRITTAnnotationsPass());
1050+
}
10531051

1054-
// Allocate static local memory in SYCL kernel scope for each allocation
1055-
// call. It should be called after inlining pass.
1056-
if (LangOpts.SYCLIsDevice) {
1057-
// Group local memory pass depends on inlining. Turn it on even in case if
1058-
// all llvm passes or SYCL early optimizations are disabled.
1059-
// FIXME: Remove this workaround when dependency on inlining is eliminated.
1060-
if (CodeGenOpts.DisableLLVMPasses)
1061-
MPM.addPass(AlwaysInlinerPass(false));
1062-
MPM.addPass(SYCLLowerWGLocalMemoryPass());
1052+
// Allocate static local memory in SYCL kernel scope for each allocation
1053+
// call.
1054+
MPM.addPass(SYCLLowerWGLocalMemoryPass());
1055+
}
10631056
}
10641057

10651058
// Add a verifier pass if requested. We don't have to do this if the action

clang/lib/Frontend/CompilerInvocation.cpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1691,11 +1691,6 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args,
16911691
{std::string(Split.first), std::string(Split.second)});
16921692
}
16931693

1694-
Opts.DisableLLVMPasses =
1695-
Args.hasArg(OPT_disable_llvm_passes) ||
1696-
(Args.hasArg(OPT_fsycl_is_device) && T.isSPIR() &&
1697-
Args.hasArg(OPT_fno_sycl_early_optimizations));
1698-
16991694
const llvm::Triple::ArchType DebugEntryValueArchs[] = {
17001695
llvm::Triple::x86, llvm::Triple::x86_64, llvm::Triple::aarch64,
17011696
llvm::Triple::arm, llvm::Triple::armeb, llvm::Triple::mips,

clang/test/CodeGenSYCL/device_has.cpp

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,32 @@
11
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s
22

3-
// Tests for IR of device_has(aspect, ...) attribute and
4-
// !sycl_used_aspects metadata
3+
// Tests for IR of device_has(aspect, ...) attribute
54
#include "sycl.hpp"
65

76
using namespace sycl;
87
queue q;
98

109
// CHECK: define dso_local spir_kernel void @{{.*}}kernel_name_1{{.*}} !sycl_declared_aspects ![[ASPECTS1:[0-9]+]] !srcloc ![[SRCLOC1:[0-9]+]]
1110

12-
// CHECK: define dso_local spir_func void @{{.*}}func1{{.*}} !sycl_declared_aspects ![[ASPECTS1]] !srcloc ![[SRCLOC2:[0-9]+]]
13-
// CHECK-SAME: !sycl_used_aspects ![[ASPECTS1]]
11+
// CHECK: define dso_local spir_func void @{{.*}}func1{{.*}} !sycl_declared_aspects ![[ASPECTS1]] !srcloc ![[SRCLOC2:[0-9]+]] {
1412
[[sycl::device_has(sycl::aspect::cpu)]] void func1() {}
1513

16-
// CHECK: define dso_local spir_func void @{{.*}}func2{{.*}} !sycl_declared_aspects ![[ASPECTS2:[0-9]+]] !srcloc ![[SRCLOC3:[0-9]+]]
17-
// CHECK-SAME: !sycl_used_aspects ![[ASPECTS2]]
14+
// CHECK: define dso_local spir_func void @{{.*}}func2{{.*}} !sycl_declared_aspects ![[ASPECTS2:[0-9]+]] !srcloc ![[SRCLOC3:[0-9]+]] {
1815
[[sycl::device_has(sycl::aspect::fp16, sycl::aspect::gpu)]] void func2() {}
1916

2017
// CHECK: define dso_local spir_func void @{{.*}}func3{{.*}} !sycl_declared_aspects ![[EMPTYASPECTS:[0-9]+]] !srcloc ![[SRCLOC4:[0-9]+]] {
2118
[[sycl::device_has()]] void func3() {}
2219

23-
// CHECK: define linkonce_odr spir_func void @{{.*}}func4{{.*}} !sycl_declared_aspects ![[ASPECTS3:[0-9]+]] !srcloc ![[SRCLOC5:[0-9]+]]
24-
// CHECK-SAME: !sycl_used_aspects ![[ASPECTS3]]
20+
// CHECK: define linkonce_odr spir_func void @{{.*}}func4{{.*}} !sycl_declared_aspects ![[ASPECTS3:[0-9]+]] !srcloc ![[SRCLOC5:[0-9]+]] {
2521
template <sycl::aspect Aspect>
2622
[[sycl::device_has(Aspect)]] void func4() {}
2723

28-
// CHECK: define dso_local spir_func void @{{.*}}func5{{.*}} !sycl_declared_aspects ![[ASPECTS1]] !srcloc ![[SRCLOC6:[0-9]+]]
29-
// CHECK-SAME: !sycl_used_aspects ![[ASPECTS1]]
24+
// CHECK: define dso_local spir_func void @{{.*}}func5{{.*}} !sycl_declared_aspects ![[ASPECTS1]] !srcloc ![[SRCLOC6:[0-9]+]] {
3025
[[sycl::device_has(sycl::aspect::cpu)]] void func5();
3126
void func5() {}
3227

3328
constexpr sycl::aspect getAspect() { return sycl::aspect::cpu; }
34-
// CHECK: define dso_local spir_func void @{{.*}}func6{{.*}} !sycl_declared_aspects ![[ASPECTS1]] !srcloc ![[SRCLOC7:[0-9]+]]
35-
// CHECK-SAME: !sycl_used_aspects ![[ASPECTS1]]
29+
// CHECK: define dso_local spir_func void @{{.*}}func6{{.*}} !sycl_declared_aspects ![[ASPECTS1]] !srcloc ![[SRCLOC7:[0-9]+]] {
3630
[[sycl::device_has(getAspect())]] void func6() {}
3731

3832
class KernelFunctor {

clang/test/CodeGenSYCL/group-local-memory.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,18 @@
1212
// Check that AlwaysInliner pass is always run for compilation of SYCL device
1313
// target code, even if all optimizations are disabled.
1414

15-
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -emit-llvm -disable-llvm-passes \
16-
// RUN: -mdebug-pass Structure %s -o /dev/null 2>&1 \
17-
// RUN: | FileCheck %s --check-prefixes=CHECK-ALWINL,CHECK
1815
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -emit-llvm -fno-sycl-early-optimizations \
1916
// RUN: -mdebug-pass Structure %s -o /dev/null 2>&1 \
2017
// RUN: | FileCheck %s --check-prefixes=CHECK-ALWINL,CHECK
2118

2219
// CHECK-INL: Running pass: ModuleInlinerWrapperPass on [module]
2320
// CHECK-ALWINL: Running pass: AlwaysInlinerPass on [module]
2421
// CHECK: Running pass: SYCLLowerWGLocalMemoryPass on [module]
22+
23+
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -emit-llvm -disable-llvm-passes \
24+
// RUN: -mdebug-pass Structure %s -o /dev/null 2>&1 \
25+
// RUN: | FileCheck %s --check-prefixes=CHECK-NO-PASSES-ALWINL,CHECK-NO-PASSES,CHECK-NO-PASSES-INL
26+
27+
// CHECK-NO-PASSES-INL-NOT: Running pass: ModuleInlinerWrapperPass on [module]
28+
// CHECK-NO-PASSES-ALWINL-NOT: Running pass: AlwaysInlinerPass on [module]
29+
// CHECK-NO-PASSES-NOT: Running pass: SYCLLowerWGLocalMemoryPass on [module]

clang/test/CodeGenSYCL/sub-group-size.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -sycl-std=2020 -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=NONE,ALL
2-
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -fsycl-default-sub-group-size=primary -sycl-std=2020 -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=PRIM_DEF,ALL
3-
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -fsycl-default-sub-group-size=10 -sycl-std=2020 -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=TEN_DEF,ALL
1+
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -sycl-std=2020 -triple spir64-unknown-unknown -fno-sycl-early-optimizations -emit-llvm -o - %s | FileCheck %s --check-prefixes=NONE,ALL
2+
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -fsycl-default-sub-group-size=primary -sycl-std=2020 -triple spir64-unknown-unknown -fno-sycl-early-optimizations -emit-llvm -o - %s | FileCheck %s --check-prefixes=PRIM_DEF,ALL
3+
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -fsycl-default-sub-group-size=10 -sycl-std=2020 -triple spir64-unknown-unknown -fno-sycl-early-optimizations -emit-llvm -o - %s | FileCheck %s --check-prefixes=TEN_DEF,ALL
44

55
// Ensure that both forms of the new sub_group_size properly emit their metadata
66
// on sycl-kernel and sycl-external functions.

clang/test/CodeGenSYCL/uses_aspects.cpp

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ template <sycl::aspect Aspect>
2828
void func5() {}
2929

3030
[[__sycl_detail__::__uses_aspects__(sycl::aspect::cpu)]] void func6();
31-
// CHECK: define dso_local spir_func void @{{.*}}func6{{.*}} !sycl_used_aspects ![[ASPECTS4:[0-9]+]] {
31+
// CHECK: define dso_local spir_func void @{{.*}}func6{{.*}} !sycl_used_aspects ![[ASPECTS1]] {
3232
void func6() {
3333
Type1WithAspect TestObj1;
3434
Type2WithAspect TestObj2;
@@ -58,10 +58,9 @@ void foo() {
5858
});
5959
}
6060
// CHECK: !sycl_types_that_use_aspects = !{![[TYPE1:[0-9]+]], ![[TYPE2:[0-9]+]]}
61-
// CHECK-DAG: [[TYPE1]] = !{!"class.Type1WithAspect", i32 1}
62-
// CHECK-DAG: [[TYPE2]] = !{!"class.Type2WithAspect", i32 5, i32 1}
63-
// CHECK-DAG: [[EMPTYASPECTS]] = !{}
64-
// CHECK-DAG: [[ASPECTS1]] = !{i32 1}
65-
// CHECK-DAG: [[ASPECTS2]] = !{i32 5, i32 2}
66-
// CHECK-DAG: [[ASPECTS3]] = !{i32 0}
67-
// CHECK-DAG: [[ASPECTS4]] = !{i32 1, i32 5}
61+
// CHECK: [[TYPE1]] = !{!"class.Type1WithAspect", i32 1}
62+
// CHECK: [[TYPE2]] = !{!"class.Type2WithAspect", i32 5, i32 1}
63+
// CHECK: [[EMPTYASPECTS]] = !{}
64+
// CHECK: [[ASPECTS1]] = !{i32 1}
65+
// CHECK: [[ASPECTS2]] = !{i32 5, i32 2}
66+
// CHECK: [[ASPECTS3]] = !{i32 0}
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -triple spir64-unknown-unknown -ast-dump -o - %s | FileCheck %s --check-prefixes=NOINLINE,CHECK
2+
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -triple spir64-unknown-unknown -ast-dump -o - %s | FileCheck %s --check-prefixes=INLINE,CHECK
3+
4+
// Tests that the appropriate inlining attributes are added to kernel lambda functions,
5+
// with no inline attribute being added when -fno-sycl-force-inline-kernel-lambda is set
6+
// and attribute not explicitly provided.
7+
8+
#include "sycl.hpp"
9+
10+
int main() {
11+
sycl::queue q;
12+
13+
q.submit([&](sycl::handler &h) {
14+
// CHECK: LambdaExpr{{.*}}sycl-force-inline-kernel-lambda-ast.cpp:17
15+
// INLINE: AlwaysInlineAttr
16+
// NOINLINE-NOT: AlwaysInlineAttr
17+
h.parallel_for<class KernelName>([] {});
18+
});
19+
20+
q.submit([&](sycl::handler &h) {
21+
// CHECK: LambdaExpr{{.*}}sycl-force-inline-kernel-lambda-ast.cpp:23
22+
// CHECK: AlwaysInlineAttr
23+
h.parallel_for<class KernelNameInline>([]() __attribute__((always_inline)) {});
24+
});
25+
26+
q.submit([&](sycl::handler &h) {
27+
// CHECK: LambdaExpr{{.*}}sycl-force-inline-kernel-lambda-ast.cpp:30
28+
// CHECK: NoInlineAttr
29+
// CHECK-NOT: AlwaysInlineAttr
30+
h.parallel_for<class KernelNameNoInline>([]() __attribute__((noinline)) {});
31+
});
32+
33+
/// The flag is ignored for ESIMD kernels
34+
q.submit([&](sycl::handler &h) {
35+
// CHECK: LambdaExpr{{.*}}sycl-force-inline-kernel-lambda-ast.cpp:39
36+
// CHECK: SYCLSimdAttr
37+
// CHECK-NOT: AlwaysInlineAttr
38+
// CHECK-NOT: NoInlineAttr
39+
h.parallel_for<class KernelNameESIMD>([]() __attribute__((sycl_explicit_simd)) {});
40+
});
41+
42+
return 0;
43+
}

clang/test/SemaSYCL/sycl-force-inline-kernel-lambda.cpp

Lines changed: 0 additions & 30 deletions
This file was deleted.

0 commit comments

Comments
 (0)