Skip to content

Commit db5f72a

Browse files
authored
[SYCL] Allow _Bitint of size greater than 128 bits when -fintelfpga is used (#6295)
This revives the patch for #6152 (which was reverted in #6232). In this patch, the new change is that there is a new target for spir64_fpga and the maximum bitsize limit for that target is 2048. Unfortunately, for the host, there is now an explicit check in Sema for the same bitsize. When -fintefpga is specified with -fsycl, we allow a maximum bitsize of 2048.
1 parent acffd4b commit db5f72a

File tree

14 files changed

+176
-9
lines changed

14 files changed

+176
-9
lines changed

clang/include/clang/Basic/LangOptions.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,7 @@ LANGOPT(OffloadingNewDriver, 1, 0, "use the new driver for generating offloading
272272

273273
LANGOPT(SYCLIsDevice , 1, 0, "Generate code for SYCL device")
274274
LANGOPT(SYCLIsHost , 1, 0, "SYCL host compilation")
275+
LANGOPT(IntelFPGA , 1, 0, "Perform ahead-of-time compilation for FPGA")
275276
LANGOPT(SYCLAllowFuncPtr , 1, 0, "Allow function pointers in SYCL device code")
276277
LANGOPT(SYCLStdLayoutKernelParams, 1, 0, "Enable standard layout requirement for SYCL kernel parameters")
277278
LANGOPT(SYCLUnnamedLambda , 1, 0, "Allow unnamed lambda SYCL kernels")

clang/include/clang/Driver/Options.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2789,7 +2789,8 @@ def fstrict_overflow : Flag<["-"], "fstrict-overflow">, Group<f_Group>;
27892789
def fdriver_only : Flag<["-"], "fdriver-only">, Flags<[NoXarchOption, CoreOption]>,
27902790
Group<Action_Group>, HelpText<"Only run the driver.">;
27912791
def fintelfpga : Flag<["-"], "fintelfpga">, Group<f_Group>,
2792-
Flags<[CC1Option, CoreOption]>, HelpText<"Perform ahead of time compilation for FPGA">;
2792+
Flags<[CC1Option, CoreOption]>, MarshallingInfoFlag<LangOpts<"IntelFPGA">>,
2793+
HelpText<"Perform ahead-of-time compilation for FPGA">;
27932794
def fsycl_device_only : Flag<["-"], "fsycl-device-only">, Flags<[CoreOption]>,
27942795
HelpText<"Compile SYCL kernels for device">;
27952796
defm sycl_esimd_force_stateless_mem : BoolFOption<"sycl-esimd-force-stateless-mem",

clang/lib/Basic/Targets.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -618,6 +618,8 @@ TargetInfo *AllocateTarget(const llvm::Triple &Triple,
618618

619619
case llvm::Triple::spir64: {
620620
llvm::Triple HT(Opts.HostTriple);
621+
bool IsFPGASubArch = Triple.getSubArch() == llvm::Triple::SPIRSubArch_fpga;
622+
621623
switch (HT.getOS()) {
622624
case llvm::Triple::Win32:
623625
switch (HT.getEnvironment()) {
@@ -628,8 +630,12 @@ TargetInfo *AllocateTarget(const llvm::Triple &Triple,
628630
return new MicrosoftX86_64_SPIR64TargetInfo(Triple, Opts);
629631
}
630632
case llvm::Triple::Linux:
633+
if (IsFPGASubArch)
634+
return new LinuxTargetInfo<SPIR64FPGATargetInfo>(Triple, Opts);
631635
return new LinuxTargetInfo<SPIR64TargetInfo>(Triple, Opts);
632636
default:
637+
if (IsFPGASubArch)
638+
return new SPIR64FPGATargetInfo(Triple, Opts);
633639
return new SPIR64TargetInfo(Triple, Opts);
634640
}
635641
}

clang/lib/Basic/Targets/SPIR.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,14 @@ class LLVM_LIBRARY_VISIBILITY SPIR64TargetInfo : public SPIRTargetInfo {
225225
MacroBuilder &Builder) const override;
226226
};
227227

228+
// spir64_fpga target
229+
class LLVM_LIBRARY_VISIBILITY SPIR64FPGATargetInfo : public SPIR64TargetInfo {
230+
public:
231+
SPIR64FPGATargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
232+
: SPIR64TargetInfo(Triple, Opts) {}
233+
virtual size_t getMaxBitIntWidth() const override { return 2048; }
234+
};
235+
228236
// x86-32 SPIR Windows target
229237
class LLVM_LIBRARY_VISIBILITY WindowsX86_32SPIRTargetInfo
230238
: public WindowsTargetInfo<SPIR32TargetInfo> {

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4979,14 +4979,22 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
49794979
// Disable parallel for range-rounding for anything involving FPGA
49804980
auto SYCLTCRange = C.getOffloadToolChains<Action::OFK_SYCL>();
49814981
bool HasFPGA = false;
4982-
for (auto TI = SYCLTCRange.first, TE = SYCLTCRange.second; TI != TE; ++TI)
4983-
if (TI->second->getTriple().getSubArch() ==
4984-
llvm::Triple::SPIRSubArch_fpga) {
4982+
for (auto TI = SYCLTCRange.first, TE = SYCLTCRange.second; TI != TE; ++TI) {
4983+
llvm::Triple SYCLTriple = TI->second->getTriple();
4984+
if (SYCLTriple.getSubArch() == llvm::Triple::SPIRSubArch_fpga) {
49854985
HasFPGA = true;
4986+
if (!IsSYCLOffloadDevice) {
4987+
CmdArgs.push_back("-aux-triple");
4988+
CmdArgs.push_back(Args.MakeArgString(SYCLTriple.getTriple()));
4989+
}
49864990
break;
49874991
}
4988-
if (HasFPGA)
4992+
}
4993+
if (HasFPGA) {
49894994
CmdArgs.push_back("-fsycl-disable-range-rounding");
4995+
// Pass -fintelfpga to both the host and device SYCL compilations if set.
4996+
CmdArgs.push_back("-fintelfpga");
4997+
}
49904998

49914999
// Add any options that are needed specific to SYCL offload while
49925000
// performing the host side compilation.

clang/lib/Frontend/CompilerInstance.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ bool CompilerInstance::createTarget() {
110110
// other side of CUDA/OpenMP/SYCL compilation.
111111
if (!getAuxTarget() &&
112112
(getLangOpts().CUDA || getLangOpts().OpenMPIsDevice ||
113-
getLangOpts().SYCLIsDevice) &&
113+
getLangOpts().isSYCL()) &&
114114
!getFrontendOpts().AuxTriple.empty()) {
115115
auto TO = std::make_shared<TargetOptions>();
116116
TO->Triple = llvm::Triple::normalize(getFrontendOpts().AuxTriple);

clang/lib/Sema/SemaType.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2320,10 +2320,15 @@ QualType Sema::BuildBitIntType(bool IsUnsigned, Expr *BitWidth,
23202320
return QualType();
23212321
}
23222322

2323-
const TargetInfo &TI = getASTContext().getTargetInfo();
2324-
if (NumBits > TI.getMaxBitIntWidth()) {
2323+
// If the number of bits exceed the maximum bit width supported on
2324+
// both host and device, issue an error diagnostic.
2325+
const TargetInfo *AuxTargetInfo = getASTContext().getAuxTargetInfo();
2326+
size_t MaxBitIntWidth = std::max(
2327+
(AuxTargetInfo == nullptr) ? 0 : AuxTargetInfo->getMaxBitIntWidth(),
2328+
getASTContext().getTargetInfo().getMaxBitIntWidth());
2329+
if (NumBits > MaxBitIntWidth) {
23252330
Diag(Loc, diag::err_bit_int_max_size)
2326-
<< IsUnsigned << static_cast<uint64_t>(TI.getMaxBitIntWidth());
2331+
<< IsUnsigned << static_cast<uint64_t>(MaxBitIntWidth);
23272332
return QualType();
23282333
}
23292334

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
// RUN: %clang_cc1 -no-opaque-pointers -fsycl-is-host -fintelfpga -triple x86_64 -aux-triple spir64_fpga -emit-llvm %s -o - | FileCheck %s
2+
3+
// This test checks that we generate appropriate code for division
4+
// operations of _BitInts of size greater than 128 bits, since it
5+
// is allowed when -fintelfpga is enabled. The test uses a value of
6+
// 2048 for the bitsize as that is the maximum that is currently
7+
// supported.
8+
9+
// CHECK: define{{.*}} void @_Z3fooDB2048_S_(i2048* {{.*}} sret(i2048) align 8 %agg.result, i2048* {{.*}} byval(i2048) align 8 %[[ARG1:[0-9]+]], i2048* {{.*}} byval(i2048) align 8 %[[ARG2:[0-9]+]])
10+
signed _BitInt(2048) foo(signed _BitInt(2048) a, signed _BitInt(2048) b) {
11+
// CHECK: %[[VAR_A:a]].addr = alloca i2048, align 8
12+
// CHECK: %[[VAR_B:b]].addr = alloca i2048, align 8
13+
// CHECK: %[[VAR_A]] = load i2048, i2048* %[[ARG1]], align 8
14+
// CHECK: %[[VAR_B]] = load i2048, i2048* %[[ARG2]], align 8
15+
// CHECK: store i2048 %[[VAR_A]], i2048* %[[VAR_A]].addr, align 8
16+
// CHECK: store i2048 %[[VAR_B]], i2048* %[[VAR_B]].addr, align 8
17+
// CHECK: %[[TEMP1:[0-9]+]] = load i2048, i2048* %[[VAR_A]].addr, align 8
18+
// CHECK: %[[TEMP2:[0-9]+]] = load i2048, i2048* %[[VAR_B]].addr, align 8
19+
// CHECK: %div = sdiv i2048 %[[TEMP1]], %[[TEMP2]]
20+
// CHECK: store i2048 %div, i2048* %agg.result, align 8
21+
// CHECK: %[[RES:[0-9+]]] = load i2048, i2048* %agg.result, align 8
22+
// CHECK: store i2048 %[[RES]], i2048* %agg.result, align 8
23+
// CHECK: ret void
24+
return a / b;
25+
}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
// RUN: %clang_cc1 -no-opaque-pointers -fsycl-is-device -fintelfpga -triple spir64_fpga -aux-triple x86_64-unknown-linux-gnu -IInputs -emit-llvm %s -o - | FileCheck %s
2+
3+
// This test checks that we generate appropriate code for division
4+
// operations of _BitInts of size greater than 128 bits, since it
5+
// is allowed when -fintelfpga is enabled. The test uses a value
6+
// of 2048 for the bitsize, the max that is currently supported.
7+
8+
#include "Inputs/sycl.hpp"
9+
10+
// CHECK: define{{.*}} void @_Z3fooDB2048_S_(i2048 addrspace(4)* {{.*}} sret(i2048) align 8 %agg.result, i2048* {{.*}} byval(i2048) align 8 %[[ARG1:[0-9]+]], i2048* {{.*}} byval(i2048) align 8 %[[ARG2:[0-9]+]])
11+
signed _BitInt(2048) foo(signed _BitInt(2048) a, signed _BitInt(2048) b) {
12+
// CHECK: %[[VAR_A:a]] = load i2048, i2048* %[[ARG1]], align 8
13+
// CHECK: %[[VAR_B:b]] = load i2048, i2048* %[[ARG2]], align 8
14+
// CHECK: %[[RES:div]] = sdiv i2048 %[[VAR_A]], %[[VAR_B]]
15+
// CHECK: store i2048 %[[RES]], i2048 addrspace(4)* %agg.result, align 8
16+
// CHECK: ret void
17+
return a / b;
18+
}
19+
20+
int main() {
21+
sycl::handler h;
22+
auto lambda = []() {
23+
_BitInt(2048) a, b = 3, c = 4;
24+
a = foo(b, c);
25+
};
26+
h.single_task(lambda);
27+
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// RUN: %clang_cc1 -opaque-pointers -fsycl-is-host -fintelfpga -triple x86_64 -aux-triple spir64_fpga -emit-llvm %s -o - | FileCheck %s
2+
3+
// This test checks that we generate appropriate code for division
4+
// operations of _BitInts of size greater than 128 bits, since it
5+
// is allowed when -fintelfpga is enabled. The test uses a value of
6+
// 2048, the maximum bitsize that is currently supported.
7+
8+
// CHECK: define{{.*}} void @_Z3fooDB2048_S_(ptr {{.*}} sret(i2048) align 8 %agg.result, ptr {{.*}} byval(i2048) align 8 %[[ARG1:[0-9]+]], ptr {{.*}} byval(i2048) align 8 %[[ARG2:[0-9]+]])
9+
signed _BitInt(2048) foo(signed _BitInt(2048) a, signed _BitInt(2048) b) {
10+
// CHECK: %[[VAR_A:a]].addr = alloca i2048, align 8
11+
// CHECK: %[[VAR_B:b]].addr = alloca i2048, align 8
12+
// CHECK: %[[VAR_A]] = load i2048, ptr %[[ARG1]], align 8
13+
// CHECK: %[[VAR_B]] = load i2048, ptr %[[ARG2]], align 8
14+
// CHECK: store i2048 %[[VAR_A]], ptr %[[VAR_A]].addr, align 8
15+
// CHECK: store i2048 %[[VAR_B]], ptr %[[VAR_B]].addr, align 8
16+
// CHECK: %[[TEMP1:[0-9]+]] = load i2048, ptr %[[VAR_A]].addr, align 8
17+
// CHECK: %[[TEMP2:[0-9]+]] = load i2048, ptr %[[VAR_B]].addr, align 8
18+
// CHECK: %div = sdiv i2048 %[[TEMP1]], %[[TEMP2]]
19+
// CHECK: store i2048 %div, ptr %agg.result, align 8
20+
// CHECK: %[[RES:[0-9+]]] = load i2048, ptr %agg.result, align 8
21+
// CHECK: store i2048 %[[RES]], ptr %agg.result, align 8
22+
// CHECK: ret void
23+
return a / b;
24+
}

0 commit comments

Comments
 (0)