[SYCL] Allow _Bitint of size greater than 128 bits when -fintelfpga is used (#6295)

premanandrao · web-flow · commit db5f72a8167d · 2022-06-23T10:57:48.000-07:00
This revives the patch for #6152 (which was reverted in #6232). In this patch, the new change is that there is a new target for spir64_fpga and the maximum bitsize limit for that target is 2048. Unfortunately, for the host, there is now an explicit check in Sema for the same bitsize. When -fintefpga is specified with -fsycl, we allow a maximum bitsize of 2048.
diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def
@@ -272,6 +272,7 @@ LANGOPT(OffloadingNewDriver, 1, 0, "use the new driver for generating offloading
 
 LANGOPT(SYCLIsDevice      , 1, 0, "Generate code for SYCL device")
 LANGOPT(SYCLIsHost        , 1, 0, "SYCL host compilation")
+LANGOPT(IntelFPGA         , 1, 0, "Perform ahead-of-time compilation for FPGA")
 LANGOPT(SYCLAllowFuncPtr  , 1, 0, "Allow function pointers in SYCL device code")
 LANGOPT(SYCLStdLayoutKernelParams, 1, 0, "Enable standard layout requirement for SYCL kernel parameters")
 LANGOPT(SYCLUnnamedLambda , 1, 0, "Allow unnamed lambda SYCL kernels")
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
@@ -2789,7 +2789,8 @@ def fstrict_overflow : Flag<["-"], "fstrict-overflow">, Group<f_Group>;
 def fdriver_only : Flag<["-"], "fdriver-only">, Flags<[NoXarchOption, CoreOption]>,
   Group<Action_Group>, HelpText<"Only run the driver.">;
 def fintelfpga : Flag<["-"], "fintelfpga">, Group<f_Group>,
-  Flags<[CC1Option, CoreOption]>, HelpText<"Perform ahead of time compilation for FPGA">;
+  Flags<[CC1Option, CoreOption]>, MarshallingInfoFlag<LangOpts<"IntelFPGA">>,
+  HelpText<"Perform ahead-of-time compilation for FPGA">;
 def fsycl_device_only : Flag<["-"], "fsycl-device-only">, Flags<[CoreOption]>,
   HelpText<"Compile SYCL kernels for device">;
 defm sycl_esimd_force_stateless_mem : BoolFOption<"sycl-esimd-force-stateless-mem",
diff --git a/clang/lib/Basic/Targets.cpp b/clang/lib/Basic/Targets.cpp
@@ -618,6 +618,8 @@ TargetInfo *AllocateTarget(const llvm::Triple &Triple,
 
   case llvm::Triple::spir64: {
     llvm::Triple HT(Opts.HostTriple);
+    bool IsFPGASubArch = Triple.getSubArch() == llvm::Triple::SPIRSubArch_fpga;
+
     switch (HT.getOS()) {
     case llvm::Triple::Win32:
       switch (HT.getEnvironment()) {
@@ -628,8 +630,12 @@ TargetInfo *AllocateTarget(const llvm::Triple &Triple,
         return new MicrosoftX86_64_SPIR64TargetInfo(Triple, Opts);
       }
     case llvm::Triple::Linux:
+      if (IsFPGASubArch)
+        return new LinuxTargetInfo<SPIR64FPGATargetInfo>(Triple, Opts);
       return new LinuxTargetInfo<SPIR64TargetInfo>(Triple, Opts);
     default:
+      if (IsFPGASubArch)
+        return new SPIR64FPGATargetInfo(Triple, Opts);
       return new SPIR64TargetInfo(Triple, Opts);
     }
   }
diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h
@@ -225,6 +225,14 @@ class LLVM_LIBRARY_VISIBILITY SPIR64TargetInfo : public SPIRTargetInfo {
                         MacroBuilder &Builder) const override;
 };
 
+// spir64_fpga target
+class LLVM_LIBRARY_VISIBILITY SPIR64FPGATargetInfo : public SPIR64TargetInfo {
+public:
+  SPIR64FPGATargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : SPIR64TargetInfo(Triple, Opts) {}
+  virtual size_t getMaxBitIntWidth() const override { return 2048; }
+};
+
 // x86-32 SPIR Windows target
 class LLVM_LIBRARY_VISIBILITY WindowsX86_32SPIRTargetInfo
     : public WindowsTargetInfo<SPIR32TargetInfo> {
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -4979,14 +4979,22 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     // Disable parallel for range-rounding for anything involving FPGA
     auto SYCLTCRange = C.getOffloadToolChains<Action::OFK_SYCL>();
     bool HasFPGA = false;
-    for (auto TI = SYCLTCRange.first, TE = SYCLTCRange.second; TI != TE; ++TI)
-      if (TI->second->getTriple().getSubArch() ==
-          llvm::Triple::SPIRSubArch_fpga) {
+    for (auto TI = SYCLTCRange.first, TE = SYCLTCRange.second; TI != TE; ++TI) {
+      llvm::Triple SYCLTriple = TI->second->getTriple();
+      if (SYCLTriple.getSubArch() == llvm::Triple::SPIRSubArch_fpga) {
         HasFPGA = true;
+        if (!IsSYCLOffloadDevice) {
+          CmdArgs.push_back("-aux-triple");
+          CmdArgs.push_back(Args.MakeArgString(SYCLTriple.getTriple()));
+        }
         break;
       }
-    if (HasFPGA)
+    }
+    if (HasFPGA) {
       CmdArgs.push_back("-fsycl-disable-range-rounding");
+      // Pass -fintelfpga to both the host and device SYCL compilations if set.
+      CmdArgs.push_back("-fintelfpga");
+    }
 
     // Add any options that are needed specific to SYCL offload while
     // performing the host side compilation.
diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp
@@ -110,7 +110,7 @@ bool CompilerInstance::createTarget() {
   // other side of CUDA/OpenMP/SYCL compilation.
   if (!getAuxTarget() &&
       (getLangOpts().CUDA || getLangOpts().OpenMPIsDevice ||
-       getLangOpts().SYCLIsDevice) &&
+       getLangOpts().isSYCL()) &&
       !getFrontendOpts().AuxTriple.empty()) {
     auto TO = std::make_shared<TargetOptions>();
     TO->Triple = llvm::Triple::normalize(getFrontendOpts().AuxTriple);
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
@@ -2320,10 +2320,15 @@ QualType Sema::BuildBitIntType(bool IsUnsigned, Expr *BitWidth,
     return QualType();
   }
 
-  const TargetInfo &TI = getASTContext().getTargetInfo();
-  if (NumBits > TI.getMaxBitIntWidth()) {
+  // If the number of bits exceed the maximum bit width supported on
+  // both host and device, issue an error diagnostic.
+  const TargetInfo *AuxTargetInfo = getASTContext().getAuxTargetInfo();
+  size_t MaxBitIntWidth = std::max(
+      (AuxTargetInfo == nullptr) ? 0 : AuxTargetInfo->getMaxBitIntWidth(),
+      getASTContext().getTargetInfo().getMaxBitIntWidth());
+  if (NumBits > MaxBitIntWidth) {
     Diag(Loc, diag::err_bit_int_max_size)
-        << IsUnsigned << static_cast<uint64_t>(TI.getMaxBitIntWidth());
+        << IsUnsigned << static_cast<uint64_t>(MaxBitIntWidth);
     return QualType();
   }
 
diff --git a/clang/test/CodeGenSYCL/no-opaque-ptrs-sycl-host-intelfpga-bitint.cpp b/clang/test/CodeGenSYCL/no-opaque-ptrs-sycl-host-intelfpga-bitint.cpp
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 -no-opaque-pointers -fsycl-is-host -fintelfpga -triple x86_64 -aux-triple spir64_fpga -emit-llvm %s -o - | FileCheck %s
+
+// This test checks that we generate appropriate code for division
+// operations of _BitInts of size greater than 128 bits, since it
+// is allowed when -fintelfpga is enabled.  The test uses a value of
+// 2048 for the bitsize as that is the maximum that is currently
+// supported.
+
+// CHECK: define{{.*}} void @_Z3fooDB2048_S_(i2048* {{.*}} sret(i2048) align 8 %agg.result, i2048* {{.*}} byval(i2048) align 8 %[[ARG1:[0-9]+]], i2048* {{.*}} byval(i2048) align 8 %[[ARG2:[0-9]+]])
+signed _BitInt(2048) foo(signed _BitInt(2048) a, signed _BitInt(2048) b) {
+  // CHECK: %[[VAR_A:a]].addr = alloca i2048, align 8
+  // CHECK: %[[VAR_B:b]].addr = alloca i2048, align 8
+  // CHECK: %[[VAR_A]] = load i2048, i2048* %[[ARG1]], align 8
+  // CHECK: %[[VAR_B]] = load i2048, i2048* %[[ARG2]], align 8
+  // CHECK: store i2048 %[[VAR_A]], i2048* %[[VAR_A]].addr, align 8
+  // CHECK: store i2048 %[[VAR_B]], i2048* %[[VAR_B]].addr, align 8
+  // CHECK: %[[TEMP1:[0-9]+]] = load i2048, i2048* %[[VAR_A]].addr, align 8
+  // CHECK: %[[TEMP2:[0-9]+]] = load i2048, i2048* %[[VAR_B]].addr, align 8
+  // CHECK: %div = sdiv i2048 %[[TEMP1]], %[[TEMP2]]
+  // CHECK: store i2048 %div, i2048* %agg.result, align 8
+  // CHECK: %[[RES:[0-9+]]] = load i2048, i2048* %agg.result, align 8
+  // CHECK: store i2048 %[[RES]], i2048* %agg.result, align 8
+  // CHECK: ret void
+  return a / b;
+}
diff --git a/clang/test/CodeGenSYCL/no-opaque-ptrs-sycl-intelfpga-bitint.cpp b/clang/test/CodeGenSYCL/no-opaque-ptrs-sycl-intelfpga-bitint.cpp
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -no-opaque-pointers -fsycl-is-device -fintelfpga -triple spir64_fpga -aux-triple x86_64-unknown-linux-gnu -IInputs -emit-llvm %s -o - | FileCheck %s
+
+// This test checks that we generate appropriate code for division
+// operations of _BitInts of size greater than 128 bits, since it
+// is allowed when -fintelfpga is enabled.  The test uses a value
+// of 2048 for the bitsize, the max that is currently supported.
+
+#include "Inputs/sycl.hpp"
+
+// CHECK: define{{.*}} void @_Z3fooDB2048_S_(i2048 addrspace(4)* {{.*}} sret(i2048) align 8 %agg.result, i2048* {{.*}} byval(i2048) align 8 %[[ARG1:[0-9]+]], i2048* {{.*}} byval(i2048) align 8 %[[ARG2:[0-9]+]])
+signed _BitInt(2048) foo(signed _BitInt(2048) a, signed _BitInt(2048) b) {
+  // CHECK: %[[VAR_A:a]] = load i2048, i2048* %[[ARG1]], align 8
+  // CHECK: %[[VAR_B:b]] = load i2048, i2048* %[[ARG2]], align 8
+  // CHECK: %[[RES:div]] = sdiv i2048 %[[VAR_A]], %[[VAR_B]]
+  // CHECK: store i2048 %[[RES]], i2048 addrspace(4)* %agg.result, align 8
+  // CHECK: ret void
+  return a / b;
+}
+
+int main() {
+  sycl::handler h;
+  auto lambda = []() {
+    _BitInt(2048) a, b = 3, c = 4;
+    a = foo(b, c);
+  };
+  h.single_task(lambda);
+}
diff --git a/clang/test/CodeGenSYCL/sycl-host-intelfpga-bitint.cpp b/clang/test/CodeGenSYCL/sycl-host-intelfpga-bitint.cpp
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -opaque-pointers -fsycl-is-host -fintelfpga -triple x86_64 -aux-triple spir64_fpga -emit-llvm %s -o - | FileCheck %s
+
+// This test checks that we generate appropriate code for division
+// operations of _BitInts of size greater than 128 bits, since it
+// is allowed when -fintelfpga is enabled.  The test uses a value of
+// 2048, the maximum bitsize that is currently supported.
+
+// CHECK: define{{.*}} void @_Z3fooDB2048_S_(ptr {{.*}} sret(i2048) align 8 %agg.result, ptr {{.*}} byval(i2048) align 8 %[[ARG1:[0-9]+]], ptr {{.*}} byval(i2048) align 8 %[[ARG2:[0-9]+]])
+signed _BitInt(2048) foo(signed _BitInt(2048) a, signed _BitInt(2048) b) {
+  // CHECK: %[[VAR_A:a]].addr = alloca i2048, align 8
+  // CHECK: %[[VAR_B:b]].addr = alloca i2048, align 8
+  // CHECK: %[[VAR_A]] = load i2048, ptr %[[ARG1]], align 8
+  // CHECK: %[[VAR_B]] = load i2048, ptr %[[ARG2]], align 8
+  // CHECK: store i2048 %[[VAR_A]], ptr %[[VAR_A]].addr, align 8
+  // CHECK: store i2048 %[[VAR_B]], ptr %[[VAR_B]].addr, align 8
+  // CHECK: %[[TEMP1:[0-9]+]] = load i2048, ptr %[[VAR_A]].addr, align 8
+  // CHECK: %[[TEMP2:[0-9]+]] = load i2048, ptr %[[VAR_B]].addr, align 8
+  // CHECK: %div = sdiv i2048 %[[TEMP1]], %[[TEMP2]]
+  // CHECK: store i2048 %div, ptr %agg.result, align 8
+  // CHECK: %[[RES:[0-9+]]] = load i2048, ptr %agg.result, align 8
+  // CHECK: store i2048 %[[RES]], ptr %agg.result, align 8
+  // CHECK: ret void
+  return a / b;
+}
diff --git a/clang/test/CodeGenSYCL/sycl-intelfpga-bitint.cpp b/clang/test/CodeGenSYCL/sycl-intelfpga-bitint.cpp
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -opaque-pointers -fsycl-is-device -fintelfpga -triple spir64_fpga -aux-triple x86_64-unknown-linux-gnu -IInputs -emit-llvm %s -o - | FileCheck %s
+
+// This test checks that we generate appropriate code for division
+// operations of _BitInts of size greater than 128 bits, since it
+// is allowed when -fintelfpga is enabled.  The test uses a value of
+// 2048 for bitint size, the maximum that is currently supported.
+
+#include "Inputs/sycl.hpp"
+
+// CHECK: define{{.*}} void @_Z3fooDB2048_S_(ptr addrspace(4) {{.*}} sret(i2048) align 8 %agg.result, ptr {{.*}} byval(i2048) align 8 %[[ARG1:[0-9]+]], ptr {{.*}} byval(i2048) align 8 %[[ARG2:[0-9]+]])
+signed _BitInt(2048) foo(signed _BitInt(2048) a, signed _BitInt(2048) b) {
+  // CHECK: %[[VAR_A:a]] = load i2048, ptr %[[ARG1]], align 8
+  // CHECK: %[[VAR_B:b]] = load i2048, ptr %[[ARG2]], align 8
+  // CHECK: %[[RES:div]] = sdiv i2048 %[[VAR_A]], %[[VAR_B]]
+  // CHECK: store i2048 %[[RES]], ptr addrspace(4) %agg.result, align 8
+  // CHECK: ret void
+  return a / b;
+}
+
+int main() {
+  sycl::handler h;
+  auto lambda = []() {
+    _BitInt(2048) a, b = 3, c = 4;
+    a = foo(b, c);
+  };
+  h.single_task(lambda);
+}
diff --git a/clang/test/Driver/sycl-offload-intelfpga.cpp b/clang/test/Driver/sycl-offload-intelfpga.cpp
@@ -20,6 +20,12 @@
 // RUN:   | FileCheck -check-prefix=CHK-TOOLS-INTELFPGA-G0 %s
 // CHK-TOOLS-INTELFPGA-G0-NOT: clang{{.*}} "-debug-info-kind=constructor"
 
+/// -fintelfpga passes it to host and device cc1 compilations
+// RUN:   %clangxx -### -fsycl -fintelfpga %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHK-HOST-DEVICE %s
+// CHK-HOST-DEVICE: clang{{.*}} "-cc1"{{.*}} "-fsycl-is-device"{{.*}} "-fintelfpga"
+// CHK-HOST-DEVICE: clang{{.*}} "-cc1"{{.*}} "-fintelfpga"{{.*}} "-fsycl-is-host"
+
 /// FPGA target implies -fsycl-disable-range-rounding
 // RUN:   %clangxx -### -target x86_64-unknown-linux-gnu -fsycl -fintelfpga %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHK-RANGE-ROUNDING %s
diff --git a/clang/test/Driver/sycl-offload.c b/clang/test/Driver/sycl-offload.c
@@ -86,6 +86,12 @@
 
 /// ###########################################################################
 
+/// Check that -aux-triple is passed with -fsycl -fintelfpga
+// RUN:    %clang -### -fsycl -fintelfpga %s 2>&1 \
+// RUN:    | FileCheck -DARCH=x86_64 -DARCH2=spir64_fpga -check-prefix=CHK-SYCL-FPGA-AUX-TRIPLE %s
+// CHK-SYCL-FPGA-AUX-TRIPLE: clang{{.*}} "-cc1" "-triple" "[[ARCH]]-unknown-linux-gnu"{{.*}} "-aux-triple" "[[ARCH2]]-unknown-unknown"{{.*}} "-fsycl-is-host"
+/// ###########################################################################
+
 /// Validate SYCL option values
 // RUN:   %clang -### -fsycl-device-code-split=bad_value -fsycl  %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHK-SYCL-BAD-OPT-VALUE -Doption=-fsycl-device-code-split %s
diff --git a/clang/test/SemaSYCL/sycl-intelfpga.cpp b/clang/test/SemaSYCL/sycl-intelfpga.cpp
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -fsycl-is-device -fintelfpga -verify=device-intelfpga -fsyntax-only %s -triple spir64_fpga -aux-triple x86_64-unknown-linux-gnu
+// RUN: %clang_cc1 -fsycl-is-host -fintelfpga -verify=host-intelfpga -fsyntax-only %s -triple x86_64 -aux-triple spir64_fpga
+// RUN: %clang_cc1 -fsycl-is-device -verify=device -fsyntax-only %s
+// RUN: %clang_cc1 -fsycl-is-host -verify=host -fsyntax-only %s
+
+// Tests that we do not issue errors for _Bitints of size greater than 128
+// when -fintelfpga is enabled.  The backend is expected to be able to handle
+// this.  When -fintelfpga is not passed, we continue to diagnose.
+
+// device-intelfpga-error@+4 3{{signed _BitInt of bit sizes greater than 2048 not supported}}
+// host-intelfpga-error@+3 3{{signed _BitInt of bit sizes greater than 2048 not supported}}
+// device-error@+2 3{{signed _BitInt of bit sizes greater than 128 not supported}}
+// host-error@+1 3{{signed _BitInt of bit sizes greater than 128 not supported}}
+signed _BitInt(2049) foo(signed _BitInt(2049) a, signed _BitInt(2049) b) {
+  return a / b;
+}
+// device-error@+4 3{{signed _BitInt of bit sizes greater than 128 not supported}}
+// host-error@+3 3{{signed _BitInt of bit sizes greater than 128 not supported}}
+// device-intelfpga-no-diagnostic@+2
+// host-intelfpga-no-diagnostic@+1
+signed _BitInt(215) foo(signed _BitInt(215) a, signed _BitInt(215) b) {
+  return a + b;
+}