Skip to content

Commit e92eca9

Browse files
author
Pavel Chupin
authored
Merge branch 'sycl' into llvmspirv_pulldown
2 parents 129372d + c1b6835 commit e92eca9

File tree

84 files changed

+2684
-936
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

84 files changed

+2684
-936
lines changed

.github/CODEOWNERS

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ sycl/doc/extensions/ @intel/dpcpp-specification-reviewers
3535
sycl/plugins/level_zero/ @intel/dpcpp-l0-pi-reviewers
3636

3737
# ESIMD CPU emulator plug-in
38-
sycl/plugins/esimd_cpu/ @intel/dpcpp-esimd-reviewers
38+
sycl/plugins/esimd_emulator/ @intel/dpcpp-esimd-reviewers
3939

4040
# CUDA plugin
4141
sycl/plugins/cuda/ @intel/llvm-reviewers-cuda

clang/include/clang/Basic/BuiltinsNVPTX.def

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,8 @@ BUILTIN(__nvvm_rcp_rm_ftz_f, "ff", "")
347347
BUILTIN(__nvvm_rcp_rm_f, "ff", "")
348348
BUILTIN(__nvvm_rcp_rp_ftz_f, "ff", "")
349349
BUILTIN(__nvvm_rcp_rp_f, "ff", "")
350+
BUILTIN(__nvvm_rcp_approx_f, "ff", "")
351+
BUILTIN(__nvvm_rcp_approx_ftz_f, "ff", "")
350352

351353
BUILTIN(__nvvm_rcp_rn_d, "dd", "")
352354
BUILTIN(__nvvm_rcp_rz_d, "dd", "")

clang/include/clang/Driver/Options.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1553,7 +1553,7 @@ def ffp_exception_behavior_EQ : Joined<["-"], "ffp-exception-behavior=">, Group<
15531553
MarshallingInfoEnum<LangOpts<"FPExceptionMode">, "FPE_Ignore">;
15541554
defm fast_math : BoolFOption<"fast-math",
15551555
LangOpts<"FastMath">, DefaultFalse,
1556-
PosFlag<SetTrue, [CC1Option], "Allow aggressive, lossy floating-point optimizations",
1556+
PosFlag<SetTrue, [CC1Option, CoreOption], "Allow aggressive, lossy floating-point optimizations",
15571557
[cl_fast_relaxed_math.KeyPath]>,
15581558
NegFlag<SetFalse>>;
15591559
def menable_unsafe_fp_math : Flag<["-"], "menable-unsafe-fp-math">, Flags<[CC1Option]>,

clang/lib/CodeGen/CGExprScalar.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2059,10 +2059,9 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
20592059
llvm::Type *SrcTy = Src->getType();
20602060
llvm::Type *DstTy = ConvertType(DestTy);
20612061
if (SrcTy->isPtrOrPtrVectorTy() && DstTy->isPtrOrPtrVectorTy() &&
2062-
SrcTy->getPointerAddressSpace() != DstTy->getPointerAddressSpace()) {
2063-
llvm_unreachable("wrong cast for pointers in different address spaces"
2064-
"(must be an address space cast)!");
2065-
}
2062+
SrcTy->getPointerAddressSpace() != DstTy->getPointerAddressSpace())
2063+
Src = Builder.CreateAddrSpaceCast(
2064+
Src, llvm::PointerType::get(SrcTy, DstTy->getPointerAddressSpace()));
20662065

20672066
if (CGF.SanOpts.has(SanitizerKind::CFIUnrelatedCast)) {
20682067
if (auto *PT = DestTy->getAs<PointerType>()) {

clang/lib/Driver/ToolChain.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1287,7 +1287,7 @@ llvm::opt::DerivedArgList *ToolChain::TranslateOffloadTargetArgs(
12871287
// improved upon
12881288
auto SingleTargetTripleCount = [&Args](OptSpecifier Opt) {
12891289
const Arg *TargetArg = Args.getLastArg(Opt);
1290-
if (TargetArg && TargetArg->getValues().size() == 1)
1290+
if (!TargetArg || TargetArg->getValues().size() == 1)
12911291
return true;
12921292
return false;
12931293
};

clang/lib/Frontend/InitPreprocessor.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1289,8 +1289,9 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
12891289

12901290
const llvm::Triple &DeviceTriple = TI.getTriple();
12911291
const llvm::Triple::SubArchType DeviceSubArch = DeviceTriple.getSubArch();
1292-
if (DeviceTriple.isSPIR() &&
1293-
DeviceSubArch != llvm::Triple::SPIRSubArch_fpga)
1292+
if (DeviceTriple.isNVPTX() ||
1293+
(DeviceTriple.isSPIR() &&
1294+
DeviceSubArch != llvm::Triple::SPIRSubArch_fpga))
12941295
Builder.defineMacro("SYCL_USE_NATIVE_FP_ATOMICS");
12951296
// Enable generation of USM address spaces for FPGA.
12961297
if (DeviceSubArch == llvm::Triple::SPIRSubArch_fpga) {

clang/lib/Sema/SemaSYCL.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3719,6 +3719,12 @@ static void CheckSYCL2020SubGroupSizes(Sema &S, FunctionDecl *SYCLKernel,
37193719
CalcEffectiveSubGroup(S.Context, S.getLangOpts(), FD))
37203720
return;
37213721

3722+
// No need to validate __spirv routines here since they
3723+
// are mapped to the equivalent SPIRV operations.
3724+
const IdentifierInfo *II = FD->getIdentifier();
3725+
if (II && II->getName().startswith("__spirv_"))
3726+
return;
3727+
37223728
// Else we need to figure out why they don't match.
37233729
SourceLocation FDAttrLoc = GetSubGroupLoc(FD);
37243730
SourceLocation KernelAttrLoc = GetSubGroupLoc(SYCLKernel);

clang/test/CodeGenSYCL/Inputs/sycl.hpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,11 @@
44

55
extern "C" int printf(const char* fmt, ...);
66

7+
#ifdef __SYCL_DEVICE_ONLY__
8+
__attribute__((convergent)) extern SYCL_EXTERNAL void
9+
__spirv_ControlBarrier(int, int, int) noexcept;
10+
#endif
11+
712
// Dummy runtime classes to model SYCL API.
813
inline namespace cl {
914
namespace sycl {
@@ -399,10 +404,19 @@ kernel_parallel_for(const KernelType &KernelFunc) {
399404
KernelFunc(id<Dims>());
400405
}
401406

407+
// Dummy parallel_for_work_item function to mimic calls from
408+
// parallel_for_work_group.
409+
void parallel_for_work_item() {
410+
#ifdef __SYCL_DEVICE_ONLY__
411+
__spirv_ControlBarrier(0, 0, 0);
412+
#endif
413+
}
414+
402415
template <typename KernelName, typename KernelType, int Dims>
403416
ATTR_SYCL_KERNEL void
404417
kernel_parallel_for_work_group(const KernelType &KernelFunc) {
405418
KernelFunc(group<Dims>());
419+
parallel_for_work_item();
406420
}
407421

408422
class handler {
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// RUN: %clang_cc1 -triple spir64-unknown-linux -fsycl-is-device -disable-llvm-passes -emit-llvm -x c++ %s -o - | FileCheck %s
2+
3+
// Test to verify that address space cast is generated correctly for __builtin_alloca
4+
5+
__attribute__((sycl_device)) void foo() {
6+
// CHECK: %TestVar = alloca i32 addrspace(4)*, align 8
7+
// CHECK: %TestVar.ascast = addrspacecast i32 addrspace(4)** %TestVar to i32 addrspace(4)* addrspace(4)*
8+
// CHECK: %[[ALLOCA:[0-9]+]] = alloca i8, i64 1, align 8
9+
// CHECK: %[[ADDRSPCAST:[0-9]+]] = addrspacecast i8* %[[ALLOCA]] to i8* addrspace(4)*
10+
// CHECK: %[[BITCAST:[0-9]+]] = bitcast i8* addrspace(4)* %[[ADDRSPCAST]] to i32 addrspace(4)*
11+
// CHECK: store i32 addrspace(4)* %[[BITCAST]], i32 addrspace(4)* addrspace(4)* %TestVar.ascast, align 8
12+
int *TestVar = (int *)__builtin_alloca(1);
13+
}

clang/test/CodeGenSYCL/dead-elim-sycl.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@
66

77

88
// CHECK: Running pass: DeadArgumentEliminationSYCLPass on [module]
9-
// DISABLE-NOT: DeadArgumentEliminationSYCLPass
9+
// DISABLE-NOT: DeadArgumentEliminationSYCLPass
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// RUN: %clang_cc1 -triple spir64 -fsycl-is-device -internal-isystem %S/Inputs -fdeclare-spirv-builtins %s -emit-llvm -o - | FileCheck %s
2+
3+
// Test that when __spirv intrinsics are invoked from kernel functions
4+
// that have a sub_group_size specified, that such invocations don't
5+
// trigger the error diagnostic that the intrinsic routines must also
6+
// marked with the same attribute.
7+
8+
#include "Inputs/sycl.hpp"
9+
10+
int main() {
11+
sycl::queue q;
12+
13+
q.submit([&](sycl::handler &cgh) {
14+
auto kernel_ = [=](sycl::group<1> item) [[intel::sub_group_size(8)]] {
15+
};
16+
17+
cgh.parallel_for_work_group<class kernel_class>(
18+
cl::sycl::range<1>(), cl::sycl::range<1>(), kernel_);
19+
});
20+
return 0;
21+
}
22+
23+
// CHECK: define dso_local spir_kernel void @{{.*}}main{{.*}}kernel_class() {{.*}} !intel_reqd_sub_group_size ![[SUBGROUPSIZE:[0-9]+]]
24+
// CHECK: tail call spir_func void @{{.*}}__spirv_ControlBarrier{{.*}}({{.*}})
25+
26+
// CHECK: declare spir_func void @{{.*}}__spirv_ControlBarrier{{.*}}({{.*}})
27+
28+
// CHECK: ![[SUBGROUPSIZE]] = !{i32 8}

clang/test/Driver/sycl-offload.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -671,6 +671,11 @@
671671
// RUN: | FileCheck -check-prefix=CHK-FSYCL-TARGET-2X-ERROR %s
672672
// CHK-FSYCL-TARGET-2X-ERROR-NOT: clang{{.*}} error: cannot deduce implicit triple value for '-Xsycl-target{{.*}}', specify triple using '-Xsycl-target{{.*}}=<triple>'
673673

674+
/// Check -Xsycl-target-frontend does not trigger an error when no -fsycl-targets is specified
675+
// RUN: %clang -### -fsycl -Xsycl-target-frontend -DFOO %s 2>&1 \
676+
// RUN: | FileCheck -check-prefix=CHK-NO-FSYCL-TARGET-ERROR %s
677+
// CHK-NO-FSYCL-TARGET-ERROR-NOT: clang{{.*}} error: cannot deduce implicit triple value for '-Xsycl-target-frontend', specify triple using '-Xsycl-target-frontend=<triple>'
678+
674679
/// ###########################################################################
675680

676681
/// Ahead of Time compilation for fpga, gen, cpu

clang/test/Preprocessor/sycl-macro-target-specific.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
// RUN: %clang_cc1 %s -fsycl-is-device -triple spir64_fpga-unknown-unknown -E -dM \
2121
// RUN: | FileCheck --check-prefix=CHECK-SYCL-FP-ATOMICS-NEG %s
2222
// RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-nvcl -E -dM \
23-
// RUN: | FileCheck --check-prefix=CHECK-SYCL-FP-ATOMICS-NEG %s
23+
// RUN: | FileCheck --check-prefix=CHECK-SYCL-FP-ATOMICS %s
2424
// CHECK-SYCL-FP-ATOMICS: #define SYCL_USE_NATIVE_FP_ATOMICS
2525
// CHECK-SYCL-FP-ATOMICS-NEG-NOT: #define SYCL_USE_NATIVE_FP_ATOMICS
2626

libclc/ptx-nvidiacl/libspirv/SOURCES

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,15 @@ math/log2.cl
4343
math/logb.cl
4444
math/modf.cl
4545
math/native_cos.cl
46+
math/native_divide.cl
4647
math/native_exp.cl
4748
math/native_exp10.cl
4849
math/native_exp2.cl
4950
math/native_log.cl
5051
math/native_log10.cl
5152
math/native_log2.cl
5253
math/native_powr.cl
54+
math/native_recip.cl
5355
math/native_rsqrt.cl
5456
math/native_sin.cl
5557
math/native_sqrt.cl

0 commit comments

Comments
 (0)