Skip to content

Commit 1e0ff5e

Browse files
author
iclsrc
committed
Merge from 'sycl' to 'sycl-web'
2 parents dde4dea + 7bc8447 commit 1e0ff5e

39 files changed

+733
-158
lines changed

clang/include/clang/Basic/BuiltinsNVPTX.def

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,8 @@ BUILTIN(__nvvm_rcp_rm_ftz_f, "ff", "")
347347
BUILTIN(__nvvm_rcp_rm_f, "ff", "")
348348
BUILTIN(__nvvm_rcp_rp_ftz_f, "ff", "")
349349
BUILTIN(__nvvm_rcp_rp_f, "ff", "")
350+
BUILTIN(__nvvm_rcp_approx_f, "ff", "")
351+
BUILTIN(__nvvm_rcp_approx_ftz_f, "ff", "")
350352

351353
BUILTIN(__nvvm_rcp_rn_d, "dd", "")
352354
BUILTIN(__nvvm_rcp_rz_d, "dd", "")

clang/include/clang/Driver/Options.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1553,7 +1553,7 @@ def ffp_exception_behavior_EQ : Joined<["-"], "ffp-exception-behavior=">, Group<
15531553
MarshallingInfoEnum<LangOpts<"FPExceptionMode">, "FPE_Ignore">;
15541554
defm fast_math : BoolFOption<"fast-math",
15551555
LangOpts<"FastMath">, DefaultFalse,
1556-
PosFlag<SetTrue, [CC1Option], "Allow aggressive, lossy floating-point optimizations",
1556+
PosFlag<SetTrue, [CC1Option, CoreOption], "Allow aggressive, lossy floating-point optimizations",
15571557
[cl_fast_relaxed_math.KeyPath]>,
15581558
NegFlag<SetFalse>>;
15591559
def menable_unsafe_fp_math : Flag<["-"], "menable-unsafe-fp-math">, Flags<[CC1Option]>,

clang/lib/Driver/ToolChain.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1287,7 +1287,7 @@ llvm::opt::DerivedArgList *ToolChain::TranslateOffloadTargetArgs(
12871287
// improved upon
12881288
auto SingleTargetTripleCount = [&Args](OptSpecifier Opt) {
12891289
const Arg *TargetArg = Args.getLastArg(Opt);
1290-
if (TargetArg && TargetArg->getValues().size() == 1)
1290+
if (!TargetArg || TargetArg->getValues().size() == 1)
12911291
return true;
12921292
return false;
12931293
};

clang/lib/Frontend/InitPreprocessor.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1289,8 +1289,9 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
12891289

12901290
const llvm::Triple &DeviceTriple = TI.getTriple();
12911291
const llvm::Triple::SubArchType DeviceSubArch = DeviceTriple.getSubArch();
1292-
if (DeviceTriple.isSPIR() &&
1293-
DeviceSubArch != llvm::Triple::SPIRSubArch_fpga)
1292+
if (DeviceTriple.isNVPTX() ||
1293+
(DeviceTriple.isSPIR() &&
1294+
DeviceSubArch != llvm::Triple::SPIRSubArch_fpga))
12941295
Builder.defineMacro("SYCL_USE_NATIVE_FP_ATOMICS");
12951296
// Enable generation of USM address spaces for FPGA.
12961297
if (DeviceSubArch == llvm::Triple::SPIRSubArch_fpga) {

clang/test/Driver/sycl-offload.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -671,6 +671,11 @@
671671
// RUN: | FileCheck -check-prefix=CHK-FSYCL-TARGET-2X-ERROR %s
672672
// CHK-FSYCL-TARGET-2X-ERROR-NOT: clang{{.*}} error: cannot deduce implicit triple value for '-Xsycl-target{{.*}}', specify triple using '-Xsycl-target{{.*}}=<triple>'
673673

674+
/// Check -Xsycl-target-frontend does not trigger an error when no -fsycl-targets is specified
675+
// RUN: %clang -### -fsycl -Xsycl-target-frontend -DFOO %s 2>&1 \
676+
// RUN: | FileCheck -check-prefix=CHK-NO-FSYCL-TARGET-ERROR %s
677+
// CHK-NO-FSYCL-TARGET-ERROR-NOT: clang{{.*}} error: cannot deduce implicit triple value for '-Xsycl-target-frontend', specify triple using '-Xsycl-target-frontend=<triple>'
678+
674679
/// ###########################################################################
675680

676681
/// Ahead of Time compilation for fpga, gen, cpu

clang/test/Preprocessor/sycl-macro-target-specific.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
// RUN: %clang_cc1 %s -fsycl-is-device -triple spir64_fpga-unknown-unknown -E -dM \
2121
// RUN: | FileCheck --check-prefix=CHECK-SYCL-FP-ATOMICS-NEG %s
2222
// RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-nvcl -E -dM \
23-
// RUN: | FileCheck --check-prefix=CHECK-SYCL-FP-ATOMICS-NEG %s
23+
// RUN: | FileCheck --check-prefix=CHECK-SYCL-FP-ATOMICS %s
2424
// CHECK-SYCL-FP-ATOMICS: #define SYCL_USE_NATIVE_FP_ATOMICS
2525
// CHECK-SYCL-FP-ATOMICS-NEG-NOT: #define SYCL_USE_NATIVE_FP_ATOMICS
2626

libclc/ptx-nvidiacl/libspirv/SOURCES

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,15 @@ math/log2.cl
4343
math/logb.cl
4444
math/modf.cl
4545
math/native_cos.cl
46+
math/native_divide.cl
4647
math/native_exp.cl
4748
math/native_exp10.cl
4849
math/native_exp2.cl
4950
math/native_log.cl
5051
math/native_log10.cl
5152
math/native_log2.cl
5253
math/native_powr.cl
54+
math/native_recip.cl
5355
math/native_rsqrt.cl
5456
math/native_sin.cl
5557
math/native_sqrt.cl

libclc/ptx-nvidiacl/libspirv/atomic/atomic_add.cl

Lines changed: 92 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,98 @@ __CLC_NVVM_ATOMIC(ulong, m, long, l, add, _Z18__spirv_AtomicIAdd)
1717

1818
__CLC_NVVM_ATOMIC(float, f, float, f, add, _Z21__spirv_AtomicFAddEXT)
1919
#ifdef cl_khr_int64_base_atomics
20-
__CLC_NVVM_ATOMIC(double, d, double, d, add, _Z21__spirv_AtomicFAddEXT)
20+
21+
#define __CLC_NVVM_ATOMIC_ADD_DOUBLE_IMPL(ADDR_SPACE, ADDR_SPACE_MANGLED, \
22+
ADDR_SPACE_NV, SUBSTITUTION1, \
23+
SUBSTITUTION2) \
24+
long \
25+
_Z18__spirv_AtomicLoadP##ADDR_SPACE_MANGLED##KlN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \
26+
volatile ADDR_SPACE const long *, enum Scope, \
27+
enum MemorySemanticsMask); \
28+
long \
29+
_Z29__spirv_AtomicCompareExchange##P##ADDR_SPACE_MANGLED##lN5__spv5Scope4FlagENS##SUBSTITUTION1##_19MemorySemanticsMask4FlagES##SUBSTITUTION2##_ll( \
30+
volatile ADDR_SPACE long *, enum Scope, enum MemorySemanticsMask, \
31+
enum MemorySemanticsMask, long, long); \
32+
__attribute__((always_inline)) _CLC_DECL double \
33+
_Z21__spirv_AtomicFAddEXT##P##ADDR_SPACE_MANGLED##d##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##d( \
34+
volatile ADDR_SPACE double *pointer, enum Scope scope, \
35+
enum MemorySemanticsMask semantics, double value) { \
36+
/* Semantics mask may include memory order, storage class and other info \
37+
Memory order is stored in the lowest 5 bits */ \
38+
unsigned int order = semantics & 0x1F; \
39+
if (__clc_nvvm_reflect_arch() >= 600) { \
40+
switch (order) { \
41+
case None: \
42+
__CLC_NVVM_ATOMIC_IMPL_ORDER(double, double, d, add, ADDR_SPACE, \
43+
ADDR_SPACE_NV, ) \
44+
break; \
45+
case Acquire: \
46+
if (__clc_nvvm_reflect_arch() >= 700) { \
47+
__CLC_NVVM_ATOMIC_IMPL_ORDER(double, double, d, add, ADDR_SPACE, \
48+
ADDR_SPACE_NV, _acquire) \
49+
} else { \
50+
__CLC_NVVM_ATOMIC_IMPL_ACQUIRE_FENCE(double, double, d, add, \
51+
ADDR_SPACE, ADDR_SPACE_NV) \
52+
} \
53+
break; \
54+
case Release: \
55+
if (__clc_nvvm_reflect_arch() >= 700) { \
56+
__CLC_NVVM_ATOMIC_IMPL_ORDER(double, double, d, add, ADDR_SPACE, \
57+
ADDR_SPACE_NV, _release) \
58+
} else { \
59+
__spirv_MemoryBarrier(scope, Release); \
60+
__CLC_NVVM_ATOMIC_IMPL_ORDER(double, double, d, add, ADDR_SPACE, \
61+
ADDR_SPACE_NV, ) \
62+
} \
63+
break; \
64+
case AcquireRelease: \
65+
if (__clc_nvvm_reflect_arch() >= 700) { \
66+
__CLC_NVVM_ATOMIC_IMPL_ORDER(double, double, d, add, ADDR_SPACE, \
67+
ADDR_SPACE_NV, _acq_rel) \
68+
} else { \
69+
__spirv_MemoryBarrier(scope, Release); \
70+
__CLC_NVVM_ATOMIC_IMPL_ACQUIRE_FENCE(double, double, d, add, \
71+
ADDR_SPACE, ADDR_SPACE_NV) \
72+
} \
73+
break; \
74+
} \
75+
__builtin_trap(); \
76+
__builtin_unreachable(); \
77+
} else { \
78+
enum MemorySemanticsMask load_order; \
79+
switch (semantics) { \
80+
case SequentiallyConsistent: \
81+
load_order = SequentiallyConsistent; \
82+
break; \
83+
case Acquire: \
84+
case AcquireRelease: \
85+
load_order = Acquire; \
86+
break; \
87+
default: \
88+
load_order = None; \
89+
} \
90+
volatile ADDR_SPACE long *pointer_int = \
91+
(volatile ADDR_SPACE long *)pointer; \
92+
long old_int; \
93+
long new_val_int; \
94+
do { \
95+
old_int = \
96+
_Z18__spirv_AtomicLoadP##ADDR_SPACE_MANGLED##KlN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \
97+
pointer_int, scope, load_order); \
98+
double new_val = *(double *)&old_int + *(double *)&value; \
99+
new_val_int = *(long *)&new_val; \
100+
} while ( \
101+
_Z29__spirv_AtomicCompareExchange##P##ADDR_SPACE_MANGLED##lN5__spv5Scope4FlagENS##SUBSTITUTION1##_19MemorySemanticsMask4FlagES##SUBSTITUTION2##_ll( \
102+
pointer_int, scope, semantics, semantics, new_val_int, \
103+
old_int) != old_int); \
104+
return *(double *)&old_int; \
105+
} \
106+
}
107+
108+
__CLC_NVVM_ATOMIC_ADD_DOUBLE_IMPL(, , _gen_, 0, 4)
109+
__CLC_NVVM_ATOMIC_ADD_DOUBLE_IMPL(__global, U3AS1, _global_, 1, 5)
110+
__CLC_NVVM_ATOMIC_ADD_DOUBLE_IMPL(__local, U3AS3, _shared_, 1, 5)
111+
21112
#endif
22113

23114
#undef __CLC_NVVM_ATOMIC_TYPES

0 commit comments

Comments
 (0)