Skip to content

Commit b8395de

Browse files
authored
[SYCL][libclc][E2E] atomic work_item scope fallback (#16172)
Add Invocation case, that falls back to a coarser grained scope, to libclc atomic functions. This prevents hangs on AMD and crashes on NVIDIA when using atomic_ref functionality with work_item scope. Add a test which simply checks that the kernel does not crash when using atomic_ref with work_item scope. See issue: #16037
1 parent 814290d commit b8395de

File tree

6 files changed

+72
-0
lines changed

6 files changed

+72
-0
lines changed

libclc/amdgcn-amdhsa/libspirv/atomic/atomic_helpers.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ extern int __oclc_amdgpu_reflect(__constant char *);
1919
OUT_ORDER) \
2020
{ \
2121
switch (IN_SCOPE) { \
22+
case Invocation: \
2223
case Subgroup: \
2324
OUT_SCOPE = __HIP_MEMORY_SCOPE_WAVEFRONT; \
2425
break; \

libclc/ptx-nvidiacl/libspirv/atomic/atomic_cmpxchg.cl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(unsigned int, unsigned int);
1616
#define __CLC_NVVM_ATOMIC_CAS_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \
1717
ADDR_SPACE, ADDR_SPACE_NV, ORDER) \
1818
switch (scope) { \
19+
case Invocation: \
1920
case Subgroup: \
2021
case Workgroup: { \
2122
if (__clc_nvvm_reflect_arch() >= 600) { \
@@ -44,6 +45,7 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(unsigned int, unsigned int);
4445
#define __CLC_NVVM_ATOMIC_CAS_IMPL_ACQUIRE_FENCE( \
4546
TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, ADDR_SPACE, ADDR_SPACE_NV) \
4647
switch (scope) { \
48+
case Invocation: \
4749
case Subgroup: \
4850
case Workgroup: { \
4951
if (__clc_nvvm_reflect_arch() >= 600) { \

libclc/ptx-nvidiacl/libspirv/atomic/atomic_helpers.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(unsigned int, unsigned int);
1818
#define __CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \
1919
ADDR_SPACE, ADDR_SPACE_NV, ORDER) \
2020
switch (scope) { \
21+
case Invocation: \
2122
case Subgroup: \
2223
case Workgroup: { \
2324
if (__clc_nvvm_reflect_arch() >= 600) { \
@@ -46,6 +47,7 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(unsigned int, unsigned int);
4647
#define __CLC_NVVM_ATOMIC_IMPL_ACQUIRE_FENCE(TYPE, TYPE_NV, TYPE_MANGLED_NV, \
4748
OP, ADDR_SPACE, ADDR_SPACE_NV) \
4849
switch (scope) { \
50+
case Invocation: \
4951
case Subgroup: \
5052
case Workgroup: { \
5153
if (__clc_nvvm_reflect_arch() >= 600) { \

libclc/ptx-nvidiacl/libspirv/atomic/atomic_load.cl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(unsigned int, unsigned int);
1616
#define __CLC_NVVM_ATOMIC_LOAD_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, \
1717
ADDR_SPACE, ADDR_SPACE_NV, ORDER) \
1818
switch (scope) { \
19+
case Invocation: \
1920
case Subgroup: \
2021
case Workgroup: { \
2122
TYPE_NV res = __nvvm##ORDER##_cta_ld##ADDR_SPACE_NV##TYPE_MANGLED_NV( \

libclc/ptx-nvidiacl/libspirv/atomic/atomic_store.cl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(unsigned int, unsigned int);
1616
#define __CLC_NVVM_ATOMIC_STORE_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, \
1717
ADDR_SPACE, ADDR_SPACE_NV, ORDER) \
1818
switch (scope) { \
19+
case Invocation: \
1920
case Subgroup: \
2021
case Workgroup: { \
2122
__nvvm##ORDER##_cta_st##ADDR_SPACE_NV##TYPE_MANGLED_NV( \
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: %{run} %t.out
3+
4+
#include <iostream>
5+
#include <sycl/atomic_ref.hpp>
6+
#include <sycl/detail/core.hpp>
7+
#include <sycl/usm.hpp>
8+
9+
int main() {
10+
11+
sycl::device dev;
12+
sycl::queue q(dev);
13+
auto ctxt = q.get_context();
14+
15+
// This test does not validate any output
16+
// Only that the work_item scope does not error
17+
try {
18+
19+
// Allocate device memory
20+
int *data = sycl::malloc_device<int>(1, q);
21+
22+
q.submit([&](sycl::handler &cgh) {
23+
cgh.parallel_for(10, [=](sycl::id<> id) {
24+
data[0] = 0;
25+
26+
// Check atomic_ref functionality
27+
sycl::atomic_ref<int, sycl::memory_order::relaxed,
28+
sycl::memory_scope::work_item,
29+
sycl::access::address_space::generic_space>
30+
at(data[0]);
31+
32+
auto lock = at.is_lock_free();
33+
at.store(1);
34+
auto load = at.load();
35+
auto xch = at.exchange(2);
36+
auto weak =
37+
at.compare_exchange_weak(data[0], 3, sycl::memory_order::relaxed,
38+
sycl::memory_order::relaxed);
39+
auto strong =
40+
at.compare_exchange_strong(data[0], 4, sycl::memory_order::relaxed,
41+
sycl::memory_order::relaxed);
42+
auto fetch_add = at.fetch_add(5);
43+
auto fetch_sub = at.fetch_sub(6);
44+
auto fetch_and = at.fetch_and(7);
45+
auto fetch_or = at.fetch_or(8);
46+
auto fetch_xor = at.fetch_xor(9);
47+
auto fetch_min = at.fetch_min(10);
48+
auto fetch_max = at.fetch_max(11);
49+
});
50+
});
51+
q.wait_and_throw();
52+
53+
sycl::free(data, q);
54+
55+
} catch (sycl::exception e) {
56+
std::cerr << "SYCL exception caught! : " << e.what() << "\n";
57+
return 1;
58+
} catch (...) {
59+
std::cerr << "Unknown exception caught!\n";
60+
return 2;
61+
}
62+
63+
std::cout << "Test passed!" << std::endl;
64+
return 0;
65+
}

0 commit comments

Comments
 (0)