Skip to content

Commit 8a00958

Browse files
authored
[SYCLomatic][ASM] Support migration of instruction st.volatile.global.u32 and ld.volatile.global.u32 (#2873)
Signed-off-by: chenwei.sun <chenwei.sun@intel.com>
1 parent cb24e95 commit 8a00958

File tree

3 files changed

+39
-0
lines changed

3 files changed

+39
-0
lines changed

clang/lib/DPCT/RulesAsm/Parser/AsmTokenKinds.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -450,6 +450,7 @@ MODIFIER(cs, ".cs")
450450
MODIFIER(to, ".to")
451451
MODIFIER(aligned, ".aligned")
452452
MODIFIER(trans, ".trans")
453+
MODIFIER(ptx_volatile, ".volatile")
453454

454455
#undef LINKAGE
455456
#undef TARGET

clang/test/dpct/asm/ld.cu

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,4 +70,27 @@ __device__ inline void load_global_short4(short4 &a, const short4 *addr) {
7070
a.w = w;
7171
}
7272

73+
// CHECK: __dpct_inline__ int ld_flag_volatile(int* flag_addr) {
74+
// CHECK-NEXT: int flag;
75+
// CHECK-NEXT: flag = *((uint32_t *)(uintptr_t)flag_addr);
76+
// CHECK-NEXT: sycl::atomic_fence(sycl::memory_order::seq_cst,sycl::memory_scope::device);
77+
// CHECK-NEXT: return flag;
78+
// CHECK-NEXT: }
79+
__device__ __forceinline__ int ld_flag_volatile(int* flag_addr) {
80+
int flag;
81+
asm volatile("ld.volatile.global.u32 %0, [%1]; membar.gl;" : "=r"(flag) : "l"(flag_addr));
82+
return flag;
83+
}
84+
85+
// CHECK: __dpct_inline__ int ld_flag_acquire(int* flag_addr) {
86+
// CHECK-NEXT: int flag;
87+
// CHECK-NEXT: flag = *((uint32_t *)(uintptr_t)flag_addr);
88+
// CHECK-NEXT: return flag;
89+
// CHECK-NEXT: }
90+
__device__ __forceinline__ int ld_flag_acquire(int* flag_addr) {
91+
int flag;
92+
asm volatile("ld.volatile.global.u32 %0, [%1];" : "=r"(flag) : "l"(flag_addr));
93+
return flag;
94+
}
95+
7396
// clang-format on

clang/test/dpct/asm/st.cu

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,4 +61,19 @@ __device__ inline void store_streaming_short2(short2 *addr, short x, short y) {
6161
asm("st.cs.global.v2.s16 [%0+0], {%1, %2};" ::__PTR(addr), "h"(x), "h"(y));
6262
}
6363

64+
// CHECK: __dpct_inline__ void st_flag_release(int* flag_addr, int flag) {
65+
// CHECK-NEXT: sycl::atomic_fence(sycl::memory_order::seq_cst,sycl::memory_scope::system);
66+
// CHECK-NEXT: *((uint32_t *)(uintptr_t)flag_addr) = flag;
67+
// CHECK-NEXT: }
68+
__device__ __forceinline__ void st_flag_release(int* flag_addr, int flag) {
69+
asm volatile("membar.sys; st.volatile.global.u32 [%1], %0;" ::"r"(flag), "l"(flag_addr));
70+
}
71+
72+
// CHECK: __dpct_inline__ void st_flag_volatile(int* flag_addr, int flag) {
73+
// CHECK-NEXT: *((uint32_t *)(uintptr_t)flag_addr) = flag;
74+
// CHECK-NEXT: }
75+
__device__ __forceinline__ void st_flag_volatile(int* flag_addr, int flag) {
76+
asm volatile("st.volatile.global.u32 [%1], %0;" ::"r"(flag), "l"(flag_addr));
77+
}
78+
6479
// clang-format on

0 commit comments

Comments
 (0)