Skip to content

Commit 305705c

Browse files
authored
[SYCL] refactor and implement dynamic address space casts (#16604)
The patch provides an implementation for dynamic address space cast for NVPTX and AMDGPU. The patch also refactor the static cast by relying on a simple cast rather than the SPIR-V builtin. --------- Signed-off-by: Victor Lomuller <victor@codeplay.com>
1 parent ec8346e commit 305705c

File tree

10 files changed

+154
-57
lines changed

10 files changed

+154
-57
lines changed
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#define GenericCastToPtrExplicit_To(ADDRSPACE, NAME) \
10+
_CLC_DECL _CLC_OVERLOAD \
11+
ADDRSPACE void *__spirv_GenericCastToPtrExplicit_To##NAME( \
12+
generic void *, int); \
13+
_CLC_DECL _CLC_OVERLOAD \
14+
ADDRSPACE const void *__spirv_GenericCastToPtrExplicit_To##NAME( \
15+
generic const void *, int); \
16+
_CLC_DECL _CLC_OVERLOAD \
17+
ADDRSPACE volatile void *__spirv_GenericCastToPtrExplicit_To##NAME( \
18+
generic volatile void *, int); \
19+
_CLC_DECL _CLC_OVERLOAD ADDRSPACE const volatile void * \
20+
__spirv_GenericCastToPtrExplicit_To##NAME(generic const volatile void *, \
21+
int)
22+
23+
GenericCastToPtrExplicit_To(global, Global);
24+
GenericCastToPtrExplicit_To(local, Local);
25+
GenericCastToPtrExplicit_To(private, Private);
26+
27+
#undef GenericCastToPtrExplicit_To

libclc/libspirv/include/libspirv/spirv.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,4 +92,7 @@
9292
#include <libspirv/image/image.h>
9393
#include <libspirv/image/image_defines.h>
9494

95+
/* Pointer Conversion */
96+
#include <libspirv/conversion/GenericCastToPtrExplicit.h>
97+
9598
#pragma OPENCL EXTENSION all : disable

libclc/libspirv/lib/amdgcn-amdhsa/SOURCES

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ atomic/atomic_min.cl
1414
atomic/atomic_max.cl
1515
atomic/atomic_sub.cl
1616
atomic/atomic_store.cl
17+
conversion/GenericCastToPtrExplicit.cl
1718
synchronization/barrier.cl
1819
math/acos.cl
1920
math/acosh.cl
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <libspirv/spirv.h>
10+
11+
12+
_CLC_DEF static bool __clc_amdgcn_is_private(generic void *ptr) {
13+
return __builtin_amdgcn_is_private(ptr);
14+
}
15+
_CLC_DEF static bool __clc_amdgcn_is_local(generic void *ptr) {
16+
return __builtin_amdgcn_is_shared(ptr);
17+
}
18+
_CLC_DEF static bool __clc_amdgcn_is_global(generic void *ptr) {
19+
return !__clc_amdgcn_is_private(ptr) && !__clc_amdgcn_is_local(ptr);
20+
}
21+
22+
#define GenericCastToPtrExplicit_To(ADDRSPACE, NAME) \
23+
_CLC_DECL _CLC_OVERLOAD \
24+
ADDRSPACE void *__spirv_GenericCastToPtrExplicit_To##NAME( \
25+
generic void *ptr, int unused) { \
26+
if (__clc_amdgcn_is_##ADDRSPACE(ptr)) \
27+
return (ADDRSPACE void *)ptr; \
28+
return 0; \
29+
} \
30+
_CLC_DECL _CLC_OVERLOAD \
31+
ADDRSPACE const void *__spirv_GenericCastToPtrExplicit_To##NAME( \
32+
generic const void *ptr, int unused) { \
33+
return __spirv_GenericCastToPtrExplicit_To##NAME((generic void *)ptr, \
34+
unused); \
35+
} \
36+
_CLC_DECL _CLC_OVERLOAD \
37+
ADDRSPACE volatile void *__spirv_GenericCastToPtrExplicit_To##NAME( \
38+
generic volatile void *ptr, int unused) { \
39+
return __spirv_GenericCastToPtrExplicit_To##NAME((generic void *)ptr, \
40+
unused); \
41+
} \
42+
_CLC_DECL _CLC_OVERLOAD ADDRSPACE const volatile void \
43+
*__spirv_GenericCastToPtrExplicit_To##NAME( \
44+
generic const volatile void *ptr, int unused) { \
45+
return __spirv_GenericCastToPtrExplicit_To##NAME((generic void *)ptr, \
46+
unused); \
47+
}
48+
49+
GenericCastToPtrExplicit_To(global, Global)
50+
GenericCastToPtrExplicit_To(local, Local)
51+
GenericCastToPtrExplicit_To(private, Private)

libclc/libspirv/lib/ptx-nvidiacl/SOURCES

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ atomic/loadstore_helpers_release.ll
33
atomic/loadstore_helpers_acquire.ll
44
atomic/loadstore_helpers_seq_cst.ll
55
cl_khr_int64_extended_atomics/minmax_helpers.ll
6+
conversion/GenericCastToPtrExplicit.cl
67
integer/mul24.cl
78
integer/mul_hi.cl
89
math/acos.cl
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <libspirv/spirv.h>
10+
11+
_CLC_DEF static bool __clc_nvvm_is_private(generic void *ptr) {
12+
return __nvvm_isspacep_local(ptr);
13+
}
14+
_CLC_DEF static bool __clc_nvvm_is_local(generic void *ptr) {
15+
return __nvvm_isspacep_shared(ptr);
16+
}
17+
_CLC_DEF static bool __clc_nvvm_is_global(generic void *ptr) {
18+
return __nvvm_isspacep_global(ptr);
19+
}
20+
21+
#define GenericCastToPtrExplicit_To(ADDRSPACE, NAME) \
22+
_CLC_DECL _CLC_OVERLOAD \
23+
ADDRSPACE void *__spirv_GenericCastToPtrExplicit_To##NAME( \
24+
generic void *ptr, int unused) { \
25+
if (__clc_nvvm_is_##ADDRSPACE(ptr)) \
26+
return (ADDRSPACE void *)ptr; \
27+
return 0; \
28+
} \
29+
_CLC_DECL _CLC_OVERLOAD \
30+
ADDRSPACE const void *__spirv_GenericCastToPtrExplicit_To##NAME( \
31+
generic const void *ptr, int unused) { \
32+
return __spirv_GenericCastToPtrExplicit_To##NAME((generic void *)ptr, \
33+
unused); \
34+
} \
35+
_CLC_DECL _CLC_OVERLOAD \
36+
ADDRSPACE volatile void *__spirv_GenericCastToPtrExplicit_To##NAME( \
37+
generic volatile void *ptr, int unused) { \
38+
return __spirv_GenericCastToPtrExplicit_To##NAME((generic void *)ptr, \
39+
unused); \
40+
} \
41+
_CLC_DECL _CLC_OVERLOAD ADDRSPACE const volatile void \
42+
*__spirv_GenericCastToPtrExplicit_To##NAME( \
43+
generic const volatile void *ptr, int unused) { \
44+
return __spirv_GenericCastToPtrExplicit_To##NAME((generic void *)ptr, \
45+
unused); \
46+
}
47+
48+
GenericCastToPtrExplicit_To(global, Global)
49+
GenericCastToPtrExplicit_To(local, Local)
50+
GenericCastToPtrExplicit_To(private, Private)

sycl/include/sycl/access/access.hpp

Lines changed: 2 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -350,15 +350,6 @@ address_space_cast_is_possible(access::address_space Src,
350350

351351
template <access::address_space Space, typename ElementType>
352352
auto static_address_cast(ElementType *Ptr) {
353-
constexpr auto generic_space = access::address_space::generic_space;
354-
constexpr auto global_space = access::address_space::global_space;
355-
constexpr auto local_space = access::address_space::local_space;
356-
constexpr auto private_space = access::address_space::private_space;
357-
constexpr auto global_device =
358-
access::address_space::ext_intel_global_device_space;
359-
constexpr auto global_host =
360-
access::address_space::ext_intel_global_host_space;
361-
362353
constexpr auto SrcAS = deduce_AS<ElementType *>::value;
363354
static_assert(address_space_cast_is_possible(SrcAS, Space));
364355

@@ -367,31 +358,7 @@ auto static_address_cast(ElementType *Ptr) {
367358

368359
// Note: reinterpret_cast isn't enough for some of the casts between different
369360
// address spaces, use C-style cast instead.
370-
#if !defined(__SPIR__)
371361
return (dst_type)Ptr;
372-
#else
373-
if constexpr (SrcAS != generic_space) {
374-
return (dst_type)Ptr;
375-
} else if constexpr (Space == global_space) {
376-
return (dst_type)__spirv_GenericCastToPtr_ToGlobal(
377-
Ptr, __spv::StorageClass::CrossWorkgroup);
378-
} else if constexpr (Space == local_space) {
379-
return (dst_type)__spirv_GenericCastToPtr_ToLocal(
380-
Ptr, __spv::StorageClass::Workgroup);
381-
} else if constexpr (Space == private_space) {
382-
return (dst_type)__spirv_GenericCastToPtr_ToPrivate(
383-
Ptr, __spv::StorageClass::Function);
384-
#if !defined(__ENABLE_USM_ADDR_SPACE__)
385-
} else if constexpr (Space == global_device || Space == global_host) {
386-
// If __ENABLE_USM_ADDR_SPACE__ isn't defined then both
387-
// global_device/global_host are just aliases for global_space.
388-
return (dst_type)__spirv_GenericCastToPtr_ToGlobal(
389-
Ptr, __spv::StorageClass::CrossWorkgroup);
390-
#endif
391-
} else {
392-
return (dst_type)Ptr;
393-
}
394-
#endif
395362
}
396363

397364
// Previous implementation (`castAS`, used in `multi_ptr` ctors among other
@@ -427,14 +394,13 @@ auto dynamic_address_cast(ElementType *Ptr) {
427394
#if defined(__ENABLE_USM_ADDR_SPACE__)
428395
static_assert(SupressNotImplementedAssert || Space != Space,
429396
"Not supported yet!");
430-
return static_address_cast<Space>(Ptr);
397+
return detail::static_address_cast<Space>(Ptr);
431398
#else
432399
// If __ENABLE_USM_ADDR_SPACE__ isn't defined then both
433400
// global_device/global_host are just aliases for global_space.
434401
static_assert(std::is_same_v<dst_type, ElementType *>);
435402
return (dst_type)Ptr;
436403
#endif
437-
#if defined(__SPIR__)
438404
} else if constexpr (Space == global_space) {
439405
return (dst_type)__spirv_GenericCastToPtrExplicit_ToGlobal(
440406
Ptr, __spv::StorageClass::CrossWorkgroup);
@@ -449,12 +415,11 @@ auto dynamic_address_cast(ElementType *Ptr) {
449415
(Space == global_device || Space == global_host)) {
450416
return (dst_type)__spirv_GenericCastToPtrExplicit_ToGlobal(
451417
Ptr, __spv::StorageClass::CrossWorkgroup);
452-
#endif
453418
#endif
454419
} else {
455420
static_assert(SupressNotImplementedAssert || Space != Space,
456421
"Not supported yet!");
457-
return static_address_cast<Space>(Ptr);
422+
return detail::static_address_cast<Space>(Ptr);
458423
}
459424
}
460425
#else // __SYCL_DEVICE_ONLY__

sycl/test-e2e/AddressCast/dynamic_address_cast.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,8 @@
77
//===----------------------------------------------------------------------===//
88

99
// Issue with OpenCL CPU runtime implementation of OpGenericCastToPtrExplicit
10-
// OpGenericCastToPtr* intrinsics not implemented on AMD or NVIDIA
1110
// FPGA emulator affected by same issue as OpenCL CPU runtime
12-
// UNSUPPORTED: cpu, hip, cuda, accelerator
11+
// UNSUPPORTED: cpu, accelerator
1312
// RUN: %{build} -o %t.out
1413
// RUN: %{run} %t.out
1514

sycl/test/check_device_code/atomic_ref.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
// CHECK-LABEL: define dso_local spir_func noundef i32 @_Z17atomic_ref_globalRi(
77
// CHECK-SAME: ptr addrspace(4) noundef align 4 dereferenceable(4) [[I:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !srcloc [[META6:![0-9]+]] !sycl_fixed_targets [[META7:![0-9]+]] {
88
// CHECK-NEXT: [[ENTRY:.*:]]
9-
// CHECK-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = tail call spir_func noundef ptr addrspace(1) @_Z33__spirv_GenericCastToPtr_ToGlobalPvi(ptr addrspace(4) noundef align 4 dereferenceable(4) [[I]], i32 noundef 5) #[[ATTR3:[0-9]+]]
10-
// CHECK-NEXT: [[CALL3_I_I:%.*]] = tail call spir_func noundef i32 @_Z18__spirv_AtomicLoadPU3AS1KiN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE(ptr addrspace(1) noundef [[CALL_I_I_I_I_I_I]], i32 noundef 1, i32 noundef 898) #[[ATTR4:[0-9]+]]
9+
// CHECK-NEXT: [[TMP:%.*]] = addrspacecast ptr addrspace(4) [[I]] to ptr addrspace(1)
10+
// CHECK-NEXT: [[CALL3_I_I:%.*]] = tail call spir_func noundef i32 @_Z18__spirv_AtomicLoadPU3AS1KiN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE(ptr addrspace(1) noundef [[TMP]], i32 noundef 1, i32 noundef 898) #[[ATTR4:[0-9]+]]
1111
// CHECK-NEXT: ret i32 [[CALL3_I_I]]
1212
//
1313
SYCL_EXTERNAL auto atomic_ref_global(int &i) {

0 commit comments

Comments
 (0)