Skip to content

Commit 695e2cf

Browse files
authored
[ESIMD] Fix inconsistencies in the ESIMD API signatures. (#4800)
This is API breaking patch. - Remove 'esimd_' prefix from public API names. This prefix is redundant, because all APIs are in ...esimd:: namespace already. API names follow C-for-Metal naming. - esimd_sat -> saturate Memory access API changes: - Un-deprecate block_load/store - rename slm_atomic -> slm_atomic_update, flat_atomic -> atomic_update. atomic is not used to avoid conflict with C++ atomic class - Offset is now expected in bytes(was in elements). (in scatter, gather, scalar_load, scalar_store) - Make 'offsets' argument preceed the 'val' argument to be consistent with other memory APIs. (in scatter, scatter_rgba, slm_scatter) - Remove unused L1 / L3 CachHint template parameters. (in scatter, gather, scalar_load, scalar_store, scatter_rgba, gather_rgba) - Rename enum EsimdFenceMask -> enum fence_mask and its elements (deprecate old ones) Old behavior is preserved in deprecated APIs : scatter->scatter1 gather->gather1 scalar_load->scalar_load1 scalar_store->scalar_store1 gather_rgba->gather4 scatter_rgba->scatter4 slm_scatter->slm_store Signed-off-by: Konstantin S Bobrovsky konstantin.s.bobrovsky@intel.com
1 parent 6b9b0eb commit 695e2cf

File tree

16 files changed

+898
-882
lines changed

16 files changed

+898
-882
lines changed

sycl/include/sycl/ext/intel/experimental/esimd/common.hpp

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -17,23 +17,7 @@
1717
#ifdef __SYCL_DEVICE_ONLY__
1818
#define SYCL_ESIMD_KERNEL __attribute__((sycl_explicit_simd))
1919
#define SYCL_ESIMD_FUNCTION __attribute__((sycl_explicit_simd))
20-
#else
21-
#define SYCL_ESIMD_KERNEL
22-
#define SYCL_ESIMD_FUNCTION
23-
#endif
24-
25-
__SYCL_INLINE_NAMESPACE(cl) {
26-
namespace sycl {
27-
namespace ext {
28-
namespace intel {
29-
namespace experimental {
30-
namespace esimd {
31-
32-
using uchar = unsigned char;
33-
using ushort = unsigned short;
34-
using uint = unsigned int;
3520

36-
#ifdef __SYCL_DEVICE_ONLY__
3721
// Mark a function being nodebug.
3822
#define ESIMD_NODEBUG __attribute__((nodebug))
3923
// Mark a "ESIMD global": accessible from all functions in current translation
@@ -43,14 +27,21 @@ using uint = unsigned int;
4327
__attribute__((opencl_private)) __attribute__((sycl_explicit_simd))
4428
// Bind a ESIMD global variable to a specific register.
4529
#define ESIMD_REGISTER(n) __attribute__((register_num(n)))
46-
#else
30+
31+
#define __ESIMD_API ESIMD_NODEBUG ESIMD_INLINE
32+
#else // __SYCL_DEVICE_ONLY__
33+
#define SYCL_ESIMD_KERNEL
34+
#define SYCL_ESIMD_FUNCTION
35+
4736
// TODO ESIMD define what this means on Windows host
4837
#define ESIMD_NODEBUG
4938
// On host device ESIMD global is a thread local static var. This assumes that
5039
// each work-item is mapped to a separate OS thread on host device.
5140
#define ESIMD_PRIVATE thread_local
5241
#define ESIMD_REGISTER(n)
53-
#endif
42+
43+
#define __ESIMD_API ESIMD_INLINE
44+
#endif // __SYCL_DEVICE_ONLY__
5445

5546
// Mark a function being noinline
5647
#define ESIMD_NOINLINE __attribute__((noinline))
@@ -70,6 +61,17 @@ using uint = unsigned int;
7061
#define __ESIMD_DEPR_ENUM_V(old, new, t) \
7162
old __ESIMD_DEPRECATED(new) = static_cast<t>(new)
7263

64+
__SYCL_INLINE_NAMESPACE(cl) {
65+
namespace sycl {
66+
namespace ext {
67+
namespace intel {
68+
namespace experimental {
69+
namespace esimd {
70+
71+
using uchar = unsigned char;
72+
using ushort = unsigned short;
73+
using uint = unsigned int;
74+
7375
/// Gen hardware supports applying saturation to results of some operation.
7476
/// This enum allows to control this behavior.
7577
enum class saturation : uint8_t { off, on };
@@ -237,8 +239,6 @@ enum class split_barrier_action : uint8_t {
237239
// For backward compatibility:
238240
using EsimdSbarrierType = split_barrier_action;
239241

240-
#undef __ESIMD_DEPR_ENUM_V
241-
242242
// Since EsimdSbarrierType values are deprecated, these macros will generate
243243
// deprecation message.
244244
#define ESIMD_SBARRIER_WAIT EsimdSbarrierType::WAIT

sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp

Lines changed: 11 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -76,9 +76,7 @@ constexpr unsigned int ElemsPerAddrDecoding(unsigned int ElemsPerAddrEncoded) {
7676
} // __SYCL_INLINE_NAMESPACE(cl)
7777

7878
// flat_read does flat-address gather
79-
template <typename Ty, int N, int NumBlk = 0,
80-
__SEIEE::CacheHint L1H = __SEIEE::CacheHint::None,
81-
__SEIEE::CacheHint L3H = __SEIEE::CacheHint::None>
79+
template <typename Ty, int N, int NumBlk = 0>
8280
__ESIMD_INTRIN
8381
__SEIEED::vector_type_t<Ty, N * __SEIEED::ElemsPerAddrDecoding(NumBlk)>
8482
__esimd_svm_gather(__SEIEED::vector_type_t<uint64_t, N> addrs,
@@ -111,9 +109,7 @@ __ESIMD_INTRIN
111109
#endif // __SYCL_DEVICE_ONLY__
112110

113111
// flat_write does flat-address scatter
114-
template <typename Ty, int N, int NumBlk = 0,
115-
__SEIEE::CacheHint L1H = __SEIEE::CacheHint::None,
116-
__SEIEE::CacheHint L3H = __SEIEE::CacheHint::None>
112+
template <typename Ty, int N, int NumBlk = 0>
117113
__ESIMD_INTRIN void __esimd_svm_scatter(
118114
__SEIEED::vector_type_t<uint64_t, N> addrs,
119115
__SEIEED::vector_type_t<Ty, N * __SEIEED::ElemsPerAddrDecoding(NumBlk)>
@@ -144,8 +140,7 @@ __ESIMD_INTRIN void __esimd_svm_scatter(
144140
#endif // __SYCL_DEVICE_ONLY__
145141

146142
// flat_block_read reads a block of data from one flat address
147-
template <typename Ty, int N, __SEIEE::CacheHint L1H = __SEIEE::CacheHint::None,
148-
__SEIEE::CacheHint L3H = __SEIEE::CacheHint::None>
143+
template <typename Ty, int N>
149144
__ESIMD_INTRIN __SEIEED::vector_type_t<Ty, N>
150145
__esimd_svm_block_ld_unaligned(uint64_t addr)
151146
#ifdef __SYCL_DEVICE_ONLY__
@@ -181,8 +176,7 @@ __esimd_svm_block_ld(uint64_t addr)
181176
#endif // __SYCL_DEVICE_ONLY__
182177

183178
// flat_block_write writes a block of data using one flat address
184-
template <typename Ty, int N, __SEIEE::CacheHint L1H = __SEIEE::CacheHint::None,
185-
__SEIEE::CacheHint L3H = __SEIEE::CacheHint::None>
179+
template <typename Ty, int N>
186180
__ESIMD_INTRIN void __esimd_svm_block_st(uint64_t addr,
187181
__SEIEED::vector_type_t<Ty, N> vals)
188182
#ifdef __SYCL_DEVICE_ONLY__
@@ -221,9 +215,7 @@ __ESIMD_INTRIN void __esimd_oword_st(SurfIndAliasTy surf_ind, uint32_t offset,
221215
#endif // __SYCL_DEVICE_ONLY__
222216

223217
// flat_read4 does flat-address gather4
224-
template <typename Ty, int N, __SEIEE::rgba_channel_mask Mask,
225-
__SEIEE::CacheHint L1H = __SEIEE::CacheHint::None,
226-
__SEIEE::CacheHint L3H = __SEIEE::CacheHint::None>
218+
template <typename Ty, int N, __SEIEE::rgba_channel_mask Mask>
227219
__SEIEED::vector_type_t<Ty, N * get_num_channels_enabled(Mask)> __ESIMD_INTRIN
228220
__esimd_svm_gather4_scaled(__SEIEED::vector_type_t<uint64_t, N> addrs,
229221
__SEIEED::simd_mask_storage_t<N> pred = 1)
@@ -276,9 +268,7 @@ __esimd_svm_gather4_scaled(__SEIEED::vector_type_t<uint64_t, N> addrs,
276268
#endif // __SYCL_DEVICE_ONLY__
277269

278270
// flat_write does flat-address scatter
279-
template <typename Ty, int N, __SEIEE::rgba_channel_mask Mask,
280-
__SEIEE::CacheHint L1H = __SEIEE::CacheHint::None,
281-
__SEIEE::CacheHint L3H = __SEIEE::CacheHint::None>
271+
template <typename Ty, int N, __SEIEE::rgba_channel_mask Mask>
282272
__ESIMD_INTRIN void __esimd_svm_scatter4_scaled(
283273
__SEIEED::vector_type_t<uint64_t, N> addrs,
284274
__SEIEED::vector_type_t<Ty, N * get_num_channels_enabled(Mask)> vals,
@@ -351,8 +341,7 @@ __ESIMD_INTRIN void __esimd_svm_scatter4_scaled(
351341
// @param elem_offsets - per-element offsets
352342
//
353343
template <typename Ty, int N, typename SurfIndAliasTy, int TySizeLog2,
354-
int16_t Scale = 0, __SEIEE::CacheHint L1H = __SEIEE::CacheHint::None,
355-
__SEIEE::CacheHint L3H = __SEIEE::CacheHint::None>
344+
int16_t Scale = 0>
356345
__ESIMD_INTRIN __SEIEED::vector_type_t<Ty, N>
357346
__esimd_gather_scaled2(SurfIndAliasTy surf_ind, uint32_t global_offset,
358347
__SEIEED::vector_type_t<uint32_t, N> elem_offsets)
@@ -392,8 +381,7 @@ __esimd_gather_scaled2(SurfIndAliasTy surf_ind, uint32_t global_offset,
392381
// @param vals - values to write
393382
//
394383
template <typename Ty, int N, typename SurfIndAliasTy, int TySizeLog2,
395-
int16_t Scale = 0, __SEIEE::CacheHint L1H = __SEIEE::CacheHint::None,
396-
__SEIEE::CacheHint L3H = __SEIEE::CacheHint::None>
384+
int16_t Scale = 0>
397385
__ESIMD_INTRIN void
398386
__esimd_scatter_scaled(__SEIEED::simd_mask_storage_t<N> pred,
399387
SurfIndAliasTy surf_ind, uint32_t global_offset,
@@ -411,9 +399,7 @@ __esimd_scatter_scaled(__SEIEED::simd_mask_storage_t<N> pred,
411399
#endif // __SYCL_DEVICE_ONLY__
412400

413401
// flat_atomic: flat-address atomic
414-
template <__SEIEE::atomic_op Op, typename Ty, int N,
415-
__SEIEE::CacheHint L1H = __SEIEE::CacheHint::None,
416-
__SEIEE::CacheHint L3H = __SEIEE::CacheHint::None>
402+
template <__SEIEE::atomic_op Op, typename Ty, int N>
417403
__ESIMD_INTRIN __SEIEED::vector_type_t<Ty, N>
418404
__esimd_svm_atomic0(__SEIEED::vector_type_t<uint64_t, N> addrs,
419405
__SEIEED::simd_mask_storage_t<N> pred)
@@ -425,9 +411,7 @@ __esimd_svm_atomic0(__SEIEED::vector_type_t<uint64_t, N> addrs,
425411
}
426412
#endif // __SYCL_DEVICE_ONLY__
427413

428-
template <__SEIEE::atomic_op Op, typename Ty, int N,
429-
__SEIEE::CacheHint L1H = __SEIEE::CacheHint::None,
430-
__SEIEE::CacheHint L3H = __SEIEE::CacheHint::None>
414+
template <__SEIEE::atomic_op Op, typename Ty, int N>
431415
__ESIMD_INTRIN __SEIEED::vector_type_t<Ty, N>
432416
__esimd_svm_atomic1(__SEIEED::vector_type_t<uint64_t, N> addrs,
433417
__SEIEED::vector_type_t<Ty, N> src0,
@@ -440,9 +424,7 @@ __esimd_svm_atomic1(__SEIEED::vector_type_t<uint64_t, N> addrs,
440424
}
441425
#endif // __SYCL_DEVICE_ONLY__
442426

443-
template <__SEIEE::atomic_op Op, typename Ty, int N,
444-
__SEIEE::CacheHint L1H = __SEIEE::CacheHint::None,
445-
__SEIEE::CacheHint L3H = __SEIEE::CacheHint::None>
427+
template <__SEIEE::atomic_op Op, typename Ty, int N>
446428
__ESIMD_INTRIN __SEIEED::vector_type_t<Ty, N>
447429
__esimd_svm_atomic2(__SEIEED::vector_type_t<uint64_t, N> addrs,
448430
__SEIEED::vector_type_t<Ty, N> src0,

sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_obj_impl.hpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -648,9 +648,7 @@ void simd_obj_impl<T, N, T1, SFINAE>::copy_from(const T *Addr)
648648
"block size must be at most 8 owords");
649649

650650
uintptr_t AddrVal = reinterpret_cast<uintptr_t>(Addr);
651-
*this =
652-
__esimd_svm_block_ld_unaligned<T, N, CacheHint::None, CacheHint::None>(
653-
AddrVal);
651+
*this = __esimd_svm_block_ld_unaligned<T, N>(AddrVal);
654652
}
655653

656654
template <typename T, int N, class T1, class SFINAE>
@@ -691,7 +689,7 @@ void simd_obj_impl<T, N, T1, SFINAE>::copy_to(T *addr) const
691689
"block size must be at most 8 owords");
692690

693691
uintptr_t AddrVal = reinterpret_cast<uintptr_t>(addr);
694-
__esimd_svm_block_st<T, N, CacheHint::None, CacheHint::None>(AddrVal, data());
692+
__esimd_svm_block_st<T, N>(AddrVal, data());
695693
}
696694

697695
template <typename T, int N, class T1, class SFINAE>

0 commit comments

Comments
 (0)