@@ -86,7 +86,8 @@ using IsReduOptForFastAtomicFetch =
86
86
#ifdef SYCL_REDUCTION_DETERMINISTIC
87
87
bool_constant<false >;
88
88
#else
89
- bool_constant<sycl::detail::is_sgeninteger<T>::value &&
89
+ bool_constant<((sycl::detail::is_sgenfloat<T>::value && sizeof (T) == 4 ) ||
90
+ sycl::detail::is_sgeninteger<T>::value) &&
90
91
sycl::detail::IsValidAtomicType<T>::value &&
91
92
(sycl::detail::IsPlus<T, BinaryOperation>::value ||
92
93
sycl::detail::IsMinimum<T, BinaryOperation>::value ||
@@ -104,18 +105,15 @@ using IsReduOptForFastAtomicFetch =
104
105
// IsReduOptForFastReduce. The macro SYCL_REDUCTION_DETERMINISTIC prohibits
105
106
// using the reduce_over_group() algorithm to produce stable results across same
106
107
// type devices.
107
- // TODO 32 bit floating point atomics are eventually expected to be supported by
108
- // the has_fast_atomics specialization. Once the reducer class is updated to
109
- // replace the deprecated atomic class with atomic_ref, the (sizeof(T) == 4)
110
- // case should be removed here and replaced in IsReduOptForFastAtomicFetch.
111
108
template <typename T, class BinaryOperation >
112
- using IsReduOptForAtomic64Add =
109
+ using IsReduOptForAtomic64Op =
113
110
#ifdef SYCL_REDUCTION_DETERMINISTIC
114
111
bool_constant<false >;
115
112
#else
116
- bool_constant<sycl::detail::IsPlus<T, BinaryOperation>::value &&
117
- sycl::detail::is_sgenfloat<T>::value &&
118
- (sizeof (T) == 4 || sizeof (T) == 8 )>;
113
+ bool_constant<(sycl::detail::IsPlus<T, BinaryOperation>::value ||
114
+ sycl::detail::IsMinimum<T, BinaryOperation>::value ||
115
+ sycl::detail::IsMaximum<T, BinaryOperation>::value) &&
116
+ sycl::detail::is_sgenfloat<T>::value && sizeof (T) == 8 >;
119
117
#endif
120
118
121
119
// This type trait is used to detect if the group algorithm reduce() used with
@@ -278,7 +276,7 @@ template <class Reducer> class combiner {
278
276
typename _T = T, class _BinaryOperation = BinaryOperation>
279
277
enable_if_t <BasicCheck<_T, Space, _BinaryOperation> &&
280
278
(IsReduOptForFastAtomicFetch<T, _BinaryOperation>::value ||
281
- IsReduOptForAtomic64Add <T, _BinaryOperation>::value) &&
279
+ IsReduOptForAtomic64Op <T, _BinaryOperation>::value) &&
282
280
sycl::detail::IsPlus<T, _BinaryOperation>::value>
283
281
atomic_combine (_T *ReduVarPtr) const {
284
282
atomic_combine_impl<Space>(
@@ -324,7 +322,8 @@ template <class Reducer> class combiner {
324
322
template <access::address_space Space = access::address_space::global_space,
325
323
typename _T = T, class _BinaryOperation = BinaryOperation>
326
324
enable_if_t <BasicCheck<_T, Space, _BinaryOperation> &&
327
- IsReduOptForFastAtomicFetch<T, _BinaryOperation>::value &&
325
+ (IsReduOptForFastAtomicFetch<T, _BinaryOperation>::value ||
326
+ IsReduOptForAtomic64Op<T, _BinaryOperation>::value) &&
328
327
sycl::detail::IsMinimum<T, _BinaryOperation>::value>
329
328
atomic_combine (_T *ReduVarPtr) const {
330
329
atomic_combine_impl<Space>(
@@ -335,7 +334,8 @@ template <class Reducer> class combiner {
335
334
template <access::address_space Space = access::address_space::global_space,
336
335
typename _T = T, class _BinaryOperation = BinaryOperation>
337
336
enable_if_t <BasicCheck<_T, Space, _BinaryOperation> &&
338
- IsReduOptForFastAtomicFetch<T, _BinaryOperation>::value &&
337
+ (IsReduOptForFastAtomicFetch<T, _BinaryOperation>::value ||
338
+ IsReduOptForAtomic64Op<T, _BinaryOperation>::value) &&
339
339
sycl::detail::IsMaximum<T, _BinaryOperation>::value>
340
340
atomic_combine (_T *ReduVarPtr) const {
341
341
atomic_combine_impl<Space>(
@@ -591,8 +591,8 @@ class reduction_impl_algo : public reduction_impl_common<T, BinaryOperation> {
591
591
using rw_accessor_type = accessor<T, accessor_dim, access::mode::read_write,
592
592
access::target::device, is_placeholder,
593
593
ext::oneapi::accessor_property_list<>>;
594
- static constexpr bool has_atomic_add_float64 =
595
- IsReduOptForAtomic64Add <T, BinaryOperation>::value;
594
+ static constexpr bool has_float64_atomics =
595
+ IsReduOptForAtomic64Op <T, BinaryOperation>::value;
596
596
static constexpr bool has_fast_atomics =
597
597
IsReduOptForFastAtomicFetch<T, BinaryOperation>::value;
598
598
static constexpr bool has_fast_reduce =
@@ -678,7 +678,7 @@ class reduction_impl_algo : public reduction_impl_common<T, BinaryOperation> {
678
678
// / require initialization with identity value, then return user's read-write
679
679
// / accessor. Otherwise, create global buffer with 'num_elements' initialized
680
680
// / with identity value and return an accessor to that buffer.
681
- template <bool HasFastAtomics = (has_fast_atomics || has_atomic_add_float64 )>
681
+ template <bool HasFastAtomics = (has_fast_atomics || has_float64_atomics )>
682
682
std::enable_if_t <HasFastAtomics, rw_accessor_type>
683
683
getReadWriteAccessorToInitializedMem (handler &CGH) {
684
684
if constexpr (is_rw_acc) {
@@ -2093,18 +2093,15 @@ template <class KernelName> struct NDRangeAtomic64;
2093
2093
} // namespace main_krn
2094
2094
} // namespace reduction
2095
2095
2096
- // Specialization for devices with the atomic64 aspect, which guarantees 64 (and
2097
- // temporarily 32) bit floating point support for atomic add.
2098
- // TODO 32 bit floating point atomics are eventually expected to be supported by
2099
- // the has_fast_atomics specialization. Corresponding changes to
2100
- // IsReduOptForAtomic64Add, as prescribed in its documentation, should then also
2101
- // be made.
2096
+ // Specialization for devices with the atomic64 aspect, which guarantees 64 bit
2097
+ // floating point support for atomic reduction operation.
2102
2098
template <typename KernelName, typename KernelType, int Dims, class Reduction >
2103
2099
void reduCGFuncAtomic64 (handler &CGH, KernelType KernelFunc,
2104
2100
const nd_range<Dims> &Range, Reduction &Redu) {
2105
2101
auto Out = Redu.getReadWriteAccessorToInitializedMem (CGH);
2106
- static_assert (Reduction::has_atomic_add_float64,
2107
- " Only suitable for reductions that have FP64 atomic add." );
2102
+ static_assert (
2103
+ Reduction::has_float64_atomics,
2104
+ " Only suitable for reductions that have FP64 atomic operations." );
2108
2105
constexpr size_t NElements = Reduction::num_elements;
2109
2106
using Name =
2110
2107
__sycl_reduction_kernel<reduction::main_krn::NDRangeAtomic64, KernelName>;
0 commit comments