@@ -192,57 +192,52 @@ struct ReducerTraits<reducer<T, BinaryOperation, Dims, Extent, View, Subst>> {
192
192
// / Also, for int32/64 types the atomic_combine() is lowered to
193
193
// / sycl::atomic::fetch_add().
194
194
template <class Reducer > class combiner {
195
- using T = typename ReducerTraits<Reducer>::type;
196
- using BinaryOperation = typename ReducerTraits<Reducer>::op;
195
+ using Ty = typename ReducerTraits<Reducer>::type;
196
+ using BinaryOp = typename ReducerTraits<Reducer>::op;
197
197
static constexpr int Dims = ReducerTraits<Reducer>::dims;
198
198
static constexpr size_t Extent = ReducerTraits<Reducer>::extent;
199
199
200
200
public:
201
- template <typename _T = T, int _Dims = Dims>
202
- enable_if_t <(_Dims == 0 ) &&
203
- sycl::detail::IsPlus<_T, BinaryOperation>::value &&
201
+ template <typename _T = Ty, int _Dims = Dims>
202
+ enable_if_t <(_Dims == 0 ) && sycl::detail::IsPlus<_T, BinaryOp>::value &&
204
203
sycl::detail::is_geninteger<_T>::value>
205
204
operator ++() {
206
- static_cast <Reducer *>(this )->combine (static_cast <T >(1 ));
205
+ static_cast <Reducer *>(this )->combine (static_cast <_T >(1 ));
207
206
}
208
207
209
- template <typename _T = T, int _Dims = Dims>
210
- enable_if_t <(_Dims == 0 ) &&
211
- sycl::detail::IsPlus<_T, BinaryOperation>::value &&
208
+ template <typename _T = Ty, int _Dims = Dims>
209
+ enable_if_t <(_Dims == 0 ) && sycl::detail::IsPlus<_T, BinaryOp>::value &&
212
210
sycl::detail::is_geninteger<_T>::value>
213
211
operator ++(int ) {
214
- static_cast <Reducer *>(this )->combine (static_cast <T >(1 ));
212
+ static_cast <Reducer *>(this )->combine (static_cast <_T >(1 ));
215
213
}
216
214
217
- template <typename _T = T , int _Dims = Dims>
218
- enable_if_t <(_Dims == 0 ) && sycl::detail::IsPlus<_T, BinaryOperation >::value>
215
+ template <typename _T = Ty , int _Dims = Dims>
216
+ enable_if_t <(_Dims == 0 ) && sycl::detail::IsPlus<_T, BinaryOp >::value>
219
217
operator +=(const _T &Partial) {
220
218
static_cast <Reducer *>(this )->combine (Partial);
221
219
}
222
220
223
- template <typename _T = T, int _Dims = Dims>
224
- enable_if_t <(_Dims == 0 ) &&
225
- sycl::detail::IsMultiplies<_T, BinaryOperation>::value>
221
+ template <typename _T = Ty, int _Dims = Dims>
222
+ enable_if_t <(_Dims == 0 ) && sycl::detail::IsMultiplies<_T, BinaryOp>::value>
226
223
operator *=(const _T &Partial) {
227
224
static_cast <Reducer *>(this )->combine (Partial);
228
225
}
229
226
230
- template <typename _T = T , int _Dims = Dims>
231
- enable_if_t <(_Dims == 0 ) && sycl::detail::IsBitOR<_T, BinaryOperation >::value>
227
+ template <typename _T = Ty , int _Dims = Dims>
228
+ enable_if_t <(_Dims == 0 ) && sycl::detail::IsBitOR<_T, BinaryOp >::value>
232
229
operator |=(const _T &Partial) {
233
230
static_cast <Reducer *>(this )->combine (Partial);
234
231
}
235
232
236
- template <typename _T = T, int _Dims = Dims>
237
- enable_if_t <(_Dims == 0 ) &&
238
- sycl::detail::IsBitXOR<_T, BinaryOperation>::value>
233
+ template <typename _T = Ty, int _Dims = Dims>
234
+ enable_if_t <(_Dims == 0 ) && sycl::detail::IsBitXOR<_T, BinaryOp>::value>
239
235
operator ^=(const _T &Partial) {
240
236
static_cast <Reducer *>(this )->combine (Partial);
241
237
}
242
238
243
- template <typename _T = T, int _Dims = Dims>
244
- enable_if_t <(_Dims == 0 ) &&
245
- sycl::detail::IsBitAND<_T, BinaryOperation>::value>
239
+ template <typename _T = Ty, int _Dims = Dims>
240
+ enable_if_t <(_Dims == 0 ) && sycl::detail::IsBitAND<_T, BinaryOp>::value>
246
241
operator &=(const _T &Partial) {
247
242
static_cast <Reducer *>(this )->combine (Partial);
248
243
}
@@ -266,53 +261,53 @@ template <class Reducer> class combiner {
266
261
}
267
262
}
268
263
269
- template <class _T , access::address_space Space, class BinaryOperation >
264
+ template <class _T , access::address_space Space, class BinaryOp >
270
265
static constexpr bool BasicCheck =
271
- std::is_same<typename remove_AS<_T>::type, T >::value &&
266
+ std::is_same<typename remove_AS<_T>::type, Ty >::value &&
272
267
(Space == access::address_space::global_space ||
273
268
Space == access::address_space::local_space);
274
269
275
270
public:
276
271
// / Atomic ADD operation: *ReduVarPtr += MValue;
277
272
template <access::address_space Space = access::address_space::global_space,
278
- typename _T = T , class _BinaryOperation = BinaryOperation >
273
+ typename _T = Ty , class _BinaryOperation = BinaryOp >
279
274
enable_if_t <BasicCheck<_T, Space, _BinaryOperation> &&
280
- (IsReduOptForFastAtomicFetch<T , _BinaryOperation>::value ||
281
- IsReduOptForAtomic64Op<T , _BinaryOperation>::value) &&
282
- sycl::detail::IsPlus<T , _BinaryOperation>::value>
275
+ (IsReduOptForFastAtomicFetch<_T , _BinaryOperation>::value ||
276
+ IsReduOptForAtomic64Op<_T , _BinaryOperation>::value) &&
277
+ sycl::detail::IsPlus<_T , _BinaryOperation>::value>
283
278
atomic_combine (_T *ReduVarPtr) const {
284
279
atomic_combine_impl<Space>(
285
280
ReduVarPtr, [](auto Ref, auto Val) { return Ref.fetch_add (Val); });
286
281
}
287
282
288
283
// / Atomic BITWISE OR operation: *ReduVarPtr |= MValue;
289
284
template <access::address_space Space = access::address_space::global_space,
290
- typename _T = T , class _BinaryOperation = BinaryOperation >
285
+ typename _T = Ty , class _BinaryOperation = BinaryOp >
291
286
enable_if_t <BasicCheck<_T, Space, _BinaryOperation> &&
292
- IsReduOptForFastAtomicFetch<T , _BinaryOperation>::value &&
293
- sycl::detail::IsBitOR<T , _BinaryOperation>::value>
287
+ IsReduOptForFastAtomicFetch<_T , _BinaryOperation>::value &&
288
+ sycl::detail::IsBitOR<_T , _BinaryOperation>::value>
294
289
atomic_combine (_T *ReduVarPtr) const {
295
290
atomic_combine_impl<Space>(
296
291
ReduVarPtr, [](auto Ref, auto Val) { return Ref.fetch_or (Val); });
297
292
}
298
293
299
294
// / Atomic BITWISE XOR operation: *ReduVarPtr ^= MValue;
300
295
template <access::address_space Space = access::address_space::global_space,
301
- typename _T = T , class _BinaryOperation = BinaryOperation >
296
+ typename _T = Ty , class _BinaryOperation = BinaryOp >
302
297
enable_if_t <BasicCheck<_T, Space, _BinaryOperation> &&
303
- IsReduOptForFastAtomicFetch<T , _BinaryOperation>::value &&
304
- sycl::detail::IsBitXOR<T , _BinaryOperation>::value>
298
+ IsReduOptForFastAtomicFetch<_T , _BinaryOperation>::value &&
299
+ sycl::detail::IsBitXOR<_T , _BinaryOperation>::value>
305
300
atomic_combine (_T *ReduVarPtr) const {
306
301
atomic_combine_impl<Space>(
307
302
ReduVarPtr, [](auto Ref, auto Val) { return Ref.fetch_xor (Val); });
308
303
}
309
304
310
305
// / Atomic BITWISE AND operation: *ReduVarPtr &= MValue;
311
306
template <access::address_space Space = access::address_space::global_space,
312
- typename _T = T , class _BinaryOperation = BinaryOperation >
313
- enable_if_t <std::is_same<typename remove_AS<_T>::type, T >::value &&
314
- IsReduOptForFastAtomicFetch<T , _BinaryOperation>::value &&
315
- sycl::detail::IsBitAND<T , _BinaryOperation>::value &&
307
+ typename _T = Ty , class _BinaryOperation = BinaryOp >
308
+ enable_if_t <std::is_same<typename remove_AS<_T>::type, _T >::value &&
309
+ IsReduOptForFastAtomicFetch<_T , _BinaryOperation>::value &&
310
+ sycl::detail::IsBitAND<_T , _BinaryOperation>::value &&
316
311
(Space == access::address_space::global_space ||
317
312
Space == access::address_space::local_space)>
318
313
atomic_combine (_T *ReduVarPtr) const {
@@ -322,23 +317,23 @@ template <class Reducer> class combiner {
322
317
323
318
// / Atomic MIN operation: *ReduVarPtr = sycl::minimum(*ReduVarPtr, MValue);
324
319
template <access::address_space Space = access::address_space::global_space,
325
- typename _T = T , class _BinaryOperation = BinaryOperation >
320
+ typename _T = Ty , class _BinaryOperation = BinaryOp >
326
321
enable_if_t <BasicCheck<_T, Space, _BinaryOperation> &&
327
- (IsReduOptForFastAtomicFetch<T , _BinaryOperation>::value ||
328
- IsReduOptForAtomic64Op<T , _BinaryOperation>::value) &&
329
- sycl::detail::IsMinimum<T , _BinaryOperation>::value>
322
+ (IsReduOptForFastAtomicFetch<_T , _BinaryOperation>::value ||
323
+ IsReduOptForAtomic64Op<_T , _BinaryOperation>::value) &&
324
+ sycl::detail::IsMinimum<_T , _BinaryOperation>::value>
330
325
atomic_combine (_T *ReduVarPtr) const {
331
326
atomic_combine_impl<Space>(
332
327
ReduVarPtr, [](auto Ref, auto Val) { return Ref.fetch_min (Val); });
333
328
}
334
329
335
330
// / Atomic MAX operation: *ReduVarPtr = sycl::maximum(*ReduVarPtr, MValue);
336
331
template <access::address_space Space = access::address_space::global_space,
337
- typename _T = T , class _BinaryOperation = BinaryOperation >
332
+ typename _T = Ty , class _BinaryOperation = BinaryOp >
338
333
enable_if_t <BasicCheck<_T, Space, _BinaryOperation> &&
339
- (IsReduOptForFastAtomicFetch<T , _BinaryOperation>::value ||
340
- IsReduOptForAtomic64Op<T , _BinaryOperation>::value) &&
341
- sycl::detail::IsMaximum<T , _BinaryOperation>::value>
334
+ (IsReduOptForFastAtomicFetch<_T , _BinaryOperation>::value ||
335
+ IsReduOptForAtomic64Op<_T , _BinaryOperation>::value) &&
336
+ sycl::detail::IsMaximum<_T , _BinaryOperation>::value>
342
337
atomic_combine (_T *ReduVarPtr) const {
343
338
atomic_combine_impl<Space>(
344
339
ReduVarPtr, [](auto Ref, auto Val) { return Ref.fetch_max (Val); });
@@ -928,7 +923,7 @@ bool reduCGFuncForRangeFastAtomics(handler &CGH, KernelType KernelFunc,
928
923
const range<Dims> &Range,
929
924
const nd_range<1 > &NDRange,
930
925
Reduction &Redu) {
931
- constexpr size_t NElements = Reduction::num_elements;
926
+ size_t NElements = Reduction::num_elements;
932
927
auto Out = Redu.getReadWriteAccessorToInitializedMem (CGH);
933
928
auto GroupSum = Reduction::getReadWriteLocalAcc (NElements, CGH);
934
929
using Name = __sycl_reduction_kernel<reduction::main_krn::RangeFastAtomics,
@@ -976,7 +971,7 @@ template <typename KernelName, typename KernelType, int Dims, class Reduction>
976
971
bool reduCGFuncForRangeFastReduce (handler &CGH, KernelType KernelFunc,
977
972
const range<Dims> &Range,
978
973
const nd_range<1 > &NDRange, Reduction &Redu) {
979
- constexpr size_t NElements = Reduction::num_elements;
974
+ size_t NElements = Reduction::num_elements;
980
975
size_t WGSize = NDRange.get_local_range ().size ();
981
976
size_t NWorkGroups = NDRange.get_group_range ().size ();
982
977
@@ -1078,7 +1073,7 @@ template <typename KernelName, typename KernelType, int Dims, class Reduction>
1078
1073
bool reduCGFuncForRangeBasic (handler &CGH, KernelType KernelFunc,
1079
1074
const range<Dims> &Range,
1080
1075
const nd_range<1 > &NDRange, Reduction &Redu) {
1081
- constexpr size_t NElements = Reduction::num_elements;
1076
+ size_t NElements = Reduction::num_elements;
1082
1077
size_t WGSize = NDRange.get_local_range ().size ();
1083
1078
size_t NWorkGroups = NDRange.get_group_range ().size ();
1084
1079
@@ -1230,7 +1225,7 @@ template <typename KernelName, typename KernelType, int Dims, class Reduction>
1230
1225
void reduCGFuncForNDRangeBothFastReduceAndAtomics (
1231
1226
handler &CGH, KernelType KernelFunc, const nd_range<Dims> &Range,
1232
1227
Reduction &, typename Reduction::rw_accessor_type Out) {
1233
- constexpr size_t NElements = Reduction::num_elements;
1228
+ size_t NElements = Reduction::num_elements;
1234
1229
using Name = __sycl_reduction_kernel<
1235
1230
reduction::main_krn::NDRangeBothFastReduceAndAtomics, KernelName>;
1236
1231
CGH.parallel_for <Name>(Range, [=](nd_item<Dims> NDIt) {
@@ -1266,7 +1261,7 @@ void reduCGFuncForNDRangeFastAtomicsOnly(
1266
1261
handler &CGH, bool IsPow2WG, KernelType KernelFunc,
1267
1262
const nd_range<Dims> &Range, Reduction &,
1268
1263
typename Reduction::rw_accessor_type Out) {
1269
- constexpr size_t NElements = Reduction::num_elements;
1264
+ size_t NElements = Reduction::num_elements;
1270
1265
size_t WGSize = Range.get_local_range ().size ();
1271
1266
1272
1267
// Use local memory to reduce elements in work-groups into zero-th element.
@@ -1345,7 +1340,7 @@ template <typename KernelName, typename KernelType, int Dims, class Reduction>
1345
1340
void reduCGFuncForNDRangeFastReduceOnly (
1346
1341
handler &CGH, KernelType KernelFunc, const nd_range<Dims> &Range,
1347
1342
Reduction &Redu, typename Reduction::rw_accessor_type Out) {
1348
- constexpr size_t NElements = Reduction::num_elements;
1343
+ size_t NElements = Reduction::num_elements;
1349
1344
size_t NWorkGroups = Range.get_group_range ().size ();
1350
1345
bool IsUpdateOfUserVar =
1351
1346
!Reduction::is_usm && !Redu.initializeToIdentity () && NWorkGroups == 1 ;
@@ -1392,7 +1387,7 @@ void reduCGFuncForNDRangeBasic(handler &CGH, bool IsPow2WG,
1392
1387
KernelType KernelFunc,
1393
1388
const nd_range<Dims> &Range, Reduction &Redu,
1394
1389
typename Reduction::rw_accessor_type Out) {
1395
- constexpr size_t NElements = Reduction::num_elements;
1390
+ size_t NElements = Reduction::num_elements;
1396
1391
size_t WGSize = Range.get_local_range ().size ();
1397
1392
size_t NWorkGroups = Range.get_group_range ().size ();
1398
1393
@@ -1477,7 +1472,7 @@ void reduAuxCGFuncFastReduceImpl(handler &CGH, bool UniformWG,
1477
1472
size_t NWorkItems, size_t NWorkGroups,
1478
1473
size_t WGSize, Reduction &Redu, InputT In,
1479
1474
OutputT Out) {
1480
- constexpr size_t NElements = Reduction::num_elements;
1475
+ size_t NElements = Reduction::num_elements;
1481
1476
using Name =
1482
1477
__sycl_reduction_kernel<reduction::aux_krn::FastReduce, KernelName>;
1483
1478
bool IsUpdateOfUserVar =
@@ -1523,7 +1518,7 @@ void reduAuxCGFuncNoFastReduceNorAtomicImpl(handler &CGH, bool UniformPow2WG,
1523
1518
size_t NWorkGroups, size_t WGSize,
1524
1519
Reduction &Redu, InputT In,
1525
1520
OutputT Out) {
1526
- constexpr size_t NElements = Reduction::num_elements;
1521
+ size_t NElements = Reduction::num_elements;
1527
1522
bool IsUpdateOfUserVar =
1528
1523
!Reduction::is_usm && !Redu.initializeToIdentity () && NWorkGroups == 1 ;
1529
1524
@@ -1642,7 +1637,7 @@ reduSaveFinalResultToUserMem(handler &CGH, Reduction &Redu) {
1642
1637
template <typename KernelName, class Reduction >
1643
1638
std::enable_if_t <Reduction::is_usm>
1644
1639
reduSaveFinalResultToUserMem (handler &CGH, Reduction &Redu) {
1645
- constexpr size_t NElements = Reduction::num_elements;
1640
+ size_t NElements = Reduction::num_elements;
1646
1641
auto InAcc = Redu.getReadAccToPreviousPartialReds (CGH);
1647
1642
auto UserVarPtr = Redu.getUserRedVar ();
1648
1643
bool IsUpdateOfUserVar = !Redu.initializeToIdentity ();
@@ -2120,7 +2115,7 @@ void reduCGFuncAtomic64(handler &CGH, KernelType KernelFunc,
2120
2115
static_assert (
2121
2116
Reduction::has_float64_atomics,
2122
2117
" Only suitable for reductions that have FP64 atomic operations." );
2123
- constexpr size_t NElements = Reduction::num_elements;
2118
+ size_t NElements = Reduction::num_elements;
2124
2119
using Name =
2125
2120
__sycl_reduction_kernel<reduction::main_krn::NDRangeAtomic64, KernelName>;
2126
2121
CGH.parallel_for <Name>(Range, [=](nd_item<Dims> NDIt) {
0 commit comments