Skip to content

Commit bc1b306

Browse files
[NFC][SYCL][Reduction] Move "Out" accessor down to the implementation (#7216)
Before this changes we had regCGFunc implement the following dispatch logic: ```` auto Out = [](){if constexpr (Cond) ... else ...}(); // ... if constexpr (cond) impl1(Out) else impl2(Out) else if constexpr (cond) impl3(Out) else impl4(Out) ```` which means that the `Out` is very tightly coupled with particular "impl" used. As such, it makes sense to sink it down to its usage.
1 parent fc78218 commit bc1b306

File tree

1 file changed

+23
-42
lines changed

1 file changed

+23
-42
lines changed

sycl/include/sycl/reduction.hpp

Lines changed: 23 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1131,12 +1131,13 @@ template <class KernelName> struct NDRangeBothFastReduceAndAtomics;
11311131
/// Briefly: calls user's lambda, reduce() + atomic, INT +
11321132
/// ADD/MIN/MAX.
11331133
template <typename KernelName, typename KernelType, int Dims,
1134-
typename PropertiesT, class Reduction, class AccTy>
1134+
typename PropertiesT, class Reduction>
11351135
void reduCGFuncForNDRangeBothFastReduceAndAtomics(handler &CGH,
11361136
KernelType KernelFunc,
11371137
const nd_range<Dims> &Range,
11381138
PropertiesT Properties,
1139-
Reduction &, AccTy Out) {
1139+
Reduction &Redu) {
1140+
auto Out = Redu.getReadWriteAccessorToInitializedMem(CGH);
11401141
size_t NElements = Reduction::num_elements;
11411142
using Name = __sycl_reduction_kernel<
11421143
reduction::main_krn::NDRangeBothFastReduceAndAtomics, KernelName>;
@@ -1169,14 +1170,15 @@ template <class KernelName> struct NDRangeFastAtomicsOnly;
11691170
///
11701171
/// Briefly: calls user's lambda, tree-reduction + atomic, INT + AND/OR/XOR.
11711172
template <typename KernelName, typename KernelType, int Dims,
1172-
typename PropertiesT, class Reduction, class AccTy>
1173-
void reduCGFuncForNDRangeFastAtomicsOnly(handler &CGH, bool IsPow2WG,
1174-
KernelType KernelFunc,
1173+
typename PropertiesT, class Reduction>
1174+
void reduCGFuncForNDRangeFastAtomicsOnly(handler &CGH, KernelType KernelFunc,
11751175
const nd_range<Dims> &Range,
1176-
PropertiesT Properties, Reduction &,
1177-
AccTy Out) {
1176+
PropertiesT Properties,
1177+
Reduction &Redu) {
1178+
auto Out = Redu.getReadWriteAccessorToInitializedMem(CGH);
11781179
size_t NElements = Reduction::num_elements;
11791180
size_t WGSize = Range.get_local_range().size();
1181+
bool IsPow2WG = (WGSize & (WGSize - 1)) == 0;
11801182

11811183
// Use local memory to reduce elements in work-groups into zero-th element.
11821184
// If WGSize is not power of two, then WGSize+1 elements are allocated.
@@ -1252,13 +1254,15 @@ template <class KernelName> struct NDRangeFastReduceOnly;
12521254
///
12531255
/// Briefly: user's lambda, reduce(), FP + ADD/MIN/MAX.
12541256
template <typename KernelName, typename KernelType, int Dims,
1255-
typename PropertiesT, class Reduction, class AccTy>
1257+
typename PropertiesT, class Reduction>
12561258
void reduCGFuncForNDRangeFastReduceOnly(handler &CGH, KernelType KernelFunc,
12571259
const nd_range<Dims> &Range,
1258-
PropertiesT Properties, Reduction &Redu,
1259-
AccTy Out) {
1260+
PropertiesT Properties,
1261+
Reduction &Redu) {
12601262
size_t NElements = Reduction::num_elements;
12611263
size_t NWorkGroups = Range.get_group_range().size();
1264+
auto Out = Redu.getWriteAccForPartialReds(NWorkGroups * NElements, CGH);
1265+
12621266
bool IsUpdateOfUserVar =
12631267
!Reduction::is_usm && !Redu.initializeToIdentity() && NWorkGroups == 1;
12641268

@@ -1300,15 +1304,15 @@ template <class KernelName> struct NDRangeBasic;
13001304
///
13011305
/// Briefly: user's lambda, tree-reduction, CUSTOM types/ops.
13021306
template <typename KernelName, typename KernelType, int Dims,
1303-
typename PropertiesT, class Reduction, class AccTy>
1304-
void reduCGFuncForNDRangeBasic(handler &CGH, bool IsPow2WG,
1305-
KernelType KernelFunc,
1307+
typename PropertiesT, class Reduction>
1308+
void reduCGFuncForNDRangeBasic(handler &CGH, KernelType KernelFunc,
13061309
const nd_range<Dims> &Range,
1307-
PropertiesT Properties, Reduction &Redu,
1308-
AccTy Out) {
1310+
PropertiesT Properties, Reduction &Redu) {
13091311
size_t NElements = Reduction::num_elements;
13101312
size_t WGSize = Range.get_local_range().size();
1313+
bool IsPow2WG = (WGSize & (WGSize - 1)) == 0;
13111314
size_t NWorkGroups = Range.get_group_range().size();
1315+
auto Out = Redu.getWriteAccForPartialReds(NWorkGroups * NElements, CGH);
13121316

13131317
bool IsUpdateOfUserVar =
13141318
!Reduction::is_usm && !Redu.initializeToIdentity() && NWorkGroups == 1;
@@ -2204,44 +2208,21 @@ template <typename KernelName, typename KernelType, int Dims,
22042208
void reduCGFunc(handler &CGH, KernelType KernelFunc,
22052209
const nd_range<Dims> &Range, PropertiesT Properties,
22062210
Reduction &Redu) {
2207-
size_t WGSize = Range.get_local_range().size();
2208-
auto Out = [&]() {
2209-
if constexpr (Reduction::has_fast_atomics) {
2210-
2211-
// User's initialized read-write accessor is re-used here if
2212-
// initialize_to_identity is not set (i.e. if user's variable is
2213-
// initialized). Otherwise, a new buffer is initialized with identity
2214-
// value and a new read-write accessor to that buffer is created. That is
2215-
// done because atomic operations update some initialized memory. User's
2216-
// USM pointer is not re-used even when initialize_to_identity is not set
2217-
// because it does not worth the creation of an additional variant of a
2218-
// user's kernel for that case.
2219-
return Redu.getReadWriteAccessorToInitializedMem(CGH);
2220-
2221-
} else {
2222-
constexpr size_t NElements = Reduction::num_elements;
2223-
size_t NWorkGroups = Range.get_group_range().size();
2224-
2225-
return Redu.getWriteAccForPartialReds(NWorkGroups * NElements, CGH);
2226-
}
2227-
}();
2228-
22292211
if constexpr (Reduction::has_fast_reduce) {
22302212
if constexpr (Reduction::has_fast_atomics) {
22312213
reduCGFuncForNDRangeBothFastReduceAndAtomics<KernelName, KernelType>(
2232-
CGH, KernelFunc, Range, Properties, Redu, Out);
2214+
CGH, KernelFunc, Range, Properties, Redu);
22332215
} else {
22342216
reduCGFuncForNDRangeFastReduceOnly<KernelName, KernelType>(
2235-
CGH, KernelFunc, Range, Properties, Redu, Out);
2217+
CGH, KernelFunc, Range, Properties, Redu);
22362218
}
22372219
} else {
2238-
bool IsPow2WG = (WGSize & (WGSize - 1)) == 0;
22392220
if constexpr (Reduction::has_fast_atomics) {
22402221
reduCGFuncForNDRangeFastAtomicsOnly<KernelName, KernelType>(
2241-
CGH, IsPow2WG, KernelFunc, Range, Properties, Redu, Out);
2222+
CGH, KernelFunc, Range, Properties, Redu);
22422223
} else {
22432224
reduCGFuncForNDRangeBasic<KernelName, KernelType>(
2244-
CGH, IsPow2WG, KernelFunc, Range, Properties, Redu, Out);
2225+
CGH, KernelFunc, Range, Properties, Redu);
22452226
}
22462227
}
22472228
}

0 commit comments

Comments
 (0)