@@ -1759,34 +1759,9 @@ class __SYCL_EXPORT handler {
1759
1759
// TODO: this variant is currently enabled for 2+ reductions only as the
1760
1760
// versions handling 1 reduction variable are more efficient right now.
1761
1761
//
1762
- // Algorithm:
1763
- // 1) discard_write accessor (DWAcc), InitializeToIdentity = true:
1764
- // a) Create uninitialized buffer and read_write accessor (RWAcc).
1765
- // b) discard-write partial sums to RWAcc.
1766
- // c) Repeat the steps (a) and (b) to get one final sum.
1767
- // d) Copy RWAcc to DWAcc.
1768
- // 2) read_write accessor (RWAcc), InitializeToIdentity = false:
1769
- // a) Create new uninitialized buffer (if #work-groups > 1) and RWAcc or
1770
- // re-use user's RWAcc (if #work-groups is 1).
1771
- // b) discard-write to RWAcc (#WG > 1), or update-write (#WG == 1).
1772
- // c) Repeat the steps (a) and (b) to get one final sum.
1773
- // 3) read_write accessor (RWAcc), InitializeToIdentity = true:
1774
- // a) Create new uninitialized buffer (if #work-groups > 1) and RWAcc or
1775
- // re-use user's RWAcc (if #work-groups is 1).
1776
- // b) discard-write to RWAcc.
1777
- // c) Repeat the steps (a) and (b) to get one final sum.
1778
- // 4) USM pointer, InitializeToIdentity = false:
1779
- // a) Create new uninitialized buffer (if #work-groups > 1) and RWAcc or
1780
- // re-use user's USM pointer (if #work-groups is 1).
1781
- // b) discard-write to RWAcc (#WG > 1) or
1782
- // update-write to USM pointer (#WG == 1).
1783
- // c) Repeat the steps (a) and (b) to get one final sum.
1784
- // 5) USM pointer, InitializeToIdentity = true:
1785
- // a) Create new uninitialized buffer (if #work-groups > 1) and RWAcc or
1786
- // re-use user's USM pointer (if #work-groups is 1).
1787
- // b) discard-write to RWAcc (#WG > 1) or
1788
- // discard-write to USM pointer (#WG == 1).
1789
- // c) Repeat the steps (a) and (b) to get one final sum.
1762
+ // This is basically a tree reduction where we re-use user's reduction
1763
+ // variable instead of creating temporary storage for the last iteration
1764
+ // (#WG == 1).
1790
1765
template <typename KernelName = detail::auto_name, int Dims,
1791
1766
typename ... RestT>
1792
1767
std::enable_if_t <(sizeof ...(RestT) >= 3 &&
@@ -1823,11 +1798,6 @@ class __SYCL_EXPORT handler {
1823
1798
AuxHandler, NWorkItems, MaxWGSize, ReduTuple, ReduIndices);
1824
1799
});
1825
1800
} // end while (NWorkItems > 1)
1826
-
1827
- auto CopyEvent = detail::reduSaveFinalResultToUserMem (
1828
- QueueCopy, MIsHost, ReduTuple, ReduIndices);
1829
- if (CopyEvent)
1830
- MLastEvent = *CopyEvent;
1831
1801
}
1832
1802
#endif // __cplusplus >= 201703L
1833
1803
0 commit comments