Skip to content

Commit 06b0d43

Browse files
authored
[SYCL] Revert "Optimize setNDRangeDescriptor functions (#18132)" (#18326)
The performance gain is not significant enough to justify the change. Signed-off-by: Ptak, Slawomir <slawomir.ptak@intel.com>
1 parent d32060a commit 06b0d43

File tree

2 files changed

+31
-25
lines changed

2 files changed

+31
-25
lines changed

sycl/include/sycl/handler.hpp

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3684,13 +3684,11 @@ class __SYCL_EXPORT handler {
36843684
bool HasAssociatedAccessor(detail::AccessorImplHost *Req,
36853685
access::target AccessTarget) const;
36863686

3687-
template <int Dims>
3688-
static sycl::range<3> padRange(sycl::range<Dims> Range,
3689-
[[maybe_unused]] size_t DefaultValue = 0) {
3687+
template <int Dims> static sycl::range<3> padRange(sycl::range<Dims> Range) {
36903688
if constexpr (Dims == 3) {
36913689
return Range;
36923690
} else {
3693-
sycl::range<3> Res{DefaultValue, DefaultValue, DefaultValue};
3691+
sycl::range<3> Res{0, 0, 0};
36943692
for (int I = 0; I < Dims; ++I)
36953693
Res[I] = Range[I];
36963694
return Res;
@@ -3711,8 +3709,7 @@ class __SYCL_EXPORT handler {
37113709
template <int Dims>
37123710
void setNDRangeDescriptor(sycl::range<Dims> N,
37133711
bool SetNumWorkGroups = false) {
3714-
return setNDRangeDescriptorPadded(padRange(N, SetNumWorkGroups ? 0 : 1),
3715-
SetNumWorkGroups, Dims);
3712+
return setNDRangeDescriptorPadded(padRange(N), SetNumWorkGroups, Dims);
37163713
}
37173714
template <int Dims>
37183715
void setNDRangeDescriptor(sycl::range<Dims> NumWorkItems,
@@ -3722,10 +3719,9 @@ class __SYCL_EXPORT handler {
37223719
}
37233720
template <int Dims>
37243721
void setNDRangeDescriptor(sycl::nd_range<Dims> ExecutionRange) {
3725-
sycl::range<Dims> LocalRange = ExecutionRange.get_local_range();
37263722
return setNDRangeDescriptorPadded(
3727-
padRange(ExecutionRange.get_global_range(), 1),
3728-
padRange(LocalRange, LocalRange[0] ? 1 : 0),
3723+
padRange(ExecutionRange.get_global_range()),
3724+
padRange(ExecutionRange.get_local_range()),
37293725
padId(ExecutionRange.get_offset()), Dims);
37303726
}
37313727

sycl/source/detail/cg.hpp

Lines changed: 26 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -63,13 +63,22 @@ class ArgDesc {
6363
// The structure represents NDRange - global, local sizes, global offset and
6464
// number of dimensions.
6565
class NDRDescT {
66-
template <int Dims>
67-
static sycl::range<3> padRange(sycl::range<Dims> Range,
68-
[[maybe_unused]] size_t DefaultValue = 0) {
66+
// The method initializes all sizes for dimensions greater than the passed one
67+
// to the default values, so they will not affect execution.
68+
void setNDRangeLeftover() {
69+
for (int I = Dims; I < 3; ++I) {
70+
GlobalSize[I] = 1;
71+
LocalSize[I] = LocalSize[0] ? 1 : 0;
72+
GlobalOffset[I] = 0;
73+
NumWorkGroups[I] = 0;
74+
}
75+
}
76+
77+
template <int Dims> static sycl::range<3> padRange(sycl::range<Dims> Range) {
6978
if constexpr (Dims == 3) {
7079
return Range;
7180
} else {
72-
sycl::range<3> Res{DefaultValue, DefaultValue, DefaultValue};
81+
sycl::range<3> Res{0, 0, 0};
7382
for (int I = 0; I < Dims; ++I)
7483
Res[I] = Range[I];
7584
return Res;
@@ -93,36 +102,37 @@ class NDRDescT {
93102
NDRDescT(NDRDescT &&Desc) = default;
94103

95104
NDRDescT(sycl::range<3> N, bool SetNumWorkGroups, int DimsArg)
96-
: Dims{size_t(DimsArg)} {
97-
if (SetNumWorkGroups) {
98-
NumWorkGroups = N;
99-
} else {
100-
GlobalSize = N;
101-
}
105+
: GlobalSize{SetNumWorkGroups ? sycl::range<3>{0, 0, 0} : N},
106+
NumWorkGroups{SetNumWorkGroups ? N : sycl::range<3>{0, 0, 0}},
107+
Dims{size_t(DimsArg)} {
108+
setNDRangeLeftover();
102109
}
103110

104111
NDRDescT(sycl::range<3> NumWorkItems, sycl::range<3> LocalSize,
105112
sycl::id<3> Offset, int DimsArg)
106113
: GlobalSize{NumWorkItems}, LocalSize{LocalSize}, GlobalOffset{Offset},
107-
Dims{size_t(DimsArg)} {}
114+
Dims{size_t(DimsArg)} {
115+
setNDRangeLeftover();
116+
}
108117

109118
NDRDescT(sycl::range<3> NumWorkItems, sycl::id<3> Offset, int DimsArg)
110119
: GlobalSize{NumWorkItems}, GlobalOffset{Offset}, Dims{size_t(DimsArg)} {}
111120

112121
template <int Dims_>
113122
NDRDescT(sycl::nd_range<Dims_> ExecutionRange, int DimsArg)
114-
: NDRDescT(padRange(ExecutionRange.get_global_range(), 1),
115-
padRange(ExecutionRange.get_local_range(),
116-
ExecutionRange.get_local_range()[0] ? 1 : 0),
117-
padId(ExecutionRange.get_offset()), size_t(DimsArg)) {}
123+
: NDRDescT(padRange(ExecutionRange.get_global_range()),
124+
padRange(ExecutionRange.get_local_range()),
125+
padId(ExecutionRange.get_offset()), size_t(DimsArg)) {
126+
setNDRangeLeftover();
127+
}
118128

119129
template <int Dims_>
120130
NDRDescT(sycl::nd_range<Dims_> ExecutionRange)
121131
: NDRDescT(ExecutionRange, Dims_) {}
122132

123133
template <int Dims_>
124134
NDRDescT(sycl::range<Dims_> Range)
125-
: NDRDescT(padRange(Range, 1), /*SetNumWorkGroups=*/false, Dims_) {}
135+
: NDRDescT(padRange(Range), /*SetNumWorkGroups=*/false, Dims_) {}
126136

127137
void setClusterDimensions(sycl::range<3> N, int Dims) {
128138
if (this->Dims != size_t(Dims)) {

0 commit comments

Comments
 (0)