@@ -1120,9 +1120,12 @@ class __SYCL_EXPORT handler {
1120
1120
};
1121
1121
1122
1122
std::optional<std::array<size_t , 3 >> getMaxWorkGroups ();
1123
+ // We need to use this version to support gcc 7.5.0. Remove when minimal
1124
+ // supported gcc version is bumped.
1125
+ std::tuple<std::array<size_t , 3 >, bool > getMaxWorkGroups_v2 ();
1123
1126
1124
1127
template <int Dims>
1125
- std::optional <range<Dims>> getRoundedRange (range<Dims> UserRange) {
1128
+ std::tuple <range<Dims>, bool > getRoundedRange (range<Dims> UserRange) {
1126
1129
range<Dims> RoundedRange = UserRange;
1127
1130
// Disable the rounding-up optimizations under these conditions:
1128
1131
// 1. The env var SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING is set.
@@ -1142,7 +1145,7 @@ class __SYCL_EXPORT handler {
1142
1145
1143
1146
// Perform range rounding if rounding-up is enabled.
1144
1147
if (this ->DisableRangeRounding ())
1145
- return {};
1148
+ return {range<Dims>{}, false };
1146
1149
1147
1150
// Range should be a multiple of this for reasonable performance.
1148
1151
size_t MinFactorX = 16 ;
@@ -1166,8 +1169,8 @@ class __SYCL_EXPORT handler {
1166
1169
// kernel in a 32-bit global range.
1167
1170
auto Dev = detail::getSyclObjImpl (detail::getDeviceFromHandler (*this ));
1168
1171
id<Dims> MaxNWGs = [&] {
1169
- auto PiResult = getMaxWorkGroups ();
1170
- if (!PiResult. has_value () ) {
1172
+ auto [MaxWGs, HasMaxWGs] = getMaxWorkGroups_v2 ();
1173
+ if (!HasMaxWGs ) {
1171
1174
id<Dims> Default;
1172
1175
for (int i = 0 ; i < Dims; ++i)
1173
1176
Default[i] = (std::numeric_limits<int32_t >::max)();
@@ -1177,7 +1180,7 @@ class __SYCL_EXPORT handler {
1177
1180
id<Dims> IdResult;
1178
1181
size_t Limit = (std::numeric_limits<int >::max)();
1179
1182
for (int i = 0 ; i < Dims; ++i)
1180
- IdResult[i] = (std::min)(Limit, (*PiResult) [Dims - i - 1 ]);
1183
+ IdResult[i] = (std::min)(Limit, MaxWGs [Dims - i - 1 ]);
1181
1184
return IdResult;
1182
1185
}();
1183
1186
auto M = (std::numeric_limits<uint32_t >::max)();
@@ -1213,8 +1216,8 @@ class __SYCL_EXPORT handler {
1213
1216
Adjust (i, MaxRange[i]);
1214
1217
1215
1218
if (!DidAdjust)
1216
- return {};
1217
- return RoundedRange;
1219
+ return {range<Dims>{}, false };
1220
+ return { RoundedRange, true } ;
1218
1221
}
1219
1222
1220
1223
// / Defines and invokes a SYCL kernel function for the specified range.
@@ -1282,7 +1285,8 @@ class __SYCL_EXPORT handler {
1282
1285
#if !defined(__SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__) && \
1283
1286
!defined (DPCPP_HOST_DEVICE_OPENMP) && \
1284
1287
!defined (DPCPP_HOST_DEVICE_PERF_NATIVE) && SYCL_LANGUAGE_VERSION >= 202001
1285
- if (auto RoundedRange = getRoundedRange (UserRange)) {
1288
+ auto [RoundedRange, HasRoundedRange] = getRoundedRange (UserRange);
1289
+ if (HasRoundedRange) {
1286
1290
using NameWT = typename detail::get_kernel_wrapper_name_t <NameT>::name;
1287
1291
auto Wrapper =
1288
1292
getRangeRoundedKernelLambda<NameWT, TransformedArgType, Dims>(
@@ -1300,7 +1304,7 @@ class __SYCL_EXPORT handler {
1300
1304
// __SYCL_ASSUME_INT can still be violated. So check the bounds
1301
1305
// of the user range, instead of the rounded range.
1302
1306
detail::checkValueRange<Dims>(UserRange);
1303
- MNDRDesc.set (* RoundedRange);
1307
+ MNDRDesc.set (RoundedRange);
1304
1308
StoreLambda<KName, decltype (Wrapper), Dims, TransformedArgType>(
1305
1309
std::move (Wrapper));
1306
1310
setType (detail::CG::Kernel);
0 commit comments