Skip to content

Commit 1958715

Browse files
[SYCL] Add offset support to parallel_for for host device (#3135)
The host device is not correctly supporting parallel_for offset argument in runOnHost routines. Adding support and updating test coverage. Signed-off-by: Chris Perkins chris.perkins@intel.com
1 parent db9151f commit 1958715

File tree

3 files changed

+64
-22
lines changed

3 files changed

+64
-22
lines changed

sycl/include/CL/sycl/detail/cg_types.hpp

Lines changed: 34 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -208,21 +208,28 @@ class HostKernel : public HostKernelBase {
208208

209209
sycl::range<Dims> Range(InitializedVal<Dims, range>::template get<0>());
210210
sycl::id<Dims> Offset;
211+
sycl::range<Dims> Stride(
212+
InitializedVal<Dims, range>::template get<1>()); // initialized to 1
213+
sycl::range<Dims> UpperBound(
214+
InitializedVal<Dims, range>::template get<0>());
211215
for (int I = 0; I < Dims; ++I) {
212216
Range[I] = NDRDesc.GlobalSize[I];
213217
Offset[I] = NDRDesc.GlobalOffset[I];
218+
UpperBound[I] = Range[I] + Offset[I];
214219
}
215220

216-
detail::NDLoop<Dims>::iterate(Range, [&](const sycl::id<Dims> &ID) {
217-
sycl::item<Dims, /*Offset=*/true> Item =
218-
IDBuilder::createItem<Dims, true>(Range, ID, Offset);
219-
220-
if (StoreLocation) {
221-
store_id(&ID);
222-
store_item(&Item);
223-
}
224-
MKernel(ID);
225-
});
221+
detail::NDLoop<Dims>::iterate(/*LowerBound=*/Offset, Stride, UpperBound,
222+
[&](const sycl::id<Dims> &ID) {
223+
sycl::item<Dims, /*Offset=*/true> Item =
224+
IDBuilder::createItem<Dims, true>(
225+
Range, ID, Offset);
226+
227+
if (StoreLocation) {
228+
store_id(&ID);
229+
store_item(&Item);
230+
}
231+
MKernel(ID);
232+
});
226233
}
227234

228235
template <class ArgT = KernelArgType>
@@ -259,22 +266,28 @@ class HostKernel : public HostKernelBase {
259266

260267
sycl::range<Dims> Range(InitializedVal<Dims, range>::template get<0>());
261268
sycl::id<Dims> Offset;
269+
sycl::range<Dims> Stride(
270+
InitializedVal<Dims, range>::template get<1>()); // initialized to 1
271+
sycl::range<Dims> UpperBound(
272+
InitializedVal<Dims, range>::template get<0>());
262273
for (int I = 0; I < Dims; ++I) {
263274
Range[I] = NDRDesc.GlobalSize[I];
264275
Offset[I] = NDRDesc.GlobalOffset[I];
276+
UpperBound[I] = Range[I] + Offset[I];
265277
}
266278

267-
detail::NDLoop<Dims>::iterate(Range, [&](const sycl::id<Dims> &ID) {
268-
sycl::id<Dims> OffsetID = ID + Offset;
269-
sycl::item<Dims, /*Offset=*/true> Item =
270-
IDBuilder::createItem<Dims, true>(Range, OffsetID, Offset);
271-
272-
if (StoreLocation) {
273-
store_id(&OffsetID);
274-
store_item(&Item);
275-
}
276-
MKernel(Item);
277-
});
279+
detail::NDLoop<Dims>::iterate(/*LowerBound=*/Offset, Stride, UpperBound,
280+
[&](const sycl::id<Dims> &ID) {
281+
sycl::item<Dims, /*Offset=*/true> Item =
282+
IDBuilder::createItem<Dims, true>(
283+
Range, ID, Offset);
284+
285+
if (StoreLocation) {
286+
store_id(&ID);
287+
store_item(&Item);
288+
}
289+
MKernel(Item);
290+
});
278291
}
279292

280293
template <class ArgT = KernelArgType>

sycl/include/CL/sycl/detail/common.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,8 @@ template <int NDIMS> struct NDLoop {
277277
const LoopBoundTy<NDIMS> &Stride,
278278
const LoopBoundTy<NDIMS> &UpperBound,
279279
FuncTy f) {
280-
LoopIndexTy<NDIMS> Index; // initialized down the call stack
280+
LoopIndexTy<NDIMS> Index =
281+
InitializedVal<NDIMS, LoopIndexTy>::template get<0>();
281282
NDLoopIterateImpl<NDIMS, NDIMS - 1, LoopBoundTy, FuncTy, LoopIndexTy>{
282283
LowerBound, Stride, UpperBound, f, Index};
283284
}

sycl/test/basic_tests/parallel_for_range_host.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,34 @@ int main() {
1616

1717
queue Q(AsyncHandler);
1818

19+
// parallel_for, offset
20+
try {
21+
Q.submit([&](handler &CGH) {
22+
CGH.parallel_for<class offset>(range<1>(1), id<1>(16),
23+
[=](id<1> ID) { assert(ID == 16); });
24+
});
25+
Q.submit([&](handler &CGH) {
26+
CGH.parallel_for<class offset_2D>(range<2>(1, 1), id<2>(16, 17),
27+
[=](id<2> ID) {
28+
assert(ID[0] == 16);
29+
assert(ID[1] == 17);
30+
});
31+
});
32+
Q.submit([&](handler &CGH) {
33+
CGH.parallel_for<class offset_3D>(range<3>(1, 1, 1), id<3>(16, 17, 18),
34+
[=](id<3> ID) {
35+
assert(ID[0] == 16);
36+
assert(ID[1] == 17);
37+
assert(ID[2] == 18);
38+
});
39+
});
40+
Q.wait_and_throw();
41+
} catch (nd_range_error) {
42+
std::cerr << "Test case 'offset' failed: exception has been thrown"
43+
<< std::endl;
44+
return 1;
45+
}
46+
1947
// parallel_for, 100 global, 3 local -> fail.
2048
try {
2149
Q.submit([&](handler &CGH) {

0 commit comments

Comments
 (0)