Skip to content

Commit 100f89d

Browse files

File tree

1 file changed

+330
-0
lines changed

1 file changed

+330
-0
lines changed
Lines changed: 330 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,330 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: %{run} %t.out
3+
4+
// This test verifies whether struct that contains either sycl::local_accesor or
5+
// sycl::accessor can be used with free function kernels extension.
6+
7+
// XFAIL: *
8+
// XFAIL-TRACKER: CMPLRLLVM-67737
9+
10+
#include <sycl/atomic_ref.hpp>
11+
#include <sycl/ext/oneapi/free_function_queries.hpp>
12+
#include <sycl/group_barrier.hpp>
13+
14+
#include "helpers.hpp"
15+
16+
namespace ns {
17+
// TODO: Need to remove explicit specified default template arguments for the
18+
// accessor when the relevant CMPLRLLVM-68249 issue is fixed.
19+
template <size_t Dims> struct StructWithAccessor {
20+
sycl::accessor<int, Dims, sycl::access::mode::read_write,
21+
sycl::access::target::device,
22+
sycl::access::placeholder::false_t>
23+
MAccessor;
24+
int MValue;
25+
};
26+
27+
template <size_t Dims> struct NestedStructWithAccessor {
28+
StructWithAccessor<Dims> NestedStruct;
29+
};
30+
31+
template <int Dims>
32+
SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::nd_range_kernel<Dims>))
33+
void nsNdRangeFreeFunc(StructWithAccessor<Dims> Type) {
34+
auto Item = syclext::this_work_item::get_nd_item<Dims>().get_global_id();
35+
Type.MAccessor[Item] = Type.MValue;
36+
}
37+
38+
template <int Dims>
39+
SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::nd_range_kernel<Dims>))
40+
void nsNdRangeFreeFuncWithNestedStruct(NestedStructWithAccessor<Dims> Type) {
41+
auto Item = syclext::this_work_item::get_nd_item<Dims>().get_global_id();
42+
Type.NestedStruct.MAccessor[Item] = Type.NestedStruct.MValue;
43+
}
44+
} // namespace ns
45+
46+
// TODO: Need to remove explicit specified default template arguments for the
47+
// accessor when the relevant CMPLRLLVM-68249 issue is fixed.
48+
template <size_t Dims> struct StructWithMultipleAccessors {
49+
sycl::accessor<int, Dims, sycl::access::mode::read,
50+
sycl::access::target::device,
51+
sycl::access::placeholder::false_t>
52+
MInputAAcc;
53+
sycl::accessor<int, Dims, sycl::access::mode::read,
54+
sycl::access::target::device,
55+
sycl::access::placeholder::false_t>
56+
MInputBAcc;
57+
sycl::accessor<int, Dims, sycl::access::mode::write,
58+
sycl::access::target::device,
59+
sycl::access::placeholder::false_t>
60+
MResultAcc;
61+
};
62+
63+
template <int Dims>
64+
SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::single_task_kernel))
65+
void globalScopeSingleFreeFunc(ns::StructWithAccessor<Dims> Type) {
66+
for (auto &Elem : Type.MAccessor)
67+
Elem = Type.MValue;
68+
}
69+
70+
template <int Dims>
71+
SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::nd_range_kernel<Dims>))
72+
void ndRangeFreeFuncMultipleParameters(StructWithMultipleAccessors<Dims> Type) {
73+
auto Item = syclext::this_work_item::get_nd_item<Dims>().get_global_id();
74+
Type.MResultAcc[Item] = Type.MInputAAcc[Item] + Type.MInputBAcc[Item];
75+
}
76+
77+
template <auto Func, size_t Dims, bool IsNestedStruct = false>
78+
int runNdRangeTest(sycl::queue &Queue, sycl::context &Context,
79+
sycl::nd_range<Dims> NdRange, std::string_view ErrorMessage,
80+
const int ExpectedResultValue) {
81+
sycl::kernel UsedKernel = getKernel<Func>(Context);
82+
std::vector<int> ResultData(NdRange.get_global_range().size(), 0);
83+
{
84+
sycl::buffer<int, Dims> Buffer(ResultData.data(),
85+
NdRange.get_global_range());
86+
Queue.submit([&](sycl::handler &Handler) {
87+
if constexpr (IsNestedStruct) {
88+
Handler.set_args(
89+
ns::NestedStructWithAccessor<Dims>{ns::StructWithAccessor<Dims>{
90+
sycl::accessor<int, Dims>{Buffer, Handler},
91+
ExpectedResultValue}});
92+
} else {
93+
Handler.set_args(ns::StructWithAccessor<Dims>{
94+
sycl::accessor<int, Dims>{Buffer, Handler}, ExpectedResultValue});
95+
}
96+
Handler.parallel_for(NdRange, UsedKernel);
97+
});
98+
}
99+
return performResultCheck(NdRange.get_global_range().size(),
100+
ResultData.data(), ErrorMessage,
101+
ExpectedResultValue);
102+
}
103+
104+
template <auto Func, size_t Dims>
105+
int runSingleTaskTest(sycl::queue &Queue, sycl::context &Context,
106+
sycl::range<Dims> NumOfElementsPerDim,
107+
std::string_view ErrorMessage,
108+
const int ExpectedResultValue) {
109+
sycl::kernel UsedKernel = getKernel<Func>(Context);
110+
std::vector<int> ResultData(NumOfElementsPerDim.size(), 0);
111+
{
112+
sycl::buffer<int, Dims> Buffer(ResultData.data(), NumOfElementsPerDim);
113+
Queue.submit([&](sycl::handler &Handler) {
114+
Handler.set_arg(0, ns::StructWithAccessor<Dims>{
115+
sycl::accessor<int, Dims>{Buffer, Handler},
116+
ExpectedResultValue});
117+
Handler.single_task(UsedKernel);
118+
});
119+
}
120+
return performResultCheck(NumOfElementsPerDim.size(), ResultData.data(),
121+
ErrorMessage, ExpectedResultValue);
122+
}
123+
124+
template <auto Func, size_t Dims>
125+
int runNdRangeTestMultipleParameters(sycl::queue &Queue, sycl::context &Context,
126+
sycl::nd_range<Dims> NdRange,
127+
std::string_view ErrorMessage,
128+
sycl::range<3> Values) {
129+
sycl::kernel UsedKernel = getKernel<Func>(Context);
130+
std::vector<int> InputAData(NdRange.get_global_range().size(), Values[0]);
131+
std::vector<int> InputBData(NdRange.get_global_range().size(), Values[1]);
132+
std::vector<int> ResultData(NdRange.get_global_range().size(), 0);
133+
134+
{
135+
sycl::buffer<int, Dims> InputABuffer(InputAData.data(),
136+
NdRange.get_global_range());
137+
sycl::buffer<int, Dims> InputBBuffer(InputBData.data(),
138+
NdRange.get_global_range());
139+
sycl::buffer<int, Dims> ResultBuffer(ResultData.data(),
140+
NdRange.get_global_range());
141+
Queue.submit([&](sycl::handler &Handler) {
142+
Handler.set_args(StructWithMultipleAccessors<Dims>{
143+
sycl::accessor<int, Dims, sycl::access::mode::read,
144+
sycl::access::target::device>{InputABuffer, Handler},
145+
sycl::accessor<int, Dims, sycl::access::mode::read,
146+
sycl::access::target::device>{InputBBuffer, Handler},
147+
sycl::accessor<int, Dims, sycl::access::mode::write>{ResultBuffer,
148+
Handler}});
149+
Handler.parallel_for(NdRange, UsedKernel);
150+
});
151+
}
152+
return performResultCheck(NdRange.get_global_range().size(),
153+
ResultData.data(), ErrorMessage, Values[2]);
154+
}
155+
156+
namespace local_acc {
157+
158+
constexpr size_t BIN_SIZE = 4;
159+
constexpr size_t NUM_BINS = 4;
160+
constexpr size_t INPUT_SIZE = 1024;
161+
162+
struct StructWithLocalAccessor {
163+
// TODO: Need to remove explicit specified default template arguments for the
164+
// accessor when the relevant CMPLRLLVM-68249 issue is fixed.
165+
sycl::accessor<int, 1, sycl::access::mode::read_write,
166+
sycl::access::target::device,
167+
sycl::access::placeholder::false_t>
168+
MInputAccessor;
169+
sycl::accessor<int, 1, sycl::access::mode::read_write,
170+
sycl::access::target::device,
171+
sycl::access::placeholder::false_t>
172+
MResultAccessor;
173+
sycl::local_accessor<int, 1> MLocalAccessor;
174+
};
175+
176+
SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::nd_range_kernel<1>))
177+
void nsNdRangeFreeFunc(StructWithLocalAccessor Type) {
178+
179+
size_t LocalWorkItemId =
180+
syclext::this_work_item::get_nd_item<1>().get_local_id();
181+
size_t GlobalWorkItemId =
182+
syclext::this_work_item::get_nd_item<1>().get_global_id();
183+
sycl::group<1> WorkGroup = syclext::this_work_item::get_work_group<1>();
184+
185+
if (LocalWorkItemId < BIN_SIZE)
186+
Type.MLocalAccessor[LocalWorkItemId] = 0;
187+
188+
sycl::group_barrier(WorkGroup);
189+
190+
int Value = Type.MInputAccessor[GlobalWorkItemId];
191+
sycl::atomic_ref<int, sycl::memory_order::relaxed,
192+
sycl::memory_scope::work_group>
193+
AtomicRefLocal(Type.MLocalAccessor[Value]);
194+
AtomicRefLocal++;
195+
sycl::group_barrier(WorkGroup);
196+
197+
if (LocalWorkItemId < BIN_SIZE) {
198+
sycl::atomic_ref<int, sycl::memory_order::relaxed,
199+
sycl::memory_scope::device>
200+
AtomicRefGlobal(Type.MResultAccessor[LocalWorkItemId]);
201+
AtomicRefGlobal.fetch_add(Type.MLocalAccessor[LocalWorkItemId]);
202+
}
203+
}
204+
205+
void FillWithData(std::vector<int> &Data, std::vector<int> &Values) {
206+
constexpr size_t Offset = INPUT_SIZE / NUM_BINS;
207+
for (size_t i = 0; i < NUM_BINS; ++i) {
208+
std::fill(Data.begin() + i * Offset, Data.begin() + (i + 1) * Offset,
209+
Values[i]);
210+
}
211+
}
212+
213+
} // namespace local_acc
214+
215+
int main() {
216+
217+
int Failed = 0;
218+
sycl::queue Queue;
219+
sycl::context Context = Queue.get_context();
220+
221+
{
222+
// Check struct type that contains sycl::accessor is supported inside
223+
// single_task free function kernel
224+
Failed += runSingleTaskTest<globalScopeSingleFreeFunc<1>, 1>(
225+
Queue, Context, sycl::range<1>{10},
226+
"globalScopeSingleFreeFunc with sycl::accessor<1>", 1);
227+
Failed += runSingleTaskTest<globalScopeSingleFreeFunc<2>, 2>(
228+
Queue, Context, sycl::range<2>{10, 10},
229+
"globalScopeSingleFreeFunc with sycl::accessor<2>", 2);
230+
Failed += runSingleTaskTest<globalScopeSingleFreeFunc<3>, 3>(
231+
Queue, Context, sycl::range<3>{5, 5, 5},
232+
"globalScopeSingleFreeFunc with sycl::accessor<3>", 3);
233+
}
234+
235+
{
236+
// Check struct type that contains sycl::accessor is supported inside
237+
// nd_range free function kernel
238+
Failed += runNdRangeTest<ns::nsNdRangeFreeFunc<1>, 1>(
239+
Queue, Context, sycl::nd_range{sycl::range{10}, sycl::range{2}},
240+
"ns::nsNdRangeFreeFunc with struct that contains sycl::accessor<1>", 4);
241+
Failed += runNdRangeTest<ns::nsNdRangeFreeFunc<2>, 2>(
242+
Queue, Context, sycl::nd_range{sycl::range{16, 16}, sycl::range{4, 4}},
243+
"ns::nsNdRangeFreeFunc with struct that contains sycl::accessor<2>", 5);
244+
Failed += runNdRangeTest<ns::nsNdRangeFreeFunc<3>, 3>(
245+
Queue, Context,
246+
sycl::nd_range{sycl::range{10, 10, 10}, sycl::range{2, 2, 2}},
247+
"ns::nsNdRangeFreeFunc with struct that contains sycl::accessor<3>", 6);
248+
}
249+
250+
{
251+
// Check struct type that contains multiple sycl::accessor is supported
252+
// inside nd_range free function kernel
253+
Failed +=
254+
runNdRangeTestMultipleParameters<ndRangeFreeFuncMultipleParameters<1>,
255+
1>(
256+
Queue, Context, sycl::nd_range{sycl::range{10}, sycl::range{2}},
257+
"ndRangeFreeFuncMultipleParameters with struct type that contains "
258+
"multiple sycl::accessor<1>",
259+
sycl::range{111, 111, 222});
260+
Failed +=
261+
runNdRangeTestMultipleParameters<ndRangeFreeFuncMultipleParameters<2>,
262+
2>(
263+
Queue, Context,
264+
sycl::nd_range{sycl::range{16, 16}, sycl::range{4, 4}},
265+
"ndRangeFreeFuncMultipleParameters with struct type that contains "
266+
"multiple sycl::accessor<2>",
267+
sycl::range{222, 222, 444});
268+
Failed +=
269+
runNdRangeTestMultipleParameters<ndRangeFreeFuncMultipleParameters<3>,
270+
3>(
271+
Queue, Context,
272+
sycl::nd_range{sycl::range{10, 10, 10}, sycl::range{2, 2, 2}},
273+
"ndRangeFreeFuncMultipleParameters with struct type that contains "
274+
"multiple sycl::accessor<3>",
275+
sycl::range{444, 444, 888});
276+
}
277+
278+
{
279+
// Check struct type that nests another struct which contains sycl::accessor
280+
// is supported inside nd_range free function kernel
281+
Failed += runNdRangeTest<ns::nsNdRangeFreeFuncWithNestedStruct<1>, 1, true>(
282+
Queue, Context, sycl::nd_range{sycl::range{10}, sycl::range{2}},
283+
"ns::nsNdRangeFreeFuncWithNestedStruct with a struct nesting another "
284+
"struct that contains sycl::accessor<1>",
285+
7);
286+
Failed += runNdRangeTest<ns::nsNdRangeFreeFuncWithNestedStruct<2>, 2, true>(
287+
Queue, Context, sycl::nd_range{sycl::range{16, 16}, sycl::range{4, 4}},
288+
"ns::nsNdRangeFreeFuncWithNestedStruct with a struct nesting another "
289+
"struct that contains sycl::accessor<2>",
290+
8);
291+
Failed += runNdRangeTest<ns::nsNdRangeFreeFuncWithNestedStruct<3>, 3, true>(
292+
Queue, Context,
293+
sycl::nd_range{sycl::range{10, 10, 10}, sycl::range{2, 2, 2}},
294+
"ns::nsNdRangeFreeFuncWithNestedStruct with a struct nesting another "
295+
"struct that contains sycl::accessor<3>",
296+
9);
297+
}
298+
299+
{
300+
// Check struct type that contains sycl::local_accesor is supported inside
301+
// nd_range free function kernel.
302+
std::vector<int> ExpectedHistogramNumbers = {0, 1, 2, 3};
303+
std::vector<int> ResultData(local_acc::BIN_SIZE, 0);
304+
305+
std::vector<int> InputData(local_acc::INPUT_SIZE);
306+
local_acc::FillWithData(InputData, ExpectedHistogramNumbers);
307+
{
308+
sycl::buffer<int, 1> InputBuffer(InputData);
309+
sycl::buffer<int, 1> ResultBuffer(ResultData);
310+
sycl::kernel UsedKernel =
311+
getKernel<local_acc::nsNdRangeFreeFunc>(Context);
312+
Queue.submit([&](sycl::handler &Handler) {
313+
Handler.set_args(local_acc::StructWithLocalAccessor{
314+
sycl::accessor<int, 1>{InputBuffer, Handler},
315+
sycl::accessor<int, 1>{ResultBuffer, Handler},
316+
sycl::local_accessor<int>{sycl::range<1>(local_acc::BIN_SIZE),
317+
Handler}});
318+
sycl::nd_range<1> Ndr{local_acc::INPUT_SIZE,
319+
local_acc::INPUT_SIZE / local_acc::NUM_BINS};
320+
Handler.parallel_for(Ndr, UsedKernel);
321+
});
322+
}
323+
Failed += performResultCheck(local_acc::NUM_BINS, ResultData.data(),
324+
"sycl::nd_range_kernel with struct type that "
325+
"contains sycl::local_accesor",
326+
local_acc::INPUT_SIZE / local_acc::NUM_BINS);
327+
}
328+
329+
return Failed;
330+
}

0 commit comments

Comments
 (0)