@@ -99,6 +99,29 @@ struct urMultiQueueLaunchMemcpyTest : uur::urMultiDeviceContextTestTemplate<1>,
99
99
UUR_RETURN_ON_FATAL_FAILURE (
100
100
uur::urMultiDeviceContextTestTemplate<1 >::TearDown ());
101
101
}
102
+
103
+ void runBackgroundCheck (std::vector<uur::raii::Event> &Events) {
104
+ std::vector<std::thread> threads;
105
+ for (size_t i = 0 ; i < Events.size (); i++) {
106
+ threads.emplace_back ([&, i] {
107
+ ur_event_status_t status;
108
+ do {
109
+ ASSERT_SUCCESS (urEventGetInfo (
110
+ Events[i].get (), UR_EVENT_INFO_COMMAND_EXECUTION_STATUS,
111
+ sizeof (ur_event_status_t ), &status, nullptr ));
112
+ } while (status != UR_EVENT_STATUS_COMPLETE);
113
+
114
+ auto ExpectedValue = InitialValue + i + 1 ;
115
+ for (uint32_t j = 0 ; j < ArraySize; ++j) {
116
+ ASSERT_EQ (reinterpret_cast <uint32_t *>(SharedMem[i])[j],
117
+ ExpectedValue);
118
+ }
119
+ });
120
+ }
121
+ for (auto &thread : threads) {
122
+ thread.join ();
123
+ }
124
+ }
102
125
};
103
126
104
127
template <typename Param>
@@ -189,26 +212,24 @@ TEST_P(urEnqueueKernelLaunchIncrementTest, Success) {
189
212
190
213
auto useEvents = std::get<1 >(GetParam ()).value ;
191
214
192
- std::vector<uur::raii::Event> Events (numOps * 2 - 1 );
193
- for (size_t i = 0 ; i < numOps; i++) {
194
- size_t waitNum = 0 ;
195
- ur_event_handle_t *lastEvent = nullptr ;
196
- ur_event_handle_t *kernelEvent = nullptr ;
197
- ur_event_handle_t *memcpyEvent = nullptr ;
215
+ std::vector<uur::raii::Event> kernelEvents (numOps);
216
+ std::vector<uur::raii::Event> memcpyEvents (numOps - 1 );
198
217
199
- if (useEvents) {
200
- // Events are: kernelEvent0, memcpyEvent0, kernelEvent1, ...
201
- waitNum = i > 0 ? 1 : 0 ;
202
- lastEvent = i > 0 ? Events[i * 2 - 1 ].ptr () : nullptr ;
218
+ ur_event_handle_t *lastMemcpyEvent = nullptr ;
219
+ ur_event_handle_t *kernelEvent = nullptr ;
220
+ ur_event_handle_t *memcpyEvent = nullptr ;
203
221
204
- kernelEvent = Events[i * 2 ].ptr ();
205
- memcpyEvent = i < numOps - 1 ? Events[i * 2 + 1 ].ptr () : nullptr ;
222
+ for (size_t i = 0 ; i < numOps; i++) {
223
+ if (useEvents) {
224
+ lastMemcpyEvent = memcpyEvent;
225
+ kernelEvent = kernelEvents[i].ptr ();
226
+ memcpyEvent = i < numOps - 1 ? memcpyEvents[i].ptr () : nullptr ;
206
227
}
207
228
208
229
// execute kernel that increments each element by 1
209
230
ASSERT_SUCCESS (urEnqueueKernelLaunch (
210
231
queue, kernels[i], n_dimensions, &global_offset, &ArraySize,
211
- nullptr , waitNum, lastEvent , kernelEvent));
232
+ nullptr , bool (lastMemcpyEvent), lastMemcpyEvent , kernelEvent));
212
233
213
234
// copy the memory (input for the next kernel)
214
235
if (i < numOps - 1 ) {
@@ -220,9 +241,9 @@ TEST_P(urEnqueueKernelLaunchIncrementTest, Success) {
220
241
}
221
242
222
243
if (useEvents) {
223
- urEventWait (1 , Events .back ().ptr ());
244
+ ASSERT_SUCCESS ( urEventWait (1 , kernelEvents .back ().ptr () ));
224
245
} else {
225
- urQueueFinish (queue);
246
+ ASSERT_SUCCESS ( urQueueFinish (queue) );
226
247
}
227
248
228
249
size_t ExpectedValue = InitialValue;
@@ -237,23 +258,38 @@ TEST_P(urEnqueueKernelLaunchIncrementTest, Success) {
237
258
238
259
template <typename T>
239
260
inline std::string
240
- printBoolParam (const testing::TestParamInfo<typename T::ParamType> &info) {
261
+ printParams (const testing::TestParamInfo<typename T::ParamType> &info) {
241
262
std::stringstream ss;
242
- ss << (info.param .value ? " " : " No" ) << info.param .name ;
263
+
264
+ auto param1 = std::get<0 >(info.param );
265
+ ss << (param1.value ? " " : " No" ) << param1.name ;
266
+
267
+ auto param2 = std::get<1 >(info.param );
268
+ ss << (param2.value ? " " : " No" ) << param2.name ;
269
+
270
+ if constexpr (std::tuple_size_v < typename T::ParamType >> 2 ) {
271
+ auto param3 = std::get<2 >(info.param );
272
+ }
273
+
243
274
return ss.str ();
244
275
}
245
276
246
277
using urEnqueueKernelLaunchIncrementMultiDeviceTest =
247
- urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam<uur::BoolTestParam>;
278
+ urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam<
279
+ std::tuple<uur::BoolTestParam, uur::BoolTestParam>>;
248
280
249
281
INSTANTIATE_TEST_SUITE_P (
250
282
, urEnqueueKernelLaunchIncrementMultiDeviceTest,
251
- testing::ValuesIn (uur::BoolTestParam::makeBoolParam(" UseEventWait" )),
252
- printBoolParam<urEnqueueKernelLaunchIncrementMultiDeviceTest>);
283
+ testing::Combine (
284
+ testing::ValuesIn (uur::BoolTestParam::makeBoolParam(" UseEventWait" )),
285
+ testing::ValuesIn(
286
+ uur::BoolTestParam::makeBoolParam (" RunBackgroundCheck" ))),
287
+ printParams<urEnqueueKernelLaunchIncrementMultiDeviceTest>);
253
288
254
289
// Do a chain of kernelLaunch(dev0) -> memcpy(dev0, dev1) -> kernelLaunch(dev1) ... ops
255
290
TEST_P (urEnqueueKernelLaunchIncrementMultiDeviceTest, Success) {
256
- auto waitOnEvent = GetParam ().value ;
291
+ auto waitOnEvent = std::get<0 >(GetParam ()).value ;
292
+ auto runBackgroundCheck = std::get<1 >(GetParam ()).value ;
257
293
258
294
size_t returned_size;
259
295
ASSERT_SUCCESS (urDeviceGetInfo (devices[0 ], UR_DEVICE_INFO_EXTENSIONS, 0 ,
@@ -277,20 +313,22 @@ TEST_P(urEnqueueKernelLaunchIncrementMultiDeviceTest, Success) {
277
313
constexpr size_t global_offset = 0 ;
278
314
constexpr size_t n_dimensions = 1 ;
279
315
280
- std::vector<uur::raii::Event> Events (devices.size () * 2 - 1 );
316
+ std::vector<uur::raii::Event> kernelEvents (devices.size ());
317
+ std::vector<uur::raii::Event> memcpyEvents (devices.size () - 1 );
318
+
319
+ ur_event_handle_t *lastMemcpyEvent = nullptr ;
320
+ ur_event_handle_t *kernelEvent = nullptr ;
321
+ ur_event_handle_t *memcpyEvent = nullptr ;
322
+
281
323
for (size_t i = 0 ; i < devices.size (); i++) {
282
- // Events are: kernelEvent0, memcpyEvent0, kernelEvent1, ...
283
- size_t waitNum = i > 0 ? 1 : 0 ;
284
- ur_event_handle_t *lastEvent =
285
- i > 0 ? Events[i * 2 - 1 ].ptr () : nullptr ;
286
- ur_event_handle_t *kernelEvent = Events[i * 2 ].ptr ();
287
- ur_event_handle_t *memcpyEvent =
288
- i < devices.size () - 1 ? Events[i * 2 + 1 ].ptr () : nullptr ;
324
+ lastMemcpyEvent = memcpyEvent;
325
+ kernelEvent = kernelEvents[i].ptr ();
326
+ memcpyEvent = i < devices.size () - 1 ? memcpyEvents[i].ptr () : nullptr ;
289
327
290
328
// execute kernel that increments each element by 1
291
329
ASSERT_SUCCESS (urEnqueueKernelLaunch (
292
330
queues[i], kernels[i], n_dimensions, &global_offset, &ArraySize,
293
- nullptr , waitNum, lastEvent , kernelEvent));
331
+ nullptr , bool (lastMemcpyEvent), lastMemcpyEvent , kernelEvent));
294
332
295
333
// copy the memory to next device
296
334
if (i < devices.size () - 1 ) {
@@ -300,12 +338,17 @@ TEST_P(urEnqueueKernelLaunchIncrementMultiDeviceTest, Success) {
300
338
}
301
339
}
302
340
341
+ // While the device(s) execute, loop over the events and if completed, verify the results
342
+ if (runBackgroundCheck) {
343
+ this ->runBackgroundCheck (kernelEvents);
344
+ }
345
+
303
346
// synchronize on the last queue/event only, this has to ensure all the operations
304
347
// are completed
305
348
if (waitOnEvent) {
306
- urEventWait (1 , Events .back ().ptr ());
349
+ ASSERT_SUCCESS ( urEventWait (1 , kernelEvents .back ().ptr () ));
307
350
} else {
308
- urQueueFinish (queues.back ());
351
+ ASSERT_SUCCESS ( urQueueFinish (queues.back () ));
309
352
}
310
353
311
354
size_t ExpectedValue = InitialValue;
@@ -318,20 +361,6 @@ TEST_P(urEnqueueKernelLaunchIncrementMultiDeviceTest, Success) {
318
361
}
319
362
}
320
363
321
- template <typename T>
322
- inline std::string
323
- printParams (const testing::TestParamInfo<typename T::ParamType> &info) {
324
- std::stringstream ss;
325
-
326
- auto param1 = std::get<0 >(info.param );
327
- auto param2 = std::get<1 >(info.param );
328
-
329
- ss << (param1.value ? " " : " No" ) << param1.name ;
330
- ss << (param2.value ? " " : " No" ) << param2.name ;
331
-
332
- return ss.str ();
333
- }
334
-
335
364
using urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest =
336
365
urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam<
337
366
std::tuple<uur::BoolTestParam, uur::BoolTestParam>>;
@@ -392,9 +421,9 @@ TEST_P(urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest, Success) {
392
421
lastEvent, signalEvent));
393
422
394
423
if (useEvents) {
395
- urEventWait (1 , Events.back ().ptr ());
424
+ ASSERT_SUCCESS ( urEventWait (1 , Events.back ().ptr () ));
396
425
} else {
397
- urQueueFinish (queue);
426
+ ASSERT_SUCCESS ( urQueueFinish (queue) );
398
427
}
399
428
400
429
size_t ExpectedValue = InitialValue;
0 commit comments