@@ -278,36 +278,9 @@ EXTERN int __tgt_target_mapper(ident_t *Loc, int64_t DeviceId, void *HostPtr,
278
278
int64_t *ArgSizes, int64_t *ArgTypes,
279
279
map_var_info_t *ArgNames, void **ArgMappers) {
280
280
TIMESCOPE_WITH_IDENT (Loc);
281
- DP (" Entering target region with entry point " DPxMOD " and device Id %" PRId64
282
- " \n " ,
283
- DPxPTR (HostPtr), DeviceId);
284
- if (checkDeviceAndCtors (DeviceId, Loc)) {
285
- DP (" Not offloading to device %" PRId64 " \n " , DeviceId);
286
- return OMP_TGT_FAIL;
287
- }
288
-
289
- if (getInfoLevel () & OMP_INFOTYPE_KERNEL_ARGS)
290
- printKernelArguments (Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames,
291
- " Entering OpenMP kernel" );
292
- #ifdef OMPTARGET_DEBUG
293
- for (int I = 0 ; I < ArgNum; ++I) {
294
- DP (" Entry %2d: Base=" DPxMOD " , Begin=" DPxMOD " , Size=%" PRId64
295
- " , Type=0x%" PRIx64 " , Name=%s\n " ,
296
- I, DPxPTR (ArgsBase[I]), DPxPTR (Args[I]), ArgSizes[I], ArgTypes[I],
297
- (ArgNames) ? getNameFromMapping (ArgNames[I]).c_str () : " unknown" );
298
- }
299
- #endif
300
-
301
- DeviceTy &Device = *PM->Devices [DeviceId];
302
- AsyncInfoTy AsyncInfo (Device);
303
- int Rc =
304
- target (Loc, Device, HostPtr, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes,
305
- ArgNames, ArgMappers, 0 , 0 , false /* team*/ , AsyncInfo);
306
- if (Rc == OFFLOAD_SUCCESS)
307
- Rc = AsyncInfo.synchronize ();
308
- handleTargetOutcome (Rc == OFFLOAD_SUCCESS, Loc);
309
- assert (Rc == OFFLOAD_SUCCESS && " __tgt_target_mapper unexpected failure!" );
310
- return OMP_TGT_SUCCESS;
281
+ __tgt_kernel_arguments KernelArgs{1 , ArgNum, ArgsBase, Args,
282
+ ArgSizes, ArgTypes, ArgNames, ArgMappers};
283
+ return __tgt_target_kernel (Loc, DeviceId, -1 , 0 , HostPtr, &KernelArgs);
311
284
}
312
285
313
286
EXTERN int __tgt_target_nowait_mapper (
@@ -352,50 +325,78 @@ EXTERN int __tgt_target_teams_mapper(ident_t *Loc, int64_t DeviceId,
352
325
map_var_info_t *ArgNames,
353
326
void **ArgMappers, int32_t TeamNum,
354
327
int32_t ThreadLimit) {
328
+ TIMESCOPE_WITH_IDENT (Loc);
329
+ __tgt_kernel_arguments KernelArgs{1 , ArgNum, ArgsBase, Args,
330
+ ArgSizes, ArgTypes, ArgNames, ArgMappers};
331
+ return __tgt_target_kernel (Loc, DeviceId, TeamNum, ThreadLimit, HostPtr,
332
+ &KernelArgs);
333
+ }
334
+
335
+ // / Implements a kernel entry that executes the target region on the specified
336
+ // / device.
337
+ // /
338
+ // / \param Loc Source location associated with this target region.
339
+ // / \param DeviceId The device to execute this region, -1 indicated the default.
340
+ // / \param NumTeams Number of teams to launch the region with, -1 indicates a
341
+ // / non-teams region and 0 indicates it was unspecified.
342
+ // / \param ThreadLimit Limit to the number of threads to use in the kernel
343
+ // / launch, 0 indicates it was unspecified.
344
+ // / \param HostPtr The pointer to the host function registered with the kernel.
345
+ // / \param Args All arguments to this kernel launch (see struct definition).
346
+ EXTERN int __tgt_target_kernel (ident_t *Loc, int64_t DeviceId, int32_t NumTeams,
347
+ int32_t ThreadLimit, void *HostPtr,
348
+ __tgt_kernel_arguments *Args) {
349
+ TIMESCOPE_WITH_IDENT (Loc);
355
350
DP (" Entering target region with entry point " DPxMOD " and device Id %" PRId64
356
351
" \n " ,
357
352
DPxPTR (HostPtr), DeviceId);
353
+ if (Args->Version != 1 ) {
354
+ DP (" Unexpected ABI version: %d\n " , Args->Version );
355
+ }
358
356
if (checkDeviceAndCtors (DeviceId, Loc)) {
359
357
DP (" Not offloading to device %" PRId64 " \n " , DeviceId);
360
358
return OMP_TGT_FAIL;
361
359
}
362
360
363
361
if (getInfoLevel () & OMP_INFOTYPE_KERNEL_ARGS)
364
- printKernelArguments (Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames,
362
+ printKernelArguments (Loc, DeviceId, Args->NumArgs , Args->ArgSizes ,
363
+ Args->ArgTypes , Args->ArgNames ,
365
364
" Entering OpenMP kernel" );
366
365
#ifdef OMPTARGET_DEBUG
367
- for (int I = 0 ; I < ArgNum ; ++I) {
366
+ for (int I = 0 ; I < Args-> NumArgs ; ++I) {
368
367
DP (" Entry %2d: Base=" DPxMOD " , Begin=" DPxMOD " , Size=%" PRId64
369
368
" , Type=0x%" PRIx64 " , Name=%s\n " ,
370
- I, DPxPTR (ArgsBase[I]), DPxPTR (Args[I]), ArgSizes[I], ArgTypes[I],
371
- (ArgNames) ? getNameFromMapping (ArgNames[I]).c_str () : " unknown" );
369
+ I, DPxPTR (Args->ArgBasePtrs [I]), DPxPTR (Args->ArgPtrs [I]),
370
+ Args->ArgSizes [I], Args->ArgTypes [I],
371
+ (Args->ArgNames ) ? getNameFromMapping (Args->ArgNames [I]).c_str ()
372
+ : " unknown" );
372
373
}
373
374
#endif
374
375
376
+ bool IsTeams = NumTeams != -1 ;
377
+ if (!IsTeams)
378
+ NumTeams = 0 ;
379
+
375
380
DeviceTy &Device = *PM->Devices [DeviceId];
376
381
AsyncInfoTy AsyncInfo (Device);
377
- int Rc = target (Loc, Device, HostPtr, ArgNum, ArgsBase, Args, ArgSizes ,
378
- ArgTypes, ArgNames, ArgMappers, TeamNum, ThreadLimit ,
379
- true /* team */ , AsyncInfo);
382
+ int Rc = target (Loc, Device, HostPtr, Args-> NumArgs , Args-> ArgBasePtrs ,
383
+ Args-> ArgPtrs , Args-> ArgSizes , Args-> ArgTypes , Args-> ArgNames ,
384
+ Args-> ArgMappers , NumTeams, ThreadLimit, IsTeams , AsyncInfo);
380
385
if (Rc == OFFLOAD_SUCCESS)
381
386
Rc = AsyncInfo.synchronize ();
382
387
handleTargetOutcome (Rc == OFFLOAD_SUCCESS, Loc);
383
- assert (Rc == OFFLOAD_SUCCESS &&
384
- " __tgt_target_teams_mapper unexpected failure!" );
388
+ assert (Rc == OFFLOAD_SUCCESS && " __tgt_target_kernel unexpected failure!" );
385
389
return OMP_TGT_SUCCESS;
386
390
}
387
391
388
- EXTERN int __tgt_target_teams_nowait_mapper (
389
- ident_t *Loc, int64_t DeviceId, void *HostPtr, int32_t ArgNum,
390
- void **ArgsBase, void **Args, int64_t *ArgSizes, int64_t *ArgTypes,
391
- map_var_info_t *ArgNames, void **ArgMappers, int32_t TeamNum,
392
- int32_t ThreadLimit, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
393
- void *NoAliasDepList) {
392
+ EXTERN int __tgt_target_kernel_nowait (
393
+ ident_t *Loc, int64_t DeviceId, int32_t NumTeams, int32_t ThreadLimit,
394
+ void *HostPtr, __tgt_kernel_arguments *Args, int32_t DepNum, void *DepList,
395
+ int32_t NoAliasDepNum, void *NoAliasDepList) {
394
396
TIMESCOPE_WITH_IDENT (Loc);
395
397
396
- return __tgt_target_teams_mapper (Loc, DeviceId, HostPtr, ArgNum, ArgsBase,
397
- Args, ArgSizes, ArgTypes, ArgNames,
398
- ArgMappers, TeamNum, ThreadLimit);
398
+ return __tgt_target_kernel (Loc, DeviceId, NumTeams, ThreadLimit, HostPtr,
399
+ Args);
399
400
}
400
401
401
402
// Get the current number of components for a user-defined mapper.
0 commit comments