Skip to content

Commit ad23e4d

Browse files
committed
[Libomptarget] Implement a unified kernel entry function
This patch implements a unified kernel entry function that will be targeted from both teams and non-teams clauses. We introduce a new interface and make the old functions call in using the new one. A following patch will include the necessary changes to Clang to call these new functions instead. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D128549
1 parent 46e984b commit ad23e4d

File tree

3 files changed

+76
-53
lines changed

3 files changed

+76
-53
lines changed

openmp/libomptarget/include/omptarget.h

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,18 @@ enum TargetAllocTy : int32_t {
109109
TARGET_ALLOC_DEFAULT
110110
};
111111

112+
/// This struct contains all of the arguments to a target kernel region launch.
113+
struct __tgt_kernel_arguments {
114+
int32_t Version; // Version of this struct for ABI compatibility.
115+
int32_t NumArgs; // Number of arguments in each input pointer.
116+
void **ArgBasePtrs; // Base pointer of each argument (e.g. a struct).
117+
void **ArgPtrs; // Pointer to the argument data.
118+
int64_t *ArgSizes; // Size of the argument data in bytes.
119+
int64_t *ArgTypes; // Type of the data (e.g. to / from).
120+
void **ArgNames; // Name of the data for debugging, possibly null.
121+
void **ArgMappers; // User-defined mappers, possible null.
122+
};
123+
112124
/// This struct is a record of an entry point or global. For a function
113125
/// entry point the size is expected to be zero
114126
struct __tgt_offload_entry {
@@ -345,11 +357,19 @@ int __tgt_target_teams_mapper(ident_t *Loc, int64_t DeviceId, void *HostPtr,
345357
map_var_info_t *ArgNames, void **ArgMappers,
346358
int32_t NumTeams, int32_t ThreadLimit);
347359
int __tgt_target_teams_nowait_mapper(
348-
ident_t *Loc, int64_t DeviceId, void *HostPtr, int32_t ArgNum,
349-
void **ArgsBase, void **Args, int64_t *ArgSizes, int64_t *ArgTypes,
350-
map_var_info_t *ArgNames, void **ArgMappers, int32_t NumTeams,
351-
int32_t ThreadLimit, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
352-
void *NoAliasDepList);
360+
ident_t *loc, int64_t device_id, void *host_ptr, int32_t arg_num,
361+
void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
362+
map_var_info_t *arg_names, void **arg_mappers, int32_t num_teams,
363+
int32_t thread_limit, int32_t depNum, void *depList, int32_t noAliasDepNum,
364+
void *noAliasDepList);
365+
int __tgt_target_kernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams,
366+
int32_t ThreadLimit, void *HostPtr,
367+
__tgt_kernel_arguments *Args);
368+
int __tgt_target_kernel_nowait(ident_t *Loc, int64_t DeviceId, int32_t NumTeams,
369+
int32_t ThreadLimit, void *HostPtr,
370+
__tgt_kernel_arguments *Args, int32_t DepNum,
371+
void *DepList, int32_t NoAliasDepNum,
372+
void *NoAliasDepList);
353373

354374
void __kmpc_push_target_tripcount(int64_t DeviceId, uint64_t LoopTripcount);
355375

openmp/libomptarget/src/exports

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ VERS1.0 {
2424
__tgt_target_data_update_nowait_mapper;
2525
__tgt_target_nowait_mapper;
2626
__tgt_target_teams_nowait_mapper;
27+
__tgt_target_kernel;
28+
__tgt_target_kernel_nowait;
2729
__tgt_mapper_num_components;
2830
__tgt_push_mapper_component;
2931
__kmpc_push_target_tripcount;

openmp/libomptarget/src/interface.cpp

Lines changed: 49 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -278,36 +278,9 @@ EXTERN int __tgt_target_mapper(ident_t *Loc, int64_t DeviceId, void *HostPtr,
278278
int64_t *ArgSizes, int64_t *ArgTypes,
279279
map_var_info_t *ArgNames, void **ArgMappers) {
280280
TIMESCOPE_WITH_IDENT(Loc);
281-
DP("Entering target region with entry point " DPxMOD " and device Id %" PRId64
282-
"\n",
283-
DPxPTR(HostPtr), DeviceId);
284-
if (checkDeviceAndCtors(DeviceId, Loc)) {
285-
DP("Not offloading to device %" PRId64 "\n", DeviceId);
286-
return OMP_TGT_FAIL;
287-
}
288-
289-
if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
290-
printKernelArguments(Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames,
291-
"Entering OpenMP kernel");
292-
#ifdef OMPTARGET_DEBUG
293-
for (int I = 0; I < ArgNum; ++I) {
294-
DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
295-
", Type=0x%" PRIx64 ", Name=%s\n",
296-
I, DPxPTR(ArgsBase[I]), DPxPTR(Args[I]), ArgSizes[I], ArgTypes[I],
297-
(ArgNames) ? getNameFromMapping(ArgNames[I]).c_str() : "unknown");
298-
}
299-
#endif
300-
301-
DeviceTy &Device = *PM->Devices[DeviceId];
302-
AsyncInfoTy AsyncInfo(Device);
303-
int Rc =
304-
target(Loc, Device, HostPtr, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes,
305-
ArgNames, ArgMappers, 0, 0, false /*team*/, AsyncInfo);
306-
if (Rc == OFFLOAD_SUCCESS)
307-
Rc = AsyncInfo.synchronize();
308-
handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc);
309-
assert(Rc == OFFLOAD_SUCCESS && "__tgt_target_mapper unexpected failure!");
310-
return OMP_TGT_SUCCESS;
281+
__tgt_kernel_arguments KernelArgs{1, ArgNum, ArgsBase, Args,
282+
ArgSizes, ArgTypes, ArgNames, ArgMappers};
283+
return __tgt_target_kernel(Loc, DeviceId, -1, 0, HostPtr, &KernelArgs);
311284
}
312285

313286
EXTERN int __tgt_target_nowait_mapper(
@@ -352,50 +325,78 @@ EXTERN int __tgt_target_teams_mapper(ident_t *Loc, int64_t DeviceId,
352325
map_var_info_t *ArgNames,
353326
void **ArgMappers, int32_t TeamNum,
354327
int32_t ThreadLimit) {
328+
TIMESCOPE_WITH_IDENT(Loc);
329+
__tgt_kernel_arguments KernelArgs{1, ArgNum, ArgsBase, Args,
330+
ArgSizes, ArgTypes, ArgNames, ArgMappers};
331+
return __tgt_target_kernel(Loc, DeviceId, TeamNum, ThreadLimit, HostPtr,
332+
&KernelArgs);
333+
}
334+
335+
/// Implements a kernel entry that executes the target region on the specified
336+
/// device.
337+
///
338+
/// \param Loc Source location associated with this target region.
339+
/// \param DeviceId The device to execute this region, -1 indicated the default.
340+
/// \param NumTeams Number of teams to launch the region with, -1 indicates a
341+
/// non-teams region and 0 indicates it was unspecified.
342+
/// \param ThreadLimit Limit to the number of threads to use in the kernel
343+
/// launch, 0 indicates it was unspecified.
344+
/// \param HostPtr The pointer to the host function registered with the kernel.
345+
/// \param Args All arguments to this kernel launch (see struct definition).
346+
EXTERN int __tgt_target_kernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams,
347+
int32_t ThreadLimit, void *HostPtr,
348+
__tgt_kernel_arguments *Args) {
349+
TIMESCOPE_WITH_IDENT(Loc);
355350
DP("Entering target region with entry point " DPxMOD " and device Id %" PRId64
356351
"\n",
357352
DPxPTR(HostPtr), DeviceId);
353+
if (Args->Version != 1) {
354+
DP("Unexpected ABI version: %d\n", Args->Version);
355+
}
358356
if (checkDeviceAndCtors(DeviceId, Loc)) {
359357
DP("Not offloading to device %" PRId64 "\n", DeviceId);
360358
return OMP_TGT_FAIL;
361359
}
362360

363361
if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
364-
printKernelArguments(Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames,
362+
printKernelArguments(Loc, DeviceId, Args->NumArgs, Args->ArgSizes,
363+
Args->ArgTypes, Args->ArgNames,
365364
"Entering OpenMP kernel");
366365
#ifdef OMPTARGET_DEBUG
367-
for (int I = 0; I < ArgNum; ++I) {
366+
for (int I = 0; I < Args->NumArgs; ++I) {
368367
DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
369368
", Type=0x%" PRIx64 ", Name=%s\n",
370-
I, DPxPTR(ArgsBase[I]), DPxPTR(Args[I]), ArgSizes[I], ArgTypes[I],
371-
(ArgNames) ? getNameFromMapping(ArgNames[I]).c_str() : "unknown");
369+
I, DPxPTR(Args->ArgBasePtrs[I]), DPxPTR(Args->ArgPtrs[I]),
370+
Args->ArgSizes[I], Args->ArgTypes[I],
371+
(Args->ArgNames) ? getNameFromMapping(Args->ArgNames[I]).c_str()
372+
: "unknown");
372373
}
373374
#endif
374375

376+
bool IsTeams = NumTeams != -1;
377+
if (!IsTeams)
378+
NumTeams = 0;
379+
375380
DeviceTy &Device = *PM->Devices[DeviceId];
376381
AsyncInfoTy AsyncInfo(Device);
377-
int Rc = target(Loc, Device, HostPtr, ArgNum, ArgsBase, Args, ArgSizes,
378-
ArgTypes, ArgNames, ArgMappers, TeamNum, ThreadLimit,
379-
true /*team*/, AsyncInfo);
382+
int Rc = target(Loc, Device, HostPtr, Args->NumArgs, Args->ArgBasePtrs,
383+
Args->ArgPtrs, Args->ArgSizes, Args->ArgTypes, Args->ArgNames,
384+
Args->ArgMappers, NumTeams, ThreadLimit, IsTeams, AsyncInfo);
380385
if (Rc == OFFLOAD_SUCCESS)
381386
Rc = AsyncInfo.synchronize();
382387
handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc);
383-
assert(Rc == OFFLOAD_SUCCESS &&
384-
"__tgt_target_teams_mapper unexpected failure!");
388+
assert(Rc == OFFLOAD_SUCCESS && "__tgt_target_kernel unexpected failure!");
385389
return OMP_TGT_SUCCESS;
386390
}
387391

388-
EXTERN int __tgt_target_teams_nowait_mapper(
389-
ident_t *Loc, int64_t DeviceId, void *HostPtr, int32_t ArgNum,
390-
void **ArgsBase, void **Args, int64_t *ArgSizes, int64_t *ArgTypes,
391-
map_var_info_t *ArgNames, void **ArgMappers, int32_t TeamNum,
392-
int32_t ThreadLimit, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
393-
void *NoAliasDepList) {
392+
EXTERN int __tgt_target_kernel_nowait(
393+
ident_t *Loc, int64_t DeviceId, int32_t NumTeams, int32_t ThreadLimit,
394+
void *HostPtr, __tgt_kernel_arguments *Args, int32_t DepNum, void *DepList,
395+
int32_t NoAliasDepNum, void *NoAliasDepList) {
394396
TIMESCOPE_WITH_IDENT(Loc);
395397

396-
return __tgt_target_teams_mapper(Loc, DeviceId, HostPtr, ArgNum, ArgsBase,
397-
Args, ArgSizes, ArgTypes, ArgNames,
398-
ArgMappers, TeamNum, ThreadLimit);
398+
return __tgt_target_kernel(Loc, DeviceId, NumTeams, ThreadLimit, HostPtr,
399+
Args);
399400
}
400401

401402
// Get the current number of components for a user-defined mapper.

0 commit comments

Comments
 (0)