Skip to content

Commit c9353eb

Browse files
committed
[Libomptarget] Use new tripcount argument in the runtime.
The previous patch added an argument to the `__tgt_target_kernel` runtime function which includes the tripcount used for the loop clause. This was originally passed in via the `__kmpc_push_target_tripcount` function. Now we move this logic to the kernel launch itself and remove the need for the push function. Depends on D128816 Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D128817
1 parent 5300263 commit c9353eb

File tree

5 files changed

+22
-15
lines changed

5 files changed

+22
-15
lines changed

openmp/libomptarget/include/omptarget.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,8 +118,10 @@ struct __tgt_kernel_arguments {
118118
int64_t *ArgSizes; // Size of the argument data in bytes.
119119
int64_t *ArgTypes; // Type of the data (e.g. to / from).
120120
void **ArgNames; // Name of the data for debugging, possibly null.
121-
void **ArgMappers; // User-defined mappers, possible null.
121+
void **ArgMappers; // User-defined mappers, possibly null.
122+
int64_t Tripcount; // Tripcount for the teams / distribute loop, 0 otherwise.
122123
};
124+
static_assert(sizeof(__tgt_kernel_arguments) == 64 && "Invalid struct size");
123125

124126
/// This struct is a record of an entry point or global. For a function
125127
/// entry point the size is expected to be zero

openmp/libomptarget/src/interface.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -278,8 +278,8 @@ EXTERN int __tgt_target_mapper(ident_t *Loc, int64_t DeviceId, void *HostPtr,
278278
int64_t *ArgSizes, int64_t *ArgTypes,
279279
map_var_info_t *ArgNames, void **ArgMappers) {
280280
TIMESCOPE_WITH_IDENT(Loc);
281-
__tgt_kernel_arguments KernelArgs{1, ArgNum, ArgsBase, Args,
282-
ArgSizes, ArgTypes, ArgNames, ArgMappers};
281+
__tgt_kernel_arguments KernelArgs{
282+
1, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames, ArgMappers, -1};
283283
return __tgt_target_kernel(Loc, DeviceId, -1, 0, HostPtr, &KernelArgs);
284284
}
285285

@@ -326,8 +326,8 @@ EXTERN int __tgt_target_teams_mapper(ident_t *Loc, int64_t DeviceId,
326326
void **ArgMappers, int32_t TeamNum,
327327
int32_t ThreadLimit) {
328328
TIMESCOPE_WITH_IDENT(Loc);
329-
__tgt_kernel_arguments KernelArgs{1, ArgNum, ArgsBase, Args,
330-
ArgSizes, ArgTypes, ArgNames, ArgMappers};
329+
__tgt_kernel_arguments KernelArgs{
330+
1, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames, ArgMappers, -1};
331331
return __tgt_target_kernel(Loc, DeviceId, TeamNum, ThreadLimit, HostPtr,
332332
&KernelArgs);
333333
}
@@ -381,7 +381,8 @@ EXTERN int __tgt_target_kernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams,
381381
AsyncInfoTy AsyncInfo(Device);
382382
int Rc = target(Loc, Device, HostPtr, Args->NumArgs, Args->ArgBasePtrs,
383383
Args->ArgPtrs, Args->ArgSizes, Args->ArgTypes, Args->ArgNames,
384-
Args->ArgMappers, NumTeams, ThreadLimit, IsTeams, AsyncInfo);
384+
Args->ArgMappers, NumTeams, ThreadLimit, Args->Tripcount,
385+
IsTeams, AsyncInfo);
385386
if (Rc == OFFLOAD_SUCCESS)
386387
Rc = AsyncInfo.synchronize();
387388
handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc);

openmp/libomptarget/src/omptarget.cpp

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -190,9 +190,9 @@ static int initLibrary(DeviceTy &Device) {
190190
DP("Has pending ctors... call now\n");
191191
for (auto &Entry : Lib.second.PendingCtors) {
192192
void *Ctor = Entry;
193-
int Rc =
194-
target(nullptr, Device, Ctor, 0, nullptr, nullptr, nullptr,
195-
nullptr, nullptr, nullptr, 1, 1, true /*team*/, AsyncInfo);
193+
int Rc = target(nullptr, Device, Ctor, 0, nullptr, nullptr, nullptr,
194+
nullptr, nullptr, nullptr, 1, 1, 0, true /*team*/,
195+
AsyncInfo);
196196
if (Rc != OFFLOAD_SUCCESS) {
197197
REPORT("Running ctor " DPxMOD " failed.\n", DPxPTR(Ctor));
198198
return OFFLOAD_FAIL;
@@ -1140,7 +1140,6 @@ uint64_t getLoopTripCount(int64_t DeviceId) {
11401140
if (I != Device.LoopTripCnt.end()) {
11411141
LoopTripCount = I->second;
11421142
Device.LoopTripCnt.erase(I);
1143-
DP("loop trip count is %" PRIu64 ".\n", LoopTripCount);
11441143
}
11451144
}
11461145

@@ -1488,9 +1487,9 @@ static int processDataAfter(ident_t *Loc, int64_t DeviceId, void *HostPtr,
14881487
int target(ident_t *Loc, DeviceTy &Device, void *HostPtr, int32_t ArgNum,
14891488
void **ArgBases, void **Args, int64_t *ArgSizes, int64_t *ArgTypes,
14901489
map_var_info_t *ArgNames, void **ArgMappers, int32_t TeamNum,
1491-
int32_t ThreadLimit, int IsTeamConstruct, AsyncInfoTy &AsyncInfo) {
1490+
int32_t ThreadLimit, uint64_t Tripcount, int IsTeamConstruct,
1491+
AsyncInfoTy &AsyncInfo) {
14921492
int32_t DeviceId = Device.DeviceID;
1493-
14941493
TableMap *TM = getTableMap(HostPtr);
14951494
// No map for this host pointer found!
14961495
if (!TM) {
@@ -1509,6 +1508,10 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr, int32_t ArgNum,
15091508
}
15101509
assert(TargetTable && "Global data has not been mapped\n");
15111510

1511+
// FIXME: Use legacy tripcount method if it is '-1'.
1512+
Tripcount = Tripcount == -1 ? getLoopTripCount(DeviceId) : Tripcount;
1513+
DP("loop trip count is %" PRIu64 ".\n", Tripcount);
1514+
15121515
// We need to keep bases and offsets separate. Sometimes (e.g. in OpenCL) we
15131516
// need to manifest base pointers prior to launching a kernel. Even if we have
15141517
// mapped an object only partially, e.g. A[N:M], although the kernel is
@@ -1546,7 +1549,7 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr, int32_t ArgNum,
15461549
if (IsTeamConstruct)
15471550
Ret = Device.runTeamRegion(TgtEntryPtr, &TgtArgs[0], &TgtOffsets[0],
15481551
TgtArgs.size(), TeamNum, ThreadLimit,
1549-
getLoopTripCount(DeviceId), AsyncInfo);
1552+
Tripcount, AsyncInfo);
15501553
else
15511554
Ret = Device.runRegion(TgtEntryPtr, &TgtArgs[0], &TgtOffsets[0],
15521555
TgtArgs.size(), AsyncInfo);

openmp/libomptarget/src/private.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,8 @@ extern int target(ident_t *Loc, DeviceTy &Device, void *HostPtr, int32_t ArgNum,
4242
void **ArgBases, void **Args, int64_t *ArgSizes,
4343
int64_t *ArgTypes, map_var_info_t *ArgNames,
4444
void **ArgMappers, int32_t TeamNum, int32_t ThreadLimit,
45-
int IsTeamConstruct, AsyncInfoTy &AsyncInfo);
45+
uint64_t Tripcount, int IsTeamConstruct,
46+
AsyncInfoTy &AsyncInfo);
4647

4748
extern void handleTargetOutcome(bool Success, ident_t *Loc);
4849
extern bool checkDeviceAndCtors(int64_t &DeviceID, ident_t *Loc);

openmp/libomptarget/src/rtl.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -444,7 +444,7 @@ void RTLsTy::unregisterLib(__tgt_bin_desc *Desc) {
444444
AsyncInfoTy AsyncInfo(Device);
445445
for (auto &Dtor : Device.PendingCtorsDtors[Desc].PendingDtors) {
446446
int Rc = target(nullptr, Device, Dtor, 0, nullptr, nullptr, nullptr,
447-
nullptr, nullptr, nullptr, 1, 1, true /*team*/,
447+
nullptr, nullptr, nullptr, 1, 1, 0, true /*team*/,
448448
AsyncInfo);
449449
if (Rc != OFFLOAD_SUCCESS) {
450450
DP("Running destructor " DPxMOD " failed.\n", DPxPTR(Dtor));

0 commit comments

Comments
 (0)