Skip to content

Commit 5300263

Browse files
committed
[OpenMP] Add loop tripcount argument to kernel launch and remove push function
Previously we added the `push_target_tripcount` function to send the loop tripcount to the device runtime so we knew how to configure the teams / threads for execute the loop for a teams distribute construct. This was implemented as a separate function mostly to avoid changing the interface for backwards compatbility. Now that we've changed it anyway and the new interface can take an arbitrary number of arguments via the struct without changing the ABI, we can move this to the new interface. This will simplify the runtime by removing unnecessary state between calls. Depends on D128550 Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D128816
1 parent 1fff116 commit 5300263

File tree

116 files changed

+30181
-28717
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

116 files changed

+30181
-28717
lines changed

clang/lib/CodeGen/CGOpenMPRuntime.cpp

Lines changed: 11 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -10187,9 +10187,8 @@ llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
1018710187
return UDMMap.lookup(D);
1018810188
}
1018910189

10190-
void CGOpenMPRuntime::emitTargetNumIterationsCall(
10190+
llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall(
1019110191
CodeGenFunction &CGF, const OMPExecutableDirective &D,
10192-
llvm::Value *DeviceID,
1019310192
llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
1019410193
const OMPLoopDirective &D)>
1019510194
SizeEmitter) {
@@ -10199,20 +10198,12 @@ void CGOpenMPRuntime::emitTargetNumIterationsCall(
1019910198
if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
1020010199
TD = getNestedDistributeDirective(CGM.getContext(), D);
1020110200
if (!TD)
10202-
return;
10201+
return llvm::ConstantInt::get(CGF.Int64Ty, 0);
10202+
1020310203
const auto *LD = cast<OMPLoopDirective>(TD);
10204-
auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF,
10205-
PrePostActionTy &) {
10206-
if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
10207-
llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10208-
llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
10209-
CGF.EmitRuntimeCall(
10210-
OMPBuilder.getOrCreateRuntimeFunction(
10211-
CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper),
10212-
Args);
10213-
}
10214-
};
10215-
emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
10204+
if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
10205+
return NumIterations;
10206+
return llvm::ConstantInt::get(CGF.Int64Ty, 0);
1021610207
}
1021710208

1021810209
void CGOpenMPRuntime::emitTargetCall(
@@ -10306,8 +10297,9 @@ void CGOpenMPRuntime::emitTargetCall(
1030610297
// Source location for the ident struct
1030710298
llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1030810299

10309-
// Emit tripcount for the target loop-based directive.
10310-
emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
10300+
// Get tripcount for the target loop-based directive.
10301+
llvm::Value *NumIterations =
10302+
emitTargetNumIterationsCall(CGF, D, SizeEmitter);
1031110303

1031210304
// Arguments for the target kernel.
1031310305
SmallVector<llvm::Value *> KernelArgs{
@@ -10318,7 +10310,8 @@ void CGOpenMPRuntime::emitTargetCall(
1031810310
InputInfo.SizesArray.getPointer(),
1031910311
MapTypesArray,
1032010312
MapNamesArray,
10321-
InputInfo.MappersArray.getPointer()};
10313+
InputInfo.MappersArray.getPointer(),
10314+
NumIterations};
1032210315

1032310316
// Arguments passed to the 'nowait' variant.
1032410317
SmallVector<llvm::Value *> NoWaitKernelArgs{

clang/lib/CodeGen/CGOpenMPRuntime.h

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -884,13 +884,11 @@ class CGOpenMPRuntime {
884884
llvm::Function *TaskFunction, QualType SharedsTy,
885885
Address Shareds, const OMPTaskDataTy &Data);
886886

887-
/// Emit code that pushes the trip count of loops associated with constructs
888-
/// 'target teams distribute' and 'teams distribute parallel for'.
889-
/// \param SizeEmitter Emits the int64 value for the number of iterations of
890-
/// the associated loop.
891-
void emitTargetNumIterationsCall(
887+
/// Return the trip count of loops associated with constructs / 'target teams
888+
/// distribute' and 'teams distribute parallel for'. \param SizeEmitter Emits
889+
/// the int64 value for the number of iterations of the associated loop.
890+
llvm::Value *emitTargetNumIterationsCall(
892891
CodeGenFunction &CGF, const OMPExecutableDirective &D,
893-
llvm::Value *DeviceID,
894892
llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
895893
const OMPLoopDirective &D)>
896894
SizeEmitter);

clang/test/OpenMP/distribute_codegen.cpp

Lines changed: 50 additions & 40 deletions
Large diffs are not rendered by default.

clang/test/OpenMP/distribute_firstprivate_codegen.cpp

Lines changed: 36 additions & 32 deletions
Large diffs are not rendered by default.

clang/test/OpenMP/distribute_lastprivate_codegen.cpp

Lines changed: 36 additions & 32 deletions
Large diffs are not rendered by default.

clang/test/OpenMP/distribute_parallel_for_codegen.cpp

Lines changed: 1744 additions & 1716 deletions
Large diffs are not rendered by default.

clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp

Lines changed: 36 additions & 32 deletions
Large diffs are not rendered by default.

clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp

Lines changed: 139 additions & 131 deletions
Large diffs are not rendered by default.

clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp

Lines changed: 36 additions & 32 deletions
Large diffs are not rendered by default.

clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp

Lines changed: 368 additions & 344 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)