Skip to content

Commit 1fff116

Browse files
committed
[OpenMP] Change OpenMP code generation for target region entries
This patch changes the code we generate to enter a target region on the device. This is in-line with the new definition in the runtime that was added previously. Additionally we implement this in the OpenMPIRBuilder so that this code can be shared with Flang in the future. Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D128550
1 parent ad23e4d commit 1fff116

File tree

190 files changed

+48690
-26623
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

190 files changed

+48690
-26623
lines changed

clang/lib/CodeGen/CGOpenMPRuntime.cpp

Lines changed: 31 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -6717,11 +6717,9 @@ llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
67176717
default:
67186718
break;
67196719
}
6720-
} else if (DefaultNT == -1) {
6721-
return nullptr;
67226720
}
67236721

6724-
return Bld.getInt32(DefaultNT);
6722+
return llvm::ConstantInt::get(CGF.Int32Ty, DefaultNT);
67256723
}
67266724

67276725
static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
@@ -10311,23 +10309,29 @@ void CGOpenMPRuntime::emitTargetCall(
1031110309
// Emit tripcount for the target loop-based directive.
1031210310
emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
1031310311

10314-
bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10312+
// Arguments for the target kernel.
10313+
SmallVector<llvm::Value *> KernelArgs{
10314+
CGF.Builder.getInt32(/* Version */ 1),
10315+
PointerNum,
10316+
InputInfo.BasePointersArray.getPointer(),
10317+
InputInfo.PointersArray.getPointer(),
10318+
InputInfo.SizesArray.getPointer(),
10319+
MapTypesArray,
10320+
MapNamesArray,
10321+
InputInfo.MappersArray.getPointer()};
10322+
10323+
// Arguments passed to the 'nowait' variant.
10324+
SmallVector<llvm::Value *> NoWaitKernelArgs{
10325+
CGF.Builder.getInt32(0),
10326+
llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
10327+
CGF.Builder.getInt32(0),
10328+
llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
10329+
};
10330+
10331+
bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
10332+
1031510333
// The target region is an outlined function launched by the runtime
10316-
// via calls __tgt_target() or __tgt_target_teams().
10317-
//
10318-
// __tgt_target() launches a target region with one team and one thread,
10319-
// executing a serial region. This master thread may in turn launch
10320-
// more threads within its team upon encountering a parallel region,
10321-
// however, no additional teams can be launched on the device.
10322-
//
10323-
// __tgt_target_teams() launches a target region with one or more teams,
10324-
// each with one or more threads. This call is required for target
10325-
// constructs such as:
10326-
// 'target teams'
10327-
// 'target' / 'teams'
10328-
// 'target teams distribute parallel for'
10329-
// 'target parallel'
10330-
// and so on.
10334+
// via calls to __tgt_target_kernel().
1033110335
//
1033210336
// Note that on the host and CPU targets, the runtime implementation of
1033310337
// these calls simply call the outlined function without forking threads.
@@ -10338,70 +10342,15 @@ void CGOpenMPRuntime::emitTargetCall(
1033810342
// In contrast, on the NVPTX target, the implementation of
1033910343
// __tgt_target_teams() launches a GPU kernel with the requested number
1034010344
// of teams and threads so no additional calls to the runtime are required.
10341-
if (NumTeams) {
10342-
// If we have NumTeams defined this means that we have an enclosed teams
10343-
// region. Therefore we also expect to have NumThreads defined. These two
10344-
// values should be defined in the presence of a teams directive,
10345-
// regardless of having any clauses associated. If the user is using teams
10346-
// but no clauses, these two values will be the default that should be
10347-
// passed to the runtime library - a 32-bit integer with the value zero.
10348-
assert(NumThreads && "Thread limit expression should be available along "
10349-
"with number of teams.");
10350-
SmallVector<llvm::Value *> OffloadingArgs = {
10351-
RTLoc,
10352-
DeviceID,
10353-
OutlinedFnID,
10354-
PointerNum,
10355-
InputInfo.BasePointersArray.getPointer(),
10356-
InputInfo.PointersArray.getPointer(),
10357-
InputInfo.SizesArray.getPointer(),
10358-
MapTypesArray,
10359-
MapNamesArray,
10360-
InputInfo.MappersArray.getPointer(),
10361-
NumTeams,
10362-
NumThreads};
10363-
if (HasNowait) {
10364-
// Add int32_t depNum = 0, void *depList = nullptr, int32_t
10365-
// noAliasDepNum = 0, void *noAliasDepList = nullptr.
10366-
OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10367-
OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10368-
OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10369-
OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10370-
}
10371-
Return = CGF.EmitRuntimeCall(
10372-
OMPBuilder.getOrCreateRuntimeFunction(
10373-
CGM.getModule(), HasNowait
10374-
? OMPRTL___tgt_target_teams_nowait_mapper
10375-
: OMPRTL___tgt_target_teams_mapper),
10376-
OffloadingArgs);
10377-
} else {
10378-
SmallVector<llvm::Value *> OffloadingArgs = {
10379-
RTLoc,
10380-
DeviceID,
10381-
OutlinedFnID,
10382-
PointerNum,
10383-
InputInfo.BasePointersArray.getPointer(),
10384-
InputInfo.PointersArray.getPointer(),
10385-
InputInfo.SizesArray.getPointer(),
10386-
MapTypesArray,
10387-
MapNamesArray,
10388-
InputInfo.MappersArray.getPointer()};
10389-
if (HasNowait) {
10390-
// Add int32_t depNum = 0, void *depList = nullptr, int32_t
10391-
// noAliasDepNum = 0, void *noAliasDepList = nullptr.
10392-
OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10393-
OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10394-
OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10395-
OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10396-
}
10397-
Return = CGF.EmitRuntimeCall(
10398-
OMPBuilder.getOrCreateRuntimeFunction(
10399-
CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
10400-
: OMPRTL___tgt_target_mapper),
10401-
OffloadingArgs);
10402-
}
10403-
1040410345
// Check the error code and execute the host version if required.
10346+
CGF.Builder.restoreIP(
10347+
HasNoWait ? OMPBuilder.emitTargetKernel(
10348+
CGF.Builder, Return, RTLoc, DeviceID, NumTeams,
10349+
NumThreads, OutlinedFnID, KernelArgs, NoWaitKernelArgs)
10350+
: OMPBuilder.emitTargetKernel(CGF.Builder, Return, RTLoc,
10351+
DeviceID, NumTeams, NumThreads,
10352+
OutlinedFnID, KernelArgs));
10353+
1040510354
llvm::BasicBlock *OffloadFailedBlock =
1040610355
CGF.createBasicBlock("omp_offload.failed");
1040710356
llvm::BasicBlock *OffloadContBlock =

clang/test/OpenMP/capturing_in_templates.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ pair<T1, T2> make_pair(T1 &&t1, T2 &&t2) {
2424

2525
// CHECK-LABEL: @main
2626
int main(int argc, char **argv) {
27-
// CHECK: call i32 @__tgt_target_mapper(%struct.ident_t* @{{.+}}, i64 -1, i8* @{{.+}}.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null)
27+
// CHECK: call i32 @__tgt_target_kernel(%struct.ident_t* @{{.+}}, i64 -1, i32 -1, i32 0, i8* @{{.+}}.region_id, %struct.__tgt_kernel_arguments* %{{.+}})
2828
#pragma omp target
2929
{
3030
for (int i = 0; i < 64; ++i) {

clang/test/OpenMP/declare_mapper_codegen.cpp

Lines changed: 90 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -243,20 +243,27 @@ void foo(int a){
243243
C c;
244244
c.a = a;
245245

246-
// CK0-DAG: call i32 @__tgt_target_mapper(%struct.ident_t* @{{.+}}, i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null, i8** [[MPRGEP:%.+]])
247-
// CK0-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
248-
// CK0-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
249-
// CK0-DAG: [[MPRGEP]] = bitcast [1 x i8*]* [[MPR:%[^,]+]] to i8**
250-
// CK0-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
251-
// CK0-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
252-
// CK0-DAG: [[MPR1:%.+]] = getelementptr inbounds {{.+}}[[MPR]], i[[sz]] 0, i[[sz]] 0
253-
// CK0-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to %class.C**
254-
// CK0-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to %class.C**
255-
// CK0-DAG: store %class.C* [[VAL:%[^,]+]], %class.C** [[CBP1]]
256-
// CK0-DAG: store %class.C* [[VAL]], %class.C** [[CP1]]
257-
// CK0-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64, i8*)* [[MPRFUNC]] to i8*), i8** [[MPR1]]
258-
// CK0: call void [[KERNEL_1:@.+]](%class.C* [[VAL]])
259-
#pragma omp target map(mapper(id),tofrom: c)
246+
// CK0-DAG: call i32 @__tgt_target_kernel(%struct.ident_t* @{{.+}}, i64 -1, i32 -1, i32 0, i8* @.{{.+}}.region_id, %struct.__tgt_kernel_arguments* [[ARGS:%.+]])
247+
// CK0-DAG: [[BPARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 2
248+
// CK0-DAG: store i8** [[BPGEP:%.+]], i8*** [[BPARG]]
249+
// CK0-DAG: [[PARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 3
250+
// CK0-DAG: store i8** [[PGEP:%.+]], i8*** [[PARG]]
251+
// CK0-DAG: [[MARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 7
252+
// CK0-DAG: store i8** [[MPRGEP:%.+]], i8*** [[MARG]]
253+
// CK0-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
254+
// CK0-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
255+
// CK0-DAG: [[MPRGEP]] = bitcast [1 x i8*]* [[MPR:%[^,]+]] to i8**
256+
// CK0-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
257+
// CK0-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
258+
// CK0-DAG: [[MPR1:%.+]] = getelementptr inbounds {{.+}}[[MPR]], i[[sz]] 0, i[[sz]] 0
259+
// CK0-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to %class.C**
260+
// CK0-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to %class.C**
261+
// CK0-DAG: store %class.C* [[VAL:%[^,]+]], %class.C** [[CBP1]]
262+
// CK0-DAG: store %class.C* [[VAL]], %class.C** [[CP1]]
263+
// CK0-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64, i8*)* [[MPRFUNC]] to i8*), i8** [[MPR1]]
264+
// CK0: call void [[KERNEL_1:@.+]](%class.C* [[VAL]])
265+
#pragma omp target map(mapper(id), tofrom \
266+
: c)
260267
{
261268
++c.a;
262269
}
@@ -282,20 +289,27 @@ void foo(int a){
282289
++c.a;
283290
}
284291

285-
// CK0-DAG: call i32 @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[TEAMSIZES]]{{.+}}, {{.+}}[[TEAMTYPES]]{{.+}}, i8** null, i8** [[MPRGEP:%.+]], i32 0, i32 0)
286-
// CK0-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
287-
// CK0-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
288-
// CK0-DAG: [[MPRGEP]] = bitcast [1 x i8*]* [[MPR:%[^,]+]] to i8**
289-
// CK0-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
290-
// CK0-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
291-
// CK0-DAG: [[MPR1:%.+]] = getelementptr inbounds {{.+}}[[MPR]], i[[sz]] 0, i[[sz]] 0
292-
// CK0-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to %class.C**
293-
// CK0-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to %class.C**
294-
// CK0-DAG: store %class.C* [[VAL:%[^,]+]], %class.C** [[CBP1]]
295-
// CK0-DAG: store %class.C* [[VAL]], %class.C** [[CP1]]
296-
// CK0-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64, i8*)* [[MPRFUNC]] to i8*), i8** [[MPR1]]
297-
// CK0: call void [[KERNEL_3:@.+]](%class.C* [[VAL]])
298-
#pragma omp target teams map(mapper(id),to: c)
292+
// CK0-DAG: call i32 @__tgt_target_kernel(%struct.ident_t* @{{.+}}, i64 -1, i32 0, i32 0, i8* @.{{.+}}.region_id, %struct.__tgt_kernel_arguments* [[ARGS:%.+]])
293+
// CK0-DAG: [[BPARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 2
294+
// CK0-DAG: store i8** [[BPGEP:%.+]], i8*** [[BPARG]]
295+
// CK0-DAG: [[PARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 3
296+
// CK0-DAG: store i8** [[PGEP:%.+]], i8*** [[PARG]]
297+
// CK0-DAG: [[MARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 7
298+
// CK0-DAG: store i8** [[MPRGEP:%.+]], i8*** [[MARG]]
299+
// CK0-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
300+
// CK0-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
301+
// CK0-DAG: [[MPRGEP]] = bitcast [1 x i8*]* [[MPR:%[^,]+]] to i8**
302+
// CK0-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
303+
// CK0-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
304+
// CK0-DAG: [[MPR1:%.+]] = getelementptr inbounds {{.+}}[[MPR]], i[[sz]] 0, i[[sz]] 0
305+
// CK0-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to %class.C**
306+
// CK0-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to %class.C**
307+
// CK0-DAG: store %class.C* [[VAL:%[^,]+]], %class.C** [[CBP1]]
308+
// CK0-DAG: store %class.C* [[VAL]], %class.C** [[CP1]]
309+
// CK0-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64, i8*)* [[MPRFUNC]] to i8*), i8** [[MPR1]]
310+
// CK0: call void [[KERNEL_3:@.+]](%class.C* [[VAL]])
311+
#pragma omp target teams map(mapper(id), to \
312+
: c)
299313
{
300314
++c.a;
301315
}
@@ -493,7 +507,15 @@ void foo(int a){
493507
// CK0: }
494508

495509
// CK0: define internal void [[OUTLINED:@.+]](i32 {{.*}}{{[^,]+}}, [[ANON_T]]* noalias noundef [[CTXARG:%.+]])
496-
// CK0-DAG: call i32 @__tgt_target_nowait_mapper(%struct.ident_t* @{{.+}}, i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZEGEP:%[0-9]+]], {{.+}}[[NWTYPES]]{{.+}}, i8** null, i8** [[MPRGEP:%.+]], i32 0, i8* null, i32 0, i8* null)
510+
// CK0-DAG: call i32 @__tgt_target_kernel_nowait(%struct.ident_t* @{{.+}}, i64 -1, i32 -1, i32 0, i8* @.{{.+}}.region_id, %struct.__tgt_kernel_arguments* [[ARGS:%.+]], i32 0, i8* null, i32 0, i8* null)
511+
// CK0-DAG: [[BPARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 2
512+
// CK0-DAG: store i8** [[BPGEP:%.+]], i8*** [[BPARG]]
513+
// CK0-DAG: [[PARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 3
514+
// CK0-DAG: store i8** [[PGEP:%.+]], i8*** [[PARG]]
515+
// CK0-DAG: [[SARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4
516+
// CK0-DAG: store i64* [[SIZEGEP:%.+]], i64** [[SARG]]
517+
// CK0-DAG: [[MARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 7
518+
// CK0-DAG: store i8** [[MPRGEP:%.+]], i8*** [[MARG]]
497519
// CK0-DAG: [[BPGEP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPFPADDR:%.+]], i[[SZ]] 0, i[[SZ]] 0
498520
// CK0-DAG: [[PGEP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PFPADDR:%.+]], i[[SZ]] 0, i[[SZ]] 0
499521
// CK0-DAG: [[SIZEGEP]] = getelementptr inbounds [1 x i64], [1 x i64]* [[SIZEFPADDR:%.+]], i[[SZ]] 0, i[[SZ]] 0
@@ -525,7 +547,15 @@ void foo(int a){
525547
// CK0: }
526548

527549
// CK0: define internal void [[OUTLINE_1:@.+]](i32 {{.*}}%.global_tid.{{.+}}, [[ANON_T_0]]* noalias noundef [[CTXARG:%.+]])
528-
// CK0-DAG: call i32 @__tgt_target_teams_nowait_mapper(%struct.ident_t* @{{.+}}, i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], i64* [[SIZEGEP:%[0-9]+]], {{.+}}[[TEAMNWTYPES]]{{.+}}, i8** null, i8** [[MPRGEP:%.+]], i32 0, i32 0, i32 0, i8* null, i32 0, i8* null)
550+
// CK0-DAG: call i32 @__tgt_target_kernel_nowait(%struct.ident_t* @{{.+}}, i64 -1, i32 0, i32 0, i8* @.{{.+}}.region_id, %struct.__tgt_kernel_arguments* [[ARGS:%.+]], i32 0, i8* null, i32 0, i8* null)
551+
// CK0-DAG: [[BPARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 2
552+
// CK0-DAG: store i8** [[BPGEP:%.+]], i8*** [[BPARG]]
553+
// CK0-DAG: [[PARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 3
554+
// CK0-DAG: store i8** [[PGEP:%.+]], i8*** [[PARG]]
555+
// CK0-DAG: [[SARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4
556+
// CK0-DAG: store i64* [[SIZEGEP:%.+]], i64** [[SARG]]
557+
// CK0-DAG: [[MARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 7
558+
// CK0-DAG: store i8** [[MPRGEP:%.+]], i8*** [[MARG]]
529559
// CK0-DAG: [[BPGEP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPFPADDR:%.+]], i[[SZ]] 0, i[[SZ]] 0
530560
// CK0-DAG: [[PGEP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PFPADDR:%.+]], i[[SZ]] 0, i[[SZ]] 0
531561
// CK0-DAG: [[SIZEGEP]] = getelementptr inbounds [1 x i64], [1 x i64]* [[SIZEFPADDR:%.+]], i[[SZ]] 0, i[[SZ]] 0
@@ -902,29 +932,36 @@ void foo(int a){
902932

903933
// CK3-DAG: [[BC:%.+]] = getelementptr inbounds %class.B, %class.B* [[BVAL]], i32 0, i32 0
904934

905-
// CK3-DAG: call i32 @__tgt_target_mapper(%struct.ident_t* @{{.+}}, i64 {{.+}}, i8* {{.+}}, i32 2, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null, i8** [[MPRGEP:%.+]])
906-
// CK3-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
907-
// CK3-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
908-
// CK3-DAG: [[MPRGEP]] = bitcast [2 x i8*]* [[MPR:%[^,]+]] to i8**
909-
// CK3-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
910-
// CK3-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
911-
// CK3-DAG: [[MPR1:%.+]] = getelementptr inbounds {{.+}}[[MPR]], i{{64|32}} 0, i{{64|32}} 0
912-
// CK3-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to %class.B**
913-
// CK3-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to %class.C**
914-
// CK3-DAG: store %class.B* [[BVAL]], %class.B** [[CBP1]]
915-
// CK3-DAG: store %class.C* [[BC]], %class.C** [[CP1]]
916-
// CK3-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64, i8*)* [[MPRFUNC]] to i8*), i8** [[MPR1]]
917-
// CK3-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 1
918-
// CK3-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 1
919-
// CK3-DAG: [[MPR2:%.+]] = getelementptr inbounds {{.+}}[[MPR]], i{{64|32}} 0, i{{64|32}} 1
920-
// CK3-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to [10 x %class.C]**
921-
// CK3-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to %class.C**
922-
// CK3-DAG: store [10 x %class.C]* [[CVAL]], [10 x %class.C]** [[CBP2]]
923-
// CK3-DAG: [[CVALGEP:%.+]] = getelementptr inbounds {{.+}}[[CVAL]], i{{64|32}} 0, i{{64|32}} 0
924-
// CK3-DAG: store %class.C* [[CVALGEP]], %class.C** [[CP2]]
925-
// CK3-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64, i8*)* [[MPRFUNC]] to i8*), i8** [[MPR2]]
926-
// CK3: call void [[KERNEL:@.+]](%class.B* [[BVAL]], [10 x %class.C]* [[CVAL]])
927-
#pragma omp target map(mapper(id),tofrom: c[0:10], b.c)
935+
// CK3-DAG: call i32 @__tgt_target_kernel(%struct.ident_t* @{{.+}}, i64 -1, i32 -1, i32 0, i8* @.{{.+}}.region_id, %struct.__tgt_kernel_arguments* [[ARGS:%.+]])
936+
// CK3-DAG: [[BPARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 2
937+
// CK3-DAG: store i8** [[BPGEP:%.+]], i8*** [[BPARG]]
938+
// CK3-DAG: [[PARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 3
939+
// CK3-DAG: store i8** [[PGEP:%.+]], i8*** [[PARG]]
940+
// CK3-DAG: [[MARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 7
941+
// CK3-DAG: store i8** [[MPRGEP:%.+]], i8*** [[MARG]]
942+
// CK3-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
943+
// CK3-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
944+
// CK3-DAG: [[MPRGEP]] = bitcast [2 x i8*]* [[MPR:%[^,]+]] to i8**
945+
// CK3-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
946+
// CK3-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
947+
// CK3-DAG: [[MPR1:%.+]] = getelementptr inbounds {{.+}}[[MPR]], i{{64|32}} 0, i{{64|32}} 0
948+
// CK3-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to %class.B**
949+
// CK3-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to %class.C**
950+
// CK3-DAG: store %class.B* [[BVAL]], %class.B** [[CBP1]]
951+
// CK3-DAG: store %class.C* [[BC]], %class.C** [[CP1]]
952+
// CK3-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64, i8*)* [[MPRFUNC]] to i8*), i8** [[MPR1]]
953+
// CK3-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 1
954+
// CK3-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 1
955+
// CK3-DAG: [[MPR2:%.+]] = getelementptr inbounds {{.+}}[[MPR]], i{{64|32}} 0, i{{64|32}} 1
956+
// CK3-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to [10 x %class.C]**
957+
// CK3-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to %class.C**
958+
// CK3-DAG: store [10 x %class.C]* [[CVAL]], [10 x %class.C]** [[CBP2]]
959+
// CK3-DAG: [[CVALGEP:%.+]] = getelementptr inbounds {{.+}}[[CVAL]], i{{64|32}} 0, i{{64|32}} 0
960+
// CK3-DAG: store %class.C* [[CVALGEP]], %class.C** [[CP2]]
961+
// CK3-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64, i8*)* [[MPRFUNC]] to i8*), i8** [[MPR2]]
962+
// CK3: call void [[KERNEL:@.+]](%class.B* [[BVAL]], [10 x %class.C]* [[CVAL]])
963+
#pragma omp target map(mapper(id), tofrom \
964+
: c [0:10], b.c)
928965
for (int i = 0; i < 10; i++) {
929966
b.c.a += ++c[i].a;
930967
}

0 commit comments

Comments
 (0)