Skip to content

Commit b09b1d6

Browse files
committed
[OpenACC][CIR] Implement lowering for 'no_create' clause for comp/comb
no_create has its own 'data-in', plus uses the 'delete' for the data-out operation. Additionally, like all data clauses it uses the 'async' functionality previous implemented. This patch implements no_create for combined/compute constructs completely, and ensures that the feature is tested.
1 parent b7ef3e7 commit b09b1d6

File tree

5 files changed

+126
-11
lines changed

5 files changed

+126
-11
lines changed

clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -305,9 +305,19 @@ class OpenACCClauseCIREmitter final
305305
{
306306
mlir::OpBuilder::InsertionGuard guardCase(builder);
307307
builder.setInsertionPointAfter(operation);
308-
afterOp = builder.create<AfterOpTy>(opInfo.beginLoc, beforeOp.getResult(),
309-
opInfo.varValue, structured, implicit,
310-
opInfo.name, opInfo.bounds);
308+
309+
if constexpr (std::is_same_v<AfterOpTy, mlir::acc::DeleteOp> ||
310+
std::is_same_v<AfterOpTy, mlir::acc::DetachOp>) {
311+
// Detach/Delete ops don't have the variable reference here, so they
312+
// take 1 fewer argument to their build function.
313+
afterOp = builder.create<AfterOpTy>(
314+
opInfo.beginLoc, beforeOp.getResult(), structured, implicit,
315+
opInfo.name, opInfo.bounds);
316+
} else {
317+
afterOp = builder.create<AfterOpTy>(
318+
opInfo.beginLoc, beforeOp.getResult(), opInfo.varValue, structured,
319+
implicit, opInfo.name, opInfo.bounds);
320+
}
311321
}
312322

313323
// Set the 'rest' of the info for both operations.
@@ -845,6 +855,22 @@ class OpenACCClauseCIREmitter final
845855
return clauseNotImplemented(clause);
846856
}
847857
}
858+
859+
void VisitNoCreateClause(const OpenACCNoCreateClause &clause) {
860+
if constexpr (isOneOfTypes<OpTy, mlir::acc::ParallelOp, mlir::acc::SerialOp,
861+
mlir::acc::KernelsOp>) {
862+
for (auto var : clause.getVarList())
863+
addDataOperand<mlir::acc::NoCreateOp, mlir::acc::DeleteOp>(
864+
var, mlir::acc::DataClause::acc_no_create, /*structured=*/true,
865+
/*implicit=*/false);
866+
} else if constexpr (isCombinedType<OpTy>) {
867+
applyToComputeOp(clause);
868+
} else {
869+
// TODO: When we've implemented this for everything, switch this to an
870+
// unreachable. data remains.
871+
return clauseNotImplemented(clause);
872+
}
873+
}
848874
};
849875

850876
template <typename OpTy>

clang/test/CIR/CodeGenOpenACC/combined.cpp

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1011,8 +1011,8 @@ extern "C" void acc_combined(int N, int cond) {
10111011
// CHECK-NEXT: acc.terminator
10121012
// CHECK-NEXT: } loc
10131013
}
1014-
extern "C" void acc_combined_deviceptr(int *arg1, int *arg2) {
1015-
// CHECK: cir.func @acc_combined_deviceptr(%[[ARG1_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}, %[[ARG2_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}) {
1014+
extern "C" void acc_combined_data_clauses(int *arg1, int *arg2) {
1015+
// CHECK: cir.func @acc_combined_data_clauses(%[[ARG1_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}, %[[ARG2_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}) {
10161016
// CHECK-NEXT: %[[ARG1:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arg1", init]
10171017
// CHECK-NEXT: %[[ARG2:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arg2", init]
10181018
// CHECK-NEXT: cir.store %[[ARG1_PARAM]], %[[ARG1]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
@@ -1079,4 +1079,39 @@ extern "C" void acc_combined_deviceptr(int *arg1, int *arg2) {
10791079
// CHECK-NEXT: } loc
10801080
// CHECK-NEXT: acc.terminator
10811081
// CHECK-NEXT: } loc
1082+
1083+
#pragma acc parallel loop no_create(arg1)
1084+
for(unsigned I = 0; I < 5; ++I);
1085+
// CHECK-NEXT: %[[NOCREATE1:.*]] = acc.nocreate varPtr(%[[ARG1]] : !cir.ptr<!cir.ptr<!s32i>>) -> !cir.ptr<!cir.ptr<!s32i>> {name = "arg1"}
1086+
// CHECK-NEXT: acc.parallel combined(loop) dataOperands(%[[NOCREATE1]] : !cir.ptr<!cir.ptr<!s32i>>) {
1087+
// CHECK-NEXT: acc.loop combined(parallel) {
1088+
// CHECK: acc.yield
1089+
// CHECK-NEXT: } loc
1090+
// CHECK-NEXT: acc.yield
1091+
// CHECK-NEXT: } loc
1092+
// CHECK-NEXT: acc.delete accPtr(%[[NOCREATE1]] : !cir.ptr<!cir.ptr<!s32i>>) {dataClause = #acc<data_clause acc_no_create>, name = "arg1"}
1093+
1094+
#pragma acc serial loop no_create(arg2)
1095+
for(unsigned I = 0; I < 5; ++I);
1096+
// CHECK-NEXT: %[[NOCREATE2:.*]] = acc.nocreate varPtr(%[[ARG2]] : !cir.ptr<!cir.ptr<!s32i>>) -> !cir.ptr<!cir.ptr<!s32i>> {name = "arg2"}
1097+
// CHECK-NEXT: acc.serial combined(loop) dataOperands(%[[NOCREATE2]] : !cir.ptr<!cir.ptr<!s32i>>) {
1098+
// CHECK-NEXT: acc.loop combined(serial) {
1099+
// CHECK: acc.yield
1100+
// CHECK-NEXT: } loc
1101+
// CHECK-NEXT: acc.yield
1102+
// CHECK-NEXT: } loc
1103+
// CHECK-NEXT: acc.delete accPtr(%[[NOCREATE2]] : !cir.ptr<!cir.ptr<!s32i>>) {dataClause = #acc<data_clause acc_no_create>, name = "arg2"}
1104+
1105+
#pragma acc kernels loop no_create(arg1, arg2) device_type(host) async
1106+
for(unsigned I = 0; I < 5; ++I);
1107+
// CHECK-NEXT: %[[NOCREATE1:.*]] = acc.nocreate varPtr(%[[ARG1]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<host>]) -> !cir.ptr<!cir.ptr<!s32i>> {name = "arg1"}
1108+
// CHECK-NEXT: %[[NOCREATE2:.*]] = acc.nocreate varPtr(%[[ARG2]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<host>]) -> !cir.ptr<!cir.ptr<!s32i>> {name = "arg2"}
1109+
// CHECK-NEXT: acc.kernels combined(loop) dataOperands(%[[NOCREATE1]], %[[NOCREATE2]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<host>]) {
1110+
// CHECK-NEXT: acc.loop combined(kernels) {
1111+
// CHECK: acc.yield
1112+
// CHECK-NEXT: } loc
1113+
// CHECK-NEXT: acc.terminator
1114+
// CHECK-NEXT: } loc
1115+
// CHECK-NEXT: acc.delete accPtr(%[[NOCREATE2]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<host>]) {dataClause = #acc<data_clause acc_no_create>, name = "arg2"}
1116+
// CHECK-NEXT: acc.delete accPtr(%[[NOCREATE1]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<host>]) {dataClause = #acc<data_clause acc_no_create>, name = "arg1"}
10821117
}

clang/test/CIR/CodeGenOpenACC/kernels.c

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -417,8 +417,8 @@ void acc_kernels(int cond) {
417417
// CHECK-NEXT: cir.return
418418
}
419419

420-
void acc_kernels_deviceptr(int *arg1, int *arg2) {
421-
// CHECK: cir.func @acc_kernels_deviceptr(%[[ARG1_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}, %[[ARG2_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}) {
420+
void acc_kernels_data_clauses(int *arg1, int *arg2) {
421+
// CHECK: cir.func @acc_kernels_data_clauses(%[[ARG1_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}, %[[ARG2_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}) {
422422
// CHECK-NEXT: %[[ARG1:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arg1", init]
423423
// CHECK-NEXT: %[[ARG2:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arg2", init]
424424
// CHECK-NEXT: cir.store %[[ARG1_PARAM]], %[[ARG1]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
@@ -451,4 +451,22 @@ void acc_kernels_deviceptr(int *arg1, int *arg2) {
451451
// CHECK-NEXT: acc.kernels dataOperands(%[[DEVPTR1]], %[[DEVPTR2]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<radeon>]) {
452452
// CHECK-NEXT: acc.terminator
453453
// CHECK-NEXT: } loc
454+
455+
#pragma acc kernels no_create(arg1)
456+
;
457+
// CHECK-NEXT: %[[NOCREATE1:.*]] = acc.nocreate varPtr(%[[ARG1]] : !cir.ptr<!cir.ptr<!s32i>>) -> !cir.ptr<!cir.ptr<!s32i>> {name = "arg1"}
458+
// CHECK-NEXT: acc.kernels dataOperands(%[[NOCREATE1]] : !cir.ptr<!cir.ptr<!s32i>>) {
459+
// CHECK-NEXT: acc.terminator
460+
// CHECK-NEXT: } loc
461+
// CHECK-NEXT: acc.delete accPtr(%[[NOCREATE1]] : !cir.ptr<!cir.ptr<!s32i>>) {dataClause = #acc<data_clause acc_no_create>, name = "arg1"}
462+
463+
#pragma acc kernels no_create(arg1, arg2) device_type(nvidia) async
464+
;
465+
// CHECK-NEXT: %[[NOCREATE1:.*]] = acc.nocreate varPtr(%[[ARG1]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<nvidia>]) -> !cir.ptr<!cir.ptr<!s32i>> {name = "arg1"}
466+
// CHECK-NEXT: %[[NOCREATE2:.*]] = acc.nocreate varPtr(%[[ARG2]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<nvidia>]) -> !cir.ptr<!cir.ptr<!s32i>> {name = "arg2"}
467+
// CHECK-NEXT: acc.kernels dataOperands(%[[NOCREATE1]], %[[NOCREATE2]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<nvidia>]) {
468+
// CHECK-NEXT: acc.terminator
469+
// CHECK-NEXT: } loc
470+
// CHECK-NEXT: acc.delete accPtr(%[[NOCREATE2]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<nvidia>]) {dataClause = #acc<data_clause acc_no_create>, name = "arg2"}
471+
// CHECK-NEXT: acc.delete accPtr(%[[NOCREATE1]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<nvidia>]) {dataClause = #acc<data_clause acc_no_create>, name = "arg1"}
454472
}

clang/test/CIR/CodeGenOpenACC/parallel.c

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -444,8 +444,8 @@ void acc_parallel(int cond) {
444444
// CHECK-NEXT: cir.return
445445
}
446446

447-
void acc_parallel_deviceptr(int *arg1, int *arg2) {
448-
// CHECK: cir.func @acc_parallel_deviceptr(%[[ARG1_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}, %[[ARG2_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}) {
447+
void acc_parallel_data_clauses(int *arg1, int *arg2) {
448+
// CHECK: cir.func @acc_parallel_data_clauses(%[[ARG1_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}, %[[ARG2_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}) {
449449
// CHECK-NEXT: %[[ARG1:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arg1", init]
450450
// CHECK-NEXT: %[[ARG2:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arg2", init]
451451
// CHECK-NEXT: cir.store %[[ARG1_PARAM]], %[[ARG1]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
@@ -478,4 +478,23 @@ void acc_parallel_deviceptr(int *arg1, int *arg2) {
478478
// CHECK-NEXT: acc.parallel dataOperands(%[[DEVPTR1]], %[[DEVPTR2]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<radeon>]) {
479479
// CHECK-NEXT: acc.yield
480480
// CHECK-NEXT: } loc
481+
482+
#pragma acc parallel no_create(arg1)
483+
;
484+
// CHECK-NEXT: %[[NOCREATE1:.*]] = acc.nocreate varPtr(%[[ARG1]] : !cir.ptr<!cir.ptr<!s32i>>) -> !cir.ptr<!cir.ptr<!s32i>> {name = "arg1"}
485+
// CHECK-NEXT: acc.parallel dataOperands(%[[NOCREATE1]] : !cir.ptr<!cir.ptr<!s32i>>) {
486+
// CHECK-NEXT: acc.yield
487+
// CHECK-NEXT: } loc
488+
// CHECK-NEXT: acc.delete accPtr(%[[NOCREATE1]] : !cir.ptr<!cir.ptr<!s32i>>) {dataClause = #acc<data_clause acc_no_create>, name = "arg1"}
489+
490+
#pragma acc parallel no_create(arg1, arg2) device_type(radeon) async
491+
;
492+
// CHECK-NEXT: %[[NOCREATE1:.*]] = acc.nocreate varPtr(%[[ARG1]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<radeon>]) -> !cir.ptr<!cir.ptr<!s32i>> {name = "arg1"}
493+
// CHECK-NEXT: %[[NOCREATE2:.*]] = acc.nocreate varPtr(%[[ARG2]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<radeon>]) -> !cir.ptr<!cir.ptr<!s32i>> {name = "arg2"}
494+
// CHECK-NEXT: acc.parallel dataOperands(%[[NOCREATE1]], %[[NOCREATE2]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<radeon>]) {
495+
// CHECK-NEXT: acc.yield
496+
// CHECK-NEXT: } loc
497+
// CHECK-NEXT: acc.delete accPtr(%[[NOCREATE2]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<radeon>]) {dataClause = #acc<data_clause acc_no_create>, name = "arg2"}
498+
// CHECK-NEXT: acc.delete accPtr(%[[NOCREATE1]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<radeon>]) {dataClause = #acc<data_clause acc_no_create>, name = "arg1"}
481499
}
500+

clang/test/CIR/CodeGenOpenACC/serial.c

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -267,8 +267,8 @@ void acc_serial(int cond) {
267267
// CHECK-NEXT: cir.return
268268
}
269269

270-
void acc_serial_deviceptr(int *arg1, int *arg2) {
271-
// CHECK: cir.func @acc_serial_deviceptr(%[[ARG1_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}, %[[ARG2_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}) {
270+
void acc_serial_data_clauses(int *arg1, int *arg2) {
271+
// CHECK: cir.func @acc_serial_data_clauses(%[[ARG1_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}, %[[ARG2_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}) {
272272
// CHECK-NEXT: %[[ARG1:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arg1", init]
273273
// CHECK-NEXT: %[[ARG2:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arg2", init]
274274
// CHECK-NEXT: cir.store %[[ARG1_PARAM]], %[[ARG1]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
@@ -302,4 +302,21 @@ void acc_serial_deviceptr(int *arg1, int *arg2) {
302302
// CHECK-NEXT: acc.serial dataOperands(%[[DEVPTR1]], %[[DEVPTR2]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<radeon>]) {
303303
// CHECK-NEXT: acc.yield
304304
// CHECK-NEXT: } loc
305+
306+
#pragma acc serial no_create(arg1)
307+
;
308+
// CHECK-NEXT: %[[NOCREATE1:.*]] = acc.nocreate varPtr(%[[ARG1]] : !cir.ptr<!cir.ptr<!s32i>>) -> !cir.ptr<!cir.ptr<!s32i>> {name = "arg1"}
309+
// CHECK-NEXT: acc.serial dataOperands(%[[NOCREATE1]] : !cir.ptr<!cir.ptr<!s32i>>) {
310+
// CHECK-NEXT: acc.yield
311+
// CHECK-NEXT: } loc
312+
// CHECK-NEXT: acc.delete accPtr(%[[NOCREATE1]] : !cir.ptr<!cir.ptr<!s32i>>) {dataClause = #acc<data_clause acc_no_create>, name = "arg1"}
313+
#pragma acc serial no_create(arg1, arg2) device_type(nvidia) async
314+
;
315+
// CHECK-NEXT: %[[NOCREATE1:.*]] = acc.nocreate varPtr(%[[ARG1]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<nvidia>]) -> !cir.ptr<!cir.ptr<!s32i>> {name = "arg1"}
316+
// CHECK-NEXT: %[[NOCREATE2:.*]] = acc.nocreate varPtr(%[[ARG2]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<nvidia>]) -> !cir.ptr<!cir.ptr<!s32i>> {name = "arg2"}
317+
// CHECK-NEXT: acc.serial dataOperands(%[[NOCREATE1]], %[[NOCREATE2]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<nvidia>]) {
318+
// CHECK-NEXT: acc.yield
319+
// CHECK-NEXT: } loc
320+
// CHECK-NEXT: acc.delete accPtr(%[[NOCREATE2]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<nvidia>]) {dataClause = #acc<data_clause acc_no_create>, name = "arg2"}
321+
// CHECK-NEXT: acc.delete accPtr(%[[NOCREATE1]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<nvidia>]) {dataClause = #acc<data_clause acc_no_create>, name = "arg1"}
305322
}

0 commit comments

Comments
 (0)