Skip to content

Commit 775ad3e

Browse files
[flang][acc] Ensure all acc.loop get a default parallelism determination mode (#143623)
This PR updates the flang lowering to explicitly implement the OpenACC rules: - As per OpenACC 3.3 standard section 2.9.6 independent clause: A loop construct with no auto or seq clause is treated as if it has the independent clause when it is an orphaned loop construct or its parent compute construct is a parallel construct. - As per OpenACC 3.3 standard section 2.9.7 auto clause: When the parent compute construct is a kernels construct, a loop construct with no independent or seq clause is treated as if it has the auto clause. - Loops in serial regions are `seq` if they have no other parallelism marking such as gang, worker, vector. For now the `acc.loop` verifier has not yet been updated to enforce this.
1 parent fa9e1a1 commit 775ad3e

File tree

7 files changed

+152
-68
lines changed

7 files changed

+152
-68
lines changed

flang/lib/Lower/OpenACC.cpp

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2150,6 +2150,70 @@ privatizeIv(Fortran::lower::AbstractConverter &converter,
21502150
ivPrivate.push_back(privateValue);
21512151
}
21522152

2153+
static void determineDefaultLoopParMode(
2154+
Fortran::lower::AbstractConverter &converter, mlir::acc::LoopOp &loopOp,
2155+
llvm::SmallVector<mlir::Attribute> &seqDeviceTypes,
2156+
llvm::SmallVector<mlir::Attribute> &independentDeviceTypes,
2157+
llvm::SmallVector<mlir::Attribute> &autoDeviceTypes) {
2158+
auto hasDeviceNone = [](mlir::Attribute attr) -> bool {
2159+
return mlir::dyn_cast<mlir::acc::DeviceTypeAttr>(attr).getValue() ==
2160+
mlir::acc::DeviceType::None;
2161+
};
2162+
bool hasDefaultSeq = llvm::any_of(seqDeviceTypes, hasDeviceNone);
2163+
bool hasDefaultIndependent =
2164+
llvm::any_of(independentDeviceTypes, hasDeviceNone);
2165+
bool hasDefaultAuto = llvm::any_of(autoDeviceTypes, hasDeviceNone);
2166+
if (hasDefaultSeq || hasDefaultIndependent || hasDefaultAuto)
2167+
return; // Default loop par mode is already specified.
2168+
2169+
mlir::Region *currentRegion =
2170+
converter.getFirOpBuilder().getBlock()->getParent();
2171+
mlir::Operation *parentOp = mlir::acc::getEnclosingComputeOp(*currentRegion);
2172+
const bool isOrphanedLoop = !parentOp;
2173+
if (isOrphanedLoop ||
2174+
mlir::isa_and_present<mlir::acc::ParallelOp>(parentOp)) {
2175+
// As per OpenACC 3.3 standard section 2.9.6 independent clause:
2176+
// A loop construct with no auto or seq clause is treated as if it has the
2177+
// independent clause when it is an orphaned loop construct or its parent
2178+
// compute construct is a parallel construct.
2179+
independentDeviceTypes.push_back(mlir::acc::DeviceTypeAttr::get(
2180+
converter.getFirOpBuilder().getContext(), mlir::acc::DeviceType::None));
2181+
} else if (mlir::isa_and_present<mlir::acc::SerialOp>(parentOp)) {
2182+
// Serial construct implies `seq` clause on loop. However, this
2183+
// conflicts with parallelism assignment if already set. Therefore check
2184+
// that first.
2185+
bool hasDefaultGangWorkerOrVector =
2186+
loopOp.hasVector() || loopOp.getVectorValue() || loopOp.hasWorker() ||
2187+
loopOp.getWorkerValue() || loopOp.hasGang() ||
2188+
loopOp.getGangValue(mlir::acc::GangArgType::Num) ||
2189+
loopOp.getGangValue(mlir::acc::GangArgType::Dim) ||
2190+
loopOp.getGangValue(mlir::acc::GangArgType::Static);
2191+
if (!hasDefaultGangWorkerOrVector)
2192+
seqDeviceTypes.push_back(mlir::acc::DeviceTypeAttr::get(
2193+
converter.getFirOpBuilder().getContext(),
2194+
mlir::acc::DeviceType::None));
2195+
// Since the loop has some parallelism assigned - we cannot assign `seq`.
2196+
// However, the `acc.loop` verifier will check that one of seq, independent,
2197+
// or auto is marked. Seems reasonable to mark as auto since the OpenACC
2198+
// spec does say "If not, or if it is unable to make a determination, it
2199+
// must treat the auto clause as if it is a seq clause, and it must
2200+
// ignore any gang, worker, or vector clauses on the loop construct"
2201+
else
2202+
autoDeviceTypes.push_back(mlir::acc::DeviceTypeAttr::get(
2203+
converter.getFirOpBuilder().getContext(),
2204+
mlir::acc::DeviceType::None));
2205+
} else {
2206+
// As per OpenACC 3.3 standard section 2.9.7 auto clause:
2207+
// When the parent compute construct is a kernels construct, a loop
2208+
// construct with no independent or seq clause is treated as if it has the
2209+
// auto clause.
2210+
assert(mlir::isa_and_present<mlir::acc::KernelsOp>(parentOp) &&
2211+
"Expected kernels construct");
2212+
autoDeviceTypes.push_back(mlir::acc::DeviceTypeAttr::get(
2213+
converter.getFirOpBuilder().getContext(), mlir::acc::DeviceType::None));
2214+
}
2215+
}
2216+
21532217
static mlir::acc::LoopOp createLoopOp(
21542218
Fortran::lower::AbstractConverter &converter,
21552219
mlir::Location currentLocation,
@@ -2482,6 +2546,9 @@ static mlir::acc::LoopOp createLoopOp(
24822546
loopOp.setTileOperandsSegmentsAttr(
24832547
builder.getDenseI32ArrayAttr(tileOperandsSegments));
24842548

2549+
// Determine the loop's default par mode - either seq, independent, or auto.
2550+
determineDefaultLoopParMode(converter, loopOp, seqDeviceTypes,
2551+
independentDeviceTypes, autoDeviceTypes);
24852552
if (!seqDeviceTypes.empty())
24862553
loopOp.setSeqAttr(builder.getArrayAttr(seqDeviceTypes));
24872554
if (!independentDeviceTypes.empty())

flang/test/Lower/OpenACC/acc-kernels-loop.f90

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ subroutine acc_kernels_loop
4747
! CHECK: acc.kernels {
4848
! CHECK: acc.loop private{{.*}} {
4949
! CHECK: acc.yield
50-
! CHECK-NEXT: }{{$}}
50+
! CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]{{.*}}}
5151
! CHECK: acc.terminator
5252
! CHECK-NEXT: }{{$}}
5353

@@ -59,7 +59,7 @@ subroutine acc_kernels_loop
5959
! CHECK: acc.kernels combined(loop) {
6060
! CHECK: acc.loop combined(kernels) private{{.*}} {
6161
! CHECK: acc.yield
62-
! CHECK-NEXT: }{{$}}
62+
! CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]{{.*}}}
6363
! CHECK: acc.terminator
6464
! CHECK-NEXT: }{{$}}
6565

@@ -490,7 +490,7 @@ subroutine acc_kernels_loop
490490
! CHECK: acc.kernels {{.*}} {
491491
! CHECK: acc.loop {{.*}} gang {{.*}} {
492492
! CHECK: acc.yield
493-
! CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true>}{{$}}
493+
! CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true>}
494494
! CHECK: acc.terminator
495495
! CHECK-NEXT: }{{$}}
496496

@@ -503,7 +503,7 @@ subroutine acc_kernels_loop
503503
! CHECK: [[GANGNUM1:%.*]] = arith.constant 8 : i32
504504
! CHECK: acc.loop {{.*}} gang({num=[[GANGNUM1]] : i32}) {{.*}} {
505505
! CHECK: acc.yield
506-
! CHECK-NEXT: }{{$}}
506+
! CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true>}
507507
! CHECK: acc.terminator
508508
! CHECK-NEXT: }{{$}}
509509

@@ -516,7 +516,7 @@ subroutine acc_kernels_loop
516516
! CHECK: [[GANGNUM2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
517517
! CHECK: acc.loop {{.*}} gang({num=[[GANGNUM2]] : i32}) {{.*}} {
518518
! CHECK: acc.yield
519-
! CHECK-NEXT: }{{$}}
519+
! CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true>}
520520
! CHECK: acc.terminator
521521
! CHECK-NEXT: }{{$}}
522522

@@ -528,7 +528,7 @@ subroutine acc_kernels_loop
528528
! CHECK: acc.kernels {{.*}} {
529529
! CHECK: acc.loop {{.*}} gang({num=%{{.*}} : i32, static=%{{.*}} : i32})
530530
! CHECK: acc.yield
531-
! CHECK-NEXT: }{{$}}
531+
! CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true>}
532532
! CHECK: acc.terminator
533533
! CHECK-NEXT: }{{$}}
534534

@@ -540,7 +540,7 @@ subroutine acc_kernels_loop
540540
! CHECK: acc.kernels {{.*}} {
541541
! CHECK: acc.loop {{.*}} vector {{.*}} {
542542
! CHECK: acc.yield
543-
! CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true>}{{$}}
543+
! CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true>}
544544
! CHECK: acc.terminator
545545
! CHECK-NEXT: }{{$}}
546546

@@ -553,7 +553,7 @@ subroutine acc_kernels_loop
553553
! CHECK: [[CONSTANT128:%.*]] = arith.constant 128 : i32
554554
! CHECK: acc.loop {{.*}} vector([[CONSTANT128]] : i32) {{.*}} {
555555
! CHECK: acc.yield
556-
! CHECK-NEXT: }{{$}}
556+
! CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true>}
557557
! CHECK: acc.terminator
558558
! CHECK-NEXT: }{{$}}
559559

@@ -566,7 +566,7 @@ subroutine acc_kernels_loop
566566
! CHECK: [[VECTORLENGTH:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
567567
! CHECK: acc.loop {{.*}} vector([[VECTORLENGTH]] : i32) {{.*}} {
568568
! CHECK: acc.yield
569-
! CHECK-NEXT: }{{$}}
569+
! CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true>}
570570
! CHECK: acc.terminator
571571
! CHECK-NEXT: }{{$}}
572572

@@ -578,7 +578,7 @@ subroutine acc_kernels_loop
578578
! CHECK: acc.kernels {{.*}} {
579579
! CHECK: acc.loop {{.*}} worker {{.*}} {
580580
! CHECK: acc.yield
581-
! CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true>}{{$}}
581+
! CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true>}
582582
! CHECK: acc.terminator
583583
! CHECK-NEXT: }{{$}}
584584

@@ -591,7 +591,7 @@ subroutine acc_kernels_loop
591591
! CHECK: [[WORKER128:%.*]] = arith.constant 128 : i32
592592
! CHECK: acc.loop {{.*}} worker([[WORKER128]] : i32) {{.*}} {
593593
! CHECK: acc.yield
594-
! CHECK-NEXT: }{{$}}
594+
! CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true>}
595595
! CHECK: acc.terminator
596596
! CHECK-NEXT: }{{$}}
597597

@@ -605,7 +605,7 @@ subroutine acc_kernels_loop
605605
! CHECK: acc.kernels {{.*}} {
606606
! CHECK: acc.loop {{.*}} {
607607
! CHECK: acc.yield
608-
! CHECK-NEXT: } attributes {collapse = [2], collapseDeviceType = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true, true>}
608+
! CHECK-NEXT: } attributes {{{.*}}collapse = [2], collapseDeviceType = [#acc.device_type<none>]{{.*}}}
609609
! CHECK: acc.terminator
610610
! CHECK-NEXT: }{{$}}
611611

@@ -621,9 +621,9 @@ subroutine acc_kernels_loop
621621
! CHECK: acc.loop {{.*}} {
622622
! CHECK: acc.loop {{.*}} {
623623
! CHECK: acc.yield
624-
! CHECK-NEXT: }{{$}}
624+
! CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]{{.*}}}
625625
! CHECK: acc.yield
626-
! CHECK-NEXT: }{{$}}
626+
! CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]{{.*}}}
627627
! CHECK: acc.terminator
628628
! CHECK-NEXT: }{{$}}
629629

0 commit comments

Comments
 (0)