Skip to content

Commit 5fa4ba3

Browse files
committed
[flang][OpenMP][DoConcurrent] Handle mapping non-reference values to device
Fixes a bug uncovered by Berkely Labs' inference-engine project. This handles non-reference/non-box values (e.g. values that result from `fir.box_dims` ops) when we need to map those values to the target region.
1 parent 07c236d commit 5fa4ba3

File tree

2 files changed

+45
-7
lines changed

2 files changed

+45
-7
lines changed

flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -791,7 +791,7 @@ class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoLoopOp> {
791791

792792
return Fortran::lower::omp::internal::createMapInfoOp(
793793
rewriter, liveIn.getLoc(), rawAddr,
794-
/*varPtrPtr=*/{}, declareOp.getUniqName().str(), boundsOps,
794+
/*varPtrPtr=*/{}, name.str(), boundsOps,
795795
/*members=*/{},
796796
/*membersIndex=*/mlir::ArrayAttr{},
797797
static_cast<
@@ -814,8 +814,8 @@ class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoLoopOp> {
814814
llvm::SmallVector<mlir::Type> regionArgTypes;
815815
llvm::SmallVector<mlir::Location> regionArgLocs;
816816

817-
for (auto var :
818-
llvm::concat<const mlir::Value>(clauseOps.hostEvalVars, mappedVars)) {
817+
for (auto var : llvm::concat<const mlir::Value>(clauseOps.hostEvalVars,
818+
clauseOps.mapVars)) {
819819
regionArgTypes.push_back(var.getType());
820820
regionArgLocs.push_back(var.getLoc());
821821
}
@@ -825,11 +825,19 @@ class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoLoopOp> {
825825
rewriter,
826826
fir::getKindMapping(targetOp->getParentOfType<mlir::ModuleOp>()));
827827

828-
for (auto [arg, mapInfoOp, mappedVar] : llvm::zip_equal(
829-
argIface.getMapBlockArgs(), clauseOps.mapVars, mappedVars)) {
828+
// Within the loop, it possible that we discover other values that need to
829+
// mapped to the target region (the shape info values for arrays, for
830+
// example). Therefore, the map block args might be extended and resized.
831+
// Hence, we invoke `argIface.getMapBlockArgs()` every iteration to make
832+
// sure we access the proper vector of data.
833+
int idx = 0;
834+
for (auto [mapInfoOp, mappedVar] :
835+
llvm::zip_equal(clauseOps.mapVars, mappedVars)) {
830836
auto miOp = mlir::cast<mlir::omp::MapInfoOp>(mapInfoOp.getDefiningOp());
831-
hlfir::DeclareOp liveInDeclare = genLiveInDeclare(
832-
builder, targetOp, arg, miOp, liveInShapeInfoMap.at(mappedVar));
837+
hlfir::DeclareOp liveInDeclare =
838+
genLiveInDeclare(builder, targetOp, argIface.getMapBlockArgs()[idx],
839+
miOp, liveInShapeInfoMap.at(mappedVar));
840+
++idx;
833841

834842
// TODO If `mappedVar.getDefiningOp()` is a `fir::BoxAddrOp`, we probably
835843
// need to "unpack" the box by getting the defining op of it's value.
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
! Tests that we can map "unnamed" and non-reference/non-box values to device; for
2+
! example, values that result from `fix.box_dims` ops.
3+
4+
! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=device %s -o - \
5+
! RUN: | FileCheck %s
6+
! RUN: bbc -emit-hlfir -fopenmp -fdo-concurrent-parallel=device %s -o - \
7+
! RUN: | FileCheck %s
8+
9+
subroutine test_non_refernece
10+
integer i
11+
real, allocatable :: arr(:)
12+
13+
associate(a => arr)
14+
do concurrent (i = 1:10)
15+
block
16+
real z(size(a,1))
17+
end block
18+
end do
19+
end associate
20+
end subroutine test_non_refernece
21+
22+
! CHECK: %[[DIM_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<index>, index)
23+
! CHECK-SAME: map_clauses(implicit, exit_release_or_enter_alloc)
24+
! CHECK-SAME: capture(ByCopy) -> !fir.ref<index> {name = ""}
25+
26+
27+
! CHECK: omp.target host_eval({{.*}} : index, index, index)
28+
! CHECK-SAME: map_entries(%{{.*}} -> %{{.*}}, %[[DIM_MAP]] -> %{{.*}} :
29+
! CHECK-SAME: !fir.ref<i32>, !fir.ref<index>)
30+

0 commit comments

Comments
 (0)