Skip to content

Commit 18ff8df

Browse files
authored
[flang][cuda] Register managed variables with double descriptor (#134444)
Allocatable or pointer module variables with the CUDA managed attribute are defined with a double descriptor. One on the host and one on the device. Only the data pointed to by the descriptor will be allocated in managed memory. Allow the registration of any allocatable or pointer module variables like device or constant.
1 parent 19e0233 commit 18ff8df

File tree

2 files changed

+24
-3
lines changed

2 files changed

+24
-3
lines changed

flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -105,10 +105,15 @@ struct CUFAddConstructor
105105
if (!attr)
106106
continue;
107107

108+
if (attr.getValue() == cuf::DataAttribute::Managed &&
109+
!mlir::isa<fir::BaseBoxType>(globalOp.getType()))
110+
TODO(loc, "registration of non-allocatable managed variables");
111+
108112
mlir::func::FuncOp func;
109113
switch (attr.getValue()) {
110114
case cuf::DataAttribute::Device:
111-
case cuf::DataAttribute::Constant: {
115+
case cuf::DataAttribute::Constant:
116+
case cuf::DataAttribute::Managed: {
112117
func = fir::runtime::getRuntimeFunc<mkRTKey(CUFRegisterVariable)>(
113118
loc, builder);
114119
auto fTy = func.getFunctionType();
@@ -141,8 +146,6 @@ struct CUFAddConstructor
141146
builder, loc, fTy, registeredMod, addr, gblName, sizeVal)};
142147
builder.create<fir::CallOp>(loc, func, args);
143148
} break;
144-
case cuf::DataAttribute::Managed:
145-
TODO(loc, "registration of managed variables");
146149
default:
147150
break;
148151
}

flang/test/Fir/CUDA/cuda-constructor-2.f90

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,3 +60,21 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<i8 = dense<8> : vector<2xi64>, i
6060
}
6161
}
6262
}
63+
64+
// -----
65+
66+
module attributes {dlti.dl_spec = #dlti.dl_spec<i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i1 = dense<8> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, f80 = dense<128> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, i64 = dense<64> : vector<2xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, f128 = dense<128> : vector<2xi64>, !llvm.ptr<270> = dense<32> : vector<4xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, "dlti.stack_alignment" = 128 : i64, "dlti.endianness" = "little">, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", gpu.container_module, llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 20.0.0 (https://github.com/llvm/llvm-project.git 3372303188df0f7f8ac26e7ab610cf8b0f716d42)", llvm.target_triple = "x86_64-unknown-linux-gnu"} {
67+
fir.global @_QMmEa00 {data_attr = #cuf.cuda<managed>} : !fir.box<!fir.heap<!fir.array<?x?x?x?x?xf64>>> {
68+
%c0 = arith.constant 0 : index
69+
%0 = fir.zero_bits !fir.heap<!fir.array<?x?x?x?x?xf64>>
70+
%1 = fir.shape %c0, %c0, %c0, %c0, %c0 : (index, index, index, index, index) -> !fir.shape<5>
71+
%2 = fir.embox %0(%1) {allocator_idx = 3 : i32} : (!fir.heap<!fir.array<?x?x?x?x?xf64>>, !fir.shape<5>) -> !fir.box<!fir.heap<!fir.array<?x?x?x?x?xf64>>>
72+
fir.has_value %2 : !fir.box<!fir.heap<!fir.array<?x?x?x?x?xf64>>>
73+
}
74+
gpu.module @cuda_device_mod {
75+
}
76+
}
77+
78+
// CHECK: llvm.func internal @__cudaFortranConstructor()
79+
// CHECK: fir.address_of(@_QMmEa00)
80+
// CHECK: fir.call @_FortranACUFRegisterVariable

0 commit comments

Comments
 (0)