Skip to content

Commit 16b75c8

Browse files
authored
[mlir][OpenMP] implement SIMD reduction (#146671)
This replicates clang's implementation. Basically: - A private copy of the reduction variable is created, initialized to the reduction neutral value (using regions from the reduction declaration op). - The body of the loop is lowered as usual, with accesses to the reduction variable mapped to the private copy. - After the loop, we inline the reduction region from the declaration op to combine the privatized variable into the original variable. - As usual with the SIMD construct, attributes are added to encourage vectorization of the loop and to assert that memory accesses in the loop don't alias across iterations. I have verified that simple scalar examples do vectorize at -O3 and the tests I could find in the Fujitsu test suite produce correct results. I tested on top of #146097 and this seemed to work for composite constructs as well. Fixes #144290
1 parent 7931a8f commit 16b75c8

File tree

4 files changed

+266
-31
lines changed

4 files changed

+266
-31
lines changed

mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

Lines changed: 72 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -370,7 +370,7 @@ static LogicalResult checkImplementationStatus(Operation &op) {
370370
}
371371
};
372372
auto checkReduction = [&todo](auto op, LogicalResult &result) {
373-
if (isa<omp::TeamsOp>(op) || isa<omp::SimdOp>(op))
373+
if (isa<omp::TeamsOp>(op))
374374
if (!op.getReductionVars().empty() || op.getReductionByref() ||
375375
op.getReductionSyms())
376376
result = todo("reduction");
@@ -2864,6 +2864,17 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
28642864

28652865
PrivateVarsInfo privateVarsInfo(simdOp);
28662866

2867+
MutableArrayRef<BlockArgument> reductionArgs =
2868+
cast<omp::BlockArgOpenMPOpInterface>(opInst).getReductionBlockArgs();
2869+
DenseMap<Value, llvm::Value *> reductionVariableMap;
2870+
SmallVector<llvm::Value *> privateReductionVariables(
2871+
simdOp.getNumReductionVars());
2872+
SmallVector<DeferredStore> deferredStores;
2873+
SmallVector<omp::DeclareReductionOp> reductionDecls;
2874+
collectReductionDecls(simdOp, reductionDecls);
2875+
llvm::ArrayRef<bool> isByRef = getIsByRef(simdOp.getReductionByref());
2876+
assert(isByRef.size() == simdOp.getNumReductionVars());
2877+
28672878
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
28682879
findAllocaInsertPoint(builder, moduleTranslation);
28692880

@@ -2872,11 +2883,27 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
28722883
if (handleError(afterAllocas, opInst).failed())
28732884
return failure();
28742885

2886+
if (failed(allocReductionVars(simdOp, reductionArgs, builder,
2887+
moduleTranslation, allocaIP, reductionDecls,
2888+
privateReductionVariables, reductionVariableMap,
2889+
deferredStores, isByRef)))
2890+
return failure();
2891+
28752892
if (handleError(initPrivateVars(builder, moduleTranslation, privateVarsInfo),
28762893
opInst)
28772894
.failed())
28782895
return failure();
28792896

2897+
// TODO: no call to copyFirstPrivateVars?
2898+
2899+
assert(afterAllocas.get()->getSinglePredecessor());
2900+
if (failed(initReductionVars(simdOp, reductionArgs, builder,
2901+
moduleTranslation,
2902+
afterAllocas.get()->getSinglePredecessor(),
2903+
reductionDecls, privateReductionVariables,
2904+
reductionVariableMap, isByRef, deferredStores)))
2905+
return failure();
2906+
28802907
llvm::ConstantInt *simdlen = nullptr;
28812908
if (std::optional<uint64_t> simdlenVar = simdOp.getSimdlen())
28822909
simdlen = builder.getInt64(simdlenVar.value());
@@ -2921,6 +2948,50 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
29212948
: nullptr,
29222949
order, simdlen, safelen);
29232950

2951+
// We now need to reduce the per-simd-lane reduction variable into the
2952+
// original variable. This works a bit differently to other reductions (e.g.
2953+
// wsloop) because we don't need to call into the OpenMP runtime to handle
2954+
// threads: everything happened in this one thread.
2955+
for (auto [i, tuple] : llvm::enumerate(
2956+
llvm::zip(reductionDecls, isByRef, simdOp.getReductionVars(),
2957+
privateReductionVariables))) {
2958+
auto [decl, byRef, reductionVar, privateReductionVar] = tuple;
2959+
2960+
OwningReductionGen gen = makeReductionGen(decl, builder, moduleTranslation);
2961+
llvm::Value *originalVariable = moduleTranslation.lookupValue(reductionVar);
2962+
llvm::Type *reductionType = moduleTranslation.convertType(decl.getType());
2963+
2964+
// We have one less load for by-ref case because that load is now inside of
2965+
// the reduction region.
2966+
llvm::Value *redValue = originalVariable;
2967+
if (!byRef)
2968+
redValue =
2969+
builder.CreateLoad(reductionType, redValue, "red.value." + Twine(i));
2970+
llvm::Value *privateRedValue = builder.CreateLoad(
2971+
reductionType, privateReductionVar, "red.private.value." + Twine(i));
2972+
llvm::Value *reduced;
2973+
2974+
auto res = gen(builder.saveIP(), redValue, privateRedValue, reduced);
2975+
if (failed(handleError(res, opInst)))
2976+
return failure();
2977+
builder.restoreIP(res.get());
2978+
2979+
// For by-ref case, the store is inside of the reduction region.
2980+
if (!byRef)
2981+
builder.CreateStore(reduced, originalVariable);
2982+
}
2983+
2984+
// After the construct, deallocate private reduction variables.
2985+
SmallVector<Region *> reductionRegions;
2986+
llvm::transform(reductionDecls, std::back_inserter(reductionRegions),
2987+
[](omp::DeclareReductionOp reductionDecl) {
2988+
return &reductionDecl.getCleanupRegion();
2989+
});
2990+
if (failed(inlineOmpRegionCleanup(reductionRegions, privateReductionVariables,
2991+
moduleTranslation, builder,
2992+
"omp.reduction.cleanup")))
2993+
return failure();
2994+
29242995
return cleanupPrivateVars(builder, moduleTranslation, simdOp.getLoc(),
29252996
privateVarsInfo.llvmVars,
29262997
privateVarsInfo.privatizers);
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
// RUN: mlir-translate --mlir-to-llvmir %s | FileCheck %s
2+
3+
llvm.func @init(%arg0: !llvm.ptr {llvm.nocapture}, %arg1: !llvm.ptr {llvm.nocapture}) {
4+
llvm.return
5+
}
6+
llvm.func @combine(%arg0: !llvm.ptr {llvm.nocapture}, %arg1: !llvm.ptr {llvm.nocapture}) {
7+
llvm.return
8+
}
9+
llvm.func @cleanup(%arg0: !llvm.ptr {llvm.nocapture}) {
10+
llvm.return
11+
}
12+
omp.private {type = private} @_QFsimd_reductionEi_private_i32 : i32
13+
omp.declare_reduction @add_reduction_byref_box_2xf32 : !llvm.ptr alloc {
14+
%0 = llvm.mlir.constant(1 : i64) : i64
15+
%1 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> : (i64) -> !llvm.ptr
16+
omp.yield(%1 : !llvm.ptr)
17+
} init {
18+
^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
19+
llvm.call @init(%arg0, %arg1) : (!llvm.ptr, !llvm.ptr) -> ()
20+
omp.yield(%arg1 : !llvm.ptr)
21+
} combiner {
22+
^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
23+
llvm.call @combine(%arg0, %arg1) : (!llvm.ptr, !llvm.ptr) -> ()
24+
omp.yield(%arg0 : !llvm.ptr)
25+
} cleanup {
26+
^bb0(%arg0: !llvm.ptr):
27+
llvm.call @cleanup(%arg0) : (!llvm.ptr) -> ()
28+
omp.yield
29+
}
30+
llvm.func @_QPsimd_reduction(%arg0: !llvm.ptr {fir.bindc_name = "a", llvm.nocapture}, %arg1: !llvm.ptr {fir.bindc_name = "sum", llvm.nocapture}) {
31+
%0 = llvm.mlir.constant(1024 : i32) : i32
32+
%1 = llvm.mlir.constant(1 : i32) : i32
33+
%2 = llvm.mlir.constant(1 : i64) : i64
34+
%3 = llvm.alloca %2 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> : (i64) -> !llvm.ptr
35+
%4 = llvm.alloca %2 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
36+
omp.simd private(@_QFsimd_reductionEi_private_i32 %4 -> %arg2 : !llvm.ptr) reduction(byref @add_reduction_byref_box_2xf32 %3 -> %arg3 : !llvm.ptr) {
37+
omp.loop_nest (%arg4) : i32 = (%1) to (%0) inclusive step (%1) {
38+
llvm.store %arg4, %arg2 : i32, !llvm.ptr
39+
omp.yield
40+
}
41+
}
42+
llvm.return
43+
}
44+
45+
// CHECK-LABEL: define void @_QPsimd_reduction
46+
// CHECK: %[[MOLD:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8
47+
// CHECK: %[[ORIG_I:.*]] = alloca i32, i64 1, align 4
48+
// CHECK: %[[PRIV_I:.*]] = alloca i32, align 4
49+
// CHECK: %[[RED_VAR:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8
50+
// CHECK: %[[PTR_RED_VAR:.*]] = alloca ptr, align 8
51+
// CHECK: br label %[[VAL_5:.*]]
52+
// CHECK: omp.region.after_alloca: ; preds = %[[VAL_6:.*]]
53+
// CHECK: br label %[[VAL_7:.*]]
54+
// CHECK: entry: ; preds = %[[VAL_5]]
55+
// CHECK: br label %[[VAL_8:.*]]
56+
// CHECK: omp.private.init: ; preds = %[[VAL_7]]
57+
// CHECK: br label %[[VAL_9:.*]]
58+
// CHECK: omp.reduction.init: ; preds = %[[VAL_8]]
59+
// CHECK: store ptr %[[RED_VAR]], ptr %[[PTR_RED_VAR]], align 8
60+
// CHECK: call void @init(ptr %[[MOLD]], ptr %[[RED_VAR]])
61+
// CHECK: br label %[[VAL_10:.*]]
62+
// CHECK: omp.simd.region: ; preds = %[[VAL_9]]
63+
// CHECK: br label %[[VAL_11:.*]]
64+
// CHECK: omp_loop.preheader: ; preds = %[[VAL_10]]
65+
// CHECK: br label %[[VAL_12:.*]]
66+
// CHECK: omp_loop.header: ; preds = %[[VAL_13:.*]], %[[VAL_11]]
67+
// CHECK: %[[VAL_14:.*]] = phi i32 [ 0, %[[VAL_11]] ], [ %[[VAL_15:.*]], %[[VAL_13]] ]
68+
// CHECK: br label %[[VAL_16:.*]]
69+
// CHECK: omp_loop.cond: ; preds = %[[VAL_12]]
70+
// CHECK: %[[VAL_17:.*]] = icmp ult i32 %[[VAL_14]], 1024
71+
// CHECK: br i1 %[[VAL_17]], label %[[VAL_18:.*]], label %[[VAL_19:.*]]
72+
// CHECK: omp_loop.body: ; preds = %[[VAL_16]]
73+
// CHECK: %[[VAL_20:.*]] = mul i32 %[[VAL_14]], 1
74+
// CHECK: %[[VAL_21:.*]] = add i32 %[[VAL_20]], 1
75+
// CHECK: br label %[[VAL_22:.*]]
76+
// CHECK: omp.loop_nest.region: ; preds = %[[VAL_18]]
77+
// CHECK: store i32 %[[VAL_21]], ptr %[[PRIV_I]], align 4, !llvm.access.group ![[ACCESS_GROUP:.*]]
78+
// CHECK: br label %[[VAL_23:.*]]
79+
// CHECK: omp.region.cont1: ; preds = %[[VAL_22]]
80+
// CHECK: br label %[[VAL_13]]
81+
// CHECK: omp_loop.inc: ; preds = %[[VAL_23]]
82+
// CHECK: %[[VAL_15]] = add nuw i32 %[[VAL_14]], 1
83+
// CHECK: br label %[[VAL_12]], !llvm.loop ![[LOOP:.*]]
84+
// CHECK: omp_loop.exit: ; preds = %[[VAL_16]]
85+
// CHECK: br label %[[VAL_24:.*]]
86+
// CHECK: omp_loop.after: ; preds = %[[VAL_19]]
87+
// CHECK: br label %[[VAL_25:.*]]
88+
// CHECK: omp.region.cont: ; preds = %[[VAL_24]]
89+
// CHECK: %[[RED_VAR2:.*]] = load ptr, ptr %[[PTR_RED_VAR]], align 8
90+
// CHECK: call void @combine(ptr %[[MOLD]], ptr %[[RED_VAR2]])
91+
// CHECK: %[[RED_VAR3:.*]] = load ptr, ptr %[[PTR_RED_VAR]], align 8
92+
// CHECK: call void @cleanup(ptr %[[RED_VAR3]])
93+
// CHECK: ret void
94+
95+
// CHECK: ![[ACCESS_GROUP]] = distinct !{}
96+
// CHECK: ![[LOOP]] = distinct !{![[LOOP]], ![[PARALLEL_ACCESS:.*]], ![[VECTORIZE:.*]]}
97+
// CHECK: ![[PARALLEL_ACCESS]] = !{!"llvm.loop.parallel_accesses", ![[ACCESS_GROUP]]}
98+
// CHECK: ![[VECTORIZE]] = !{!"llvm.loop.vectorize.enable", i1 true}
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
// RUN: mlir-translate --mlir-to-llvmir %s | FileCheck %s
2+
3+
omp.private {type = private} @_QFsimd_reductionEi_private_i32 : i32
4+
omp.declare_reduction @add_reduction_f32 : f32 init {
5+
^bb0(%arg0: f32):
6+
%0 = llvm.mlir.constant(0.000000e+00 : f32) : f32
7+
omp.yield(%0 : f32)
8+
} combiner {
9+
^bb0(%arg0: f32, %arg1: f32):
10+
%0 = llvm.fadd %arg0, %arg1 {fastmathFlags = #llvm.fastmath<contract>} : f32
11+
omp.yield(%0 : f32)
12+
}
13+
llvm.func @_QPsimd_reduction(%arg0: !llvm.ptr {fir.bindc_name = "a", llvm.nocapture}, %arg1: !llvm.ptr {fir.bindc_name = "sum", llvm.nocapture}) {
14+
%0 = llvm.mlir.constant(0.000000e+00 : f32) : f32
15+
%1 = llvm.mlir.constant(1 : i32) : i32
16+
%2 = llvm.mlir.constant(1024 : i32) : i32
17+
%3 = llvm.mlir.constant(1 : i64) : i64
18+
%4 = llvm.alloca %3 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
19+
llvm.store %0, %arg1 : f32, !llvm.ptr
20+
omp.simd private(@_QFsimd_reductionEi_private_i32 %4 -> %arg2 : !llvm.ptr) reduction(@add_reduction_f32 %arg1 -> %arg3 : !llvm.ptr) {
21+
omp.loop_nest (%arg4) : i32 = (%1) to (%2) inclusive step (%1) {
22+
llvm.store %arg4, %arg2 : i32, !llvm.ptr
23+
%5 = llvm.load %arg3 : !llvm.ptr -> f32
24+
%6 = llvm.load %arg2 : !llvm.ptr -> i32
25+
%7 = llvm.sext %6 : i32 to i64
26+
%8 = llvm.sub %7, %3 overflow<nsw> : i64
27+
%9 = llvm.getelementptr %arg0[%8] : (!llvm.ptr, i64) -> !llvm.ptr, f32
28+
%10 = llvm.load %9 : !llvm.ptr -> f32
29+
%11 = llvm.fadd %5, %10 {fastmathFlags = #llvm.fastmath<contract>} : f32
30+
llvm.store %11, %arg3 : f32, !llvm.ptr
31+
omp.yield
32+
}
33+
}
34+
llvm.return
35+
}
36+
37+
// CHECK-LABEL: define void @_QPsimd_reduction(
38+
// CHECK: %[[ORIG_I:.*]] = alloca i32, i64 1, align 4
39+
// CHECK: store float 0.000000e+00, ptr %[[ORIG_SUM:.*]], align 4
40+
// CHECK: %[[PRIV_I:.*]] = alloca i32, align 4
41+
// CHECK: %[[RED_VAR:.*]] = alloca float, align 4
42+
// CHECK: br label %[[VAL_4:.*]]
43+
// CHECK: omp.region.after_alloca: ; preds = %[[VAL_5:.*]]
44+
// CHECK: br label %[[VAL_6:.*]]
45+
// CHECK: entry: ; preds = %[[VAL_4]]
46+
// CHECK: br label %[[VAL_7:.*]]
47+
// CHECK: omp.private.init: ; preds = %[[VAL_6]]
48+
// CHECK: br label %[[VAL_8:.*]]
49+
// CHECK: omp.reduction.init: ; preds = %[[VAL_7]]
50+
// CHECK: store float 0.000000e+00, ptr %[[RED_VAR]], align 4
51+
// CHECK: br label %[[VAL_9:.*]]
52+
// CHECK: omp.simd.region: ; preds = %[[VAL_8]]
53+
// CHECK: br label %[[VAL_10:.*]]
54+
// CHECK: omp_loop.preheader: ; preds = %[[VAL_9]]
55+
// CHECK: br label %[[VAL_11:.*]]
56+
// CHECK: omp_loop.header: ; preds = %[[VAL_12:.*]], %[[VAL_10]]
57+
// CHECK: %[[VAL_13:.*]] = phi i32 [ 0, %[[VAL_10]] ], [ %[[VAL_14:.*]], %[[VAL_12]] ]
58+
// CHECK: br label %[[VAL_15:.*]]
59+
// CHECK: omp_loop.cond: ; preds = %[[VAL_11]]
60+
// CHECK: %[[VAL_16:.*]] = icmp ult i32 %[[VAL_13]], 1024
61+
// CHECK: br i1 %[[VAL_16]], label %[[VAL_17:.*]], label %[[VAL_18:.*]]
62+
// CHECK: omp_loop.body: ; preds = %[[VAL_15]]
63+
// CHECK: %[[VAL_19:.*]] = mul i32 %[[VAL_13]], 1
64+
// CHECK: %[[VAL_20:.*]] = add i32 %[[VAL_19]], 1
65+
// CHECK: br label %[[VAL_21:.*]]
66+
// CHECK: omp.loop_nest.region: ; preds = %[[VAL_17]]
67+
// CHECK: store i32 %[[VAL_20]], ptr %[[PRIV_I]], align 4, !llvm.access.group ![[ACCESS_GROUP:.*]]
68+
// CHECK: %[[RED_VAL:.*]] = load float, ptr %[[RED_VAR]], align 4, !llvm.access.group ![[ACCESS_GROUP]]
69+
// CHECK: %[[VAL_23:.*]] = load i32, ptr %[[PRIV_I]], align 4, !llvm.access.group ![[ACCESS_GROUP]]
70+
// CHECK: %[[VAL_24:.*]] = sext i32 %[[VAL_23]] to i64
71+
// CHECK: %[[VAL_25:.*]] = sub nsw i64 %[[VAL_24]], 1
72+
// CHECK: %[[VAL_26:.*]] = getelementptr float, ptr %[[VAL_27:.*]], i64 %[[VAL_25]]
73+
// CHECK: %[[VAL_28:.*]] = load float, ptr %[[VAL_26]], align 4, !llvm.access.group ![[ACCESS_GROUP]]
74+
// CHECK: %[[VAL_29:.*]] = fadd contract float %[[RED_VAL]], %[[VAL_28]]
75+
// CHECK: store float %[[VAL_29]], ptr %[[RED_VAR]], align 4, !llvm.access.group ![[ACCESS_GROUP]]
76+
// CHECK: br label %[[VAL_30:.*]]
77+
// CHECK: omp.region.cont1: ; preds = %[[VAL_21]]
78+
// CHECK: br label %[[VAL_12]]
79+
// CHECK: omp_loop.inc: ; preds = %[[VAL_30]]
80+
// CHECK: %[[VAL_14]] = add nuw i32 %[[VAL_13]], 1
81+
// CHECK: br label %[[VAL_11]], !llvm.loop ![[LOOP:.*]]
82+
// CHECK: omp_loop.exit: ; preds = %[[VAL_15]]
83+
// CHECK: br label %[[VAL_31:.*]]
84+
// CHECK: omp_loop.after: ; preds = %[[VAL_18]]
85+
// CHECK: br label %[[VAL_32:.*]]
86+
// CHECK: omp.region.cont: ; preds = %[[VAL_31]]
87+
// CHECK: %[[SUM_VAL:.*]] = load float, ptr %[[ORIG_SUM]], align 4
88+
// CHECK: %[[RED_VAL:.*]] = load float, ptr %[[RED_VAR]], align 4
89+
// CHECK: %[[COMBINED_VAL:.*]] = fadd contract float %[[SUM_VAL]], %[[RED_VAL]]
90+
// CHECK: store float %[[COMBINED_VAL]], ptr %[[ORIG_SUM]], align 4
91+
// CHECK: ret void
92+
93+
// CHECK: ![[ACCESS_GROUP]] = distinct !{}
94+
// CHECK: ![[LOOP]] = distinct !{![[LOOP]], ![[PARALLEL_ACCESS:.*]], ![[VECTORIZE:.*]]}
95+
// CHECK: ![[PARALLEL_ACCESS]] = !{!"llvm.loop.parallel_accesses", ![[ACCESS_GROUP]]}
96+
// CHECK: ![[VECTORIZE]] = !{!"llvm.loop.vectorize.enable", i1 true}

mlir/test/Target/LLVMIR/openmp-todo.mlir

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -143,36 +143,6 @@ llvm.func @simd_linear(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) {
143143

144144
// -----
145145

146-
omp.declare_reduction @add_f32 : f32
147-
init {
148-
^bb0(%arg: f32):
149-
%0 = llvm.mlir.constant(0.0 : f32) : f32
150-
omp.yield (%0 : f32)
151-
}
152-
combiner {
153-
^bb1(%arg0: f32, %arg1: f32):
154-
%1 = llvm.fadd %arg0, %arg1 : f32
155-
omp.yield (%1 : f32)
156-
}
157-
atomic {
158-
^bb2(%arg2: !llvm.ptr, %arg3: !llvm.ptr):
159-
%2 = llvm.load %arg3 : !llvm.ptr -> f32
160-
llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr, f32
161-
omp.yield
162-
}
163-
llvm.func @simd_reduction(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) {
164-
// expected-error@below {{not yet implemented: Unhandled clause reduction in omp.simd operation}}
165-
// expected-error@below {{LLVM Translation failed for operation: omp.simd}}
166-
omp.simd reduction(@add_f32 %x -> %prv : !llvm.ptr) {
167-
omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
168-
omp.yield
169-
}
170-
}
171-
llvm.return
172-
}
173-
174-
// -----
175-
176146
omp.declare_reduction @add_f32 : f32
177147
init {
178148
^bb0(%arg: f32):

0 commit comments

Comments
 (0)