Skip to content

Commit d17d21e

Browse files
authored
Amd/dev/rlieberm/land simd (llvm#2966)
2 parents 33d9ab2 + f05fd92 commit d17d21e

File tree

4 files changed

+266
-45
lines changed

4 files changed

+266
-45
lines changed

mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

Lines changed: 72 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ static LogicalResult checkImplementationStatus(Operation &op) {
234234
}
235235
};
236236
auto checkReduction = [&todo](auto op, LogicalResult &result) {
237-
if (isa<omp::TeamsOp>(op) || isa<omp::SimdOp>(op))
237+
if (isa<omp::TeamsOp>(op))
238238
if (!op.getReductionVars().empty() || op.getReductionByref() ||
239239
op.getReductionSyms())
240240
result = todo("reduction");
@@ -313,10 +313,6 @@ static LogicalResult checkImplementationStatus(Operation &op) {
313313
if (!op.getNontemporalVars().empty())
314314
op.emitWarning()
315315
<< "ignored clause: nontemporal in omp.simd operation";
316-
317-
if (!op.getReductionVars().empty() || op.getReductionByref() ||
318-
op.getReductionSyms())
319-
op.emitWarning() << "ignored clause: reduction in omp.simd operation";
320316
})
321317
.Case<omp::AtomicReadOp, omp::AtomicWriteOp, omp::AtomicUpdateOp,
322318
omp::AtomicCaptureOp>([&](auto op) { checkHint(op, result); })
@@ -2693,17 +2689,19 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
26932689
if (failed(checkImplementationStatus(opInst)))
26942690
return failure();
26952691

2696-
// This is needed to make sure that uses of entry block arguments for the
2697-
// reduction clause, which is not yet being translated, are mapped to the
2698-
// outside values. This has the effect of ignoring the clause without causing
2699-
// a compiler crash.
2700-
auto blockArgIface = cast<omp::BlockArgOpenMPOpInterface>(*simdOp);
2701-
for (auto [arg, var] : llvm::zip_equal(blockArgIface.getReductionBlockArgs(),
2702-
simdOp.getReductionVars()))
2703-
moduleTranslation.mapValue(arg, moduleTranslation.lookupValue(var));
2704-
27052692
PrivateVarsInfo privateVarsInfo(simdOp);
27062693

2694+
MutableArrayRef<BlockArgument> reductionArgs =
2695+
cast<omp::BlockArgOpenMPOpInterface>(opInst).getReductionBlockArgs();
2696+
DenseMap<Value, llvm::Value *> reductionVariableMap;
2697+
SmallVector<llvm::Value *> privateReductionVariables(
2698+
simdOp.getNumReductionVars());
2699+
SmallVector<DeferredStore> deferredStores;
2700+
SmallVector<omp::DeclareReductionOp> reductionDecls;
2701+
collectReductionDecls(simdOp, reductionDecls);
2702+
llvm::ArrayRef<bool> isByRef = getIsByRef(simdOp.getReductionByref());
2703+
assert(isByRef.size() == simdOp.getNumReductionVars());
2704+
27072705
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
27082706
findAllocaInsertPoint(builder, moduleTranslation);
27092707

@@ -2712,11 +2710,27 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
27122710
if (handleError(afterAllocas, opInst).failed())
27132711
return failure();
27142712

2713+
if (failed(allocReductionVars(simdOp, reductionArgs, builder,
2714+
moduleTranslation, allocaIP, reductionDecls,
2715+
privateReductionVariables, reductionVariableMap,
2716+
deferredStores, isByRef)))
2717+
return failure();
2718+
27152719
if (handleError(initPrivateVars(builder, moduleTranslation, privateVarsInfo),
27162720
opInst)
27172721
.failed())
27182722
return failure();
27192723

2724+
// TODO: no call to copyFirstPrivateVars?
2725+
2726+
assert(afterAllocas.get()->getSinglePredecessor());
2727+
if (failed(initReductionVars(simdOp, reductionArgs, builder,
2728+
moduleTranslation,
2729+
afterAllocas.get()->getSinglePredecessor(),
2730+
reductionDecls, privateReductionVariables,
2731+
reductionVariableMap, isByRef, deferredStores)))
2732+
return failure();
2733+
27202734
llvm::ConstantInt *simdlen = nullptr;
27212735
if (std::optional<uint64_t> simdlenVar = simdOp.getSimdlen())
27222736
simdlen = builder.getInt64(simdlenVar.value());
@@ -2761,6 +2775,50 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
27612775
: nullptr,
27622776
order, simdlen, safelen);
27632777

2778+
// We now need to reduce the per-simd-lane reduction variable into the
2779+
// original variable. This works a bit differently to other reductions (e.g.
2780+
// wsloop) because we don't need to call into the OpenMP runtime to handle
2781+
// threads: everything happened in this one thread.
2782+
for (auto [i, tuple] : llvm::enumerate(
2783+
llvm::zip(reductionDecls, isByRef, simdOp.getReductionVars(),
2784+
privateReductionVariables))) {
2785+
auto [decl, byRef, reductionVar, privateReductionVar] = tuple;
2786+
2787+
OwningReductionGen gen = makeReductionGen(decl, builder, moduleTranslation);
2788+
llvm::Value *originalVariable = moduleTranslation.lookupValue(reductionVar);
2789+
llvm::Type *reductionType = moduleTranslation.convertType(decl.getType());
2790+
2791+
// We have one less load for by-ref case because that load is now inside of
2792+
// the reduction region.
2793+
llvm::Value *redValue = originalVariable;
2794+
if (!byRef)
2795+
redValue =
2796+
builder.CreateLoad(reductionType, redValue, "red.value." + Twine(i));
2797+
llvm::Value *privateRedValue = builder.CreateLoad(
2798+
reductionType, privateReductionVar, "red.private.value." + Twine(i));
2799+
llvm::Value *reduced;
2800+
2801+
auto res = gen(builder.saveIP(), redValue, privateRedValue, reduced);
2802+
if (failed(handleError(res, opInst)))
2803+
return failure();
2804+
builder.restoreIP(res.get());
2805+
2806+
// For by-ref case, the store is inside of the reduction region.
2807+
if (!byRef)
2808+
builder.CreateStore(reduced, originalVariable);
2809+
}
2810+
2811+
// After the construct, deallocate private reduction variables.
2812+
SmallVector<Region *> reductionRegions;
2813+
llvm::transform(reductionDecls, std::back_inserter(reductionRegions),
2814+
[](omp::DeclareReductionOp reductionDecl) {
2815+
return &reductionDecl.getCleanupRegion();
2816+
});
2817+
if (failed(inlineOmpRegionCleanup(reductionRegions, privateReductionVariables,
2818+
moduleTranslation, builder,
2819+
"omp.reduction.cleanup")))
2820+
return failure();
2821+
27642822
return cleanupPrivateVars(builder, moduleTranslation, simdOp.getLoc(),
27652823
privateVarsInfo.llvmVars,
27662824
privateVarsInfo.privatizers);
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
// RUN: mlir-translate --mlir-to-llvmir %s | FileCheck %s
2+
3+
llvm.func @init(%arg0: !llvm.ptr {llvm.nocapture}, %arg1: !llvm.ptr {llvm.nocapture}) {
4+
llvm.return
5+
}
6+
llvm.func @combine(%arg0: !llvm.ptr {llvm.nocapture}, %arg1: !llvm.ptr {llvm.nocapture}) {
7+
llvm.return
8+
}
9+
llvm.func @cleanup(%arg0: !llvm.ptr {llvm.nocapture}) {
10+
llvm.return
11+
}
12+
omp.private {type = private} @_QFsimd_reductionEi_private_i32 : i32
13+
omp.declare_reduction @add_reduction_byref_box_2xf32 : !llvm.ptr alloc {
14+
%0 = llvm.mlir.constant(1 : i64) : i64
15+
%1 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> : (i64) -> !llvm.ptr
16+
omp.yield(%1 : !llvm.ptr)
17+
} init {
18+
^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
19+
llvm.call @init(%arg0, %arg1) : (!llvm.ptr, !llvm.ptr) -> ()
20+
omp.yield(%arg1 : !llvm.ptr)
21+
} combiner {
22+
^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
23+
llvm.call @combine(%arg0, %arg1) : (!llvm.ptr, !llvm.ptr) -> ()
24+
omp.yield(%arg0 : !llvm.ptr)
25+
} cleanup {
26+
^bb0(%arg0: !llvm.ptr):
27+
llvm.call @cleanup(%arg0) : (!llvm.ptr) -> ()
28+
omp.yield
29+
}
30+
llvm.func @_QPsimd_reduction(%arg0: !llvm.ptr {fir.bindc_name = "a", llvm.nocapture}, %arg1: !llvm.ptr {fir.bindc_name = "sum", llvm.nocapture}) {
31+
%0 = llvm.mlir.constant(1024 : i32) : i32
32+
%1 = llvm.mlir.constant(1 : i32) : i32
33+
%2 = llvm.mlir.constant(1 : i64) : i64
34+
%3 = llvm.alloca %2 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> : (i64) -> !llvm.ptr
35+
%4 = llvm.alloca %2 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
36+
omp.simd private(@_QFsimd_reductionEi_private_i32 %4 -> %arg2 : !llvm.ptr) reduction(byref @add_reduction_byref_box_2xf32 %3 -> %arg3 : !llvm.ptr) {
37+
omp.loop_nest (%arg4) : i32 = (%1) to (%0) inclusive step (%1) {
38+
llvm.store %arg4, %arg2 : i32, !llvm.ptr
39+
omp.yield
40+
}
41+
}
42+
llvm.return
43+
}
44+
45+
// CHECK-LABEL: define void @_QPsimd_reduction
46+
// CHECK: %[[MOLD:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8
47+
// CHECK: %[[ORIG_I:.*]] = alloca i32, i64 1, align 4
48+
// CHECK: %[[PRIV_I:.*]] = alloca i32, align 4
49+
// CHECK: %[[RED_VAR:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8
50+
// CHECK: %[[PTR_RED_VAR:.*]] = alloca ptr, align 8
51+
// CHECK: br label %[[VAL_5:.*]]
52+
// CHECK: omp.region.after_alloca: ; preds = %[[VAL_6:.*]]
53+
// CHECK: br label %[[VAL_7:.*]]
54+
// CHECK: entry: ; preds = %[[VAL_5]]
55+
// CHECK: br label %[[VAL_8:.*]]
56+
// CHECK: omp.private.init: ; preds = %[[VAL_7]]
57+
// CHECK: br label %[[VAL_9:.*]]
58+
// CHECK: omp.reduction.init: ; preds = %[[VAL_8]]
59+
// CHECK: store ptr %[[RED_VAR]], ptr %[[PTR_RED_VAR]], align 8
60+
// CHECK: call void @init(ptr %[[MOLD]], ptr %[[RED_VAR]])
61+
// CHECK: br label %[[VAL_10:.*]]
62+
// CHECK: omp.simd.region: ; preds = %[[VAL_9]]
63+
// CHECK: br label %[[VAL_11:.*]]
64+
// CHECK: omp_loop.preheader: ; preds = %[[VAL_10]]
65+
// CHECK: br label %[[VAL_12:.*]]
66+
// CHECK: omp_loop.header: ; preds = %[[VAL_13:.*]], %[[VAL_11]]
67+
// CHECK: %[[VAL_14:.*]] = phi i32 [ 0, %[[VAL_11]] ], [ %[[VAL_15:.*]], %[[VAL_13]] ]
68+
// CHECK: br label %[[VAL_16:.*]]
69+
// CHECK: omp_loop.cond: ; preds = %[[VAL_12]]
70+
// CHECK: %[[VAL_17:.*]] = icmp ult i32 %[[VAL_14]], 1024
71+
// CHECK: br i1 %[[VAL_17]], label %[[VAL_18:.*]], label %[[VAL_19:.*]]
72+
// CHECK: omp_loop.body: ; preds = %[[VAL_16]]
73+
// CHECK: %[[VAL_20:.*]] = mul i32 %[[VAL_14]], 1
74+
// CHECK: %[[VAL_21:.*]] = add i32 %[[VAL_20]], 1
75+
// CHECK: br label %[[VAL_22:.*]]
76+
// CHECK: omp.loop_nest.region: ; preds = %[[VAL_18]]
77+
// CHECK: store i32 %[[VAL_21]], ptr %[[PRIV_I]], align 4, !llvm.access.group ![[ACCESS_GROUP:.*]]
78+
// CHECK: br label %[[VAL_23:.*]]
79+
// CHECK: omp.region.cont1: ; preds = %[[VAL_22]]
80+
// CHECK: br label %[[VAL_13]]
81+
// CHECK: omp_loop.inc: ; preds = %[[VAL_23]]
82+
// CHECK: %[[VAL_15]] = add nuw i32 %[[VAL_14]], 1
83+
// CHECK: br label %[[VAL_12]], !llvm.loop ![[LOOP:.*]]
84+
// CHECK: omp_loop.exit: ; preds = %[[VAL_16]]
85+
// CHECK: br label %[[VAL_24:.*]]
86+
// CHECK: omp_loop.after: ; preds = %[[VAL_19]]
87+
// CHECK: br label %[[VAL_25:.*]]
88+
// CHECK: omp.region.cont: ; preds = %[[VAL_24]]
89+
// CHECK: %[[RED_VAR2:.*]] = load ptr, ptr %[[PTR_RED_VAR]], align 8
90+
// CHECK: call void @combine(ptr %[[MOLD]], ptr %[[RED_VAR2]])
91+
// CHECK: %[[RED_VAR3:.*]] = load ptr, ptr %[[PTR_RED_VAR]], align 8
92+
// CHECK: call void @cleanup(ptr %[[RED_VAR3]])
93+
// CHECK: ret void
94+
95+
// CHECK: ![[ACCESS_GROUP]] = distinct !{}
96+
// CHECK: ![[LOOP]] = distinct !{![[LOOP]], ![[PARALLEL_ACCESS:.*]], ![[VECTORIZE:.*]]}
97+
// CHECK: ![[PARALLEL_ACCESS]] = !{!"llvm.loop.parallel_accesses", ![[ACCESS_GROUP]]}
98+
// CHECK: ![[VECTORIZE]] = !{!"llvm.loop.vectorize.enable", i1 true}
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
// RUN: mlir-translate --mlir-to-llvmir %s | FileCheck %s
2+
3+
omp.private {type = private} @_QFsimd_reductionEi_private_i32 : i32
4+
omp.declare_reduction @add_reduction_f32 : f32 init {
5+
^bb0(%arg0: f32):
6+
%0 = llvm.mlir.constant(0.000000e+00 : f32) : f32
7+
omp.yield(%0 : f32)
8+
} combiner {
9+
^bb0(%arg0: f32, %arg1: f32):
10+
%0 = llvm.fadd %arg0, %arg1 {fastmathFlags = #llvm.fastmath<contract>} : f32
11+
omp.yield(%0 : f32)
12+
}
13+
llvm.func @_QPsimd_reduction(%arg0: !llvm.ptr {fir.bindc_name = "a", llvm.nocapture}, %arg1: !llvm.ptr {fir.bindc_name = "sum", llvm.nocapture}) {
14+
%0 = llvm.mlir.constant(0.000000e+00 : f32) : f32
15+
%1 = llvm.mlir.constant(1 : i32) : i32
16+
%2 = llvm.mlir.constant(1024 : i32) : i32
17+
%3 = llvm.mlir.constant(1 : i64) : i64
18+
%4 = llvm.alloca %3 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
19+
llvm.store %0, %arg1 : f32, !llvm.ptr
20+
omp.simd private(@_QFsimd_reductionEi_private_i32 %4 -> %arg2 : !llvm.ptr) reduction(@add_reduction_f32 %arg1 -> %arg3 : !llvm.ptr) {
21+
omp.loop_nest (%arg4) : i32 = (%1) to (%2) inclusive step (%1) {
22+
llvm.store %arg4, %arg2 : i32, !llvm.ptr
23+
%5 = llvm.load %arg3 : !llvm.ptr -> f32
24+
%6 = llvm.load %arg2 : !llvm.ptr -> i32
25+
%7 = llvm.sext %6 : i32 to i64
26+
%8 = llvm.sub %7, %3 overflow<nsw> : i64
27+
%9 = llvm.getelementptr %arg0[%8] : (!llvm.ptr, i64) -> !llvm.ptr, f32
28+
%10 = llvm.load %9 : !llvm.ptr -> f32
29+
%11 = llvm.fadd %5, %10 {fastmathFlags = #llvm.fastmath<contract>} : f32
30+
llvm.store %11, %arg3 : f32, !llvm.ptr
31+
omp.yield
32+
}
33+
}
34+
llvm.return
35+
}
36+
37+
// CHECK-LABEL: define void @_QPsimd_reduction(
38+
// CHECK: %[[ORIG_I:.*]] = alloca i32, i64 1, align 4
39+
// CHECK: store float 0.000000e+00, ptr %[[ORIG_SUM:.*]], align 4
40+
// CHECK: %[[PRIV_I:.*]] = alloca i32, align 4
41+
// CHECK: %[[RED_VAR:.*]] = alloca float, align 4
42+
// CHECK: br label %[[VAL_4:.*]]
43+
// CHECK: omp.region.after_alloca: ; preds = %[[VAL_5:.*]]
44+
// CHECK: br label %[[VAL_6:.*]]
45+
// CHECK: entry: ; preds = %[[VAL_4]]
46+
// CHECK: br label %[[VAL_7:.*]]
47+
// CHECK: omp.private.init: ; preds = %[[VAL_6]]
48+
// CHECK: br label %[[VAL_8:.*]]
49+
// CHECK: omp.reduction.init: ; preds = %[[VAL_7]]
50+
// CHECK: store float 0.000000e+00, ptr %[[RED_VAR]], align 4
51+
// CHECK: br label %[[VAL_9:.*]]
52+
// CHECK: omp.simd.region: ; preds = %[[VAL_8]]
53+
// CHECK: br label %[[VAL_10:.*]]
54+
// CHECK: omp_loop.preheader: ; preds = %[[VAL_9]]
55+
// CHECK: br label %[[VAL_11:.*]]
56+
// CHECK: omp_loop.header: ; preds = %[[VAL_12:.*]], %[[VAL_10]]
57+
// CHECK: %[[VAL_13:.*]] = phi i32 [ 0, %[[VAL_10]] ], [ %[[VAL_14:.*]], %[[VAL_12]] ]
58+
// CHECK: br label %[[VAL_15:.*]]
59+
// CHECK: omp_loop.cond: ; preds = %[[VAL_11]]
60+
// CHECK: %[[VAL_16:.*]] = icmp ult i32 %[[VAL_13]], 1024
61+
// CHECK: br i1 %[[VAL_16]], label %[[VAL_17:.*]], label %[[VAL_18:.*]]
62+
// CHECK: omp_loop.body: ; preds = %[[VAL_15]]
63+
// CHECK: %[[VAL_19:.*]] = mul i32 %[[VAL_13]], 1
64+
// CHECK: %[[VAL_20:.*]] = add i32 %[[VAL_19]], 1
65+
// CHECK: br label %[[VAL_21:.*]]
66+
// CHECK: omp.loop_nest.region: ; preds = %[[VAL_17]]
67+
// CHECK: store i32 %[[VAL_20]], ptr %[[PRIV_I]], align 4, !llvm.access.group ![[ACCESS_GROUP:.*]]
68+
// CHECK: %[[RED_VAL:.*]] = load float, ptr %[[RED_VAR]], align 4, !llvm.access.group ![[ACCESS_GROUP]]
69+
// CHECK: %[[VAL_23:.*]] = load i32, ptr %[[PRIV_I]], align 4, !llvm.access.group ![[ACCESS_GROUP]]
70+
// CHECK: %[[VAL_24:.*]] = sext i32 %[[VAL_23]] to i64
71+
// CHECK: %[[VAL_25:.*]] = sub nsw i64 %[[VAL_24]], 1
72+
// CHECK: %[[VAL_26:.*]] = getelementptr float, ptr %[[VAL_27:.*]], i64 %[[VAL_25]]
73+
// CHECK: %[[VAL_28:.*]] = load float, ptr %[[VAL_26]], align 4, !llvm.access.group ![[ACCESS_GROUP]]
74+
// CHECK: %[[VAL_29:.*]] = fadd contract float %[[RED_VAL]], %[[VAL_28]]
75+
// CHECK: store float %[[VAL_29]], ptr %[[RED_VAR]], align 4, !llvm.access.group ![[ACCESS_GROUP]]
76+
// CHECK: br label %[[VAL_30:.*]]
77+
// CHECK: omp.region.cont1: ; preds = %[[VAL_21]]
78+
// CHECK: br label %[[VAL_12]]
79+
// CHECK: omp_loop.inc: ; preds = %[[VAL_30]]
80+
// CHECK: %[[VAL_14]] = add nuw i32 %[[VAL_13]], 1
81+
// CHECK: br label %[[VAL_11]], !llvm.loop ![[LOOP:.*]]
82+
// CHECK: omp_loop.exit: ; preds = %[[VAL_15]]
83+
// CHECK: br label %[[VAL_31:.*]]
84+
// CHECK: omp_loop.after: ; preds = %[[VAL_18]]
85+
// CHECK: br label %[[VAL_32:.*]]
86+
// CHECK: omp.region.cont: ; preds = %[[VAL_31]]
87+
// CHECK: %[[SUM_VAL:.*]] = load float, ptr %[[ORIG_SUM]], align 4
88+
// CHECK: %[[RED_VAL:.*]] = load float, ptr %[[RED_VAR]], align 4
89+
// CHECK: %[[COMBINED_VAL:.*]] = fadd contract float %[[SUM_VAL]], %[[RED_VAL]]
90+
// CHECK: store float %[[COMBINED_VAL]], ptr %[[ORIG_SUM]], align 4
91+
// CHECK: ret void
92+
93+
// CHECK: ![[ACCESS_GROUP]] = distinct !{}
94+
// CHECK: ![[LOOP]] = distinct !{![[LOOP]], ![[PARALLEL_ACCESS:.*]], ![[VECTORIZE:.*]]}
95+
// CHECK: ![[PARALLEL_ACCESS]] = !{!"llvm.loop.parallel_accesses", ![[ACCESS_GROUP]]}
96+
// CHECK: ![[VECTORIZE]] = !{!"llvm.loop.vectorize.enable", i1 true}

mlir/test/Target/LLVMIR/openmp-todo.mlir

Lines changed: 0 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
// RUN: mlir-translate -mlir-to-llvmir -split-input-file -verify-diagnostics %s
2-
// XFAIL: *
32

43
llvm.func @atomic_hint(%v : !llvm.ptr, %x : !llvm.ptr, %expr : i32) {
54
// expected-warning@below {{hint clause discarded}}
@@ -141,36 +140,6 @@ llvm.func @simd_linear(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) {
141140

142141
// -----
143142

144-
omp.declare_reduction @add_f32 : f32
145-
init {
146-
^bb0(%arg: f32):
147-
%0 = llvm.mlir.constant(0.0 : f32) : f32
148-
omp.yield (%0 : f32)
149-
}
150-
combiner {
151-
^bb1(%arg0: f32, %arg1: f32):
152-
%1 = llvm.fadd %arg0, %arg1 : f32
153-
omp.yield (%1 : f32)
154-
}
155-
atomic {
156-
^bb2(%arg2: !llvm.ptr, %arg3: !llvm.ptr):
157-
%2 = llvm.load %arg3 : !llvm.ptr -> f32
158-
llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr, f32
159-
omp.yield
160-
}
161-
llvm.func @simd_reduction(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) {
162-
// expected-error@below {{not yet implemented: Unhandled clause reduction in omp.simd operation}}
163-
// expected-error@below {{LLVM Translation failed for operation: omp.simd}}
164-
omp.simd reduction(@add_f32 %x -> %prv : !llvm.ptr) {
165-
omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
166-
omp.yield
167-
}
168-
}
169-
llvm.return
170-
}
171-
172-
// -----
173-
174143
omp.declare_reduction @add_f32 : f32
175144
init {
176145
^bb0(%arg: f32):

0 commit comments

Comments
 (0)