diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 0a56e888ac44b..ef4c592709d4c 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1816,8 +1816,7 @@ static void genSimdClauses( cp.processReduction(loc, clauseOps, reductionSyms); cp.processSafelen(clauseOps); cp.processSimdlen(clauseOps); - - cp.processTODO(loc, llvm::omp::Directive::OMPD_simd); + cp.processLinear(clauseOps); } static void genSingleClauses(lower::AbstractConverter &converter, @@ -2007,9 +2006,9 @@ static void genWsloopClauses( cp.processOrdered(clauseOps); cp.processReduction(loc, clauseOps, reductionSyms); cp.processSchedule(stmtCtx, clauseOps); + cp.processLinear(clauseOps); - cp.processTODO( - loc, llvm::omp::Directive::OMPD_do); + cp.processTODO(loc, llvm::omp::Directive::OMPD_do); } //===----------------------------------------------------------------------===// diff --git a/flang/test/Lower/OpenMP/Todo/omp-do-simd-linear.f90 b/flang/test/Lower/OpenMP/Todo/omp-do-simd-linear.f90 deleted file mode 100644 index 4caf12a0169c4..0000000000000 --- a/flang/test/Lower/OpenMP/Todo/omp-do-simd-linear.f90 +++ /dev/null @@ -1,14 +0,0 @@ -! This test checks lowering of OpenMP do simd linear() pragma - -! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s -! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s -subroutine testDoSimdLinear(int_array) - integer :: int_array(*) -!CHECK: not yet implemented: Unhandled clause LINEAR in SIMD construct -!$omp do simd linear(int_array) - do index_ = 1, 10 - end do -!$omp end do simd - -end subroutine testDoSimdLinear - diff --git a/flang/test/Lower/OpenMP/simd-linear.f90 b/flang/test/Lower/OpenMP/simd-linear.f90 new file mode 100644 index 0000000000000..ccb5fbb8b0a24 --- /dev/null +++ b/flang/test/Lower/OpenMP/simd-linear.f90 @@ -0,0 +1,54 @@ +! This test checks lowering of OpenMP DO Directive (Worksharing) +! with linear clause + +! RUN: %flang_fc1 -fopenmp -emit-hlfir %s -o - 2>&1 | FileCheck %s + +!CHECK: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_linearEx"} +!CHECK: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFsimple_linearEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[const:.*]] = arith.constant 1 : i32 +subroutine simple_linear + implicit none + integer :: x, y, i + !CHECK: omp.simd linear(%[[X]]#0 = %[[const]] : !fir.ref) {{.*}} + !$omp simd linear(x) + !CHECK: %[[LOAD:.*]] = fir.load %[[X]]#0 : !fir.ref + !CHECK: %[[const:.*]] = arith.constant 2 : i32 + !CHECK: %[[RESULT:.*]] = arith.addi %[[LOAD]], %[[const]] : i32 + do i = 1, 10 + y = x + 2 + end do +end subroutine + + +!CHECK: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFlinear_stepEx"} +!CHECK: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFlinear_stepEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +subroutine linear_step + implicit none + integer :: x, y, i + !CHECK: %[[const:.*]] = arith.constant 4 : i32 + !CHECK: omp.simd linear(%[[X]]#0 = %[[const]] : !fir.ref) {{.*}} + !$omp simd linear(x:4) + !CHECK: %[[LOAD:.*]] = fir.load %[[X]]#0 : !fir.ref + !CHECK: %[[const:.*]] = arith.constant 2 : i32 + !CHECK: %[[RESULT:.*]] = arith.addi %[[LOAD]], %[[const]] : i32 + do i = 1, 10 + y = x + 2 + end do +end subroutine + +!CHECK: %[[A_alloca:.*]] = fir.alloca i32 {bindc_name = "a", uniq_name = "_QFlinear_exprEa"} +!CHECK: %[[A:.*]]:2 = hlfir.declare %[[A_alloca]] {uniq_name = "_QFlinear_exprEa"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFlinear_exprEx"} +!CHECK: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFlinear_exprEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +subroutine linear_expr + implicit none + integer :: x, y, i, a + !CHECK: %[[LOAD_A:.*]] = fir.load %[[A]]#0 : !fir.ref + !CHECK: %[[const:.*]] = arith.constant 4 : i32 + !CHECK: %[[LINEAR_EXPR:.*]] = arith.addi %[[LOAD_A]], %[[const]] : i32 + !CHECK: omp.simd linear(%[[X]]#0 = %[[LINEAR_EXPR]] : !fir.ref) {{.*}} + !$omp simd linear(x:a+4) + do i = 1, 10 + y = x + 2 + end do +end subroutine diff --git a/flang/test/Lower/OpenMP/wsloop-linear.f90 b/flang/test/Lower/OpenMP/wsloop-linear.f90 new file mode 100644 index 0000000000000..b99677108be2f --- /dev/null +++ b/flang/test/Lower/OpenMP/wsloop-linear.f90 @@ -0,0 +1,57 @@ +! This test checks lowering of OpenMP DO Directive (Worksharing) +! with linear clause + +! RUN: %flang_fc1 -fopenmp -emit-hlfir %s -o - 2>&1 | FileCheck %s + +!CHECK: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_linearEx"} +!CHECK: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFsimple_linearEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[const:.*]] = arith.constant 1 : i32 +subroutine simple_linear + implicit none + integer :: x, y, i + !CHECK: omp.wsloop linear(%[[X]]#0 = %[[const]] : !fir.ref) {{.*}} + !$omp do linear(x) + !CHECK: %[[LOAD:.*]] = fir.load %[[X]]#0 : !fir.ref + !CHECK: %[[const:.*]] = arith.constant 2 : i32 + !CHECK: %[[RESULT:.*]] = arith.addi %[[LOAD]], %[[const]] : i32 + do i = 1, 10 + y = x + 2 + end do + !$omp end do +end subroutine + + +!CHECK: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFlinear_stepEx"} +!CHECK: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFlinear_stepEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +subroutine linear_step + implicit none + integer :: x, y, i + !CHECK: %[[const:.*]] = arith.constant 4 : i32 + !CHECK: omp.wsloop linear(%[[X]]#0 = %[[const]] : !fir.ref) {{.*}} + !$omp do linear(x:4) + !CHECK: %[[LOAD:.*]] = fir.load %[[X]]#0 : !fir.ref + !CHECK: %[[const:.*]] = arith.constant 2 : i32 + !CHECK: %[[RESULT:.*]] = arith.addi %[[LOAD]], %[[const]] : i32 + do i = 1, 10 + y = x + 2 + end do + !$omp end do +end subroutine + +!CHECK: %[[A_alloca:.*]] = fir.alloca i32 {bindc_name = "a", uniq_name = "_QFlinear_exprEa"} +!CHECK: %[[A:.*]]:2 = hlfir.declare %[[A_alloca]] {uniq_name = "_QFlinear_exprEa"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFlinear_exprEx"} +!CHECK: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFlinear_exprEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +subroutine linear_expr + implicit none + integer :: x, y, i, a + !CHECK: %[[LOAD_A:.*]] = fir.load %[[A]]#0 : !fir.ref + !CHECK: %[[const:.*]] = arith.constant 4 : i32 + !CHECK: %[[LINEAR_EXPR:.*]] = arith.addi %[[LOAD_A]], %[[const]] : i32 + !CHECK: omp.wsloop linear(%[[X]]#0 = %[[LINEAR_EXPR]] : !fir.ref) {{.*}} + !$omp do linear(x:a+4) + do i = 1, 10 + y = x + 2 + end do + !$omp end do +end subroutine diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index ffc84781f77ff..c4b91348c9853 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -2608,7 +2608,7 @@ void SimdOp::build(OpBuilder &builder, OperationState &state, // TODO Store clauses in op: linearVars, linearStepVars SimdOp::build(builder, state, clauses.alignedVars, makeArrayAttr(ctx, clauses.alignments), clauses.ifExpr, - /*linear_vars=*/{}, /*linear_step_vars=*/{}, + clauses.linearVars, clauses.linearStepVars, clauses.nontemporalVars, clauses.order, clauses.orderMod, clauses.privateVars, makeArrayAttr(ctx, clauses.privateSyms), clauses.privateNeedsBarrier, clauses.reductionMod, diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 336f71ccd06a3..897357472f4a4 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -147,9 +147,9 @@ class LinearClauseProcessor { public: // Allocate space for linear variabes - void createLinearVar(llvm::IRBuilderBase &builder, - LLVM::ModuleTranslation &moduleTranslation, - mlir::Value &linearVar) { + LogicalResult createLinearVar(llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation, + mlir::Value &linearVar, Operation &op) { if (llvm::AllocaInst *linearVarAlloca = dyn_cast( moduleTranslation.lookupValue(linearVar))) { linearPreconditionVars.push_back(builder.CreateAlloca( @@ -159,7 +159,12 @@ class LinearClauseProcessor { linearOrigVal.push_back(moduleTranslation.lookupValue(linearVar)); linearLoopBodyTemps.push_back(linearLoopBodyTemp); linearOrigVars.push_back(linearVarAlloca); + return success(); } + + else + return op.emitError() << "not yet implemented: linear clause support" + << " for non alloca linear variables"; } // Initialize linear step @@ -169,20 +174,15 @@ class LinearClauseProcessor { } // Emit IR for initialization of linear variables - llvm::OpenMPIRBuilder::InsertPointOrErrorTy - initLinearVar(llvm::IRBuilderBase &builder, - LLVM::ModuleTranslation &moduleTranslation, - llvm::BasicBlock *loopPreHeader) { + void initLinearVar(llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation, + llvm::BasicBlock *loopPreHeader) { builder.SetInsertPoint(loopPreHeader->getTerminator()); for (size_t index = 0; index < linearOrigVars.size(); index++) { llvm::LoadInst *linearVarLoad = builder.CreateLoad( linearOrigVars[index]->getAllocatedType(), linearOrigVars[index]); builder.CreateStore(linearVarLoad, linearPreconditionVars[index]); } - llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterBarrierIP = - moduleTranslation.getOpenMPBuilder()->createBarrier( - builder.saveIP(), llvm::omp::OMPD_barrier); - return afterBarrierIP; } // Emit IR for updating Linear variables @@ -193,18 +193,27 @@ class LinearClauseProcessor { // Emit increments for linear vars llvm::LoadInst *linearVarStart = builder.CreateLoad(linearOrigVars[index]->getAllocatedType(), - linearPreconditionVars[index]); + auto mulInst = builder.CreateMul(loopInductionVar, linearSteps[index]); - auto addInst = builder.CreateAdd(linearVarStart, mulInst); - builder.CreateStore(addInst, linearLoopBodyTemps[index]); + if (linearOrigVars[index]->getAllocatedType()->isIntegerTy()) { + auto addInst = builder.CreateAdd(linearVarStart, mulInst); + builder.CreateStore(addInst, linearLoopBodyTemps[index]); + } else if (linearOrigVars[index] + ->getAllocatedType() + ->isFloatingPointTy()) { + auto cvt = builder.CreateSIToFP( + mulInst, linearOrigVars[index]->getAllocatedType()); + auto addInst = builder.CreateFAdd(linearVarStart, cvt); + builder.CreateStore(addInst, linearLoopBodyTemps[index]); + } } } // Linear variable finalization is conditional on the last logical iteration. // Create BB splits to manage the same. - void outlineLinearFinalizationBB(llvm::IRBuilderBase &builder, - llvm::BasicBlock *loopExit) { + void splitLinearFiniBB(llvm::IRBuilderBase &builder, + llvm::BasicBlock *loopExit) { linearFinalizationBB = loopExit->splitBasicBlock( loopExit->getTerminator(), "omp_loop.linear_finalization"); linearExitBB = linearFinalizationBB->splitBasicBlock( @@ -339,10 +348,6 @@ static LogicalResult checkImplementationStatus(Operation &op) { if (!op.getIsDevicePtrVars().empty()) result = todo("is_device_ptr"); }; - auto checkLinear = [&todo](auto op, LogicalResult &result) { - if (!op.getLinearVars().empty() || !op.getLinearStepVars().empty()) - result = todo("linear"); - }; auto checkNowait = [&todo](auto op, LogicalResult &result) { if (op.getNowait()) result = todo("nowait"); @@ -432,7 +437,6 @@ static LogicalResult checkImplementationStatus(Operation &op) { }) .Case([&](omp::WsloopOp op) { checkAllocate(op, result); - checkLinear(op, result); checkOrder(op, result); checkReduction(op, result); }) @@ -440,10 +444,7 @@ static LogicalResult checkImplementationStatus(Operation &op) { checkAllocate(op, result); checkReduction(op, result); }) - .Case([&](omp::SimdOp op) { - checkLinear(op, result); - checkReduction(op, result); - }) + .Case([&](omp::SimdOp op) { checkReduction(op, result); }) .Case([&](auto op) { checkHint(op, result); }) .Case( @@ -2587,13 +2588,13 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, // Initialize linear variables and linear step LinearClauseProcessor linearClauseProcessor; - if (wsloopOp.getLinearVars().size()) { - for (mlir::Value linearVar : wsloopOp.getLinearVars()) - linearClauseProcessor.createLinearVar(builder, moduleTranslation, - linearVar); - for (mlir::Value linearStep : wsloopOp.getLinearStepVars()) - linearClauseProcessor.initLinearStep(moduleTranslation, linearStep); + for (mlir::Value linearVar : wsloopOp.getLinearVars()) { + if (failed(linearClauseProcessor.createLinearVar(builder, moduleTranslation, + linearVar, opInst))) + return failure(); } + for (mlir::Value linearStep : wsloopOp.getLinearStepVars()) + linearClauseProcessor.initLinearStep(moduleTranslation, linearStep); llvm::Expected regionBlock = convertOmpOpRegions( wsloopOp.getRegion(), "omp.wsloop.region", builder, moduleTranslation); @@ -2605,16 +2606,17 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, // Emit Initialization and Update IR for linear variables if (wsloopOp.getLinearVars().size()) { + linearClauseProcessor.initLinearVar(builder, moduleTranslation, + loopInfo->getPreheader()); llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterBarrierIP = - linearClauseProcessor.initLinearVar(builder, moduleTranslation, - loopInfo->getPreheader()); + moduleTranslation.getOpenMPBuilder()->createBarrier( + builder.saveIP(), llvm::omp::OMPD_barrier); if (failed(handleError(afterBarrierIP, *loopOp))) return failure(); builder.restoreIP(*afterBarrierIP); linearClauseProcessor.updateLinearVar(builder, loopInfo->getBody(), loopInfo->getIndVar()); - linearClauseProcessor.outlineLinearFinalizationBB(builder, - loopInfo->getExit()); + linearClauseProcessor.splitLinearFiniBB(builder, loopInfo->getExit()); } builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin()); @@ -2882,6 +2884,17 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder::InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation); + // Create linear variables and initialize linear step + LinearClauseProcessor linearClauseProcessor; + + for (mlir::Value linearVar : simdOp.getLinearVars()) { + if (failed(linearClauseProcessor.createLinearVar(builder, moduleTranslation, + linearVar, opInst))) + return failure(); + } + for (mlir::Value linearStep : simdOp.getLinearStepVars()) + linearClauseProcessor.initLinearStep(moduleTranslation, linearStep); + llvm::Expected afterAllocas = allocatePrivateVars( builder, moduleTranslation, privateVarsInfo, allocaIP); if (handleError(afterAllocas, opInst).failed()) @@ -2945,14 +2958,27 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder, if (failed(handleError(regionBlock, opInst))) return failure(); - builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin()); llvm::CanonicalLoopInfo *loopInfo = findCurrentLoopInfo(moduleTranslation); + + // Emit Initialization for linear variables + if (simdOp.getLinearVars().size()) { + linearClauseProcessor.initLinearVar(builder, moduleTranslation, + loopInfo->getPreheader()); + linearClauseProcessor.updateLinearVar(builder, loopInfo->getBody(), + loopInfo->getIndVar()); + } + builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin()); + ompBuilder->applySimd(loopInfo, alignedVars, simdOp.getIfExpr() ? moduleTranslation.lookupValue(simdOp.getIfExpr()) : nullptr, order, simdlen, safelen); + for (size_t index = 0; index < simdOp.getLinearVars().size(); index++) + linearClauseProcessor.rewriteInPlace(builder, "omp.loop_nest.region", + index); + // We now need to reduce the per-simd-lane reduction variable into the // original variable. This works a bit differently to other reductions (e.g. // wsloop) because we don't need to call into the OpenMP runtime to handle diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir index 32f0ba5b105ff..583356e81a978 100644 --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -358,6 +358,97 @@ llvm.func @wsloop_simple(%arg0: !llvm.ptr) { // ----- +// CHECK-LABEL: wsloop_linear + +// CHECK: {{.*}} = alloca i32, i64 1, align 4 +// CHECK: %[[Y:.*]] = alloca i32, i64 1, align 4 +// CHECK: %[[X:.*]] = alloca i32, i64 1, align 4 + +// CHECK: entry: +// CHECK: %[[LINEAR_VAR:.*]] = alloca i32, align 4 +// CHECK: %[[LINEAR_RESULT:.*]] = alloca i32, align 4 +// CHECK: br label %omp_loop.preheader + + +// CHECK: omp_loop.preheader: +// CHECK: %[[LOAD:.*]] = load i32, ptr %[[X]], align 4 +// CHECK: store i32 %[[LOAD]], ptr %[[LINEAR_VAR]], align 4 +// CHECK: %omp_global_thread_num = call i32 @__kmpc_global_thread_num(ptr @2) +// CHECK: call void @__kmpc_barrier(ptr @1, i32 %omp_global_thread_num) + +// CHECK: omp_loop.body: +// CHECK: %[[LOOP_IV:.*]] = add i32 %omp_loop.iv, {{.*}} +// CHECK: %[[LINEAR_LOAD:.*]] = load i32, ptr %[[LINEAR_VAR]], align 4 +// CHECK: %[[MUL:.*]] = mul i32 %[[LOOP_IV]], 1 +// CHECK: %[[ADD:.*]] = add i32 %[[LINEAR_LOAD]], %[[MUL]] +// CHECK: store i32 %[[ADD]], ptr %[[LINEAR_RESULT]], align 4 +// CHECK: br label %omp.loop_nest.region + +// CHECK: omp.loop_nest.region: +// CHECK: %[[LINEAR_LOAD:.*]] = load i32, ptr %[[LINEAR_RESULT]], align 4 +// CHECK: %[[ADD:.*]] = add i32 %[[LINEAR_LOAD]], 2 +// CHECK: store i32 %[[ADD]], ptr %[[Y]], align 4 + +// CHECK: omp_loop.exit: +// CHECK: call void @__kmpc_for_static_fini(ptr @2, i32 %omp_global_thread_num4) +// CHECK: %omp_global_thread_num5 = call i32 @__kmpc_global_thread_num(ptr @2) +// CHECK: call void @__kmpc_barrier(ptr @3, i32 %omp_global_thread_num5) +// CHECK: br label %omp_loop.linear_finalization + + +// CHECK: omp_loop.linear_finalization: +// CHECK: %[[LAST_ITER:.*]] = load i32, ptr %p.lastiter, align 4 +// CHECK: %[[CMP:.*]] = icmp ne i32 %[[LAST_ITER]], 0 +// CHECK: br i1 %[[CMP]], label %omp_loop.linear_lastiter_exit, label %omp_loop.linear_exit + +// CHECK: omp_loop.linear_lastiter_exit: +// CHECK: %[[LINEAR_RESULT_LOAD:.*]] = load i32, ptr %[[LINEAR_RESULT]], align 4 +// CHECK: store i32 %[[LINEAR_RESULT_LOAD]], ptr %[[X]], align 4 +// CHECK: br label %omp_loop.linear_exit + + +// CHECK: omp_loop.linear_exit: +// CHECK: %omp_global_thread_num6 = call i32 @__kmpc_global_thread_num(ptr @2) +// CHECK: call void @__kmpc_barrier(ptr @1, i32 %omp_global_thread_num6) +// CHECK: br label %omp_loop.after + +llvm.func @wsloop_linear() { + %0 = llvm.mlir.constant(1 : i64) : i64 + %1 = llvm.alloca %0 x i32 {bindc_name = "i", pinned} : (i64) -> !llvm.ptr + %2 = llvm.mlir.constant(1 : i64) : i64 + %3 = llvm.alloca %2 x i32 {bindc_name = "y"} : (i64) -> !llvm.ptr + %4 = llvm.mlir.constant(1 : i64) : i64 + %5 = llvm.alloca %4 x i32 {bindc_name = "x"} : (i64) -> !llvm.ptr + %6 = llvm.mlir.constant(1 : i64) : i64 + %7 = llvm.alloca %6 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr + %8 = llvm.mlir.constant(2 : i32) : i32 + %9 = llvm.mlir.constant(10 : i32) : i32 + %10 = llvm.mlir.constant(1 : i32) : i32 + %11 = llvm.mlir.constant(1 : i64) : i64 + %12 = llvm.mlir.constant(1 : i64) : i64 + %13 = llvm.mlir.constant(1 : i64) : i64 + %14 = llvm.mlir.constant(1 : i64) : i64 + omp.wsloop linear(%5 = %10 : !llvm.ptr) { + omp.loop_nest (%arg0) : i32 = (%10) to (%9) inclusive step (%10) { + llvm.store %arg0, %1 : i32, !llvm.ptr + %15 = llvm.load %5 : !llvm.ptr -> i32 + %16 = llvm.add %15, %8 : i32 + llvm.store %16, %3 : i32, !llvm.ptr + %17 = llvm.add %arg0, %10 : i32 + %18 = llvm.icmp "sgt" %17, %9 : i32 + llvm.cond_br %18, ^bb1, ^bb2 + ^bb1: // pred: ^bb0 + llvm.store %17, %1 : i32, !llvm.ptr + llvm.br ^bb2 + ^bb2: // 2 preds: ^bb0, ^bb1 + omp.yield + } + } + llvm.return +} + +// ----- + // CHECK-LABEL: @wsloop_inclusive_1 llvm.func @wsloop_inclusive_1(%arg0: !llvm.ptr) { %0 = llvm.mlir.constant(42 : index) : i64 @@ -695,6 +786,48 @@ llvm.func @simd_simple(%lb : i64, %ub : i64, %step : i64, %arg0: !llvm.ptr) { // ----- +// CHECK-LABEL: @simd_linear + +// CHECK: entry: +// CHECK: %[[LINEAR_VAR:.*]] = alloca i32, align 4 +// CHECK: %[[LINEAR_RESULT:.*]] = alloca i32, align 4 +// CHECK: br label %omp.simd.region + +// CHECK: omp.simd.region: +// CHECK: br label %omp_loop.preheader + +// CHECK: omp_loop.preheader: +// CHECK: %[[ORIG_VAL:.*]] = load i32, ptr {{.*}}, align 4 +// CHECK: store i32 %[[ORIG_VAL]], ptr %[[LINEAR_VAR]], align 4 +// CHECK: br label %omp_loop.header + +// CHECK: omp_loop.body: +// CHECK: %[[LINEAR_LOAD:.*]] = load i32, ptr %[[LINEAR_VAR]], align 4 +// CHECK: %[[MUL:.*]] = mul i32 %omp_loop.iv, 2 +// CHECK: %[[ADD:.*]] = add i32 %[[LINEAR_LOAD]], %[[MUL]] +// CHECK: store i32 %[[ADD]], ptr %[[LINEAR_RESULT]], align 4 +// CHECK: br label %omp.loop_nest.region + +llvm.func @simd_linear() { + %0 = llvm.mlir.constant(1 : i64) : i64 + %1 = llvm.alloca %0 x i32 {bindc_name = "x"} : (i64) -> !llvm.ptr + %2 = llvm.mlir.constant(1 : i64) : i64 + %3 = llvm.alloca %2 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr + %4 = llvm.mlir.constant(10 : i32) : i32 + %5 = llvm.mlir.constant(1 : i32) : i32 + %6 = llvm.mlir.constant(2 : i32) : i32 + %7 = llvm.mlir.constant(1 : i64) : i64 + %8 = llvm.mlir.constant(1 : i64) : i64 + omp.simd linear(%1 = %6 : !llvm.ptr) { + omp.loop_nest (%arg1) : i32 = (%5) to (%4) inclusive step (%5) { + omp.yield + } + } + llvm.return +} + +// ----- + // CHECK-LABEL: @simd_simple_multiple llvm.func @simd_simple_multiple(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64, %ub2 : i64, %step2 : i64, %arg0: !llvm.ptr, %arg1: !llvm.ptr) { omp.simd { diff --git a/mlir/test/Target/LLVMIR/openmp-todo.mlir b/mlir/test/Target/LLVMIR/openmp-todo.mlir index e2c32b254c200..f3a61695b7401 100644 --- a/mlir/test/Target/LLVMIR/openmp-todo.mlir +++ b/mlir/test/Target/LLVMIR/openmp-todo.mlir @@ -117,7 +117,7 @@ llvm.func @sections_private(%x : !llvm.ptr) { // ----- llvm.func @simd_linear(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) { - // expected-error@below {{not yet implemented: Unhandled clause linear in omp.simd operation}} + // expected-error@below {{not yet implemented: linear clause support for non alloca linear variables}} // expected-error@below {{LLVM Translation failed for operation: omp.simd}} omp.simd linear(%x = %step : !llvm.ptr) { omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) { @@ -467,18 +467,6 @@ llvm.func @wsloop_allocate(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) { // ----- -llvm.func @wsloop_linear(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) { - // expected-error@below {{not yet implemented: Unhandled clause linear in omp.wsloop operation}} - // expected-error@below {{LLVM Translation failed for operation: omp.wsloop}} - omp.wsloop linear(%x = %step : !llvm.ptr) { - omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) { - omp.yield - } - } - llvm.return -} - -// ----- llvm.func @wsloop_order(%lb : i32, %ub : i32, %step : i32) { // expected-error@below {{not yet implemented: Unhandled clause order in omp.wsloop operation}} // expected-error@below {{LLVM Translation failed for operation: omp.wsloop}}