Skip to content

[llvm][mlir][OpenMP] Support translation for linear clause in omp.wsloop and omp.simd #139386

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions flang/lib/Lower/OpenMP/OpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1816,8 +1816,7 @@ static void genSimdClauses(
cp.processReduction(loc, clauseOps, reductionSyms);
cp.processSafelen(clauseOps);
cp.processSimdlen(clauseOps);

cp.processTODO<clause::Linear>(loc, llvm::omp::Directive::OMPD_simd);
cp.processLinear(clauseOps);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The current implementation of processLinear does not seem to guarantee that the variable comes from an alloca. For example what if it is a function argument?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I ran some tests, and the semantic checks were able to catch those cases. For instance, if the linear variable is anything other than an integer type, we get a semantic error noting that a linear variable with the REF modifier needs to be of integer type.

Do you think we should nevertheless add a check during FIR gen? I added a check during the translation but skipped adding it during FIR gen because of this reason.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

subroutine test(lin, lb, ub, step)
  integer :: lin, lb, ub, step, i

  !$omp do linear(lin)
  do i = lb,ub,step
    l = l + 2
  enddo
endsubroutine

}

static void genSingleClauses(lower::AbstractConverter &converter,
Expand Down Expand Up @@ -2007,9 +2006,9 @@ static void genWsloopClauses(
cp.processOrdered(clauseOps);
cp.processReduction(loc, clauseOps, reductionSyms);
cp.processSchedule(stmtCtx, clauseOps);
cp.processLinear(clauseOps);

cp.processTODO<clause::Allocate, clause::Linear>(
loc, llvm::omp::Directive::OMPD_do);
cp.processTODO<clause::Allocate>(loc, llvm::omp::Directive::OMPD_do);
}

//===----------------------------------------------------------------------===//
Expand Down
14 changes: 0 additions & 14 deletions flang/test/Lower/OpenMP/Todo/omp-do-simd-linear.f90

This file was deleted.

54 changes: 54 additions & 0 deletions flang/test/Lower/OpenMP/simd-linear.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
! This test checks lowering of OpenMP DO Directive (Worksharing)
! with linear clause

! RUN: %flang_fc1 -fopenmp -emit-hlfir %s -o - 2>&1 | FileCheck %s

!CHECK: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_linearEx"}
!CHECK: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFsimple_linearEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: %[[const:.*]] = arith.constant 1 : i32
subroutine simple_linear
implicit none
integer :: x, y, i
!CHECK: omp.simd linear(%[[X]]#0 = %[[const]] : !fir.ref<i32>) {{.*}}
!$omp simd linear(x)
!CHECK: %[[LOAD:.*]] = fir.load %[[X]]#0 : !fir.ref<i32>
!CHECK: %[[const:.*]] = arith.constant 2 : i32
!CHECK: %[[RESULT:.*]] = arith.addi %[[LOAD]], %[[const]] : i32
do i = 1, 10
y = x + 2
end do
end subroutine


!CHECK: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFlinear_stepEx"}
!CHECK: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFlinear_stepEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
subroutine linear_step
implicit none
integer :: x, y, i
!CHECK: %[[const:.*]] = arith.constant 4 : i32
!CHECK: omp.simd linear(%[[X]]#0 = %[[const]] : !fir.ref<i32>) {{.*}}
!$omp simd linear(x:4)
!CHECK: %[[LOAD:.*]] = fir.load %[[X]]#0 : !fir.ref<i32>
!CHECK: %[[const:.*]] = arith.constant 2 : i32
!CHECK: %[[RESULT:.*]] = arith.addi %[[LOAD]], %[[const]] : i32
do i = 1, 10
y = x + 2
end do
end subroutine

!CHECK: %[[A_alloca:.*]] = fir.alloca i32 {bindc_name = "a", uniq_name = "_QFlinear_exprEa"}
!CHECK: %[[A:.*]]:2 = hlfir.declare %[[A_alloca]] {uniq_name = "_QFlinear_exprEa"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFlinear_exprEx"}
!CHECK: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFlinear_exprEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
subroutine linear_expr
implicit none
integer :: x, y, i, a
!CHECK: %[[LOAD_A:.*]] = fir.load %[[A]]#0 : !fir.ref<i32>
!CHECK: %[[const:.*]] = arith.constant 4 : i32
!CHECK: %[[LINEAR_EXPR:.*]] = arith.addi %[[LOAD_A]], %[[const]] : i32
!CHECK: omp.simd linear(%[[X]]#0 = %[[LINEAR_EXPR]] : !fir.ref<i32>) {{.*}}
!$omp simd linear(x:a+4)
do i = 1, 10
y = x + 2
end do
end subroutine
57 changes: 57 additions & 0 deletions flang/test/Lower/OpenMP/wsloop-linear.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
! This test checks lowering of OpenMP DO Directive (Worksharing)
! with linear clause

! RUN: %flang_fc1 -fopenmp -emit-hlfir %s -o - 2>&1 | FileCheck %s

!CHECK: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_linearEx"}
!CHECK: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFsimple_linearEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: %[[const:.*]] = arith.constant 1 : i32
subroutine simple_linear
implicit none
integer :: x, y, i
!CHECK: omp.wsloop linear(%[[X]]#0 = %[[const]] : !fir.ref<i32>) {{.*}}
!$omp do linear(x)
!CHECK: %[[LOAD:.*]] = fir.load %[[X]]#0 : !fir.ref<i32>
!CHECK: %[[const:.*]] = arith.constant 2 : i32
!CHECK: %[[RESULT:.*]] = arith.addi %[[LOAD]], %[[const]] : i32
do i = 1, 10
y = x + 2
end do
!$omp end do
end subroutine


!CHECK: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFlinear_stepEx"}
!CHECK: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFlinear_stepEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
subroutine linear_step
implicit none
integer :: x, y, i
!CHECK: %[[const:.*]] = arith.constant 4 : i32
!CHECK: omp.wsloop linear(%[[X]]#0 = %[[const]] : !fir.ref<i32>) {{.*}}
!$omp do linear(x:4)
!CHECK: %[[LOAD:.*]] = fir.load %[[X]]#0 : !fir.ref<i32>
!CHECK: %[[const:.*]] = arith.constant 2 : i32
!CHECK: %[[RESULT:.*]] = arith.addi %[[LOAD]], %[[const]] : i32
do i = 1, 10
y = x + 2
end do
!$omp end do
end subroutine

!CHECK: %[[A_alloca:.*]] = fir.alloca i32 {bindc_name = "a", uniq_name = "_QFlinear_exprEa"}
!CHECK: %[[A:.*]]:2 = hlfir.declare %[[A_alloca]] {uniq_name = "_QFlinear_exprEa"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFlinear_exprEx"}
!CHECK: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFlinear_exprEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
subroutine linear_expr
implicit none
integer :: x, y, i, a
!CHECK: %[[LOAD_A:.*]] = fir.load %[[A]]#0 : !fir.ref<i32>
!CHECK: %[[const:.*]] = arith.constant 4 : i32
!CHECK: %[[LINEAR_EXPR:.*]] = arith.addi %[[LOAD_A]], %[[const]] : i32
!CHECK: omp.wsloop linear(%[[X]]#0 = %[[LINEAR_EXPR]] : !fir.ref<i32>) {{.*}}
!$omp do linear(x:a+4)
do i = 1, 10
y = x + 2
end do
!$omp end do
end subroutine
2 changes: 1 addition & 1 deletion mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2608,7 +2608,7 @@ void SimdOp::build(OpBuilder &builder, OperationState &state,
// TODO Store clauses in op: linearVars, linearStepVars
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// TODO Store clauses in op: linearVars, linearStepVars

SimdOp::build(builder, state, clauses.alignedVars,
makeArrayAttr(ctx, clauses.alignments), clauses.ifExpr,
/*linear_vars=*/{}, /*linear_step_vars=*/{},
clauses.linearVars, clauses.linearStepVars,
clauses.nontemporalVars, clauses.order, clauses.orderMod,
clauses.privateVars, makeArrayAttr(ctx, clauses.privateSyms),
clauses.privateNeedsBarrier, clauses.reductionMod,
Expand Down
98 changes: 62 additions & 36 deletions mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,9 +147,9 @@ class LinearClauseProcessor {

public:
// Allocate space for linear variabes
void createLinearVar(llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation,
mlir::Value &linearVar) {
LogicalResult createLinearVar(llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation,
mlir::Value &linearVar, Operation &op) {
if (llvm::AllocaInst *linearVarAlloca = dyn_cast<llvm::AllocaInst>(
moduleTranslation.lookupValue(linearVar))) {
linearPreconditionVars.push_back(builder.CreateAlloca(
Expand All @@ -159,7 +159,12 @@ class LinearClauseProcessor {
linearOrigVal.push_back(moduleTranslation.lookupValue(linearVar));
linearLoopBodyTemps.push_back(linearLoopBodyTemp);
linearOrigVars.push_back(linearVarAlloca);
return success();
}

else
return op.emitError() << "not yet implemented: linear clause support"
<< " for non alloca linear variables";
}

// Initialize linear step
Expand All @@ -169,20 +174,15 @@ class LinearClauseProcessor {
}

// Emit IR for initialization of linear variables
llvm::OpenMPIRBuilder::InsertPointOrErrorTy
initLinearVar(llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation,
llvm::BasicBlock *loopPreHeader) {
void initLinearVar(llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation,
llvm::BasicBlock *loopPreHeader) {
builder.SetInsertPoint(loopPreHeader->getTerminator());
for (size_t index = 0; index < linearOrigVars.size(); index++) {
llvm::LoadInst *linearVarLoad = builder.CreateLoad(
linearOrigVars[index]->getAllocatedType(), linearOrigVars[index]);
builder.CreateStore(linearVarLoad, linearPreconditionVars[index]);
}
llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterBarrierIP =
moduleTranslation.getOpenMPBuilder()->createBarrier(
builder.saveIP(), llvm::omp::OMPD_barrier);
return afterBarrierIP;
}

// Emit IR for updating Linear variables
Expand All @@ -193,18 +193,27 @@ class LinearClauseProcessor {
// Emit increments for linear vars
llvm::LoadInst *linearVarStart =
builder.CreateLoad(linearOrigVars[index]->getAllocatedType(),

linearPreconditionVars[index]);

auto mulInst = builder.CreateMul(loopInductionVar, linearSteps[index]);
auto addInst = builder.CreateAdd(linearVarStart, mulInst);
builder.CreateStore(addInst, linearLoopBodyTemps[index]);
if (linearOrigVars[index]->getAllocatedType()->isIntegerTy()) {
auto addInst = builder.CreateAdd(linearVarStart, mulInst);
builder.CreateStore(addInst, linearLoopBodyTemps[index]);
} else if (linearOrigVars[index]
->getAllocatedType()
->isFloatingPointTy()) {
auto cvt = builder.CreateSIToFP(
mulInst, linearOrigVars[index]->getAllocatedType());
auto addInst = builder.CreateFAdd(linearVarStart, cvt);
builder.CreateStore(addInst, linearLoopBodyTemps[index]);
}
}
}

// Linear variable finalization is conditional on the last logical iteration.
// Create BB splits to manage the same.
void outlineLinearFinalizationBB(llvm::IRBuilderBase &builder,
llvm::BasicBlock *loopExit) {
void splitLinearFiniBB(llvm::IRBuilderBase &builder,
llvm::BasicBlock *loopExit) {
linearFinalizationBB = loopExit->splitBasicBlock(
loopExit->getTerminator(), "omp_loop.linear_finalization");
linearExitBB = linearFinalizationBB->splitBasicBlock(
Expand Down Expand Up @@ -339,10 +348,6 @@ static LogicalResult checkImplementationStatus(Operation &op) {
if (!op.getIsDevicePtrVars().empty())
result = todo("is_device_ptr");
};
auto checkLinear = [&todo](auto op, LogicalResult &result) {
if (!op.getLinearVars().empty() || !op.getLinearStepVars().empty())
result = todo("linear");
};
auto checkNowait = [&todo](auto op, LogicalResult &result) {
if (op.getNowait())
result = todo("nowait");
Expand Down Expand Up @@ -432,18 +437,14 @@ static LogicalResult checkImplementationStatus(Operation &op) {
})
.Case([&](omp::WsloopOp op) {
checkAllocate(op, result);
checkLinear(op, result);
checkOrder(op, result);
checkReduction(op, result);
})
.Case([&](omp::ParallelOp op) {
checkAllocate(op, result);
checkReduction(op, result);
})
.Case([&](omp::SimdOp op) {
checkLinear(op, result);
checkReduction(op, result);
})
.Case([&](omp::SimdOp op) { checkReduction(op, result); })
.Case<omp::AtomicReadOp, omp::AtomicWriteOp, omp::AtomicUpdateOp,
omp::AtomicCaptureOp>([&](auto op) { checkHint(op, result); })
.Case<omp::TargetEnterDataOp, omp::TargetExitDataOp, omp::TargetUpdateOp>(
Expand Down Expand Up @@ -2587,13 +2588,13 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,

// Initialize linear variables and linear step
LinearClauseProcessor linearClauseProcessor;
if (wsloopOp.getLinearVars().size()) {
for (mlir::Value linearVar : wsloopOp.getLinearVars())
linearClauseProcessor.createLinearVar(builder, moduleTranslation,
linearVar);
for (mlir::Value linearStep : wsloopOp.getLinearStepVars())
linearClauseProcessor.initLinearStep(moduleTranslation, linearStep);
for (mlir::Value linearVar : wsloopOp.getLinearVars()) {
if (failed(linearClauseProcessor.createLinearVar(builder, moduleTranslation,
linearVar, opInst)))
return failure();
}
for (mlir::Value linearStep : wsloopOp.getLinearStepVars())
linearClauseProcessor.initLinearStep(moduleTranslation, linearStep);

llvm::Expected<llvm::BasicBlock *> regionBlock = convertOmpOpRegions(
wsloopOp.getRegion(), "omp.wsloop.region", builder, moduleTranslation);
Expand All @@ -2605,16 +2606,17 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,

// Emit Initialization and Update IR for linear variables
if (wsloopOp.getLinearVars().size()) {
linearClauseProcessor.initLinearVar(builder, moduleTranslation,
loopInfo->getPreheader());
llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterBarrierIP =
linearClauseProcessor.initLinearVar(builder, moduleTranslation,
loopInfo->getPreheader());
moduleTranslation.getOpenMPBuilder()->createBarrier(
builder.saveIP(), llvm::omp::OMPD_barrier);
if (failed(handleError(afterBarrierIP, *loopOp)))
return failure();
builder.restoreIP(*afterBarrierIP);
linearClauseProcessor.updateLinearVar(builder, loopInfo->getBody(),
loopInfo->getIndVar());
linearClauseProcessor.outlineLinearFinalizationBB(builder,
loopInfo->getExit());
linearClauseProcessor.splitLinearFiniBB(builder, loopInfo->getExit());
}

builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
Expand Down Expand Up @@ -2882,6 +2884,17 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
findAllocaInsertPoint(builder, moduleTranslation);

// Create linear variables and initialize linear step
LinearClauseProcessor linearClauseProcessor;

for (mlir::Value linearVar : simdOp.getLinearVars()) {
if (failed(linearClauseProcessor.createLinearVar(builder, moduleTranslation,
linearVar, opInst)))
return failure();
}
for (mlir::Value linearStep : simdOp.getLinearStepVars())
linearClauseProcessor.initLinearStep(moduleTranslation, linearStep);

llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars(
builder, moduleTranslation, privateVarsInfo, allocaIP);
if (handleError(afterAllocas, opInst).failed())
Expand Down Expand Up @@ -2945,14 +2958,27 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
if (failed(handleError(regionBlock, opInst)))
return failure();

builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
llvm::CanonicalLoopInfo *loopInfo = findCurrentLoopInfo(moduleTranslation);

// Emit Initialization for linear variables
if (simdOp.getLinearVars().size()) {
linearClauseProcessor.initLinearVar(builder, moduleTranslation,
loopInfo->getPreheader());
linearClauseProcessor.updateLinearVar(builder, loopInfo->getBody(),
loopInfo->getIndVar());
}
builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());

ompBuilder->applySimd(loopInfo, alignedVars,
simdOp.getIfExpr()
? moduleTranslation.lookupValue(simdOp.getIfExpr())
: nullptr,
order, simdlen, safelen);

for (size_t index = 0; index < simdOp.getLinearVars().size(); index++)
linearClauseProcessor.rewriteInPlace(builder, "omp.loop_nest.region",
index);

// We now need to reduce the per-simd-lane reduction variable into the
// original variable. This works a bit differently to other reductions (e.g.
// wsloop) because we don't need to call into the OpenMP runtime to handle
Expand Down
Loading