Skip to content

[flang] Optimize redundant array repacking. #147881

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions flang/include/flang/Optimizer/Builder/HLFIRTools.h
Original file line number Diff line number Diff line change
Expand Up @@ -542,6 +542,14 @@ Entity gen1DSection(mlir::Location loc, fir::FirOpBuilder &builder,
/// contiguous.
bool designatePreservesContinuity(hlfir::DesignateOp op);

/// Return true iff the given \p base desribes an object
/// that is contiguous. If \p checkWhole is true, then
/// the object must be contiguous in all dimensions,
/// otherwise, it must be contiguous in the innermost dimension.
/// This function is an extension of hlfir::Entity::isSimplyContiguous(),
/// and it can be used on pure FIR representation as well as on HLFIR.
bool isSimplyContiguous(mlir::Value base, bool checkWhole = true);

} // namespace hlfir

#endif // FORTRAN_OPTIMIZER_BUILDER_HLFIRTOOLS_H
14 changes: 13 additions & 1 deletion flang/include/flang/Optimizer/Dialect/FIROpsSupport.h
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,19 @@ std::optional<int64_t> getAllocaByteSize(fir::AllocaOp alloca,
/// When \p checkWhole is false, then the checking is only done
/// for continuity in the innermost dimension, otherwise,
/// the checking is done for continuity of the whole result of rebox.
bool reboxPreservesContinuity(fir::ReboxOp rebox, bool checkWhole = true);
/// The caller may specify \p mayHaveNonDefaultLowerBounds, if it is known,
/// to allow better handling of the rebox operations representing
/// full array slices.
bool reboxPreservesContinuity(fir::ReboxOp rebox,
bool mayHaveNonDefaultLowerBounds = true,
bool checkWhole = true);

/// Return true, if \p embox operation produces a contiguous
/// entity.
/// When \p checkWhole is false, then the checking is only done
/// for continuity in the innermost dimension, otherwise,
/// the checking is done for continuity of the whole result of embox
bool isContiguousEmbox(fir::EmboxOp embox, bool checkWhole = true);

} // namespace fir

Expand Down
34 changes: 1 addition & 33 deletions flang/include/flang/Optimizer/Transforms/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,39 +31,7 @@ namespace fir {
// Passes defined in Passes.td
//===----------------------------------------------------------------------===//

#define GEN_PASS_DECL_ABSTRACTRESULTOPT
#define GEN_PASS_DECL_AFFINEDIALECTPROMOTION
#define GEN_PASS_DECL_AFFINEDIALECTDEMOTION
#define GEN_PASS_DECL_ANNOTATECONSTANTOPERANDS
#define GEN_PASS_DECL_ARRAYVALUECOPY
#define GEN_PASS_DECL_ASSUMEDRANKOPCONVERSION
#define GEN_PASS_DECL_CHARACTERCONVERSION
#define GEN_PASS_DECL_CFGCONVERSION
#define GEN_PASS_DECL_CUFADDCONSTRUCTOR
#define GEN_PASS_DECL_CUFDEVICEGLOBAL
#define GEN_PASS_DECL_CUFGPUTOLLVMCONVERSION
#define GEN_PASS_DECL_CUFOPCONVERSION
#define GEN_PASS_DECL_CUFCOMPUTESHAREDMEMORYOFFSETSANDSIZE
#define GEN_PASS_DECL_EXTERNALNAMECONVERSION
#define GEN_PASS_DECL_MEMREFDATAFLOWOPT
#define GEN_PASS_DECL_SIMPLIFYINTRINSICS
#define GEN_PASS_DECL_MEMORYALLOCATIONOPT
#define GEN_PASS_DECL_SIMPLIFYREGIONLITE
#define GEN_PASS_DECL_ALGEBRAICSIMPLIFICATION
#define GEN_PASS_DECL_POLYMORPHICOPCONVERSION
#define GEN_PASS_DECL_OPENACCDATAOPERANDCONVERSION
#define GEN_PASS_DECL_ADDDEBUGINFO
#define GEN_PASS_DECL_STACKARRAYS
#define GEN_PASS_DECL_STACKRECLAIM
#define GEN_PASS_DECL_LOOPVERSIONING
#define GEN_PASS_DECL_ADDALIASTAGS
#define GEN_PASS_DECL_VSCALEATTR
#define GEN_PASS_DECL_FUNCTIONATTR
#define GEN_PASS_DECL_CONSTANTARGUMENTGLOBALISATIONOPT
#define GEN_PASS_DECL_COMPILERGENERATEDNAMESCONVERSION
#define GEN_PASS_DECL_SETRUNTIMECALLATTRIBUTES
#define GEN_PASS_DECL_GENRUNTIMECALLSFORTEST
#define GEN_PASS_DECL_SIMPLIFYFIROPERATIONS
#define GEN_PASS_DECL

#include "flang/Optimizer/Transforms/Passes.h.inc"

Expand Down
10 changes: 10 additions & 0 deletions flang/include/flang/Optimizer/Transforms/Passes.td
Original file line number Diff line number Diff line change
Expand Up @@ -551,4 +551,14 @@ def SimplifyFIROperations : Pass<"simplify-fir-operations", "mlir::ModuleOp"> {
"Prefer expanding without using Fortran runtime calls.">];
}

def OptimizeArrayRepacking
: Pass<"optimize-array-repacking", "mlir::func::FuncOp"> {
let summary = "Optimizes redundant array repacking operations";
let description = [{
If the source of fir.pack_array is known to be contiguous,
then this pass erases such operations. The corresponding
fir.unpack_array operations are also removed.
}];
}

#endif // FLANG_OPTIMIZER_TRANSFORMS_PASSES
40 changes: 37 additions & 3 deletions flang/lib/Optimizer/Builder/HLFIRTools.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -212,10 +212,17 @@ bool hlfir::Entity::mayHaveNonDefaultLowerBounds() const {
if (auto varIface = getIfVariableInterface())
return isShapeWithLowerBounds(varIface.getShape());
// Go through chain of fir.box converts.
if (auto convert = getDefiningOp<fir::ConvertOp>())
if (auto convert = getDefiningOp<fir::ConvertOp>()) {
return hlfir::Entity{convert.getValue()}.mayHaveNonDefaultLowerBounds();
// TODO: Embox and Rebox do not have hlfir variable interface, but are
// easy to reason about.
} else if (auto rebox = getDefiningOp<fir::ReboxOp>()) {
// If slicing is involved, then the resulting box has
// default lower bounds. If there is no slicing,
// then the result depends on the shape operand
// (whether it has non default lower bounds or not).
return !rebox.getSlice() && isShapeWithLowerBounds(rebox.getShape());
} else if (auto embox = getDefiningOp<fir::EmboxOp>()) {
return !embox.getSlice() && isShapeWithLowerBounds(embox.getShape());
}
return true;
}

Expand Down Expand Up @@ -1646,3 +1653,30 @@ bool hlfir::designatePreservesContinuity(hlfir::DesignateOp op) {
}
return true;
}

bool hlfir::isSimplyContiguous(mlir::Value base, bool checkWhole) {
hlfir::Entity entity{base};
if (entity.isSimplyContiguous())
return true;

// Look at the definition.
mlir::Operation *def = base.getDefiningOp();
if (!def)
return false;

return mlir::TypeSwitch<mlir::Operation *, bool>(def)
.Case<fir::EmboxOp>(
[&](auto op) { return fir::isContiguousEmbox(op, checkWhole); })
.Case<fir::ReboxOp>([&](auto op) {
hlfir::Entity box{op.getBox()};
return fir::reboxPreservesContinuity(
op, box.mayHaveNonDefaultLowerBounds(), checkWhole) &&
isSimplyContiguous(box, checkWhole);
})
.Case<fir::DeclareOp, hlfir::DeclareOp>([&](auto op) {
return isSimplyContiguous(op.getMemref(), checkWhole);
})
.Case<fir::ConvertOp>(
[&](auto op) { return isSimplyContiguous(op.getValue()); })
.Default([](auto &&) { return false; });
}
157 changes: 129 additions & 28 deletions flang/lib/Optimizer/Dialect/FIROps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1944,6 +1944,128 @@ llvm::LogicalResult fir::EmboxOp::verify() {
return mlir::success();
}

/// Returns true if \p extent matches the extent of the \p box's
/// dimension \p dim.
static bool isBoxExtent(mlir::Value box, std::int64_t dim, mlir::Value extent) {
if (auto op = extent.getDefiningOp<fir::BoxDimsOp>())
if (op.getVal() == box && op.getExtent() == extent)
if (auto dimOperand = fir::getIntIfConstant(op.getDim()))
return *dimOperand == dim;
return false;
}

/// Returns true if \p lb matches the lower bound of the \p box's
/// dimension \p dim. If \p mayHaveNonDefaultLowerBounds is false,
/// then \p lb may be an integer constant 1.
static bool isBoxLb(mlir::Value box, std::int64_t dim, mlir::Value lb,
bool mayHaveNonDefaultLowerBounds = true) {
if (auto op = lb.getDefiningOp<fir::BoxDimsOp>()) {
if (op.getVal() == box && op.getLowerBound() == lb)
if (auto dimOperand = fir::getIntIfConstant(op.getDim()))
return *dimOperand == dim;
} else if (!mayHaveNonDefaultLowerBounds) {
if (auto constantLb = fir::getIntIfConstant(lb))
return *constantLb == 1;
}
return false;
}

/// Returns true if \p ub matches the upper bound of the \p box's
/// dimension \p dim. If \p mayHaveNonDefaultLowerBounds is false,
/// then the dimension's lower bound may be an integer constant 1.
/// Note that the upper bound is usually a result of computation
/// involving the lower bound and the extent, and the function
/// tries its best to recognize the computation pattern.
/// The conservative result 'false' does not necessarily mean
/// that \p ub is not an actual upper bound value.
static bool isBoxUb(mlir::Value box, std::int64_t dim, mlir::Value ub,
bool mayHaveNonDefaultLowerBounds = true) {
if (auto sub1 = ub.getDefiningOp<mlir::arith::SubIOp>()) {
auto one = fir::getIntIfConstant(sub1.getOperand(1));
if (!one || *one != 1)
return false;
if (auto add = sub1.getOperand(0).getDefiningOp<mlir::arith::AddIOp>())
if ((isBoxLb(box, dim, add.getOperand(0)) &&
isBoxExtent(box, dim, add.getOperand(1))) ||
(isBoxLb(box, dim, add.getOperand(1)) &&
isBoxExtent(box, dim, add.getOperand(0))))
return true;
} else if (!mayHaveNonDefaultLowerBounds) {
return isBoxExtent(box, dim, ub);
}
return false;
}

/// Checks if the given \p sliceOp specifies a contiguous
/// array slice. If \p checkWhole is true, then the check
/// is done for all dimensions, otherwise, only for the innermost
/// dimension.
/// The simplest way to prove that this is an contiguous slice
/// is to check whether the slice stride(s) is 1.
/// For more complex cases, extra information must be provided
/// by the caller:
/// * \p origBox - if not null, then the source array is represented
/// with this !fir.box value. The box is used to recognize
/// the full dimension slices, which are specified by the triplets
/// computed from the dimensions' lower bounds and extents.
/// * \p mayHaveNonDefaultLowerBounds may be set to false to indicate
/// that the source entity has default lower bounds, so the full
/// dimension slices computations may use 1 for the lower bound.
static bool isContiguousArraySlice(fir::SliceOp sliceOp, bool checkWhole = true,
mlir::Value origBox = nullptr,
bool mayHaveNonDefaultLowerBounds = true) {
if (sliceOp.getFields().empty() && sliceOp.getSubstr().empty()) {
// TODO: generalize code for the triples analysis with
// hlfir::designatePreservesContinuity, especially when
// recognition of the whole dimension slices is added.
auto triples = sliceOp.getTriples();
assert((triples.size() % 3) == 0 && "invalid triples size");

// A slice with step=1 in the innermost dimension preserves
// the continuity of the array in the innermost dimension.
// If checkWhole is false, then check only the innermost slice triples.
std::size_t checkUpTo = checkWhole ? triples.size() : 3;
checkUpTo = std::min(checkUpTo, triples.size());
for (std::size_t i = 0; i < checkUpTo; i += 3) {
if (triples[i] != triples[i + 1]) {
// This is a section of the dimension. Only allow it
// to be the first triple, if the source of the slice
// is a boxed array. If it is a raw pointer, then
// the result will still be contiguous, as long as
// the strides are all ones.
// When origBox is not null, we must prove that the triple
// covers the whole dimension and the stride is one,
// before claiming contiguity for this dimension.
if (i != 0 && origBox) {
std::int64_t dim = i / 3;
if (!isBoxLb(origBox, dim, triples[i],
mayHaveNonDefaultLowerBounds) ||
!isBoxUb(origBox, dim, triples[i + 1],
mayHaveNonDefaultLowerBounds))
return false;
}
auto constantStep = fir::getIntIfConstant(triples[i + 2]);
if (!constantStep || *constantStep != 1)
return false;
}
}
return true;
}
return false;
}

bool fir::isContiguousEmbox(fir::EmboxOp embox, bool checkWhole) {
auto sliceArg = embox.getSlice();
if (!sliceArg)
return true;

if (auto sliceOp =
mlir::dyn_cast_or_null<fir::SliceOp>(sliceArg.getDefiningOp()))
return isContiguousArraySlice(sliceOp, checkWhole);

return false;
}

//===----------------------------------------------------------------------===//
// EmboxCharOp
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -4794,41 +4916,20 @@ mlir::Type fir::applyPathToType(mlir::Type eleTy, mlir::ValueRange path) {
return eleTy;
}

bool fir::reboxPreservesContinuity(fir::ReboxOp rebox, bool checkWhole) {
bool fir::reboxPreservesContinuity(fir::ReboxOp rebox,
bool mayHaveNonDefaultLowerBounds,
bool checkWhole) {
// If slicing is not involved, then the rebox does not affect
// the continuity of the array.
auto sliceArg = rebox.getSlice();
if (!sliceArg)
return true;

if (auto sliceOp =
mlir::dyn_cast_or_null<fir::SliceOp>(sliceArg.getDefiningOp())) {
if (sliceOp.getFields().empty() && sliceOp.getSubstr().empty()) {
// TODO: generalize code for the triples analysis with
// hlfir::designatePreservesContinuity, especially when
// recognition of the whole dimension slices is added.
auto triples = sliceOp.getTriples();
assert((triples.size() % 3) == 0 && "invalid triples size");

// A slice with step=1 in the innermost dimension preserves
// the continuity of the array in the innermost dimension.
// If checkWhole is false, then check only the innermost slice triples.
std::size_t checkUpTo = checkWhole ? triples.size() : 3;
checkUpTo = std::min(checkUpTo, triples.size());
for (std::size_t i = 0; i < checkUpTo; i += 3) {
if (triples[i] != triples[i + 1]) {
// This is a section of the dimension. Only allow it
// to be the first triple.
if (i != 0)
return false;
auto constantStep = fir::getIntIfConstant(triples[i + 2]);
if (!constantStep || *constantStep != 1)
return false;
}
}
return true;
}
}
mlir::dyn_cast_or_null<fir::SliceOp>(sliceArg.getDefiningOp()))
return isContiguousArraySlice(sliceOp, checkWhole, rebox.getBox(),
mayHaveNonDefaultLowerBounds);

return false;
}

Expand Down
4 changes: 4 additions & 0 deletions flang/lib/Optimizer/Passes/Pipelines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,10 @@ void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
pm.addPass(fir::createPolymorphicOpConversion());
pm.addPass(fir::createAssumedRankOpConversion());

// Optimize redundant array repacking operations,
// if the source is known to be contiguous.
if (pc.OptLevel.isOptimizingForSpeed())
pm.addPass(fir::createOptimizeArrayRepacking());
pm.addPass(fir::createLowerRepackArraysPass());
// Expand FIR operations that may use SCF dialect for their
// implementation. This is a mandatory pass.
Expand Down
1 change: 1 addition & 0 deletions flang/lib/Optimizer/Transforms/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ add_flang_library(FIRTransforms
SetRuntimeCallAttributes.cpp
GenRuntimeCallsForTest.cpp
SimplifyFIROperations.cpp
OptimizeArrayRepacking.cpp

DEPENDS
CUFAttrs
Expand Down
4 changes: 3 additions & 1 deletion flang/lib/Optimizer/Transforms/LoopVersioning.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,9 @@ static mlir::Value unwrapPassThroughOps(mlir::Value val) {
/// of the value, otherwise return the value
static mlir::Value unwrapReboxOp(mlir::Value val) {
while (fir::ReboxOp rebox = val.getDefiningOp<fir::ReboxOp>()) {
if (!fir::reboxPreservesContinuity(rebox, /*checkWhole=*/false)) {
if (!fir::reboxPreservesContinuity(rebox,
/*mayHaveNonDefaultLowerBounds=*/true,
/*checkWhole=*/false)) {
LLVM_DEBUG(llvm::dbgs() << "REBOX may produce non-contiguous array: "
<< rebox << '\n');
break;
Expand Down
Loading