Skip to content

[mlir][Affine] Align affine fusion code in pass and utilities #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion mlir/include/mlir/Analysis/Utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,11 @@ struct ComputationSliceState {

// Clears all bounds and operands in slice state.
void clearBounds();

/// Return true if the computation slice is empty.
bool isEmpty() const { return ivs.empty(); }

void dump() const;
};

/// Computes the computation slice loop bounds for one loop nest as affine maps
Expand Down Expand Up @@ -212,7 +217,7 @@ struct MemRefRegion {
/// The last field is a 2-d FlatAffineConstraints symbolic in %i.
///
LogicalResult compute(Operation *op, unsigned loopDepth,
ComputationSliceState *sliceState = nullptr,
const ComputationSliceState *sliceState = nullptr,
bool addMemRefDimBounds = true);

FlatAffineConstraints *getConstraints() { return &cst; }
Expand Down Expand Up @@ -309,6 +314,11 @@ bool isLoopParallel(AffineForOp forOp);
/// number of constraints.
IntegerSet simplifyIntegerSet(IntegerSet set);

/// Returns the innermost common loop depth for the set of operations in 'ops'.
unsigned getInnermostCommonLoopDepth(
ArrayRef<Operation *> ops,
SmallVectorImpl<AffineForOp> *surroundingLoops = nullptr);

} // end namespace mlir

#endif // MLIR_ANALYSIS_UTILS_H
31 changes: 26 additions & 5 deletions mlir/include/mlir/Transforms/LoopFusionUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#ifndef MLIR_TRANSFORMS_LOOP_FUSION_UTILS_H
#define MLIR_TRANSFORMS_LOOP_FUSION_UTILS_H

#include "mlir/IR/Value.h"
#include "mlir/Support/LLVM.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
Expand All @@ -38,6 +39,24 @@ struct FusionResult {
FusionResult(ResultEnum v) : value(v) {}
};

/// Temporary enum to distinguish between the different fusion strategies
/// implemented in Affine. It is used to specialized the loop fusion utilities
/// with the assumptions made in the AffineLoopFusion pass while sharing a
/// single implementation.
// TODO: Remove this enum once the producer-consumer and sibling loop fusion
// strategies in AffineLoopFusion pass are generic enough.
struct FusionStrategy {
enum StrategyEnum {
None, // Generic fusion. No assumtions are made.
ProducerConsumer, // Producer-consumer fusion from AffineLoopFusion pass.
Sibling // Sibling fusion from AffineLoopFusion pass.
} strategy;

Value memref;
FusionStrategy(StrategyEnum strategy, Value memref)
: strategy(strategy), memref(memref) {}
};

/// Checks the feasibility of fusing the loop nest rooted at 'srcForOp' into the
/// loop nest rooted at 'dstForOp' at 'dstLoopDepth'. Returns FusionResult
/// 'Success' if fusion of the src/dst loop nests is feasible (i.e. they are
Expand All @@ -46,14 +65,15 @@ struct FusionResult {
/// NOTE: This function is not feature complete and should only be used in
/// testing.
/// TODO: Update comments when this function is fully implemented.
FusionResult canFuseLoops(AffineForOp srcForOp, AffineForOp dstForOp,
unsigned dstLoopDepth,
ComputationSliceState *srcSlice);
FusionResult
canFuseLoops(AffineForOp srcForOp, AffineForOp dstForOp, unsigned dstLoopDepth,
ComputationSliceState *srcSlice,
FusionStrategy fusionStrategy = {FusionStrategy::None, Value()});

/// Fuses 'srcForOp' into 'dstForOp' with destination loop block insertion point
/// and source slice loop bounds specified in 'srcSlice'.
void fuseLoops(AffineForOp srcForOp, AffineForOp dstForOp,
ComputationSliceState *srcSlice);
const ComputationSliceState &srcSlice);

/// LoopNestStats aggregates various per-loop statistics (eg. loop trip count
/// and operation count) for a loop nest up until (and including) the innermost
Expand Down Expand Up @@ -89,7 +109,8 @@ int64_t getComputeCost(AffineForOp forOp, LoopNestStats &stats);
// TODO: Improve this cost model.
bool getFusionComputeCost(AffineForOp srcForOp, LoopNestStats &srcStats,
AffineForOp dstForOp, LoopNestStats &dstStats,
ComputationSliceState *slice, int64_t *computeCost);
const ComputationSliceState &slice,
int64_t *computeCost);

} // end namespace mlir

Expand Down
38 changes: 30 additions & 8 deletions mlir/lib/Analysis/Utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,28 @@ void ComputationSliceState::clearBounds() {
ubOperands.clear();
}

void ComputationSliceState::dump() const {
llvm::errs() << "\tIVs:\n";
for (Value iv : ivs)
llvm::errs() << "\t\t" << iv << "\n";

llvm::errs() << "\tLBs:\n";
for (auto &en : llvm::enumerate(lbs)) {
llvm::errs() << "\t\t" << en.value() << "\n";
llvm::errs() << "\t\tOperands:\n";
for (Value lbOp : lbOperands[en.index()])
llvm::errs() << "\t\t\t" << lbOp << "\n";
}

llvm::errs() << "\tUBs:\n";
for (auto &en : llvm::enumerate(ubs)) {
llvm::errs() << "\t\t" << en.value() << "\n";
llvm::errs() << "\t\tOperands:\n";
for (Value ubOp : ubOperands[en.index()])
llvm::errs() << "\t\t\t" << ubOp << "\n";
}
}

unsigned MemRefRegion::getRank() const {
return memref.getType().cast<MemRefType>().getRank();
}
Expand Down Expand Up @@ -211,7 +233,7 @@ LogicalResult MemRefRegion::unionBoundingBox(const MemRefRegion &other) {
// TODO: extend this to any other memref dereferencing ops
// (dma_start, dma_wait).
LogicalResult MemRefRegion::compute(Operation *op, unsigned loopDepth,
ComputationSliceState *sliceState,
const ComputationSliceState *sliceState,
bool addMemRefDimBounds) {
assert((isa<AffineReadOpInterface, AffineWriteOpInterface>(op)) &&
"affine read/write op expected");
Expand Down Expand Up @@ -541,13 +563,12 @@ static LogicalResult addMissingLoopIVBounds(SmallPtrSet<Value, 8> &ivs,
return success();
}

// Returns the innermost common loop depth for the set of operations in 'ops'.
/// Returns the innermost common loop depth for the set of operations in 'ops'.
// TODO: Move this to LoopUtils.
static unsigned
getInnermostCommonLoopDepth(ArrayRef<Operation *> ops,
SmallVectorImpl<AffineForOp> &surroundingLoops) {
unsigned mlir::getInnermostCommonLoopDepth(
ArrayRef<Operation *> ops, SmallVectorImpl<AffineForOp> *surroundingLoops) {
unsigned numOps = ops.size();
assert(numOps > 0);
assert(numOps > 0 && "Expected at least one operation");

std::vector<SmallVector<AffineForOp, 4>> loops(numOps);
unsigned loopDepthLimit = std::numeric_limits<unsigned>::max();
Expand All @@ -564,7 +585,8 @@ getInnermostCommonLoopDepth(ArrayRef<Operation *> ops,
if (loops[i - 1][d] != loops[i][d])
return loopDepth;
}
surroundingLoops.push_back(loops[i - 1][d]);
if (surroundingLoops)
surroundingLoops->push_back(loops[i - 1][d]);
++loopDepth;
}
return loopDepth;
Expand Down Expand Up @@ -684,7 +706,7 @@ LogicalResult mlir::computeSliceUnion(ArrayRef<Operation *> opsA,
}
SmallVector<AffineForOp, 4> surroundingLoops;
unsigned innermostCommonLoopDepth =
getInnermostCommonLoopDepth(ops, surroundingLoops);
getInnermostCommonLoopDepth(ops, &surroundingLoops);
if (loopDepth > innermostCommonLoopDepth) {
LLVM_DEBUG(llvm::dbgs() << "Exceeds max loop depth\n");
return failure();
Expand Down
Loading