Skip to content

Commit 80766ec

Browse files
committed
[mlir] Add an option to control the number of loops in affine parallelizer
Add a pass option to control the number of nested parallel loops produced by the parallelization passes. This is useful to build end-to-end passes targeting systems that don't need multiple parallel dimensions (e.g., CPUs typically need only one). Reviewed By: wsmoses, chelini Differential Revision: https://reviews.llvm.org/D92765
1 parent 2fe30a3 commit 80766ec

File tree

3 files changed

+40
-4
lines changed

3 files changed

+40
-4
lines changed

mlir/include/mlir/Dialect/Affine/Passes.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,11 @@ def AffineVectorize : FunctionPass<"affine-super-vectorize"> {
118118
def AffineParallelize : FunctionPass<"affine-parallelize"> {
119119
let summary = "Convert affine.for ops into 1-D affine.parallel";
120120
let constructor = "mlir::createAffineParallelizePass()";
121+
let options = [
122+
Option<"maxNested", "max-nested", "unsigned", /*default=*/"-1u",
123+
"Maximum number of nested parallel loops to produce. "
124+
"Defaults to unlimited (UINT_MAX).">,
125+
];
121126
}
122127

123128
def AffineLoopNormalize : FunctionPass<"affine-loop-normalize"> {

mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,13 +36,28 @@ struct AffineParallelize : public AffineParallelizeBase<AffineParallelize> {
3636

3737
void AffineParallelize::runOnFunction() {
3838
FuncOp f = getFunction();
39-
SmallVector<AffineForOp, 8> parallelizableLoops;
39+
40+
// The walker proceeds in post-order, but we need to process outer loops first
41+
// to control the number of outer parallel loops, so push candidate loops to
42+
// the front of a deque.
43+
std::deque<AffineForOp> parallelizableLoops;
4044
f.walk([&](AffineForOp loop) {
4145
if (isLoopParallel(loop))
42-
parallelizableLoops.push_back(loop);
46+
parallelizableLoops.push_front(loop);
4347
});
44-
for (AffineForOp loop : parallelizableLoops)
45-
affineParallelize(loop);
48+
49+
for (AffineForOp loop : parallelizableLoops) {
50+
unsigned numParentParallelOps = 0;
51+
for (Operation *op = loop->getParentOp();
52+
op != nullptr && !op->hasTrait<OpTrait::AffineScope>();
53+
op = op->getParentOp()) {
54+
if (isa<AffineParallelOp>(op))
55+
++numParentParallelOps;
56+
}
57+
58+
if (numParentParallelOps < maxNested)
59+
affineParallelize(loop);
60+
}
4661
}
4762

4863
std::unique_ptr<OperationPass<FuncOp>> mlir::createAffineParallelizePass() {

mlir/test/Dialect/Affine/parallelize.mlir

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
// RUN: mlir-opt %s -allow-unregistered-dialect -affine-parallelize| FileCheck %s
2+
// RUN: mlir-opt %s -allow-unregistered-dialect -affine-parallelize='max-nested=1' | FileCheck --check-prefix=MAX-NESTED %s
23

34
// CHECK-LABEL: func @reduce_window_max() {
45
func @reduce_window_max() {
@@ -144,3 +145,18 @@ func @nested_for_with_minmax(%m: memref<?xf32>, %lb0: index,
144145
}
145146
return
146147
}
148+
149+
// MAX-NESTED-LABEL: @max_nested
150+
func @max_nested(%m: memref<?x?xf32>, %lb0: index, %lb1: index,
151+
%ub0: index, %ub1: index) {
152+
// MAX-NESTED: affine.parallel
153+
affine.for %i = affine_map<(d0) -> (d0)>(%lb0) to affine_map<(d0) -> (d0)>(%ub0) {
154+
// MAX-NESTED: affine.for
155+
affine.for %j = affine_map<(d0) -> (d0)>(%lb1) to affine_map<(d0) -> (d0)>(%ub1) {
156+
affine.load %m[%i, %j] : memref<?x?xf32>
157+
}
158+
}
159+
return
160+
}
161+
162+

0 commit comments

Comments
 (0)