Skip to content

Commit 2fe30a3

Browse files
committed
[mlir] properly support min/max in affine parallelization
The existing implementation of the affine parallelization silently copies over the lower and upper bound maps from affine.for to affine.parallel. However, the semantics of these maps differ between these two ops: in affine.for, a max(min) of results is taken for the lower(upper) bound; in affine.parallel, multiple induction variables can be defined an each result corresponds to one induction variable. Thus the existing implementation could generate invalid IR or IR that passes the verifier but has different semantics than the original code. Fix the parallelization utility to emit dedicated min/max operations before the affine.parallel in such cases. Disallow parallelization if min/max would have been in an operation without the AffineScope trait, e.g., in another loop, since the result of these operations is not considered a valid affine dimension identifier and may not be properly handled by the affine analyses. Reviewed By: wsmoses Differential Revision: https://reviews.llvm.org/D92763
1 parent 3af354e commit 2fe30a3

File tree

2 files changed

+65
-3
lines changed

2 files changed

+65
-3
lines changed

mlir/lib/Dialect/Affine/Utils/Utils.cpp

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -134,11 +134,43 @@ static AffineIfOp hoistAffineIfOp(AffineIfOp ifOp, Operation *hoistOverOp) {
134134
void mlir::affineParallelize(AffineForOp forOp) {
135135
Location loc = forOp.getLoc();
136136
OpBuilder outsideBuilder(forOp);
137+
138+
// If a loop has a 'max' in the lower bound, emit it outside the parallel loop
139+
// as it does not have implicit 'max' behavior.
140+
AffineMap lowerBoundMap = forOp.getLowerBoundMap();
141+
ValueRange lowerBoundOperands = forOp.getLowerBoundOperands();
142+
AffineMap upperBoundMap = forOp.getUpperBoundMap();
143+
ValueRange upperBoundOperands = forOp.getUpperBoundOperands();
144+
145+
bool needsMax = lowerBoundMap.getNumResults() > 1;
146+
bool needsMin = upperBoundMap.getNumResults() > 1;
147+
AffineMap identityMap;
148+
if (needsMax || needsMin) {
149+
if (forOp->getParentOp() &&
150+
!forOp->getParentOp()->hasTrait<OpTrait::AffineScope>())
151+
return;
152+
153+
identityMap = AffineMap::getMultiDimIdentityMap(1, loc->getContext());
154+
}
155+
if (needsMax) {
156+
auto maxOp = outsideBuilder.create<AffineMaxOp>(loc, lowerBoundMap,
157+
lowerBoundOperands);
158+
lowerBoundMap = identityMap;
159+
lowerBoundOperands = maxOp->getResults();
160+
}
161+
162+
// Same for the upper bound.
163+
if (needsMin) {
164+
auto minOp = outsideBuilder.create<AffineMinOp>(loc, upperBoundMap,
165+
upperBoundOperands);
166+
upperBoundMap = identityMap;
167+
upperBoundOperands = minOp->getResults();
168+
}
169+
137170
// Creating empty 1-D affine.parallel op.
138171
AffineParallelOp newPloop = outsideBuilder.create<AffineParallelOp>(
139-
loc, llvm::None, llvm::None, forOp.getLowerBoundMap(),
140-
forOp.getLowerBoundOperands(), forOp.getUpperBoundMap(),
141-
forOp.getUpperBoundOperands());
172+
loc, llvm::None, llvm::None, lowerBoundMap, lowerBoundOperands,
173+
upperBoundMap, upperBoundOperands);
142174
// Steal the body of the old affine for op and erase it.
143175
newPloop.region().takeBody(forOp.region());
144176
forOp.erase();

mlir/test/Dialect/Affine/parallelize.mlir

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,3 +114,33 @@ func @non_affine_load() {
114114
}
115115
return
116116
}
117+
118+
// CHECK-LABEL: for_with_minmax
119+
func @for_with_minmax(%m: memref<?xf32>, %lb0: index, %lb1: index,
120+
%ub0: index, %ub1: index) {
121+
// CHECK: %[[lb:.*]] = affine.max
122+
// CHECK: %[[ub:.*]] = affine.min
123+
// CHECK: affine.parallel (%{{.*}}) = (%[[lb]]) to (%[[ub]])
124+
affine.for %i = max affine_map<(d0, d1) -> (d0, d1)>(%lb0, %lb1)
125+
to min affine_map<(d0, d1) -> (d0, d1)>(%ub0, %ub1) {
126+
affine.load %m[%i] : memref<?xf32>
127+
}
128+
return
129+
}
130+
131+
// CHECK-LABEL: nested_for_with_minmax
132+
func @nested_for_with_minmax(%m: memref<?xf32>, %lb0: index,
133+
%ub0: index, %ub1: index) {
134+
// CHECK: affine.parallel
135+
affine.for %j = 0 to 10 {
136+
// Cannot parallelize the inner loop because we would need to compute
137+
// affine.max for its lower bound inside the loop, and that is not (yet)
138+
// considered as a valid affine dimension.
139+
// CHECK: affine.for
140+
affine.for %i = max affine_map<(d0, d1) -> (d0, d1)>(%lb0, %j)
141+
to min affine_map<(d0, d1) -> (d0, d1)>(%ub0, %ub1) {
142+
affine.load %m[%i] : memref<?xf32>
143+
}
144+
}
145+
return
146+
}

0 commit comments

Comments
 (0)