Skip to content

Commit aa1d9a4

Browse files
authored
[MLIR][Affine] Enhance simplifyAdd for AffineExpr mod (#146492)
Currently AffineExpr Add has ability to optimize `"s1 + (s1 // c * -c)" to "s1 % c"`, but can not optimize `"(s0 + s1) + (s1 // c * -c)"`. This patch provide an opportunity to do this simplification, let it can be simplified to `"s0 + s1 % c"`.
1 parent eb07f0d commit aa1d9a4

File tree

2 files changed

+15
-12
lines changed

2 files changed

+15
-12
lines changed

mlir/lib/IR/AffineExpr.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -781,6 +781,15 @@ static AffineExpr simplifyAdd(AffineExpr lhs, AffineExpr rhs) {
781781
if (isPositiveRhs && lhs == llrhs && rlrhs == -rrhs) {
782782
return lhs % rlrhs;
783783
}
784+
785+
// Try simplify lhs's last operand with rhs. e.g:
786+
// (s0 * 64 + s1) + (s1 // c * -c) --->
787+
// s0 * 64 + (s1 + s1 // c * -c) -->
788+
// s0 * 64 + s1 % c
789+
if (lBinOpExpr && lBinOpExpr.getKind() == AffineExprKind::Add) {
790+
if (auto simplified = simplifyAdd(lBinOpExpr.getRHS(), rhs))
791+
return lBinOpExpr.getLHS() + simplified;
792+
}
784793
return nullptr;
785794
}
786795

mlir/test/Dialect/Affine/decompose-affine-ops.mlir

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,7 @@ func.func @simple_test_1(%0: index, %1: index, %2: index, %lb: index, %ub: index
2727
// CHECK-DAG: #[[$c42:.*]] = affine_map<() -> (42)>
2828
// CHECK-DAG: #[[$id:.*]] = affine_map<()[s0] -> (s0)>
2929
// CHECK-DAG: #[[$add:.*]] = affine_map<()[s0, s1] -> (s0 + s1)>
30-
// CHECK-DAG: #[[$div32div4timesm4:.*]] = affine_map<()[s0] -> (((s0 floordiv 32) floordiv 4) * -4)>
31-
// CHECK-DAG: #[[$div32:.*]] = affine_map<()[s0] -> (s0 floordiv 32)>
30+
// CHECK-DAG: #[[$div32mod4:.*]] = affine_map<()[s0] -> ((s0 floordiv 32) mod 4)>
3231

3332
// CHECK-LABEL: func.func @simple_test_2
3433
// CHECK-SAME: %[[I0:[0-9a-zA-Z]+]]: index,
@@ -45,10 +44,8 @@ func.func @simple_test_2(%0: index, %1: index, %2: index, %lb: index, %ub: index
4544
// CHECK: %[[R2:.*]] = affine.apply #[[$add]]()[%[[c42]], %[[R1]]]
4645
// CHECK: scf.for %[[j:.*]] =
4746
scf.for %j = %lb to %ub step %step {
48-
// CHECK: %[[R3:.*]] = affine.apply #[[$div32div4timesm4]]()[%[[j]]]
49-
// CHECK: %[[R4:.*]] = affine.apply #[[$add]]()[%[[R2]], %[[R3]]]
50-
// CHECK: %[[R5:.*]] = affine.apply #[[$div32]]()[%[[j]]]
51-
// CHECK: %[[a:.*]] = affine.apply #[[$add]]()[%[[R4]], %[[R5]]]
47+
// CHECK: %[[R3:.*]] = affine.apply #[[$div32mod4]]()[%[[j]]]
48+
// CHECK: %[[a:.*]] = affine.apply #[[$add]]()[%[[R2]], %[[R3]]]
5249
%a = affine.apply affine_map<(d0)[s0] -> ((d0 floordiv 32) mod 4 + s0 + 42)>(%j)[%i]
5350

5451
// CHECK: "some_side_effecting_consumer"(%[[a]]) : (index) -> ()
@@ -67,8 +64,7 @@ func.func @simple_test_2(%0: index, %1: index, %2: index, %lb: index, %ub: index
6764
// CHECK-DAG: #[[$div4timesm32:.*]] = affine_map<()[s0] -> ((s0 floordiv 4) * -32)>
6865
// CHECK-DAG: #[[$times8:.*]] = affine_map<()[s0] -> (s0 * 8)>
6966
// CHECK-DAG: #[[$id:.*]] = affine_map<()[s0] -> (s0)>
70-
// CHECK-DAG: #[[$div32div4timesm4:.*]] = affine_map<()[s0] -> (((s0 floordiv 32) floordiv 4) * -4)>
71-
// CHECK-DAG: #[[$div32:.*]] = affine_map<()[s0] -> (s0 floordiv 32)>
67+
// CHECK-DAG: #[[$div32mod4:.*]] = affine_map<()[s0] -> ((s0 floordiv 32) mod 4)>
7268

7369
// CHECK-LABEL: func.func @larger_test
7470
// CHECK-SAME: %[[I0:[0-9a-zA-Z]+]]: index,
@@ -126,10 +122,8 @@ func.func @larger_test(%0: index, %1: index, %2: index, %lb: index, %ub: index,
126122
// CHECK-NEXT: %[[e:.*]] = affine.apply #[[$add]]()[%[[c]], %[[idk]]]
127123
%e = affine.apply affine_map<()[s0, s1] -> (s0 + s1 * 8 - (s1 floordiv 4) * 32)>()[%k, %0]
128124

129-
// CHECK-NEXT: %[[R15:.*]] = affine.apply #[[$div32div4timesm4]]()[%[[k]]]
130-
// CHECK-NEXT: %[[R16:.*]] = affine.apply #[[$add]]()[%[[idj]], %[[R15]]]
131-
// CHECK-NEXT: %[[R17:.*]] = affine.apply #[[$div32]]()[%[[k]]]
132-
// CHECK-NEXT: %[[f:.*]] = affine.apply #[[$add]]()[%[[R16]], %[[R17]]]
125+
// CHECK-NEXT: %[[R15:.*]] = affine.apply #[[$div32mod4]]()[%[[k]]]
126+
// CHECK-NEXT: %[[f:.*]] = affine.apply #[[$add]]()[%[[idj]], %[[R15]]]
133127
%f = affine.apply affine_map<(d0)[s0] -> ((d0 floordiv 32) mod 4 + s0)>(%k)[%j]
134128

135129
// CHECK-NEXT: %[[g:.*]] = affine.apply #[[$add]]()[%[[b]], %[[idk]]]

0 commit comments

Comments
 (0)