@@ -21,12 +21,13 @@ func.func @fold_one_loop(%arg0: memref<?xi32>, %arg1: index, %arg2: index) {
21
21
// CHECK: %[[C4:.*]] = arith.constant 4 : index
22
22
// CHECK: %[[I0:.*]] = arith.addi %[[ARG2]], %[[C0]] : index
23
23
// CHECK: %[[I1:.*]] = arith.addi %[[ARG2]], %[[ARG1]] : index
24
- // CHECK: %[[I2:.*]] = arith.muli %[[I1]], %[[C4]] : index
25
- // CHECK: %[[I3:.*]] = arith.muli %[[C1]], %[[C4]] : index
26
- // CHECK: scf.for %[[I:.*]] = %[[I0]] to %[[I2]] step %[[I3]] {
27
- // CHECK: %[[I4:.*]] = memref.load %[[ARG0]]{{\[}}%[[I]]
28
- // CHECK: %[[I5:.*]] = arith.muli %[[I4]], %[[I4]] : i32
29
- // CHECK: memref.store %[[I5]], %[[ARG0]]{{\[}}%[[I]]
24
+ // CHECK: %[[I2:.*]] = arith.muli %[[I0]], %[[C4]] : index
25
+ // CHECK: %[[I3:.*]] = arith.muli %[[I1]], %[[C4]] : index
26
+ // CHECK: %[[I4:.*]] = arith.muli %[[C1]], %[[C4]] : index
27
+ // CHECK: scf.for %[[I:.*]] = %[[I2]] to %[[I3]] step %[[I4]] {
28
+ // CHECK: %[[I5:.*]] = memref.load %[[ARG0]]{{\[}}%[[I]]
29
+ // CHECK: %[[I6:.*]] = arith.muli %[[I5]], %[[I5]] : i32
30
+ // CHECK: memref.store %[[I6]], %[[ARG0]]{{\[}}%[[I]]
30
31
31
32
func.func @fold_one_loop2 (%arg0: memref <?xi32 >, %arg1: index , %arg2: index ) {
32
33
%c0 = arith.constant 0 : index
@@ -54,12 +55,13 @@ func.func @fold_one_loop2(%arg0: memref<?xi32>, %arg1: index, %arg2: index) {
54
55
// CHECK: scf.for %[[J:.*]] = %[[C0]] to %[[C10]] step %[[C1]] {
55
56
// CHECK: %[[I0:.*]] = arith.addi %[[ARG2]], %[[C0]] : index
56
57
// CHECK: %[[I1:.*]] = arith.addi %[[ARG2]], %[[ARG1]] : index
57
- // CHECK: %[[I2:.*]] = arith.muli %[[I1]], %[[C4]] : index
58
- // CHECK: %[[I3:.*]] = arith.muli %[[C1]], %[[C4]] : index
59
- // CHECK: scf.for %[[I:.*]] = %[[I0]] to %[[I2]] step %[[I3]] {
60
- // CHECK: %[[I4:.*]] = memref.load %[[ARG0]]{{\[}}%[[I]]
61
- // CHECK: %[[I5:.*]] = arith.muli %[[I4]], %[[I4]] : i32
62
- // CHECK: memref.store %[[I5]], %[[ARG0]]{{\[}}%[[I]]
58
+ // CHECK: %[[I2:.*]] = arith.muli %[[I0]], %[[C4]] : index
59
+ // CHECK: %[[I3:.*]] = arith.muli %[[I1]], %[[C4]] : index
60
+ // CHECK: %[[I4:.*]] = arith.muli %[[C1]], %[[C4]] : index
61
+ // CHECK: scf.for %[[I:.*]] = %[[I2]] to %[[I3]] step %[[I4]] {
62
+ // CHECK: %[[I5:.*]] = memref.load %[[ARG0]]{{\[}}%[[I]]
63
+ // CHECK: %[[I6:.*]] = arith.muli %[[I5]], %[[I5]] : i32
64
+ // CHECK: memref.store %[[I6]], %[[ARG0]]{{\[}}%[[I]]
63
65
64
66
func.func @fold_two_loops (%arg0: memref <?xi32 >, %arg1: index , %arg2: index ) {
65
67
%c0 = arith.constant 0 : index
@@ -86,14 +88,17 @@ func.func @fold_two_loops(%arg0: memref<?xi32>, %arg1: index, %arg2: index) {
86
88
// CHECK: %[[C10:.*]] = arith.constant 10 : index
87
89
// CHECK: %[[I0:.*]] = arith.addi %[[ARG2]], %[[C0]] : index
88
90
// CHECK: %[[I1:.*]] = arith.addi %[[ARG2]], %[[C10]] : index
89
- // CHECK: scf.for %[[J:.*]] = %[[I0]] to %[[I1]] step %[[C1]] {
90
- // CHECK: %[[I1:.*]] = arith.addi %[[ARG2]], %[[ARG1]] : index
91
- // CHECK: %[[I2:.*]] = arith.muli %[[I1]], %[[C4]] : index
92
- // CHECK: %[[I3:.*]] = arith.muli %[[C1]], %[[C4]] : index
93
- // CHECK: scf.for %[[I:.*]] = %[[J]] to %[[I2]] step %[[I3]] {
94
- // CHECK: %[[I4:.*]] = memref.load %[[ARG0]]{{\[}}%[[I]]
95
- // CHECK: %[[I5:.*]] = arith.muli %[[I4]], %[[I4]] : i32
96
- // CHECK: memref.store %[[I5]], %[[ARG0]]{{\[}}%[[I]]
91
+ // CHECK: %[[I2:.*]] = arith.muli %[[I0]], %[[C4]] : index
92
+ // CHECK: %[[I3:.*]] = arith.muli %[[I1]], %[[C4]] : index
93
+ // CHECK: %[[I4:.*]] = arith.muli %[[C1]], %[[C4]] : index
94
+ // CHECK: scf.for %[[J:.*]] = %[[I2]] to %[[I3]] step %[[I4]] {
95
+ // CHECK: %[[I5:.*]] = arith.addi %[[ARG2]], %[[ARG1]] : index
96
+ // CHECK: %[[I6:.*]] = arith.muli %[[I5]], %[[C4]] : index
97
+ // CHECK: %[[I7:.*]] = arith.muli %[[C1]], %[[C4]] : index
98
+ // CHECK: scf.for %[[I:.*]] = %[[J]] to %[[I6]] step %[[I7]] {
99
+ // CHECK: %[[I8:.*]] = memref.load %[[ARG0]]{{\[}}%[[I]]
100
+ // CHECK: %[[I9:.*]] = arith.muli %[[I8]], %[[I8]] : i32
101
+ // CHECK: memref.store %[[I9]], %[[ARG0]]{{\[}}%[[I]]
97
102
98
103
// If an instruction's operands are not defined outside the loop, we cannot
99
104
// perform the optimization, as is the case with the arith.muli below. (If
0 commit comments