Skip to content

Commit 44861c7

Browse files
authored
[mlir] [linalg] Check for dim shape to decide unit dim for each operand in dropUnitDims pass. (#93317)
`mlir-opt --linalg-fold-unit-extent-dims` pass on the following IR ``` #map = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1 + d4, d2 + d5, d6)> #map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d4, d5, d6, d3)> #map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3)> module { func.func @main(%arg0: tensor<1x?x?x1xf32>, %arg1: index) -> tensor<?x1x61x1xf32> { %cst = arith.constant dense<1.000000e+00> : tensor<1x1x1x1xf32> %0 = tensor.empty(%arg1) : tensor<?x1x61x1xf32> %1 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%arg0, %cst : tensor<1x?x?x1xf32>, tensor<1x1x1x1xf32>) outs(%0 : tensor<?x1x61x1xf32>) { ^bb0(%in: f32, %in_0: f32, %out: f32): %2 = arith.mulf %in, %in_0 : f32 %3 = arith.addf %out, %2 : f32 linalg.yield %3 : f32 } -> tensor<?x1x61x1xf32> return %1 : tensor<?x1x61x1xf32> } } ``` produces an incorrect tensor.expand_shape operation: ``` error: 'tensor.expand_shape' op expected dimension 0 of collapsed type to be dynamic since one or more of the corresponding dimensions in the expanded type is dynamic %1 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%arg0, %cst : tensor<1x?x?x1xf32>, tensor<1x1x1x1xf32>) outs(%0 : tensor<?x1x61x1xf32>) { ^ /mathworks/devel/sandbox/sayans/geckWorks/g3294570/repro.mlir:8:10: note: see current operation: %5 = "tensor.expand_shape"(%4) <{reassociation = [[0, 1, 2, 3]]}> : (tensor<61xf32>) -> tensor<?x1x61x1xf32> // -----// IR Dump After LinalgFoldUnitExtentDimsPass Failed (linalg-fold-unit-extent-dims) //----- // #map = affine_map<(d0) -> (0, d0)> #map1 = affine_map<(d0) -> ()> #map2 = affine_map<(d0) -> (d0)> "builtin.module"() ({ "func.func"() <{function_type = (tensor<1x?x?x1xf32>, index) -> tensor<?x1x61x1xf32>, sym_name = "main"}> ({ ^bb0(%arg0: tensor<1x?x?x1xf32>, %arg1: index): %0 = "arith.constant"() <{value = dense<1.000000e+00> : tensor<f32>}> : () -> tensor<f32> %1 = "tensor.collapse_shape"(%arg0) <{reassociation = [[0, 1], [2, 3]]}> : (tensor<1x?x?x1xf32>) -> tensor<?x?xf32> %2 = "tensor.empty"() : () -> tensor<61xf32> %3 = "tensor.empty"() : () -> tensor<61xf32> %4 = "linalg.generic"(%1, %0, %2, %3) <{indexing_maps = [#map, #map1, #map2, #map2], iterator_types = [#linalg.iterator_type<parallel>], operandSegmentSizes = array<i32: 3, 1>}> ({ ^bb0(%arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32): %6 = "arith.mulf"(%arg2, %arg3) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 %7 = "arith.addf"(%arg4, %6) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 "linalg.yield"(%7) : (f32) -> () }) : (tensor<?x?xf32>, tensor<f32>, tensor<61xf32>, tensor<61xf32>) -> tensor<61xf32> %5 = "tensor.expand_shape"(%4) <{reassociation = [[0, 1, 2, 3]]}> : (tensor<61xf32>) -> tensor<?x1x61x1xf32> "func.return"(%5) : (tensor<?x1x61x1xf32>) -> () }) : () -> () }) : () -> () ``` The reason of this is because the dimension `d0` is determined to be an unit-dim that can be dropped based on the dimensions of operand `arg0` to `linalg.generic`. Later on when iterating over operand `outs` the dimension `d0` is determined to be an unit-dim even though the shape corresponding to it is `Shape::kDynamic`. For the `linalg.generic` to be valid `d0` of `outs` does need to be `1` but that isn't properly processed in the current implementation and the dimension is dropped resulting in `outs` operand to be `tensor<61xf32>` in the example. The fix is to also check that the dimension shape is actually `1` before dropping the dimension. The IR after the fix is: ``` #map = affine_map<()[s0, s1] -> (s0 * s1)> #map1 = affine_map<(d0) -> (0, d0)> #map2 = affine_map<(d0) -> ()> module { func.func @main(%arg0: tensor<1x?x?x1xf32>, %arg1: index) -> tensor<?x1x61x1xf32> { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index %cst = arith.constant dense<1.000000e+00> : tensor<f32> %collapsed = tensor.collapse_shape %arg0 [[0, 1], [2, 3]] : tensor<1x?x?x1xf32> into tensor<?x?xf32> %0 = tensor.empty(%arg1) : tensor<?x61xf32> %1 = affine.apply #map()[%arg1, %c1] %2 = tensor.empty(%1) : tensor<?x61xf32> %3 = linalg.generic {indexing_maps = [#map1, #map2, #map1, #map1], iterator_types = ["parallel"]} ins(%collapsed, %cst, %0 : tensor<?x?xf32>, tensor<f32>, tensor<?x61xf32>) outs(%2 : tensor<?x61xf32>) { ^bb0(%in: f32, %in_0: f32, %in_1: f32, %out: f32): %4 = arith.mulf %in, %in_0 : f32 %5 = arith.addf %in_1, %4 : f32 linalg.yield %5 : f32 } -> tensor<?x61xf32> %expanded = tensor.expand_shape %3 [[0, 1], [2, 3]] output_shape [%c0, 1, 61, 1] : tensor<?x61xf32> into tensor<?x1x61x1xf32> return %expanded : tensor<?x1x61x1xf32> } } ```
1 parent de32786 commit 44861c7

File tree

2 files changed

+45
-1
lines changed

2 files changed

+45
-1
lines changed

mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -351,7 +351,8 @@ static UnitExtentReplacementInfo dropUnitExtentFromOperandMetadata(
351351
auto isUnitDim = [&](unsigned dim) {
352352
if (auto dimExpr = dyn_cast<AffineDimExpr>(exprs[dim])) {
353353
unsigned oldPosition = dimExpr.getPosition();
354-
return !oldDimsToNewDimsMap.count(oldPosition);
354+
return !oldDimsToNewDimsMap.count(oldPosition) &&
355+
(operandShape[dim] == 1);
355356
}
356357
// Handle the other case where the shape is 1, and is accessed using a
357358
// constant 0.

mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1087,3 +1087,46 @@ func.func @drop_known_unit_constant_low_high(%arg0: tensor<1x383x128xf32>) -> te
10871087
// CHECK: } : tensor<383x128xf32> to tensor<384x128xf32>
10881088
// CHECK: tensor.expand_shape %[[PADDED]]
10891089
// CHECK-SAME: {{\[}}[0, 1], [2]] output_shape [1, 384, 128] : tensor<384x128xf32> into tensor<1x384x128xf32>
1090+
1091+
// -----
1092+
1093+
// CHECK: #[[$MAP0:.+]] = affine_map<()[s0, s1] -> (s0 * s1)>
1094+
// CHECK: #[[$MAP1:.+]] = affine_map<(d0) -> (0, d0)>
1095+
// CHECK: #[[$MAP2:.+]] = affine_map<(d0) -> ()>
1096+
1097+
// CHECK-LABEL: func @drop_unit_dim_corresponding_to_dynamic_dim
1098+
// CHECK-SAME: %[[ARG0:.*]]: tensor<1x?x?x1xf32>,
1099+
// CHECK-SAME: %[[ARG1:.*]]: index) -> tensor<?x1x61x1xf32> {
1100+
// CHECK: %[[VAL_0:.*]] = arith.constant 0 : index
1101+
// CHECK: %[[VAL_1:.*]] = arith.constant 1 : index
1102+
// CHECK: %[[VAL_2:.*]] = arith.constant dense<1.000000e+00> : tensor<f32>
1103+
// CHECK: %[[VAL_3:.*]] = tensor.collapse_shape %[[ARG0]] {{\[\[}}0, 1], [2, 3]] : tensor<1x?x?x1xf32> into tensor<?x?xf32>
1104+
// CHECK: %[[VAL_4:.*]] = tensor.empty(%[[ARG1]]) : tensor<?x61xf32>
1105+
// CHECK: %[[VAL_5:.*]] = affine.apply #[[$MAP0]](){{\[}}%[[ARG1]], %[[VAL_1]]]
1106+
// CHECK: %[[VAL_6:.*]] = tensor.empty(%[[VAL_5]]) : tensor<?x61xf32>
1107+
// CHECK: %[[VAL_7:.*]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel"]} ins(%[[VAL_3]], %[[VAL_2]], %[[VAL_4]] : tensor<?x?xf32>, tensor<f32>, tensor<?x61xf32>) outs(%[[VAL_6]] : tensor<?x61xf32>) {
1108+
// CHECK: ^bb0(%[[VAL_8:.*]]: f32, %[[VAL_9:.*]]: f32, %[[VAL_10:.*]]: f32, %[[VAL_11:.*]]: f32):
1109+
// CHECK: %[[VAL_12:.*]] = arith.mulf %[[VAL_8]], %[[VAL_9]] : f32
1110+
// CHECK: %[[VAL_13:.*]] = arith.addf %[[VAL_10]], %[[VAL_12]] : f32
1111+
// CHECK: linalg.yield %[[VAL_13]] : f32
1112+
// CHECK: } -> tensor<?x61xf32>
1113+
// CHECK: %[[VAL_14:.*]] = tensor.expand_shape %[[VAL_7]] {{\[\[}}0, 1], [2, 3]] output_shape {{\[}}%[[VAL_0]], 1, 61, 1] : tensor<?x61xf32> into tensor<?x1x61x1xf32>
1114+
// CHECK: return %[[VAL_14]] : tensor<?x1x61x1xf32>
1115+
// CHECK: }
1116+
1117+
#map = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1 + d4, d2 + d5, d6)>
1118+
#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d4, d5, d6, d3)>
1119+
#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3)>
1120+
module {
1121+
func.func @drop_unit_dim_corresponding_to_dynamic_dim(%arg0: tensor<1x?x?x1xf32>, %arg1: index) -> tensor<?x1x61x1xf32> {
1122+
%cst = arith.constant dense<1.000000e+00> : tensor<1x1x1x1xf32>
1123+
%0 = tensor.empty(%arg1) : tensor<?x1x61x1xf32>
1124+
%1 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%arg0, %cst : tensor<1x?x?x1xf32>, tensor<1x1x1x1xf32>) outs(%0 : tensor<?x1x61x1xf32>) {
1125+
^bb0(%in: f32, %in_0: f32, %out: f32):
1126+
%2 = arith.mulf %in, %in_0 : f32
1127+
%3 = arith.addf %out, %2 : f32
1128+
linalg.yield %3 : f32
1129+
} -> tensor<?x1x61x1xf32>
1130+
return %1 : tensor<?x1x61x1xf32>
1131+
}
1132+
}

0 commit comments

Comments
 (0)