sx-aurora-dev
diff --git a/‎mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
Lines changed: 23 additions & 61 deletions b/‎mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
Lines changed: 23 additions & 61 deletions
diff --git a/‎mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
Lines changed: 30 additions & 30 deletions b/‎mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
Lines changed: 30 additions & 30 deletions
diff --git a/‎mlir/test/Dialect/Linalg/codegen-strategy.mlir
Lines changed: 13 additions & 13 deletions b/‎mlir/test/Dialect/Linalg/codegen-strategy.mlir
Lines changed: 13 additions & 13 deletions
@@ -553,69 +553,32 @@ void transformIndexOps(RewriterBase &b, LinalgOp op,
                        SmallVectorImpl<Value> &ivs,
                        const LoopIndexToRangeIndexMap &loopIndexToRangeIndex);
 
-/// Callback returning the padding value to use for a given OpOperand or failure
-/// for no padding. This should be a function of both the operation and the
-/// operand type.
-using PaddingValueComputationFunction =
-    std::function<FailureOr<Value>(OpBuilder &, OpOperand &)>;
-
-/// Callback returning true if the PadOp defining the given OpOperand shall be
-/// marked as nofold to enable packing.
-using PaddingNoFoldComputationFunction = std::function<bool(OpOperand &)>;
-
-/// Callback returning the number of loops to hoist the PadOp defining the given
-/// OpOperand.
-using PaddingHoistComputationFunction = std::function<int64_t(OpOperand &)>;
-
-/// Callback returning the transpose vector used to permute the result tensor
-/// dimensions of the PadOp defining the given OpOperand.
-using PaddingTransposeComputationFunction =
-    std::function<SmallVector<int64_t>(OpOperand &)>;
-
 struct LinalgPaddingOptions {
-  /// Callback returning the padding value to use for a given OpOperand or
-  /// failure for no padding. Padding operations are introduced if
-  /// `paddingValueComputationFunction` is set and does not return failure.
-  /// Padding all operands guarantees the operation is statically shaped and
-  /// thus can be vectorized.
-  PaddingValueComputationFunction paddingValueComputationFunction = nullptr;
-
-  LinalgPaddingOptions &
-  setPaddingValueComputationFunction(PaddingValueComputationFunction fun) {
-    paddingValueComputationFunction = std::move(fun);
+  /// A padding value for every operand.
+  SmallVector<Attribute> paddingValues;
+  LinalgPaddingOptions &setPaddingValues(ArrayRef<Attribute> pv) {
+    paddingValues.assign(pv.begin(), pv.end());
     return *this;
   }
-
-  /// Callback returning true if the PadOp defining the given OpOperand shall be
-  /// marked as nofold to enable packing. A padding operation is only marked
-  /// nofold if `paddingNoFoldComputationFunction` is set and returns true.
-  /// Otherwise, the nofold attribute is set to false.
-  PaddingNoFoldComputationFunction paddingNoFoldComputationFunction = nullptr;
-
-  LinalgPaddingOptions &
-  setPaddingNoFoldComputationFunction(PaddingNoFoldComputationFunction fun) {
-    paddingNoFoldComputationFunction = std::move(fun);
+  /// A flag for every operand to mark the PadOp as nofold which enables packing
+  /// for statically shaped operands.
+  SmallVector<bool> packPaddings;
+  LinalgPaddingOptions &setPackPaddings(ArrayRef<bool> pp) {
+    packPaddings.assign(pp.begin(), pp.end());
     return *this;
   }
-
-  /// Callback returning the number of loops to hoist the PadOp defining the
-  /// given OpOperand.
-  PaddingHoistComputationFunction paddingHoistComputationFunction = nullptr;
-
-  LinalgPaddingOptions &
-  setPaddingHoistComputationFunction(PaddingHoistComputationFunction fun) {
-    paddingHoistComputationFunction = std::move(fun);
+  /// A number of loops to hoist the PadOp out for every operand.
+  SmallVector<int64_t> hoistPaddings;
+  LinalgPaddingOptions &setHoistPaddings(ArrayRef<int64_t> hp) {
+    hoistPaddings.assign(hp.begin(), hp.end());
     return *this;
   }
-
-  /// Callback returning the transpose vector used to permute the result tensor
-  /// dimensions of the PadOp defining the given OpOperand.
-  PaddingTransposeComputationFunction paddingTransposeComputationFunction =
-      nullptr;
-
-  LinalgPaddingOptions &setPaddingTransposeComputationFunction(
-      PaddingTransposeComputationFunction fun) {
-    paddingTransposeComputationFunction = std::move(fun);
+  /// A permutation vector for every operand used to transpose the packed PadOp
+  /// results.
+  SmallVector<SmallVector<int64_t>> transposePaddings;
+  LinalgPaddingOptions &
+  setTransposePaddings(ArrayRef<SmallVector<int64_t>> tp) {
+    transposePaddings.assign(tp.begin(), tp.end());
     return *this;
   }
 };
@@ -1254,16 +1217,15 @@ struct PadOpTransformationPattern : public OpRewritePattern<tensor::PadOp> {
                                 PatternRewriter &rewriter) const override;
 };
 
-/// Pad the operands of `opToPad` to a static bounding box. Use `paddingFunc`
-/// and `nofoldFunc` to set the padding value and the nofold attribute of the
+/// Pad the operands of `opToPad` to a static bounding box. Use `paddingValues`
+/// and `packPaddings` to set the padding value and the nofold attribute of the
 /// introduced tensor::PadOps, respectively. Update `paddedOp` to the cloned
 /// statically shaped operation and return the extracted dynamically shaped
 /// results. If padding fails, return failure.
 FailureOr<SmallVector<Value>>
 rewriteAsPaddedOp(OpBuilder &b, LinalgOp opToPad,
-                  const PaddingValueComputationFunction &paddingFunc,
-                  const PaddingNoFoldComputationFunction &nofoldFunc,
-                  LinalgOp &paddedOp);
+                  ArrayRef<Attribute> paddingValues,
+                  ArrayRef<bool> packPaddings, LinalgOp &paddedOp);
 
 using OptimizeCopyFn =
     std::function<LogicalResult(PatternRewriter &, tensor::PadOp, Value)>;
 
@@ -158,16 +158,17 @@ LinalgTilingOptions &mlir::linalg::LinalgTilingOptions::scalarizeDynamicDims() {
   return *this;
 }
 
-/// Helper function that tries to pad `opOperand`. Exit early for scalar
-/// operands, if `paddingFunc` returns failure, or if `opOperand` is not defined
-/// by an ExtractSliceOp. Otherwise, try to pad the operand even if it already
-/// has a static shape. Set `result` to the result of the created tensor::PadOp
-/// or and return success if the operand either has been padded to a static
-/// shape or already had a static shape and failure otherwise.
+/// Pad `opOperand` using the provided `paddingValues`. Exit early for scalar
+/// operands, if `paddingValues` contains no value for the `opOperand`, or if
+/// `opOperand` is not defined by an ExtractSliceOp. Otherwise, try to pad the
+/// operand even if it already has a static shape. Set `result` to the result of
+/// the created tensor::PadOp or and return success if the operand either has
+/// been padded to a static shape or already had a static shape and failure
+/// otherwise.
 static LogicalResult padOperandToSmallestStaticBoundingBox(
     OpBuilder &b, linalg::LinalgOp opToPad, OpOperand *opOperand,
-    const PaddingValueComputationFunction &paddingFunc,
-    const PaddingNoFoldComputationFunction &nofoldFunc, Value &result) {
+    ArrayRef<Attribute> paddingValues, ArrayRef<bool> packPaddings,
+    Value &result) {
   // Get the shape of the operand and check if it has a dynamic shape. Only
   // return failure if the operand is not a scalar and has a dynamic shape.
   ArrayRef<int64_t> shape = opToPad.getShape(opOperand);
@@ -178,9 +179,11 @@ static LogicalResult padOperandToSmallestStaticBoundingBox(
     return success();
 
   // Cannot pad if the padding value is unknown.
-  FailureOr<Value> paddingValue = paddingFunc(b, *opOperand);
-  if (failed(paddingValue))
+  if (opOperand->getOperandNumber() >= paddingValues.size())
     return failure(hasDynamicShape);
+  Attribute paddingAttr = paddingValues[opOperand->getOperandNumber()];
+  Value paddingValue = b.create<arith::ConstantOp>(
+      opToPad.getLoc(), paddingAttr.getType(), paddingAttr);
 
   // Follow the use-def chain if `currOpOperand` is defined by a LinalgOp.
   OpOperand *currOpOperand = opOperand;
@@ -227,18 +230,18 @@ static LogicalResult padOperandToSmallestStaticBoundingBox(
   // Pad the operand to the bounding box defined by `staticSizes`.
   auto staticTensorType = RankedTensorType::get(
       staticSizes, getElementTypeOrSelf(opOperand->get()));
-  bool nofold = nofoldFunc ? nofoldFunc(*opOperand) : false;
-  result =
-      makeComposedPadHighOp(b, opToPad->getLoc(), staticTensorType,
-                            opOperand->get(), paddingValue.getValue(), nofold);
+  bool nofold = opOperand->getOperandNumber() < packPaddings.size()
+                    ? packPaddings[opOperand->getOperandNumber()]
+                    : false;
+  result = makeComposedPadHighOp(b, opToPad->getLoc(), staticTensorType,
+                                 opOperand->get(), paddingValue, nofold);
   return success();
 }
 
 FailureOr<SmallVector<Value>>
 linalg::rewriteAsPaddedOp(OpBuilder &b, LinalgOp opToPad,
-                          const PaddingValueComputationFunction &paddingFunc,
-                          const PaddingNoFoldComputationFunction &nofoldFunc,
-                          LinalgOp &paddedOp) {
+                          ArrayRef<Attribute> paddingValues,
+                          ArrayRef<bool> packPaddings, LinalgOp &paddedOp) {
   Location loc = opToPad->getLoc();
 
   // TODO: there are cases where we may still want to pad to larger sizes.
@@ -256,7 +259,7 @@ linalg::rewriteAsPaddedOp(OpBuilder &b, LinalgOp opToPad,
     // If padding was requested but the shape cannot be bounded statically then
     // the pattern fails to apply.
     if (failed(padOperandToSmallestStaticBoundingBox(
-            b, opToPad, opOperand, paddingFunc, nofoldFunc, paddedOperand)))
+            b, opToPad, opOperand, paddingValues, packPaddings, paddedOperand)))
       return failure();
     newOperands.push_back(paddedOperand ? paddedOperand : opOperand->get());
   }
@@ -498,29 +501,26 @@ mlir::linalg::LinalgPaddingPattern::returningMatchAndRewrite(
 
   // Pad the operation.
   LinalgOp paddedOp;
-  FailureOr<SmallVector<Value>> newResults = rewriteAsPaddedOp(
-      rewriter, linalgOp, options.paddingValueComputationFunction,
-      options.paddingNoFoldComputationFunction, paddedOp);
+  FailureOr<SmallVector<Value>> newResults =
+      rewriteAsPaddedOp(rewriter, linalgOp, options.paddingValues,
+                        options.packPaddings, paddedOp);
   if (failed(newResults))
     return failure();
 
-  // Compute the desired hoisting depths.
-  SmallVector<int64_t> depths;
-  if (options.paddingHoistComputationFunction) {
-    for (OpOperand *opOperand : linalgOp.getInputAndOutputOperands())
-      depths.push_back(options.paddingHoistComputationFunction(*opOperand));
-  }
-
   // Hoist the padding.
-  for (const auto &en : enumerate(depths)) {
+  for (const auto &en : enumerate(options.hoistPaddings)) {
+    if (static_cast<int64_t>(en.index()) >= paddedOp.getNumInputsAndOutputs())
+      break;
     OpOperand &opOperand = paddedOp->getOpOperand(en.index());
     auto padOp = opOperand.get().getDefiningOp<tensor::PadOp>();
     if (!padOp || en.value() == 0)
       continue;
     tensor::PadOp hoistedOp;
     SmallVector<GenericOp> transposeOps;
     SmallVector<int64_t> transposeVector =
-        options.paddingTransposeComputationFunction(opOperand);
+        en.index() < options.transposePaddings.size()
+            ? options.transposePaddings[en.index()]
+            : SmallVector<int64_t>{};
 
     FailureOr<Value> newResult = hoistPaddingOnTensors(
         padOp, en.value(), transposeVector, hoistedOp, transposeOps);
 
@@ -1,13 +1,13 @@
 // RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=linalg.matmul tile-sizes=2,4,8 vectorize vectorize-contraction-to=matrixintrinsics unroll-vector-transfers=true" -split-input-file | FileCheck %s --check-prefix=CHECK-INTRINSIC
 // RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=linalg.matmul tile-sizes=16,32,64 promote promote-full-tile-pad register-tile-sizes=2,4,8 vectorize vectorize-contraction-to=outerproduct split-transfers=true unroll-vector-transfers=false" -split-input-file | FileCheck %s --check-prefix=CHECK-OUTER
 // RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=linalg.matmul tile-sizes=16,32,64 tile-interchange=1,2,0 generalize iterator-interchange=0,2,1" -split-input-file | FileCheck %s --check-prefix=CHECK-INTERCHANGE
-// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=linalg.matmul tile-sizes=16,32,64 pad pack-paddings=1,1,0 hoist-paddings=3,3,0" -split-input-file | FileCheck %s --check-prefix=CHECK-PAD
-// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=linalg.matmul tile-sizes=16,32,64 fuse pad vectorize" -split-input-file | FileCheck %s --check-prefix=CHECK-FUSE
-// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-func=conv anchor-op=linalg.conv_2d_nhwc_hwcf tile-sizes=1,1,8,32,1,1,8 fuse pad decompose vectorize vectorize-padding" -split-input-file | FileCheck %s --check-prefix=CHECK-DECOMP
+// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=linalg.matmul tile-sizes=16,32,64 pad padding-values=0.:f32,0.:f32,0.:f32 pack-paddings=1,1,0 hoist-paddings=3,3,0" -split-input-file | FileCheck %s --check-prefix=CHECK-PAD
+// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=linalg.matmul tile-sizes=16,32,64 fuse pad padding-values=0.:f32,0.:f32,0.:f32 vectorize" -split-input-file | FileCheck %s --check-prefix=CHECK-FUSE
+// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-func=conv anchor-op=linalg.conv_2d_nhwc_hwcf tile-sizes=1,1,8,32,1,1,8 fuse pad padding-values=0.:f32,0.:f32,0.:f32 decompose vectorize vectorize-padding" -split-input-file | FileCheck %s --check-prefix=CHECK-DECOMP
 
 // CHECK-INTRINSIC: func @matmul(
 //     CHECK-OUTER: func @matmul(
-func @matmul(%arg0: memref<72x72xf32>, %arg1: memref<72x72xf32>, %arg2: memref<72x72xf32>) {
+func.func @matmul(%arg0: memref<72x72xf32>, %arg1: memref<72x72xf32>, %arg2: memref<72x72xf32>) {
 
   // Check the matrix intrinsic lowering is triggered.
   //      CHECK-INTRINSIC: vector.matrix_multiply
@@ -17,13 +17,13 @@ func @matmul(%arg0: memref<72x72xf32>, %arg1: memref<72x72xf32>, %arg2: memref<7
   // Check the outer product lowering is triggered.
   //          CHECK-OUTER: vector.outerproduct {{.*}} : vector<2xf32>, vector<4xf32>
   linalg.matmul ins(%arg0, %arg1: memref<72x72xf32>, memref<72x72xf32>) outs(%arg2: memref<72x72xf32>)
-  return
+  func.return
 }
 
 // -----
 
 // CHECK-INTERCHANGE: func @matmul(
-func @matmul(%arg0: tensor<72x72xf32>, %arg1: tensor<72x72xf32>, %arg2: tensor<72x72xf32>) -> tensor<72x72xf32> {
+func.func @matmul(%arg0: tensor<72x72xf32>, %arg1: tensor<72x72xf32>, %arg2: tensor<72x72xf32>) -> tensor<72x72xf32> {
   //  CHECK-INTERCHANGE-DAG: %[[C16:.*]] = arith.constant 16
   //  CHECK-INTERCHANGE-DAG: %[[C32:.*]] = arith.constant 32
   //  CHECK-INTERCHANGE-DAG: %[[C64:.*]] = arith.constant 64
@@ -37,15 +37,15 @@ func @matmul(%arg0: tensor<72x72xf32>, %arg1: tensor<72x72xf32>, %arg2: tensor<7
   //      CHECK-INTERCHANGE:      linalg.generic
   // CHECK-INTERCHANGE-SAME:      iterator_types = ["parallel", "reduction", "parallel"]
   %0 = linalg.matmul ins(%arg0, %arg1: tensor<72x72xf32>, tensor<72x72xf32>) outs(%arg2: tensor<72x72xf32>) -> tensor<72x72xf32>
-  return %0 : tensor<72x72xf32>
+  func.return %0 : tensor<72x72xf32>
 }
 
 // -----
 
 //     CHECK-PAD-DAG: #[[MAP0:[0-9a-z]+]] = affine_map<(d0) -> (-d0 + 72, 16)>
 
 //         CHECK-PAD: func @matmul(
-func @matmul(%arg0: tensor<72x72xf32>, %arg1: tensor<72x72xf32>, %arg2: tensor<72x72xf32>) -> tensor<72x72xf32> {
+func.func @matmul(%arg0: tensor<72x72xf32>, %arg1: tensor<72x72xf32>, %arg2: tensor<72x72xf32>) -> tensor<72x72xf32> {
 
   // Check the padding of the input operands has been hoisted out of the tile loop nest.
   //      CHECK-PAD-COUNT=2: tensor.pad %{{.*}} nofold
@@ -56,13 +56,13 @@ func @matmul(%arg0: tensor<72x72xf32>, %arg1: tensor<72x72xf32>, %arg2: tensor<7
   //      CHECK-PAD-COUNT=2: scf.for
   //              CHECK-PAD: linalg.matmul
   %0 = linalg.matmul ins(%arg0, %arg1: tensor<72x72xf32>, tensor<72x72xf32>) outs(%arg2: tensor<72x72xf32>) -> tensor<72x72xf32>
-  return %0 : tensor<72x72xf32>
+  func.return %0 : tensor<72x72xf32>
 }
 
 // -----
 
 //         CHECK-FUSE: func @matmul(
-func @matmul(%arg0: tensor<72x72xf32>, %arg1: tensor<72x72xf32>, %arg2: tensor<72x72xf32>) -> tensor<72x72xf32> {
+func.func @matmul(%arg0: tensor<72x72xf32>, %arg1: tensor<72x72xf32>, %arg2: tensor<72x72xf32>) -> tensor<72x72xf32> {
 
   // Check the padding and vectorization applies to the fill operation due to the empty anchor op string.
   //        CHECK-FUSE:  %[[CST:.*]] = arith.constant dense<0.000000e+00>
@@ -73,13 +73,13 @@ func @matmul(%arg0: tensor<72x72xf32>, %arg1: tensor<72x72xf32>, %arg2: tensor<7
   // Check the matmul is padded and vectorized despite the empty anchor op string.
   //        CHECK-FUSE:  vector.outerproduct
   %1 = linalg.matmul ins(%arg0, %arg1: tensor<72x72xf32>, tensor<72x72xf32>) outs(%0: tensor<72x72xf32>) -> tensor<72x72xf32>
-  return %1 : tensor<72x72xf32>
+  func.return %1 : tensor<72x72xf32>
 }
 
 // -----
 
 //         CHECK-DECOMP: func @conv(
-func @conv(%arg0: tensor<8x18x17x32xf32>, %arg1: tensor<3x3x32x64xf32>, %arg2: tensor<8x16x15x64xf32>) -> tensor<8x16x15x64xf32> {
+func.func @conv(%arg0: tensor<8x18x17x32xf32>, %arg1: tensor<3x3x32x64xf32>, %arg2: tensor<8x16x15x64xf32>) -> tensor<8x16x15x64xf32> {
   %cst = arith.constant 0.000000e+00 : f32
   %0 = linalg.fill ins(%cst : f32) outs(%arg2 : tensor<8x16x15x64xf32>) -> tensor<8x16x15x64xf32>
 
@@ -88,5 +88,5 @@ func @conv(%arg0: tensor<8x18x17x32xf32>, %arg1: tensor<3x3x32x64xf32>, %arg2: t
   //        CHECK-DECOMP:  vector.outerproduct
   //        CHECK-DECOMP:  vector.transfer_write {{.*}}: vector<1x8x32xf32>, tensor<1x1x?x32xf32>
   %1 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<8x18x17x32xf32>, tensor<3x3x32x64xf32>) outs(%0 : tensor<8x16x15x64xf32>) -> tensor<8x16x15x64xf32>
-  return %1 : tensor<8x16x15x64xf32>
+  func.return %1 : tensor<8x16x15x64xf32>
 }