Lower from tm_tensor to linalg in tosa_linalg pipeline.

sahas3 · sahas3 · commit 45998aa9bd1d · 2025-07-10T13:06:36.000-04:00
diff --git a/lib/Conversion/TorchToTosaLinalg/TorchToTosaLinalg.cpp b/lib/Conversion/TorchToTosaLinalg/TorchToTosaLinalg.cpp
@@ -60,8 +60,6 @@ class ConvertTorchToTosaLinalg
                            tosa::TosaDialect, tensor::TensorDialect,
                            arith::ArithDialect, complex::ComplexDialect>();
 
-    target.addIllegalDialect<Torch::TorchDialect>();
-
     target.addLegalOp<TorchConversion::GetNextSeedOp>();
     torch::populateTorchToTosaConversionLegalOps(target);
 
@@ -71,7 +69,13 @@ class ConvertTorchToTosaLinalg
 
     RewritePatternSet patterns(context);
 
-    torch::populateTorchToTosaConversionPatterns(typeConverter, patterns);
+    auto illegalOps = populateTorchToTosaConversionPatternsAndIllegalOps(
+        typeConverter, patterns);
+
+    // for (auto op : illegalOps) {
+    //   target.addIllegalOp(OperationName(op, context));
+    // }
+
     torch::populateTorchToLinalgOnTensorsPatternsAndLegality(typeConverter,
                                                              patterns, target);
 
diff --git a/lib/Dialect/TorchConversion/Transforms/Passes.cpp b/lib/Dialect/TorchConversion/Transforms/Passes.cpp
@@ -12,6 +12,7 @@
 #include "mlir/Dialect/MemRef/Transforms/Passes.h"
 #include "mlir/Pass/PassManager.h"
 #include "mlir/Transforms/Passes.h"
+#include "torch-mlir-dialects/Dialect/TMTensor/Transforms/Passes.h"
 #include "torch-mlir/Conversion/TorchConversionToMLProgram/TorchConversionToMLProgram.h"
 #include "torch-mlir/Conversion/TorchToArith/TorchToArith.h"
 #include "torch-mlir/Conversion/TorchToLinalg/TorchToLinalg.h"
@@ -192,6 +193,16 @@ void TorchConversion::createTorchBackendToTosaLinalgBackendPipeline(
   // The resolution of `dim` ops tends to create identical ops. CSE them.
   pm.addNestedPass<func::FuncOp>(createCSEPass());
 
+  // `tm-tensor-to-loops` pass can convert TMTensor ops to Linalg and other MLIR
+  // core dialects only when the operand types to the TMTensor ops are of type
+  // `memref,` so run the `tm-tensor-bufferize` pass before running
+  // `tm-tensor-to-loops.` Unfortunately, we have to lower to `memref` types
+  // this early due to the `tm-tensor-to-loops`  pass limitation. Ideally, we
+  // would have liked to keep `tensor` types at this stage and defer lowering to
+  // `memref` types as late as possible.
+  pm.addNestedPass<func::FuncOp>(TMTensor::createTMTensorBufferizePass());
+  pm.addNestedPass<func::FuncOp>(TMTensor::createTMTensorToLoopsPass());
+
   // Finish the type conversion from `torch` types to the types of the
   // TOSA backend contract.
   pm.addPass(TorchConversion::createFuncBackendTypeConversionPass());
diff --git a/lib/Dialect/TorchConversion/Transforms/VerifyTosaLinalgBackendContract.cpp b/lib/Dialect/TorchConversion/Transforms/VerifyTosaLinalgBackendContract.cpp
@@ -10,18 +10,19 @@
 #include "PassDetail.h"
 
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
 #include "mlir/Dialect/Complex/IR/Complex.h"
 #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
 #include "mlir/Dialect/MLProgram/IR/MLProgram.h"
 #include "mlir/Dialect/Math/IR/Math.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/SCF/IR/SCF.h"
 #include "mlir/Dialect/SparseTensor/IR/SparseTensor.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/Dialect/Tosa/IR/TosaOps.h"
 #include "mlir/Transforms/DialectConversion.h"
-#include "torch-mlir-dialects/Dialect/TMTensor/IR/TMTensorDialect.h"
 #include "torch-mlir/Dialect/TorchConversion/IR/TorchConversionOps.h"
 #include "torch-mlir/Dialect/TorchConversion/Transforms/Passes.h"
 
@@ -30,7 +31,6 @@
 using namespace mlir;
 using namespace mlir::torch;
 using namespace mlir::torch::TorchConversion;
-using namespace TMTensor;
 
 namespace {
 class VerifyTosaLinalgBackendContractPass
@@ -40,8 +40,13 @@ class VerifyTosaLinalgBackendContractPass
     MLIRContext *context = &getContext();
     auto module = getOperation();
     TypeConverter converter;
-    converter.addConversion([](RankedTensorType type) -> Type {
-      if (BaseMemRefType::isValidElementType(type.getElementType()))
+    converter.addConversion([](Type type) -> Type {
+      auto elemTy = type;
+      if (isa<TensorType>(type))
+        elemTy = cast<TensorType>(type).getElementType();
+      if (isa<MemRefType>(type))
+        elemTy = cast<MemRefType>(type).getElementType();
+      if (BaseMemRefType::isValidElementType(elemTy))
         return type;
       return nullptr;
     });
@@ -72,6 +77,8 @@ class VerifyTosaLinalgBackendContractPass
     target.addDynamicallyLegalDialect<func::FuncDialect>(isLegalScalarOp);
     target.addDynamicallyLegalDialect<math::MathDialect>(isLegalScalarOp);
     target.addDynamicallyLegalDialect<arith::ArithDialect>(isLegalScalarOp);
+    target.addDynamicallyLegalDialect<bufferization::BufferizationDialect>(
+        opHasLegalTypes);
     target.addDynamicallyLegalDialect<complex::ComplexDialect>(isLegalScalarOp);
 
     // Tensor operations should go through linalg and the tensor dialect.
@@ -83,8 +90,8 @@ class VerifyTosaLinalgBackendContractPass
     target.addDynamicallyLegalDialect<tosa::TosaDialect>(opHasLegalTypes);
     target.addDynamicallyLegalDialect<affine::AffineDialect>(opHasLegalTypes);
     target.addDynamicallyLegalDialect<cf::ControlFlowDialect>(opHasLegalTypes);
-    target.addDynamicallyLegalDialect<TMTensorDialect>(opHasLegalTypes);
     target.addDynamicallyLegalDialect<scf::SCFDialect>(opHasLegalTypes);
+    target.addDynamicallyLegalDialect<memref::MemRefDialect>(opHasLegalTypes);
     target.addDynamicallyLegalDialect<ml_program::MLProgramDialect>(
         opHasLegalTypes);
 
@@ -97,7 +104,8 @@ class VerifyTosaLinalgBackendContractPass
       // We avoid `module.emitError()` so that mlir-print-op-on-diagnostics
       // doesn't unnecessarily spew out the entire module.
       emitError(module.getLoc())
-          << "Module does not conform to the linalg-on-tensors backend "
+          << "Module does not conform to the "
+             "torch-backend-to-tosa-linalg-backend "
              "contract. "
              "See dialect conversion legality information above.";
       return signalPassFailure();
diff --git a/test/Conversion/TorchToTosa/torch-backend-to-tosa-linalg-backend-pipeline.mlir b/test/Conversion/TorchToTosa/torch-backend-to-tosa-linalg-backend-pipeline.mlir
diff --git a/test/Conversion/TorchToTosaLinalg/basic.mlir b/test/Conversion/TorchToTosaLinalg/basic.mlir
@@ -0,0 +1,19 @@
+// RUN: torch-mlir-opt <%s -convert-torch-to-tosa-linalg -split-input-file -verify-diagnostics | FileCheck %s
+
+// CHECK-LABEL:   func.func @torch.aten.avg_pool2d.divisor_override
+// CHECK: linalg.pooling_nchw_sum
+// CHECK-NOT: torch.aten.avg_pool2d
+func.func @torch.aten.avg_pool2d.divisor_override(%arg0: !torch.vtensor<[1,192,35,35],f32>) -> !torch.vtensor<[1,192,35,35],f32> {
+  %int0 = torch.constant.int 0
+  %int1 = torch.constant.int 1
+  %int3 = torch.constant.int 3
+  %false= torch.constant.bool false
+  %count_include_pad = torch.constant.bool false
+  %divisor_override = torch.constant.int 9
+
+  %0 = torch.prim.ListConstruct %int3, %int3 : (!torch.int, !torch.int) -> !torch.list<int>
+  %1 = torch.prim.ListConstruct %int1, %int1 : (!torch.int, !torch.int) -> !torch.list<int>
+  %2 = torch.prim.ListConstruct %int1, %int1 : (!torch.int, !torch.int) -> !torch.list<int>
+  %3 = torch.aten.avg_pool2d %arg0, %0, %1, %2, %false, %count_include_pad, %divisor_override : !torch.vtensor<[1,192,35,35],f32>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.bool, !torch.int -> !torch.vtensor<[1,192,35,35],f32>
+  return %3 : !torch.vtensor<[1,192,35,35],f32>
+}
diff --git a/test/Conversion/TorchToTosaLinalg/torch-backend-to-tosa-linalg-backend-pipeline.mlir b/test/Conversion/TorchToTosaLinalg/torch-backend-to-tosa-linalg-backend-pipeline.mlir
@@ -0,0 +1,55 @@
+// RUN: torch-mlir-opt -pass-pipeline='builtin.module(torch-backend-to-tosa-linalg-backend-pipeline)' -split-input-file -verify-diagnostics %s | FileCheck %s
+
+//-----
+
+// CHECK-LABEL:   func.func @torch.aten.size.int(
+// CHECK-SAME:                                   %[[ARG0:.*]]: tensor<4x2xf32>) -> i64 {
+// CHECK:           %[[VAL_0:.*]] = arith.constant false
+// CHECK:           %[[VAL_1:.*]] = arith.constant 2 : index
+// CHECK:           cf.assert %[[VAL_0]], "dim must be smaller than inputRank"
+// CHECK:           %[[VAL_2:.*]] = tensor.dim %[[ARG0]], %[[VAL_1]] : tensor<4x2xf32>
+// CHECK:           %[[VAL_3:.*]] = arith.index_cast %[[VAL_2]] : index to i64
+// CHECK:           return %[[VAL_3]] : i64
+func.func @torch.aten.size.int(%arg0: !torch.vtensor<[4,2],f32>) -> !torch.int {
+    %c2 = torch.constant.int 2
+    %0 = torch.aten.size.int %arg0, %c2 : !torch.vtensor<[4,2],f32>, !torch.int -> !torch.int
+    return %0 : !torch.int
+}
+
+//-----
+
+// CHECK-LABEL:   func.func @tm_scan(
+// CHECK-SAME:      %[[ARG0:.*]]: tensor<1x512xi64>) -> (tensor<1x512xi64>, tensor<1xi64>) {
+// CHECK:           %[[VAL_1:.*]] = arith.constant 512 : index
+// CHECK:           %[[VAL_2:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_4:.*]] = memref.alloc() : memref<1x512xi64>
+// CHECK:           %[[VAL_5:.*]] = bufferization.to_tensor %[[VAL_4]] : memref<1x512xi64>
+// CHECK:           %[[VAL_6:.*]] = memref.alloc() : memref<1xi64>
+// CHECK:           %[[VAL_7:.*]] = bufferization.to_tensor %[[VAL_6]] : memref<1xi64>
+// CHECK:           scf.for %[[VAL_8:.*]] = %[[VAL_3]] to %[[VAL_1]] step %[[VAL_2]] {
+// CHECK:             %[[VAL_9:.*]] = arith.cmpi eq, %[[VAL_8]], %[[VAL_3]] : index
+// CHECK:             scf.if %[[VAL_9]] {
+// CHECK:               %[[VAL_10:.*]] = tensor.extract %[[ARG0]]{{\[}}%[[VAL_3]], %[[VAL_8]]] : tensor<1x512xi64>
+// CHECK:               memref.store %[[VAL_10]], %[[VAL_4]]{{\[}}%[[VAL_3]], %[[VAL_8]]] : memref<1x512xi64>
+// CHECK:             } else {
+// CHECK:               %[[VAL_11:.*]] = arith.subi %[[VAL_8]], %[[VAL_2]] : index
+// CHECK:               %[[VAL_12:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_3]], %[[VAL_11]]] : memref<1x512xi64>
+// CHECK:               %[[VAL_13:.*]] = tensor.extract %[[ARG0]]{{\[}}%[[VAL_3]], %[[VAL_8]]] : tensor<1x512xi64>
+// CHECK:               %[[VAL_14:.*]] = arith.addi %[[VAL_12]], %[[VAL_13]] : i64
+// CHECK:               memref.store %[[VAL_14]], %[[VAL_4]]{{\[}}%[[VAL_3]], %[[VAL_8]]] : memref<1x512xi64>
+// CHECK:               memref.store %[[VAL_14]], %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref<1xi64>
+// CHECK:             }
+// CHECK:           }
+// CHECK:           return %[[VAL_5]], %[[VAL_7]] : tensor<1x512xi64>, tensor<1xi64>
+// CHECK:         }
+func.func @tm_scan(%arg0: tensor<1x512xi64>) -> (tensor<1x512xi64>, tensor<1xi64>) {
+    %0 = tensor.empty() : tensor<1x512xi64>
+    %1 = tensor.empty() : tensor<1xi64>
+    %2:2 = tm_tensor.scan dimension(1) inclusive(true) ins(%arg0 : tensor<1x512xi64>) outs(%0, %1 : tensor<1x512xi64>, tensor<1xi64>) {
+    ^bb0(%arg1: i64, %arg2: i64):
+      %3 = arith.addi %arg1, %arg2 : i64
+      tm_tensor.yield %3 : i64
+    } -> tensor<1x512xi64>, tensor<1xi64>
+    return %2#0, %2#1 : tensor<1x512xi64>, tensor<1xi64>
+}