submit the llvm#3902 to local repo (llvm#5)

sahas3 · sahas3 · commit c51c87fc947e · 2025-07-10T13:06:36.000-04:00
* Decompose lstm and gru.

* Add tests and update xfail_sets.py

* Rebase main

* Fix casting for arith.cmpi operands to be of same type.
diff --git a/lib/Conversion/TorchToLinalg/IndirectDataMovement.cpp b/lib/Conversion/TorchToLinalg/IndirectDataMovement.cpp
@@ -417,6 +417,21 @@ class ConvertAtenEmbeddingBagPaddingIdxOp
 };
 } // namespace
 
+static Value wrapIndicesAroundMax(OpBuilder &b, Location loc, Value index,
+                                  Value input, int64_t dim) {
+  // performs the operation : index = index % maxIndex to wrap index around
+  // maxIndex
+  Value maxIndexValue = getDimOp(b, loc, input, dim);
+  maxIndexValue =
+      b.createOrFold<arith::IndexCastOp>(loc, index.getType(), maxIndexValue);
+  Value isBeyondMaxIndices = b.createOrFold<arith::CmpIOp>(
+      loc, arith::CmpIPredicate::sge, index, maxIndexValue);
+  Value wrappedIndices =
+      b.createOrFold<arith::RemSIOp>(loc, index, maxIndexValue);
+  return b.createOrFold<arith::SelectOp>(loc, isBeyondMaxIndices,
+                                         wrappedIndices, index);
+}
+
 namespace {
 // Let's say we have an input tensor: initialized with some random values of
 // size [4, 5, 6]. An index tensor (always 1-d): [0, 2] of size [2], and an
@@ -478,16 +493,17 @@ class ConvertAtenIndexSelectOp : public OpConversionPattern<AtenIndexSelectOp> {
 
     auto indexingMaps = AffineMap::inferFromExprList({indicesExpr, resultExpr},
                                                      rewriter.getContext());
-
     Value finalRes =
         rewriter
             .create<linalg::GenericOp>(
                 loc, initTensor.getType(), ValueRange{indices}, initTensor,
                 /*indexingMaps=*/indexingMaps,
                 /*iteratorTypes=*/iteratorTypes,
                 [&](OpBuilder &b, Location loc, ValueRange args) {
-                  Value index = rewriter.create<arith::IndexCastOp>(
-                      loc, rewriter.getIndexType(), args[0]);
+                  Value index =
+                      wrapIndicesAroundMax(b, loc, args[0], input, dimInt);
+                  index = rewriter.create<arith::IndexCastOp>(
+                      loc, rewriter.getIndexType(), index);
                   SmallVector<Value> indexTarget;
                   for (unsigned i = 0; i < inputRank; i++)
                     indexTarget.push_back(b.create<linalg::IndexOp>(loc, i));
diff --git a/lib/Conversion/TorchToTosa/TorchToTosa.cpp b/lib/Conversion/TorchToTosa/TorchToTosa.cpp
@@ -4223,6 +4223,42 @@ LogicalResult ConvertAtenOp<AtenGatherOp>::matchAndRewrite(
   return success();
 }
 
+Value wrapIndicesAroundMax(Value index, int maxIndex, Operation *op,
+                           ConversionPatternRewriter &rewriter) {
+  // performs the operation : index = index % maxIndex to wrap index around
+  // maxIndex
+
+  auto maxIndexValue =
+      tosa::getConstTensor<int32_t>(rewriter, op, maxIndex, {}).value();
+  auto maxIndexValueMinusOne =
+      tosa::getConstTensor<int32_t>(rewriter, op, maxIndex - 1, {}).value();
+
+  auto indexType = dyn_cast<RankedTensorType>(index.getType());
+  auto boolType = indexType.clone(rewriter.getIntegerType(1));
+
+  auto isBeyondMaxIndices = tosa::CreateOpAndInfer<tosa::GreaterOp>(
+      rewriter, op->getLoc(), boolType, index, maxIndexValueMinusOne);
+  auto wrappedBeyondMaxIndicesQuotient =
+      tosa::CreateOpAndInfer<tosa::IntDivOp>(rewriter, op->getLoc(), indexType,
+                                             index, maxIndexValue)
+          .getResult();
+  auto wrappedBeyondMaxIndicesQuotientTimesIndices =
+      tosa::createMulOpAndCast(rewriter, op, indexType,
+                               wrappedBeyondMaxIndicesQuotient,
+                               wrappedBeyondMaxIndicesQuotient,
+                               /*shift=*/0)
+          .getResult();
+  auto wrappedBeyondMaxIndices =
+      tosa::CreateOpAndInfer<tosa::SubOp>(
+          rewriter, op->getLoc(), indexType, index,
+          wrappedBeyondMaxIndicesQuotientTimesIndices)
+          .getResult();
+
+  return tosa::CreateOpAndInfer<tosa::SelectOp>(rewriter, op->getLoc(),
+                                                indexType, isBeyondMaxIndices,
+                                                wrappedBeyondMaxIndices, index);
+}
+
 template <>
 LogicalResult ConvertAtenOp<AtenIndexSelectOp>::matchAndRewrite(
     AtenIndexSelectOp op, OpAdaptor adaptor,
@@ -4271,6 +4307,10 @@ LogicalResult ConvertAtenOp<AtenIndexSelectOp>::matchAndRewrite(
                 .value();
   }
 
+  int64_t selfNumElems = std::accumulate(inputShape.begin(), inputShape.end(),
+                                         1, std::multiplies<int64_t>());
+  index = wrapIndicesAroundMax(index, selfNumElems, op, rewriter);
+
   // Get positive dim
   int64_t dim;
   if (!matchPattern(op.getDim(), m_TorchConstantInt(&dim)))
@@ -7704,10 +7744,12 @@ LogicalResult ConvertAtenOp<AtenAsStridedOp>::matchAndRewrite(
     //              coord_i_n * stride[n]
     int32_t index = offset;
     int64_t coordFinder = i;
+
     for (int64_t dim = 0; dim < outputRank; dim++) {
       int64_t indexCoord = coordFinder % outputSize[outputRank - dim - 1];
       index += indexCoord * stride[outputRank - dim - 1];
       coordFinder /= outputSize[outputRank - dim - 1];
+      index = (index % selfNumElems);
     }
     targetIndicesVec.push_back(index);
   }
diff --git a/projects/pt1/e2e_testing/xfail_sets.py b/projects/pt1/e2e_testing/xfail_sets.py
@@ -497,6 +497,7 @@
     "SplitTensorNegativeDimModule_basic",
     "SplitWithSizesListUnpackModule_basic",
     "SplitWithSizes_Module_basic",
+    "AsStridedWithOffsetModule_basic",
     "AdaptiveAvgPool1dGeneralDynamic_basic",
     "AdaptiveAvgPool1dStaticEvenMultiple_basic",
     "AdaptiveAvgPool1dStaticLargerOutput_basic",
@@ -930,6 +931,7 @@
     "SplitTensorNegativeDimModule_basic",
     "SplitWithSizesListUnpackModule_basic",
     "SplitWithSizes_Module_basic",
+    "AsStridedWithOffsetModule_basic",
     "Unfold_Module_basic",
     "Unfold_Module_Rank_4",
     "Unfold_Module_Rank_Zero_basic",
@@ -1846,6 +1848,7 @@
     "AdaptiveAvgPool2dNonUnitOutputSizeStaticModule_basic",
     "AdaptiveAvgPool2dOutputSizeDivisibleByInputStaticModule_basic",
     "AdaptiveAvgPool2dUnitOutputSizeStaticModule_basic",
+    "AsStridedWithOffsetModule_basic",
     "ElementwiseAtenLogicalNotOpPromoteModule_basic",
     "ElementwiseCosIntModule_basic",
     "ElementwiseReciprocalIntModule_basic",
diff --git a/projects/pt1/python/torch_mlir_e2e_test/test_suite/slice_like.py b/projects/pt1/python/torch_mlir_e2e_test/test_suite/slice_like.py
@@ -1144,3 +1144,32 @@ def forward(self, x):
 @register_test_case(module_factory=lambda: TensorSplitSections_ListUnpackModule())
 def TensorSplitSections_ListUnpackModule_basic(module, tu: TestUtils):
     module.forward(tu.rand(2, 5))
+
+
+# ==============================================================================
+
+
+class AsStridedWithOffsetModule(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    @export
+    @annotate_args(
+        [
+            None,
+            ([2, 6, 60], torch.float32, True),
+        ]
+    )
+    def forward(self, x):
+        output_size = [6, 20]
+        stride = [60, 1]
+        slice = torch.ops.aten.slice.Tensor(x, 0, 1, 2)
+        squeeze = torch.ops.aten.squeeze.dim(slice, 0)
+        return torch.ops.aten.as_strided(
+            squeeze, size=output_size, stride=stride, storage_offset=360
+        )
+
+
+@register_test_case(module_factory=lambda: AsStridedWithOffsetModule())
+def AsStridedWithOffsetModule_basic(module, tu: TestUtils):
+    module.forward(torch.rand(2, 6, 60))
diff --git a/test/Conversion/TorchToTosa/basic.mlir b/test/Conversion/TorchToTosa/basic.mlir