[TOSA] TOSA updates for LLVM hash b3b0070

justin-ngo-arm · justin-ngo-arm · commit 479ce0e5bc9a · 2025-03-13T17:08:26.000Z
1: [TOSA] Update rescale input_/output_zp and double_round attribute

* Update tosa.rescale input_/output_zp as inputs according to TOSA 1.0
* Update double_round bool attribute to rounding_mode in alignment with
TOSA 1.0. rounding_mode supports "SINGLE_ROUND", "INEXACT_ROUND", and
"DOUBLE_ROUND". Existing double_round behaviours are mapped as followed:
  - double_round = true -&gt; rounding_mode = "DOUBLE_ROUND"
  - double_round = false -&gt; rounding_mode = "SINGLE_ROUND"

2: [TOSA] Update tosa.negate's zero-points to inputs

Update LIT tests and XFAIL sets
diff --git a/include/torch-mlir/Conversion/TorchToTosa/TosaLegalizeUtils.h b/include/torch-mlir/Conversion/TorchToTosa/TosaLegalizeUtils.h
@@ -26,7 +26,7 @@ namespace tosa {
 // rounding mode
 Value buildRescale(PatternRewriter &rewriter, Operation *op,
                    ShapedType output_type, Value input_val, double scale,
-                   int64_t input_zp, int64_t output_zp, bool double_round,
+                   int64_t input_zp, int64_t output_zp, StringRef rounding_mode,
                    bool scale32);
 
 // Creates TOSA rescale op with int32 output
diff --git a/lib/Conversion/TorchToTosa/TosaLegalizeCommon.cpp b/lib/Conversion/TorchToTosa/TosaLegalizeCommon.cpp
@@ -777,7 +777,7 @@ std::optional<Value> convertReduceOpCommon(
       RankedTensorType output_rescale_type =
           RankedTensorType::get(shape_vec, output_type.getElementType());
       val = buildRescale(rewriter, op, output_rescale_type, val, output_scale,
-                         0, output_zp, false, true);
+                         0, output_zp, "SINGLE_ROUND", true);
     }
 
     // Optionally squeeze out the reduced axes.
diff --git a/lib/Conversion/TorchToTosa/TosaLegalizeUtils.cpp b/lib/Conversion/TorchToTosa/TosaLegalizeUtils.cpp
@@ -34,14 +34,15 @@ Value buildRescaleMultiplier(bool scale32, PatternRewriter &rewriter,
 // rounding mode
 Value buildRescale(PatternRewriter &rewriter, Operation *op,
                    ShapedType output_type, Value input_val, double scale,
-                   int64_t input_zp, int64_t output_zp, bool double_round,
+                   int64_t input_zp, int64_t output_zp, StringRef rounding_mode,
                    bool scale32) {
   int32_t multiplier;
   int32_t shift;
 
   int32_t scale_width = scale32 ? 32 : 16;
 
-  computeMultiplierAndShift(scale, multiplier, shift, scale_width);
+  if (!computeMultiplierAndShift(scale, multiplier, shift, scale_width))
+    op->emitError("buildRescale: shift must be in the range 2 <= shift <= 62");
 
   Value multiplier_val =
       buildRescaleMultiplier(scale32, rewriter, op, {multiplier});
@@ -52,11 +53,23 @@ Value buildRescale(PatternRewriter &rewriter, Operation *op,
   bool input_unsigned = input_val.getType().isUnsignedInteger();
   bool output_unsigned = output_type.isUnsignedInteger();
 
+  // Create input_zp matches the input type and output_zp matches the output
+  // type of RescaleOp
+  const auto input_zp_val = tosa::createZeroPointTensor(
+      rewriter, op->getLoc(), dyn_cast<TensorType>(input_val.getType()),
+      input_zp);
+  if (!input_zp_val.has_value())
+    op->emitError("Failed to create input zero-point tensor for RescaleOp.");
+
+  const auto output_zp_val = tosa::createZeroPointTensor(
+      rewriter, op->getLoc(), output_type, output_zp);
+  if (!output_zp_val.has_value())
+    op->emitError("Failed to create output zero-point tensor for RescaleOp.");
+
   auto rescale_op = CreateOpAndInfer<tosa::RescaleOp>(
       rewriter, op->getLoc(), output_type, input_val, multiplier_val, shift_val,
-      rewriter.getI32IntegerAttr(static_cast<int32_t>(input_zp)),
-      rewriter.getI32IntegerAttr(static_cast<int32_t>(output_zp)),
-      rewriter.getBoolAttr(scale32), rewriter.getBoolAttr(double_round),
+      input_zp_val.value(), output_zp_val.value(),
+      rewriter.getBoolAttr(scale32), rewriter.getStringAttr(rounding_mode),
       rewriter.getBoolAttr(false), rewriter.getBoolAttr(input_unsigned),
       rewriter.getBoolAttr(output_unsigned));
 
@@ -73,7 +86,7 @@ Value buildRescaleToInt32(PatternRewriter &rewriter, Operation *op,
   auto output_type = input_type.clone(rewriter.getI32Type());
 
   return buildRescale(rewriter, op, output_type, input_val, input_scale,
-                      input_zp, 0, false, true);
+                      input_zp, 0, "SINGLE_ROUND", true);
 }
 
 // Creates a TOSA rescale op based on conv2d parameters.
@@ -96,6 +109,16 @@ Value buildRescaleOpConvOutput(PatternRewriter &rewriter, Operation *op,
   bool input_unsigned = input_qtype.isUnsignedInteger();
   bool output_unsigned = output_qtype.isUnsignedInteger();
 
+  const auto input_zp_val = tosa::createZeroPointTensor(
+      rewriter, op->getLoc(), input_type, static_cast<int64_t>(0));
+  if (!input_zp_val.has_value())
+    op->emitError("Failed to create input zero-point tensor for RescaleOp.");
+
+  const auto output_zp_val = tosa::createZeroPointTensor(
+      rewriter, op->getLoc(), output_type, output_zp);
+  if (!output_zp_val.has_value())
+    op->emitError("Failed to create output zero-point tensor for RescaleOp.");
+
   if (auto weight_per_tensor_qtype =
           dyn_cast<mlir::quant::UniformQuantizedType>(
               weight_type.getElementType())) {
@@ -107,7 +130,11 @@ Value buildRescaleOpConvOutput(PatternRewriter &rewriter, Operation *op,
 
     double op_tensor_scale = (input_scale * weight_scale) / output_scale;
 
-    computeMultiplierAndShift(op_tensor_scale, multiplier, shift, scale_width);
+    if (!computeMultiplierAndShift(op_tensor_scale, multiplier, shift,
+                                   scale_width))
+      op->emitError(
+          "buildRescaleOpConvOutput: shift must be in the range 2 <= shift <= "
+          "62");
 
     Value multiplier_val =
         buildRescaleMultiplier(scale32, rewriter, op, {multiplier});
@@ -117,10 +144,9 @@ Value buildRescaleOpConvOutput(PatternRewriter &rewriter, Operation *op,
 
     auto rescale_op = CreateOpAndInfer<tosa::RescaleOp>(
         rewriter, op->getLoc(), output_type, conv_val, multiplier_val,
-        shift_val, rewriter.getI32IntegerAttr(0),
-        rewriter.getI32IntegerAttr(output_zp), rewriter.getBoolAttr(scale32),
-        rewriter.getBoolAttr(true), rewriter.getBoolAttr(false),
-        rewriter.getBoolAttr(input_unsigned),
+        shift_val, input_zp_val.value(), output_zp_val.value(),
+        rewriter.getBoolAttr(scale32), rewriter.getStringAttr("DOUBLE_ROUND"),
+        rewriter.getBoolAttr(false), rewriter.getBoolAttr(input_unsigned),
         rewriter.getBoolAttr(output_unsigned));
 
     return rescale_op.getResult();
@@ -136,17 +162,16 @@ Value buildRescaleOpConvOutput(PatternRewriter &rewriter, Operation *op,
         weight_per_channel_qtype.getScales().begin(),
         weight_per_channel_qtype.getScales().end());
 
-    int64_t output_zp = output_qtype.getZeroPoint();
-    double output_scale = output_qtype.getScale();
-
     for (double weight_scale : weight_scale_arr) {
       int32_t multiplier;
       int32_t shift;
 
       double op_channel_scale = (input_scale * weight_scale) / output_scale;
 
-      computeMultiplierAndShift(op_channel_scale, multiplier, shift,
-                                scale_width);
+      if (!computeMultiplierAndShift(op_channel_scale, multiplier, shift, 32))
+        op->emitError(
+            "buildRescaleOpConvOutput: shift must be in the range 2 <= shift "
+            "<= 62");
 
       multiplier_arr.push_back(multiplier);
       shift_arr.push_back(static_cast<int8_t>(shift));
@@ -161,10 +186,9 @@ Value buildRescaleOpConvOutput(PatternRewriter &rewriter, Operation *op,
 
     auto rescale_op = CreateOpAndInfer<tosa::RescaleOp>(
         rewriter, op->getLoc(), output_type, conv_val, multiplier_val,
-        shift_val, rewriter.getI32IntegerAttr(0),
-        rewriter.getI32IntegerAttr(output_zp), rewriter.getBoolAttr(scale32),
-        rewriter.getBoolAttr(true), rewriter.getBoolAttr(true),
-        rewriter.getBoolAttr(input_unsigned),
+        shift_val, input_zp_val.value(), output_zp_val.value(),
+        rewriter.getBoolAttr(scale32), rewriter.getStringAttr("DOUBLE_ROUND"),
+        rewriter.getBoolAttr(true), rewriter.getBoolAttr(input_unsigned),
         rewriter.getBoolAttr(output_unsigned));
 
     return rescale_op.getResult();
diff --git a/projects/pt1/e2e_testing/xfail_sets.py b/projects/pt1/e2e_testing/xfail_sets.py
@@ -1717,6 +1717,8 @@
     "ScatterSrcModule_basic",
     "ScatterSrcStaticModule_basic",
     "HBC_basic",
+    # 1D inputs cause generated tosa.negate ops to crash downstream
+    "NllLossModule_1D_basic",
 }
 
 # Write the TOSA set as a "passing" set as it is very early in development
diff --git a/test/Conversion/TorchToTosa/basic.mlir b/test/Conversion/TorchToTosa/basic.mlir
@@ -94,11 +94,14 @@ func.func @torch.aten.exp$basic(%arg0: !torch.vtensor<[?,?],f32>) -> !torch.vten
 // -----
 
 // CHECK-LABEL:   func.func @torch.aten.neg$basic(
-// CHECK-SAME:                                %[[ARG:.*]]: !torch.vtensor<[?,?],f32>) -> !torch.vtensor<[?,?],f32> {
-// CHECK:           %[[ARG_BUILTIN:.*]] = torch_c.to_builtin_tensor %[[ARG]] : !torch.vtensor<[?,?],f32> -> tensor<?x?xf32>
-// CHECK:           %[[RESULT_BUILTIN:.*]] = tosa.negate %[[ARG_BUILTIN]] : (tensor<?x?xf32>) -> tensor<?x?xf32>
-// CHECK:           %[[RESULT:.*]] = torch_c.from_builtin_tensor %[[RESULT_BUILTIN]] : tensor<?x?xf32> -> !torch.vtensor<[?,?],f32>
-// CHECK:           return %[[RESULT]] : !torch.vtensor<[?,?],f32>
+// CHECK-SAME:                                    %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !torch.vtensor<[?,?],f32>) -> !torch.vtensor<[?,?],f32> {
+// CHECK:           %[[VAL_1:.*]] = torch_c.to_builtin_tensor %[[VAL_0]] : !torch.vtensor<[?,?],f32> -> tensor<?x?xf32>
+// CHECK:           %[[VAL_2:.*]] = "tosa.const"() <{values = dense<0.000000e+00> : tensor<1xf32>}> : () -> tensor<1xf32>
+// CHECK:           %[[VAL_3:.*]] = "tosa.const"() <{values = dense<0.000000e+00> : tensor<1xf32>}> : () -> tensor<1xf32>
+// CHECK:           %[[VAL_4:.*]] = tosa.negate %[[VAL_1]], %[[VAL_2]], %[[VAL_3]] : (tensor<?x?xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<?x?xf32>
+// CHECK:           %[[VAL_5:.*]] = torch_c.from_builtin_tensor %[[VAL_4]] : tensor<?x?xf32> -> !torch.vtensor<[?,?],f32>
+// CHECK:           return %[[VAL_5]] : !torch.vtensor<[?,?],f32>
+// CHECK:         }
 func.func @torch.aten.neg$basic(%arg0: !torch.vtensor<[?,?],f32>) -> !torch.vtensor<[?,?],f32> {
   %0 = torch.aten.neg %arg0 : !torch.vtensor<[?,?],f32> -> !torch.vtensor<[?,?],f32>
   return %0 : !torch.vtensor<[?,?],f32>
@@ -1555,20 +1558,22 @@ func.func @torch.aten.tril$basic(%arg0: !torch.vtensor<[2,4], si32>) -> !torch.v
 // -----
 
 // CHECK-LABEL:   func.func @torch.aten.min.dim$basic(
-// CHECK-SAME:                                        %[[VAL_0:.*]]: tensor<3x2x3xf32>) -> tensor<3x2x1xf32> {
+// CHECK-SAME:                                        %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: tensor<3x2x3xf32>) -> tensor<3x2x1xf32> {
 // CHECK:           %[[VAL_1:.*]] = torch_c.from_builtin_tensor %[[VAL_0]] : tensor<3x2x3xf32> -> !torch.vtensor<[3,2,3],f32>
 // CHECK:           %[[VAL_2:.*]] = torch_c.to_builtin_tensor %[[VAL_1]] : !torch.vtensor<[3,2,3],f32> -> tensor<3x2x3xf32>
 // CHECK:           %[[VAL_3:.*]] = torch.constant.bool true
 // CHECK:           %[[VAL_4:.*]] = torch.constant.int 2
-// CHECK:           %[[VAL_5:.*]] = tosa.const_shape  {values = dense<[3, 2]> : tensor<2xindex>} : () -> !tosa.shape<2>
+// CHECK-DAG:           %[[VAL_5:.*]] = tosa.const_shape  {values = dense<[3, 2]> : tensor<2xindex>} : () -> !tosa.shape<2>
 // CHECK:           %[[VAL_6:.*]] = tosa.reduce_min %[[VAL_2]] {axis = 2 : i32} : (tensor<3x2x3xf32>) -> tensor<3x2x1xf32>
 // CHECK:           %[[VAL_7:.*]] = torch_c.from_builtin_tensor %[[VAL_6]] : tensor<3x2x1xf32> -> !torch.vtensor<[3,2,1],f32>
-// CHECK:           %[[VAL_8:.*]] = tosa.negate %[[VAL_2]] : (tensor<3x2x3xf32>) -> tensor<3x2x3xf32>
-// CHECK:           %[[VAL_9:.*]] = tosa.argmax %[[VAL_8]] {axis = 2 : i32} : (tensor<3x2x3xf32>) -> tensor<3x2xi64>
-// CHECK:           %[[VAL_10:.*]] = tosa.const_shape  {values = dense<[3, 2, 1]> : tensor<3xindex>} : () -> !tosa.shape<3>
-// CHECK:           %[[VAL_11:.*]] = tosa.reshape %[[VAL_9]], %[[VAL_10]] : (tensor<3x2xi64>, !tosa.shape<3>) -> tensor<3x2x1xi64>
-// CHECK:           %[[VAL_12:.*]] = torch_c.to_builtin_tensor %[[VAL_7]] : !torch.vtensor<[3,2,1],f32> -> tensor<3x2x1xf32>
-// CHECK:           return %[[VAL_12]] : tensor<3x2x1xf32>
+// CHECK:           %[[VAL_8:.*]] = "tosa.const"() <{values = dense<0.000000e+00> : tensor<1xf32>}> : () -> tensor<1xf32>
+// CHECK:           %[[VAL_9:.*]] = "tosa.const"() <{values = dense<0.000000e+00> : tensor<1xf32>}> : () -> tensor<1xf32>
+// CHECK:           %[[VAL_10:.*]] = tosa.negate %[[VAL_2]], %[[VAL_8]], %[[VAL_9]] : (tensor<3x2x3xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<3x2x3xf32>
+// CHECK:           %[[VAL_11:.*]] = tosa.argmax %[[VAL_10]] {axis = 2 : i32} : (tensor<3x2x3xf32>) -> tensor<3x2xi64>
+// CHECK:           %[[VAL_12:.*]] = tosa.const_shape  {values = dense<[3, 2, 1]> : tensor<3xindex>} : () -> !tosa.shape<3>
+// CHECK:           %[[VAL_13:.*]] = tosa.reshape %[[VAL_11]], %[[VAL_12]] : (tensor<3x2xi64>, !tosa.shape<3>) -> tensor<3x2x1xi64>
+// CHECK:           %[[VAL_14:.*]] = torch_c.to_builtin_tensor %[[VAL_7]] : !torch.vtensor<[3,2,1],f32> -> tensor<3x2x1xf32>
+// CHECK:           return %[[VAL_14]] : tensor<3x2x1xf32>
 // CHECK:         }
 func.func @torch.aten.min.dim$basic(%arg0: tensor<3x2x3xf32>) -> tensor<3x2x1xf32> {
   %0 = torch_c.from_builtin_tensor %arg0 : tensor<3x2x3xf32> -> !torch.vtensor<[3,2,3],f32>

Original file line number	Diff line number	Diff line change
`@@ -777,7 +777,7 @@ std::optional<Value> convertReduceOpCommon(`
`777`	`777`	`RankedTensorType output_rescale_type =`
`778`	`778`	`RankedTensorType::get(shape_vec, output_type.getElementType());`
`779`	`779`	`val = buildRescale(rewriter, op, output_rescale_type, val, output_scale,`
`780`		`- 0, output_zp, false, true);`
	`780`	`+ 0, output_zp, "SINGLE_ROUND", true);`
`781`	`781`	`}`
`782`	`782`
`783`	`783`	`// Optionally squeeze out the reduced axes.`
Original file line number	Diff line number	Diff line change
`@@ -1717,6 +1717,8 @@`
`1717`	`1717`	`"ScatterSrcModule_basic",`
`1718`	`1718`	`"ScatterSrcStaticModule_basic",`
`1719`	`1719`	`"HBC_basic",`
	`1720`	`+ # 1D inputs cause generated tosa.negate ops to crash downstream`
	`1721`	`+ "NllLossModule_1D_basic",`
`1720`	`1722`	`}`
`1721`	`1723`
`1722`	`1724`	`# Write the TOSA set as a "passing" set as it is very early in development`