Add quantize_and_export_to_edge and quantize_and_export_to_executorch

mcremon-meta · web-flow · commit 8ccd0c6232d5 · 2025-04-25T16:39:10.000-07:00
Differential Revision: D73397438 Pull Request resolved: #10379
diff --git a/backends/cadence/aot/compiler.py b/backends/cadence/aot/compiler.py
@@ -47,6 +47,14 @@
 from .utils import print_ops_info
 
 
+default_quantizer = CadenceDefaultQuantizer()
+
+
+# Note: this is not meant as a primary API since it can create inconsistencies
+# if the quantizer here is different from the quantizer used to convert. It is
+# however useful for unit tests to separate the converted model from the fused
+# model, to be able to get reference numerics.
+# If this does not apply, please use quantize_and_fuse_pt2 instead.
 def prepare_and_convert_pt2(
     model: torch.nn.Module,
     inputs: tuple[object, ...],
@@ -245,6 +253,28 @@ def export_to_edge(
     return edge_prog_manager
 
 
+def quantize_and_export_to_edge(
+    model: torch.nn.Module,
+    inputs: tuple[object, ...],
+    quantizer: Optional[CadenceQuantizer] = None,
+    dump_graphs: bool = False,
+    constant_methods: Optional[dict[str, object]] = None,
+) -> EdgeProgramManager:
+    quantized_model = quantize_pt2(
+        model,
+        inputs,
+        quantizer=quantizer,
+        dump_graphs=dump_graphs,
+    )
+
+    return export_to_edge(
+        quantized_model,
+        inputs,
+        dump_graphs=dump_graphs,
+        constant_methods=constant_methods,
+    )
+
+
 def export_to_cadence(
     model: torch.nn.Module,
     inputs: tuple[object, ...],
diff --git a/backends/cadence/aot/tests/test_fusion_ops_passes.py b/backends/cadence/aot/tests/test_fusion_ops_passes.py
@@ -12,7 +12,10 @@
 import executorch.backends.cadence.aot.ops_registrations  # noqa
 import torch
 from executorch.backends.cadence.aot import compiler
-from executorch.backends.cadence.aot.compiler import export_to_edge, quantize_pt2
+from executorch.backends.cadence.aot.compiler import (
+    export_to_edge,
+    quantize_and_export_to_edge,
+)
 from executorch.backends.cadence.aot.fuse_ops import (
     FuseFullThenReshapePass,
     FuseMulIntoDequantPass,
@@ -414,9 +417,10 @@ def forward(self, x):
 
         inputs = torch.randn(2, 12, 1, 6)
         model = M()
-        quantized_model = quantize_pt2(model, (inputs,))
         graph_module = (
-            export_to_edge(quantized_model, (inputs,)).exported_program().graph_module
+            quantize_and_export_to_edge(model, (inputs,))
+            .exported_program()
+            .graph_module
         )
         graph_module = FuseQuantDequantToRequantizePass()(graph_module).graph_module
         self.check_op_counts(
diff --git a/backends/cadence/aot/tests/test_replace_ops_passes.py b/backends/cadence/aot/tests/test_replace_ops_passes.py
@@ -13,7 +13,10 @@
 import torch
 import torch.nn.functional as F
 from executorch.backends.cadence.aot import compiler
-from executorch.backends.cadence.aot.compiler import export_to_edge, quantize_pt2
+from executorch.backends.cadence.aot.compiler import (
+    export_to_edge,
+    quantize_and_export_to_edge,
+)
 from executorch.backends.cadence.aot.graph_builder import (
     GraphBuilder,
     single_op_builder,
@@ -851,9 +854,8 @@ def test_replace_single_element_tensor_arguments_from_full_op_with_scalar(
 
         inputs = (x,)
         model = torch.nn.Linear(in_features=in_features, out_features=out_features)
-        quantized_model = quantize_pt2(model, inputs)
 
-        exported_program = export_to_edge(quantized_model, inputs).exported_program()
+        exported_program = quantize_and_export_to_edge(model, inputs).exported_program()
 
         # By default, the quantized linear op should have constant scalar attributes.
         self.assertTargetCountsEqual(
@@ -898,9 +900,8 @@ def test_replace_single_element_tensor_arguments_from_full_op_with_scalar_tuple_
 
         inputs = (x,)
         model = torch.nn.Linear(in_features=in_features, out_features=out_features)
-        quantized_model = quantize_pt2(model, inputs)
 
-        exported_program = export_to_edge(quantized_model, inputs).exported_program()
+        exported_program = quantize_and_export_to_edge(model, inputs).exported_program()
 
         # By default, the quantized linear op should have constant scalar attributes.
         self.assertTargetCountsEqual(