Revert "Remove torchao.quantization.prototype" (#1919)

andrewor14 · web-flow · commit ddb7f83dad97 · 2025-03-18T17:00:31.000-07:00
Revert "Remove torchao.quantization.prototype (#1889)" This reverts commit 576cf6b.
diff --git a/test/quantization/test_qat.py b/test/quantization/test_qat.py
@@ -1133,6 +1133,62 @@ def embedding_forward_4w(x: torch.Tensor, weight: torch.Tensor) -> torch.Tensor:
         baseline_out = embedding_forward_4w(x2, fq_embedding.weight)
         torch.testing.assert_close(baseline_out, fq_out, atol=0, rtol=0)
 
+    @unittest.skipIf(
+        not TORCH_VERSION_AT_LEAST_2_4, "skipping when torch version is 2.4 or lower"
+    )
+    def test_qat_prototype_bc(self):
+        """
+        Just to make sure we can import all the old prototype paths.
+        We will remove this test in the near future when we actually break BC.
+        """
+        from torchao.quantization.prototype.qat import (  # noqa: F401, F811, I001
+            disable_4w_fake_quant,
+            disable_8da4w_fake_quant,
+            enable_4w_fake_quant,
+            enable_8da4w_fake_quant,
+            ComposableQATQuantizer,
+            Int8DynActInt4WeightQATLinear,
+            Int4WeightOnlyEmbeddingQATQuantizer,
+            Int4WeightOnlyQATQuantizer,
+            Int8DynActInt4WeightQATQuantizer,
+        )
+        from torchao.quantization.prototype.qat._module_swap_api import (  # noqa: F401, F811
+            disable_4w_fake_quant_module_swap,
+            enable_4w_fake_quant_module_swap,
+            disable_8da4w_fake_quant_module_swap,
+            enable_8da4w_fake_quant_module_swap,
+            Int4WeightOnlyQATQuantizerModuleSwap,
+            Int8DynActInt4WeightQATQuantizerModuleSwap,
+        )
+        from torchao.quantization.prototype.qat.affine_fake_quantized_tensor import (  # noqa: F401, F811
+            AffineFakeQuantizedTensor,
+            to_affine_fake_quantized,
+        )
+        from torchao.quantization.prototype.qat.api import (  # noqa: F401, F811
+            ComposableQATQuantizer,
+            FakeQuantizeConfig,
+        )
+        from torchao.quantization.prototype.qat.embedding import (  # noqa: F401, F811
+            FakeQuantizedEmbedding,
+            Int4WeightOnlyEmbeddingQATQuantizer,
+            Int4WeightOnlyEmbedding,
+            Int4WeightOnlyQATEmbedding,
+        )
+        from torchao.quantization.prototype.qat.fake_quantizer import (  # noqa: F401, F811
+            FakeQuantizer,
+        )
+        from torchao.quantization.prototype.qat.linear import (  # noqa: F401, F811
+            disable_4w_fake_quant,
+            disable_8da4w_fake_quant,
+            enable_4w_fake_quant,
+            enable_8da4w_fake_quant,
+            FakeQuantizedLinear,
+            Int4WeightOnlyQATLinear,
+            Int4WeightOnlyQATQuantizer,
+            Int8DynActInt4WeightQATLinear,
+            Int8DynActInt4WeightQATQuantizer,
+        )
+
     @unittest.skipIf(
         not TORCH_VERSION_AT_LEAST_2_4, "skipping when torch version is 2.4 or lower"
     )
diff --git a/torchao/quantization/prototype/__init__.py b/torchao/quantization/prototype/__init__.py
diff --git a/torchao/quantization/prototype/qat/README.md b/torchao/quantization/prototype/qat/README.md
@@ -0,0 +1,3 @@
+Note: QAT has been moved to torchao/quantization/qat.
+This is a legacy folder only for backward compatibility
+and will be removed in the near future.
diff --git a/torchao/quantization/prototype/qat/__init__.py b/torchao/quantization/prototype/qat/__init__.py
@@ -0,0 +1,25 @@
+from torchao.quantization.qat import (
+    ComposableQATQuantizer,
+    Int4WeightOnlyEmbeddingQATQuantizer,
+    Int4WeightOnlyQATQuantizer,
+    Int8DynActInt4WeightQATQuantizer,
+)
+from torchao.quantization.qat.linear import (
+    Int8DynActInt4WeightQATLinear,
+    disable_4w_fake_quant,
+    disable_8da4w_fake_quant,
+    enable_4w_fake_quant,
+    enable_8da4w_fake_quant,
+)
+
+__all__ = [
+    "disable_4w_fake_quant",
+    "disable_8da4w_fake_quant",
+    "enable_4w_fake_quant",
+    "enable_8da4w_fake_quant",
+    "ComposableQATQuantizer",
+    "Int4WeightOnlyQATQuantizer",
+    "Int4WeightOnlyEmbeddingQATQuantizer",
+    "Int8DynActInt4WeightQATQuantizer",
+    "Int8DynActInt4WeightQATLinear",
+]
diff --git a/torchao/quantization/prototype/qat/_module_swap_api.py b/torchao/quantization/prototype/qat/_module_swap_api.py
@@ -0,0 +1,30 @@
+# For backward compatibility only
+# These will be removed in the future
+
+from torchao.quantization.qat.linear import (
+    Int4WeightOnlyQATQuantizer as Int4WeightOnlyQATQuantizerModuleSwap,
+)
+from torchao.quantization.qat.linear import (
+    Int8DynActInt4WeightQATQuantizer as Int8DynActInt4WeightQATQuantizerModuleSwap,
+)
+from torchao.quantization.qat.linear import (
+    disable_4w_fake_quant as disable_4w_fake_quant_module_swap,
+)
+from torchao.quantization.qat.linear import (
+    disable_8da4w_fake_quant as disable_8da4w_fake_quant_module_swap,
+)
+from torchao.quantization.qat.linear import (
+    enable_4w_fake_quant as enable_4w_fake_quant_module_swap,
+)
+from torchao.quantization.qat.linear import (
+    enable_8da4w_fake_quant as enable_8da4w_fake_quant_module_swap,
+)
+
+__all__ = [
+    "Int8DynActInt4WeightQATQuantizerModuleSwap",
+    "Int4WeightOnlyQATQuantizerModuleSwap",
+    "enable_8da4w_fake_quant_module_swap",
+    "disable_8da4w_fake_quant_module_swap",
+    "enable_4w_fake_quant_module_swap",
+    "disable_4w_fake_quant_module_swap",
+]
diff --git a/torchao/quantization/prototype/qat/affine_fake_quantized_tensor.py b/torchao/quantization/prototype/qat/affine_fake_quantized_tensor.py
@@ -0,0 +1,9 @@
+from torchao.quantization.qat.affine_fake_quantized_tensor import (
+    AffineFakeQuantizedTensor,
+    to_affine_fake_quantized,
+)
+
+__all__ = [
+    "AffineFakeQuantizedTensor",
+    "to_affine_fake_quantized",
+]
diff --git a/torchao/quantization/prototype/qat/api.py b/torchao/quantization/prototype/qat/api.py
@@ -0,0 +1,9 @@
+from torchao.quantization.qat.api import (
+    ComposableQATQuantizer,
+    FakeQuantizeConfig,
+)
+
+__all__ = [
+    "ComposableQATQuantizer",
+    "FakeQuantizeConfig",
+]
diff --git a/torchao/quantization/prototype/qat/embedding.py b/torchao/quantization/prototype/qat/embedding.py
@@ -0,0 +1,13 @@
+from torchao.quantization.qat.embedding import (
+    FakeQuantizedEmbedding,
+    Int4WeightOnlyEmbedding,
+    Int4WeightOnlyEmbeddingQATQuantizer,
+    Int4WeightOnlyQATEmbedding,
+)
+
+__all__ = [
+    "FakeQuantizedEmbedding",
+    "Int4WeightOnlyEmbeddingQATQuantizer",
+    "Int4WeightOnlyEmbedding",
+    "Int4WeightOnlyQATEmbedding",
+]
diff --git a/torchao/quantization/prototype/qat/fake_quantizer.py b/torchao/quantization/prototype/qat/fake_quantizer.py
@@ -0,0 +1,7 @@
+from torchao.quantization.qat.fake_quantizer import (
+    FakeQuantizer,
+)
+
+__all__ = [
+    "FakeQuantizer",
+]
diff --git a/torchao/quantization/prototype/qat/linear.py b/torchao/quantization/prototype/qat/linear.py
@@ -0,0 +1,23 @@
+from torchao.quantization.qat.linear import (
+    FakeQuantizedLinear,
+    Int4WeightOnlyQATLinear,
+    Int4WeightOnlyQATQuantizer,
+    Int8DynActInt4WeightQATLinear,
+    Int8DynActInt4WeightQATQuantizer,
+    disable_4w_fake_quant,
+    disable_8da4w_fake_quant,
+    enable_4w_fake_quant,
+    enable_8da4w_fake_quant,
+)
+
+__all__ = [
+    "disable_4w_fake_quant",
+    "disable_8da4w_fake_quant",
+    "enable_4w_fake_quant",
+    "enable_8da4w_fake_quant",
+    "FakeQuantizedLinear",
+    "Int4WeightOnlyQATLinear",
+    "Int4WeightOnlyQATQuantizer",
+    "Int8DynActInt4WeightQATLinear",
+    "Int8DynActInt4WeightQATQuantizer",
+]

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+Note: QAT has been moved to torchao/quantization/qat.`
	`2`	`+This is a legacy folder only for backward compatibility`
	`3`	`+and will be removed in the near future.`