Move moe quant to better prototype dir (#2192)

HDCharles · web-flow · commit 709e1da30c13 · 2025-05-09T15:21:02.000-04:00
* Move moe quant to better prototype dir

Summary:

The old quantization/prototype dir is being deprecated so moving
moe_quant out into the correct one.

Test Plan: see CI

Reviewers:

Subscribers:

Tasks:

Tags:

* actually adding new folder

Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:

* ruff format

Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:
diff --git a/test/quantization/test_moe_quant.py b/test/quantization/test_moe_quant.py
@@ -6,10 +6,10 @@
 from torchao.dtypes.floatx.float8_layout import Float8AQTTensorImpl
 from torchao.dtypes.uintx.plain_layout import PlainAQTTensorImpl
 from torchao.dtypes.uintx.tensor_core_tiled_layout import TensorCoreTiledAQTTensorImpl
-from torchao.quantization.prototype.moe_quant.quantizable_moe_modules import (
+from torchao.prototype.moe_quant.quantizable_moe_modules import (
     MOEFeedForwardAOQuantizable,
 )
-from torchao.quantization.prototype.moe_quant.utils import (
+from torchao.prototype.moe_quant.utils import (
     FakeExtraDimTensor,
     MoEQuantConfig,
     UseFakeExtraDimTensor,
diff --git a/torchao/_models/mixtral-moe/generate.py b/torchao/_models/mixtral-moe/generate.py
@@ -236,7 +236,7 @@ def main(
         ]
     )
 
-    from torchao.quantization.prototype.moe_quant.utils import (
+    from torchao.prototype.moe_quant.utils import (
         MoEQuantConfig,
         UseFakeExtraDimTensor,
         cond_ffn_filter,
diff --git a/torchao/_models/mixtral-moe/model.py b/torchao/_models/mixtral-moe/model.py
@@ -11,7 +11,7 @@
 from torch import Tensor
 from torch.nn import functional as F
 
-from torchao.quantization.prototype.moe_quant.utils import FakeExtraDimTensor
+from torchao.prototype.moe_quant.utils import FakeExtraDimTensor
 
 
 def find_multiple(n: int, k: int) -> int:
diff --git a/torchao/experimental/tests/test_int8_dynamic_activation_intx_weight.py b/torchao/experimental/tests/test_int8_dynamic_activation_intx_weight.py
@@ -630,10 +630,10 @@ def test_identical_to_Int8DynActInt4WeightQATQuantizer(
         self.assertTrue(sqnr2 == float("inf"))
 
     def test_moe_quant_intx(self):
-        from torchao.quantization.prototype.moe_quant.quantizable_moe_modules import (
+        from torchao.prototype.moe_quant.quantizable_moe_modules import (
             MOEFeedForwardAOQuantizable,
         )
-        from torchao.quantization.prototype.moe_quant.utils import (
+        from torchao.prototype.moe_quant.utils import (
             FakeExtraDimTensor,
             MoEQuantConfig,
             UseFakeExtraDimTensor,
diff --git a/torchao/prototype/moe_quant/README.md b/torchao/prototype/moe_quant/README.md
@@ -10,7 +10,7 @@ The API for moe quantization is very similar to linear quantization, given a moe
 
 ```python
 
-from torchao.quantization.prototype.moe_quant.utils import cond_ffn_filter,
+from torchao.prototype.moe_quant.utils import cond_ffn_filter,
 from torchao.quantization.quant_api import quantize_, Int8WeightOnlyConfig
 
 quantize_(model, MoEQuantConfig(Int8WeightOnlyConfig()), filter_fn=cond_ffn_filter)
@@ -27,7 +27,7 @@ To make the above api work, each tensor subclass had to be edited to work as 3D
 
 ```python
 
-from torchao.quantization.prototype.moe_quant.utils import cond_ffn_filter, MoEQuantConfig, UseFakeExtraDimTensor
+from torchao.prototype.moe_quant.utils import cond_ffn_filter, MoEQuantConfig, UseFakeExtraDimTensor
 from torchao.quantization.quant_api import quantize_, Int8DynamicActivationIntxWeightConfig
 
 config = MoEQuantConfig(
diff --git a/torchao/prototype/moe_quant/__init__.py b/torchao/prototype/moe_quant/__init__.py
diff --git a/torchao/prototype/moe_quant/llama4_quant.py b/torchao/prototype/moe_quant/llama4_quant.py
@@ -16,7 +16,7 @@
 from transformers import AutoTokenizer, Llama4ForCausalLM
 from transformers.models.llama4.modeling_llama4 import Llama4TextMoe
 
-from torchao.quantization.prototype.moe_quant.quantizable_moe_modules import (
+from torchao.prototype.moe_quant.quantizable_moe_modules import (
     MOEFeedForwardAOQuantizable,
 )
 from torchao.quantization.quant_api import _replace_with_custom_fn_if_matches_filter
@@ -69,11 +69,11 @@ def convert_fn(module):
 
 model = model
 
-from torchao.quantization import Int4WeightOnlyConfig, quantize_
-from torchao.quantization.prototype.moe_quant.utils import (
+from torchao.prototype.moe_quant.utils import (
     MoEQuantConfig,
     cond_ffn_filter,
 )
+from torchao.quantization import Int4WeightOnlyConfig, quantize_
 
 quantize_(model, MoEQuantConfig(Int4WeightOnlyConfig()), cond_ffn_filter, device="cuda")
 
diff --git a/torchao/prototype/moe_quant/quantizable_moe_modules.py b/torchao/prototype/moe_quant/quantizable_moe_modules.py
@@ -2,7 +2,7 @@
 import torch.nn.functional as F
 from torch import Tensor, nn
 
-from torchao.quantization.prototype.moe_quant.utils import FakeExtraDimTensor
+from torchao.prototype.moe_quant.utils import FakeExtraDimTensor
 
 
 class MOEFeedForwardAOQuantizable(nn.Module):
diff --git a/torchao/prototype/moe_quant/utils.py b/torchao/prototype/moe_quant/utils.py

Original file line number	Diff line number	Diff line change
`@@ -236,7 +236,7 @@ def main(`
`236`	`236`	`]`
`237`	`237`	`)`
`238`	`238`
`239`		`- from torchao.quantization.prototype.moe_quant.utils import (`
	`239`	`+ from torchao.prototype.moe_quant.utils import (`
`240`	`240`	`MoEQuantConfig,`
`241`	`241`	`UseFakeExtraDimTensor,`
`242`	`242`	`cond_ffn_filter,`