Skip to content

Commit 709e1da

Browse files
authored
Move moe quant to better prototype dir (#2192)
* Move moe quant to better prototype dir Summary: The old quantization/prototype dir is being deprecated so moving moe_quant out into the correct one. Test Plan: see CI Reviewers: Subscribers: Tasks: Tags: * actually adding new folder Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags: * ruff format Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags:
1 parent 45b39b1 commit 709e1da

File tree

9 files changed

+12
-12
lines changed

9 files changed

+12
-12
lines changed

test/quantization/test_moe_quant.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,10 @@
66
from torchao.dtypes.floatx.float8_layout import Float8AQTTensorImpl
77
from torchao.dtypes.uintx.plain_layout import PlainAQTTensorImpl
88
from torchao.dtypes.uintx.tensor_core_tiled_layout import TensorCoreTiledAQTTensorImpl
9-
from torchao.quantization.prototype.moe_quant.quantizable_moe_modules import (
9+
from torchao.prototype.moe_quant.quantizable_moe_modules import (
1010
MOEFeedForwardAOQuantizable,
1111
)
12-
from torchao.quantization.prototype.moe_quant.utils import (
12+
from torchao.prototype.moe_quant.utils import (
1313
FakeExtraDimTensor,
1414
MoEQuantConfig,
1515
UseFakeExtraDimTensor,

torchao/_models/mixtral-moe/generate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ def main(
236236
]
237237
)
238238

239-
from torchao.quantization.prototype.moe_quant.utils import (
239+
from torchao.prototype.moe_quant.utils import (
240240
MoEQuantConfig,
241241
UseFakeExtraDimTensor,
242242
cond_ffn_filter,

torchao/_models/mixtral-moe/model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from torch import Tensor
1212
from torch.nn import functional as F
1313

14-
from torchao.quantization.prototype.moe_quant.utils import FakeExtraDimTensor
14+
from torchao.prototype.moe_quant.utils import FakeExtraDimTensor
1515

1616

1717
def find_multiple(n: int, k: int) -> int:

torchao/experimental/tests/test_int8_dynamic_activation_intx_weight.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -630,10 +630,10 @@ def test_identical_to_Int8DynActInt4WeightQATQuantizer(
630630
self.assertTrue(sqnr2 == float("inf"))
631631

632632
def test_moe_quant_intx(self):
633-
from torchao.quantization.prototype.moe_quant.quantizable_moe_modules import (
633+
from torchao.prototype.moe_quant.quantizable_moe_modules import (
634634
MOEFeedForwardAOQuantizable,
635635
)
636-
from torchao.quantization.prototype.moe_quant.utils import (
636+
from torchao.prototype.moe_quant.utils import (
637637
FakeExtraDimTensor,
638638
MoEQuantConfig,
639639
UseFakeExtraDimTensor,

torchao/quantization/prototype/moe_quant/README.md renamed to torchao/prototype/moe_quant/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ The API for moe quantization is very similar to linear quantization, given a moe
1010

1111
```python
1212

13-
from torchao.quantization.prototype.moe_quant.utils import cond_ffn_filter,
13+
from torchao.prototype.moe_quant.utils import cond_ffn_filter,
1414
from torchao.quantization.quant_api import quantize_, Int8WeightOnlyConfig
1515

1616
quantize_(model, MoEQuantConfig(Int8WeightOnlyConfig()), filter_fn=cond_ffn_filter)
@@ -27,7 +27,7 @@ To make the above api work, each tensor subclass had to be edited to work as 3D
2727

2828
```python
2929

30-
from torchao.quantization.prototype.moe_quant.utils import cond_ffn_filter, MoEQuantConfig, UseFakeExtraDimTensor
30+
from torchao.prototype.moe_quant.utils import cond_ffn_filter, MoEQuantConfig, UseFakeExtraDimTensor
3131
from torchao.quantization.quant_api import quantize_, Int8DynamicActivationIntxWeightConfig
3232

3333
config = MoEQuantConfig(

torchao/quantization/prototype/moe_quant/llama4_quant.py renamed to torchao/prototype/moe_quant/llama4_quant.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from transformers import AutoTokenizer, Llama4ForCausalLM
1717
from transformers.models.llama4.modeling_llama4 import Llama4TextMoe
1818

19-
from torchao.quantization.prototype.moe_quant.quantizable_moe_modules import (
19+
from torchao.prototype.moe_quant.quantizable_moe_modules import (
2020
MOEFeedForwardAOQuantizable,
2121
)
2222
from torchao.quantization.quant_api import _replace_with_custom_fn_if_matches_filter
@@ -69,11 +69,11 @@ def convert_fn(module):
6969

7070
model = model
7171

72-
from torchao.quantization import Int4WeightOnlyConfig, quantize_
73-
from torchao.quantization.prototype.moe_quant.utils import (
72+
from torchao.prototype.moe_quant.utils import (
7473
MoEQuantConfig,
7574
cond_ffn_filter,
7675
)
76+
from torchao.quantization import Int4WeightOnlyConfig, quantize_
7777

7878
quantize_(model, MoEQuantConfig(Int4WeightOnlyConfig()), cond_ffn_filter, device="cuda")
7979

torchao/quantization/prototype/moe_quant/quantizable_moe_modules.py renamed to torchao/prototype/moe_quant/quantizable_moe_modules.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import torch.nn.functional as F
33
from torch import Tensor, nn
44

5-
from torchao.quantization.prototype.moe_quant.utils import FakeExtraDimTensor
5+
from torchao.prototype.moe_quant.utils import FakeExtraDimTensor
66

77

88
class MOEFeedForwardAOQuantizable(nn.Module):

0 commit comments

Comments
 (0)