ruff format

HDCharles · HDCharles · commit 44b1543d5ea3 · 2025-05-08T11:01:24.000-07:00
Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:
diff --git a/torchao/_models/mixtral-moe/generate.py b/torchao/_models/mixtral-moe/generate.py
@@ -238,8 +238,8 @@ def main(
 
     from torchao.quantization.prototype.moe_quant.utils import (
         MoEQuantConfig,
+        UseFakeExtraDimTensor,
         cond_ffn_filter,
-        UseFakeExtraDimTensor
     )
     from torchao.quantization.quant_api import (
         Float8DynamicActivationFloat8WeightConfig,
@@ -260,28 +260,42 @@ def main(
             config = MoEQuantConfig(Int8WeightOnlyConfig())
 
         elif "int8wo" in moe_quant:
-            config = MoEQuantConfig(Int8WeightOnlyConfig(), use_fake_extra_dim_tensor=UseFakeExtraDimTensor.TRUE)
+            config = MoEQuantConfig(
+                Int8WeightOnlyConfig(),
+                use_fake_extra_dim_tensor=UseFakeExtraDimTensor.TRUE,
+            )
 
         elif "int8dq-base" in moe_quant:
             config = MoEQuantConfig(Int8DynamicActivationInt8WeightConfig())
 
         elif "int8dq" in moe_quant:
-            config = MoEQuantConfig(Int8DynamicActivationInt8WeightConfig(), use_fake_extra_dim_tensor=UseFakeExtraDimTensor.TRUE)
+            config = MoEQuantConfig(
+                Int8DynamicActivationInt8WeightConfig(),
+                use_fake_extra_dim_tensor=UseFakeExtraDimTensor.TRUE,
+            )
 
         elif "int4wo-base" in moe_quant:
             config = MoEQuantConfig(Int4WeightOnlyConfig())
 
         elif "int4wo" in moe_quant:
-            config = MoEQuantConfig(Int4WeightOnlyConfig(), use_fake_extra_dim_tensor=UseFakeExtraDimTensor.TRUE)
+            config = MoEQuantConfig(
+                Int4WeightOnlyConfig(),
+                use_fake_extra_dim_tensor=UseFakeExtraDimTensor.TRUE,
+            )
 
         elif "fp8wo-base" in moe_quant:
             config = MoEQuantConfig(Float8WeightOnlyConfig())
 
         elif "fp8wo" in moe_quant:
-            config = MoEQuantConfig(Float8WeightOnlyConfig(), use_fake_extra_dim_tensor=UseFakeExtraDimTensor.TRUE)
+            config = MoEQuantConfig(
+                Float8WeightOnlyConfig(),
+                use_fake_extra_dim_tensor=UseFakeExtraDimTensor.TRUE,
+            )
 
         elif "fp8dq-base" in moe_quant:
-            config = MoEQuantConfig(Float8DynamicActivationFloat8WeightConfig(granularity=PerRow()))
+            config = MoEQuantConfig(
+                Float8DynamicActivationFloat8WeightConfig(granularity=PerRow())
+            )
 
         elif "fp8dq" in moe_quant:
             config = MoEQuantConfig(
diff --git a/torchao/dtypes/floatx/float8_layout.py b/torchao/dtypes/floatx/float8_layout.py
@@ -55,8 +55,10 @@ class Float8Layout(Layout):
 
     mm_config: Optional[Float8MMConfig] = None
 
+
 _fallback_warning_shown = False
 
+
 @register_layout(Float8Layout)
 class Float8AQTTensorImpl(AQTTensorImpl):
     """
@@ -102,7 +104,7 @@ def __init__(
     def _apply_fn_to_data(self, fn):
         """Applys a fn to all tensor components stored on this class"""
         global _fallback_warning_shown
-        
+
         try:
             return self.__class__(
                 fn(self.float8_data),
@@ -114,14 +116,15 @@ def _apply_fn_to_data(self, fn):
             if '"index_cuda" not implemented for ' in str(e):
                 if not _fallback_warning_shown:
                     import warnings
+
                     warnings.warn(
                         f"When trying to index Float8AQTTensorImpl, got known error {e}, will use slower fallback but "
                         + "note: You can torch.compile the model to avoid this problem.",
-                        UserWarning
+                        UserWarning,
                     )
                     _fallback_warning_shown = True
-                    
-                return self.__class__( # do indexing in bfloat16 then convert back
+
+                return self.__class__(  # do indexing in bfloat16 then convert back
                     fn(self.float8_data.to(torch.bfloat16)).to(self.float8_data.dtype),
                     fn(self.scale),
                     self.transposed,
diff --git a/torchao/experimental/tests/test_int8_dynamic_activation_intx_weight.py b/torchao/experimental/tests/test_int8_dynamic_activation_intx_weight.py
@@ -636,8 +636,8 @@ def test_moe_quant_intx(self):
         from torchao.quantization.prototype.moe_quant.utils import (
             FakeExtraDimTensor,
             MoEQuantConfig,
-            cond_ffn_filter,
             UseFakeExtraDimTensor,
+            cond_ffn_filter,
         )
         from torchao.quantization.quant_api import (
             Int8DynamicActivationIntxWeightConfig,
@@ -657,7 +657,9 @@ def test_moe_quant_intx(self):
         base_config = Int8DynamicActivationIntxWeightConfig(
             layout=PackedLinearInt8DynamicActivationIntxWeightLayout()
         )
-        moe_config = MoEQuantConfig(base_config, use_fake_extra_dim_tensor=UseFakeExtraDimTensor.TRUE)
+        moe_config = MoEQuantConfig(
+            base_config, use_fake_extra_dim_tensor=UseFakeExtraDimTensor.TRUE
+        )
 
         quantize_(model, moe_config, cond_ffn_filter)
 
diff --git a/torchao/quantization/prototype/moe_quant/llama4_quant.py b/torchao/quantization/prototype/moe_quant/llama4_quant.py
@@ -70,7 +70,10 @@ def convert_fn(module):
 model = model
 
 from torchao.quantization import Int4WeightOnlyConfig, quantize_
-from torchao.quantization.prototype.moe_quant.utils import cond_ffn_filter, MoEQuantConfig
+from torchao.quantization.prototype.moe_quant.utils import (
+    MoEQuantConfig,
+    cond_ffn_filter,
+)
 
 quantize_(model, MoEQuantConfig(Int4WeightOnlyConfig()), cond_ffn_filter, device="cuda")
 
diff --git a/torchao/quantization/prototype/moe_quant/utils.py b/torchao/quantization/prototype/moe_quant/utils.py
@@ -5,16 +5,16 @@
 
 aten = torch.ops.aten
 
+from enum import Enum, auto
 from typing import List, Optional, Tuple, Union
 
 from torchao.quantization.quant_api import (
+    _QUANTIZE_CONFIG_HANDLER,
     AOBaseConfig,
     dataclass,
     register_quantize_module_handler,
 )
 from torchao.utils import fill_defaults
-from enum import Enum, auto
-from torchao.quantization.quant_api import _QUANTIZE_CONFIG_HANDLER
 
 
 class DummyModule(torch.nn.Module):
@@ -213,9 +213,10 @@ def __torch_dispatch__(cls, func, types, args, kwargs):
             )
             raise e
 
+
 class UseFakeExtraDimTensor(Enum):
-    """Enum that indicate whether to use FakeExtraDimTensor
-    """
+    """Enum that indicate whether to use FakeExtraDimTensor"""
+
     TRUE = auto()
     FALSE = auto()
     AS_FALLBACK = auto()
@@ -230,12 +231,13 @@ class MoEQuantConfig(AOBaseConfig):
 
     base_config: AOBaseConfig
     use_fake_extra_dim_tensor: UseFakeExtraDimTensor = UseFakeExtraDimTensor.AS_FALLBACK
-    set_inductor_config: bool=True
+    set_inductor_config: bool = True
 
 
 # Module-level flag to track if we've already printed the error
 _moe_quant_tensor_has_printed_error = False
 
+
 def _moe_quant_tensor(weight, config):
     def _moe_quant_tensor_base(weight, config):
         base_config_handler = _QUANTIZE_CONFIG_HANDLER[type(config.base_config)]
@@ -250,13 +252,15 @@ def _moe_quant_tensor_fake_extra_dim_tensor(weight, config):
         # put tensors into modules since the handlers target modules not tensors
         dummy_modules = [DummyModule(tensor) for tensor in tensors]
         # apply handler to each module
-        quant_mods = list(map(lambda x: base_config_handler(x, config.base_config), dummy_modules))
+        quant_mods = list(
+            map(lambda x: base_config_handler(x, config.base_config), dummy_modules)
+        )
         # pack quantized subclasses into FakeExtraDimTensor
         quant_weight = FakeExtraDimTensor([mod.weight for mod in quant_mods])
         return quant_weight
 
     global _moe_quant_tensor_has_printed_error
-    
+
     use_fake = config.use_fake_extra_dim_tensor
     if use_fake == UseFakeExtraDimTensor.FALSE:
         return _moe_quant_tensor_base(weight, config)
@@ -272,7 +276,6 @@ def _moe_quant_tensor_fake_extra_dim_tensor(weight, config):
         return _moe_quant_tensor_fake_extra_dim_tensor(weight, config)
 
 
-
 @register_quantize_module_handler(MoEQuantConfig)
 def moe_quant_fn(module, config: MoEQuantConfig):
     import warnings
@@ -283,7 +286,9 @@ def moe_quant_fn(module, config: MoEQuantConfig):
 
     for weight_attr in ["w1", "w2", "w3"]:
         param = getattr(module, weight_attr)
-        assert param.dim() == 3, f"when applying moe_quant to {module} expected 3D tensor for {weight_attr} but got {param.dim()}"
+        assert param.dim() == 3, (
+            f"when applying moe_quant to {module} expected 3D tensor for {weight_attr} but got {param.dim()}"
+        )
         assert isinstance(config.base_config, AOBaseConfig), (
             f"MoEQuantConfig expected to be initialized with an AOBaseConfig but got {type(config.base_config)}"
             + "this can happen if you initiaze with MoEQuantConfig(AOConfig) rather than MoEQuantConfig(AOConfig())"