Lint fixes torchao/profiler and torchao/testing (#1368)

jainapurva · web-flow · commit 63d142ce8644 · 2024-12-02T14:19:10.000-08:00
diff --git a/ruff.toml b/ruff.toml
@@ -7,6 +7,8 @@ include = [
     "torchao/quantization/**/*.py",
     "torchao/dtypes/**/*.py",
     "torchao/sparsity/**/*.py",
+    "torchao/profiler/**/*.py",
+    "torchao/testing/**/*.py",
     "torchao/prototype/low_bit_optim/**.py",
     "torchao/utils.py",
     "torchao/ops.py",
diff --git a/torchao/profiler/__init__.py b/torchao/profiler/__init__.py
@@ -1,4 +1,3 @@
-
 # Re-exports
 from .device_spec import CUDADeviceSpec, DeviceSpec
 from .performance_counter import (
@@ -20,4 +19,3 @@
     "DeviceSpec",
     "total_model_params",
 ]
-
diff --git a/torchao/testing/float8/dtensor_utils.py b/torchao/testing/float8/dtensor_utils.py
@@ -4,7 +4,6 @@
 # This source code is licensed under the BSD 3-Clause license found in the
 # LICENSE file in the root directory of this source tree.
 
-import torch
 import torch.nn as nn
 import torch.nn.functional as F
 
diff --git a/torchao/testing/float8/fsdp2_utils.py b/torchao/testing/float8/fsdp2_utils.py
@@ -1,16 +1,13 @@
-import contextlib
-from typing import List, Optional
+from typing import List
 
 import torch
 import torch.distributed as dist
 import torch.nn as nn
 
-import torchao.float8.config as config
 from torchao.float8.config import (
     Float8LinearConfig,
     ScalingType,
 )
-
 from torchao.float8.float8_linear_utils import (
     linear_requires_sync,
     sync_float8_amax_and_scale_history,
@@ -52,7 +49,11 @@ def check_parity_no_mp(
             ):
                 precompute_float8_dynamic_scale_for_fsdp(model)
 
-        test_cls.assertEqual(losses[0], losses[1], msg = f"iter: {iter_idx}, loss-ref: {losses[0]}, loss-fp8: {losses[1]}")
+        test_cls.assertEqual(
+            losses[0],
+            losses[1],
+            msg=f"iter: {iter_idx}, loss-ref: {losses[0]}, loss-fp8: {losses[1]}",
+        )
 
 
 def check_parity_bf16_mp(
@@ -87,7 +88,11 @@ def check_parity_bf16_mp(
                 ref_model.parameters(), ref_model_bf16.parameters()
             ):
                 param_bf16.detach().copy_(param_fp32)
-        test_cls.assertEqual(losses[0], losses[1], msg = f"iter: {iter_idx}, loss-ref: {losses[0]}, loss-fp8: {losses[1]}")
+        test_cls.assertEqual(
+            losses[0],
+            losses[1],
+            msg=f"iter: {iter_idx}, loss-ref: {losses[0]}, loss-fp8: {losses[1]}",
+        )
 
 
 def check_parity_fp8_comm_only(
@@ -104,7 +109,6 @@ def check_parity_fp8_comm_only(
     for iter_idx in range(10):
         losses: List[torch.Tensor] = []
         for model, optim in ((ref_model, ref_optim), (fsdp_model, fsdp_optim)):
-
             optim.zero_grad(set_to_none=(iter_idx % 2 == 0))
             losses.append(model(local_inp).sum())
             losses[-1].backward()
@@ -123,9 +127,15 @@ def check_parity_fp8_comm_only(
                 and config.cast_config_weight.scaling_type is ScalingType.DYNAMIC
             ):
                 precompute_float8_dynamic_scale_for_fsdp(model)
-        
+
         if compile:
             # When compile, the ref loss and fsdp loss are not exactly the same, only check the loss values are valid for now.
-            assert (torch.isfinite(losses[0]).any() and torch.isfinite(losses[1]).any()), f"iter: {iter_idx}, loss-ref: {losses[0]}, loss-fp8: {losses[1]}"
+            assert (
+                torch.isfinite(losses[0]).any() and torch.isfinite(losses[1]).any()
+            ), f"iter: {iter_idx}, loss-ref: {losses[0]}, loss-fp8: {losses[1]}"
         else:
-            test_cls.assertEqual(losses[0], losses[1], f"iter: {iter_idx}, loss-ref: {losses[0]}, loss-fp8: {losses[1]}")
+            test_cls.assertEqual(
+                losses[0],
+                losses[1],
+                f"iter: {iter_idx}, loss-ref: {losses[0]}, loss-fp8: {losses[1]}",
+            )
diff --git a/torchao/testing/float8/test_utils.py b/torchao/testing/float8/test_utils.py
@@ -1,9 +1,9 @@
 import torch
+
 from torchao.float8.config import (
-    ScalingGranularity, 
-    ScalingType, 
-    CastConfig, 
+    CastConfig,
     Float8LinearConfig,
+    ScalingType,
 )
 
 
diff --git a/torchao/testing/utils.py b/torchao/testing/utils.py
@@ -1,15 +1,19 @@
-import unittest
-import functools
 import copy
-import torch
-import torchao
-import os
+import functools
+import unittest
 
+import torch
+from torch.distributed._tensor import DeviceMesh, DTensor, Replicate, Shard
 from torch.testing._internal import common_utils
-from torchao.dtypes import AffineQuantizedTensor
-from torchao.dtypes import to_affine_quantized_intx
+from torch.testing._internal.distributed._tensor.common_dtensor import (
+    DTensorTestBase,
+    with_comms,
+)
+
+import torchao
+from torchao.dtypes import AffineQuantizedTensor, to_affine_quantized_intx
+from torchao.quantization import int8_weight_only, quantize_
 from torchao.quantization.quant_primitives import MappingType
-from torchao.quantization import quantize_, int8_weight_only
 from torchao.utils import TORCH_VERSION_AT_LEAST_2_6
 
 """
@@ -36,10 +40,9 @@ class MyTestCase(TorchAOBasicTestCase):
     unittest.main()
 """
 
+
 # copied from https://github.com/pytorch/pytorch/blob/941d094dd1b507dacf06ddc6ed3485a9537e09b7/test/inductor/test_torchinductor.py#L11389
-def copy_tests(
-    my_cls, other_cls, suffix, test_failures=None, xfail_prop=None
-):  # noqa: B902
+def copy_tests(my_cls, other_cls, suffix, test_failures=None, xfail_prop=None):  # noqa: B902
     for name, value in my_cls.__dict__.items():
         if name.startswith("test_"):
             # You cannot copy functions in Python, so we use closures here to
@@ -70,7 +73,6 @@ def new_test(self, value=value):
             setattr(other_cls, f"{name}_{suffix}", new_test)
 
 
-
 class TorchAOBasicTestCase(common_utils.TestCase):
     COMMON_DEVICES = ["cpu"] + (["cuda"] if torch.cuda.is_available() else [])
     COMMON_DTYPES = [torch.float32, torch.float16, torch.bfloat16]
@@ -90,17 +92,21 @@ def test_flatten_unflatten(self):
         hp_tensor = torch.randn(4, 128)
         lp_tensor = self.FACTORY_FN(hp_tensor, **self.kwargs)
         tensor_data_name_dict, tensor_attributes = lp_tensor.__tensor_flatten__()
-        tensor_data_dict = {name: getattr(lp_tensor, name) for name in tensor_data_name_dict}
+        tensor_data_dict = {
+            name: getattr(lp_tensor, name) for name in tensor_data_name_dict
+        }
         outer_size = lp_tensor.size()
         outer_stride = lp_tensor.stride()
-        reconstructed = self.TENSOR_SUBCLASS.__tensor_unflatten__(tensor_data_dict, tensor_attributes, outer_size, outer_stride)
+        reconstructed = self.TENSOR_SUBCLASS.__tensor_unflatten__(
+            tensor_data_dict, tensor_attributes, outer_size, outer_stride
+        )
         self.assertEqual(lp_tensor.dequantize(), reconstructed.dequantize())
 
     @common_utils.parametrize("device", COMMON_DEVICES)
     @common_utils.parametrize("dtype", COMMON_DTYPES)
     def test_hp_tensor_device_dtype(self, device, dtype):
         hp_tensor = torch.randn(4, 128, device=device, dtype=dtype)
-        lp_tensor = self.FACTORY_FN(hp_tensor, **self.kwargs)
+        self.FACTORY_FN(hp_tensor, **self.kwargs)
 
     @common_utils.parametrize("device1", COMMON_DEVICES)
     @common_utils.parametrize("device2", COMMON_DEVICES)
@@ -141,7 +147,10 @@ def test_linear(self, device, dtype):
         hp_act_tensor = torch.randn(32, 128, device=device, dtype=dtype)
         hp_res = torch.nn.functional.linear(hp_act_tensor, hp_tensor)
         lp_res = torch.nn.functional.linear(hp_act_tensor, lp_tensor)
-        self.assertGreater(torchao.quantization.utils.compute_error(hp_res, lp_res), self.LINEAR_MIN_SQNR)
+        self.assertGreater(
+            torchao.quantization.utils.compute_error(hp_res, lp_res),
+            self.LINEAR_MIN_SQNR,
+        )
 
 
 class TorchAOCompileTestCase(common_utils.TestCase):
@@ -165,6 +174,7 @@ class TorchAOCompileTestCase(common_utils.TestCase):
     def test_input_output_tensor_subclass(self, device, dtype):
         hp_tensor = torch.randn(4, 128, device=device, dtype=dtype)
         lp_tensor = self.FACTORY_FN(hp_tensor, **self.kwargs)
+
         def f(tensor):
             return tensor
 
@@ -179,6 +189,7 @@ def f(tensor):
     def test_input_tensor_subclass(self, device, dtype):
         hp_tensor = torch.randn(4, 128, device=device, dtype=dtype)
         lp_tensor = self.FACTORY_FN(hp_tensor, **self.kwargs)
+
         def f(tensor):
             return tensor.dequantize()
 
@@ -192,6 +203,7 @@ def f(tensor):
     @common_utils.parametrize("dtype", COMMON_DTYPES)
     def test_output_tensor_subclass(self, device, dtype):
         hp_tensor = torch.randn(4, 128, device=device, dtype=dtype)
+
         def f(hp_tensor):
             return self.FACTORY_FN(hp_tensor, **self.kwargs)
 
@@ -201,7 +213,12 @@ def f(hp_tensor):
         self.assertTrue(isinstance(f(hp_tensor), self.TENSOR_SUBCLASS))
         # bfloat16 seems to result in much larger numerical differences
         if dtype != torch.bfloat16:
-            self.assertGreater(torchao.quantization.utils.compute_error(ref.dequantize(), compiled.dequantize()), self.COMPILE_MIN_SQNR)
+            self.assertGreater(
+                torchao.quantization.utils.compute_error(
+                    ref.dequantize(), compiled.dequantize()
+                ),
+                self.COMPILE_MIN_SQNR,
+            )
 
     @common_utils.parametrize("device", COMMON_DEVICES)
     @common_utils.parametrize("dtype", COMMON_DTYPES)
@@ -211,22 +228,18 @@ def test_linear_compile(self, device, dtype):
 
         hp_act_tensor = torch.randn(32, 128, device=device, dtype=dtype)
         hp_res = torch.nn.functional.linear(hp_act_tensor, hp_tensor)
-        l = torch.nn.Linear(128, 4, bias=False, device=device, dtype=dtype)
-        l.weight = torch.nn.Parameter(lp_tensor)
-        lp_res = torch.compile(l)(hp_act_tensor)
-        self.assertGreater(torchao.quantization.utils.compute_error(hp_res, lp_res), self.LINEAR_MIN_SQNR)
+        linear = torch.nn.Linear(128, 4, bias=False, device=device, dtype=dtype)
+        linear.weight = torch.nn.Parameter(lp_tensor)
+        lp_res = torch.compile(linear)(hp_act_tensor)
+        self.assertGreater(
+            torchao.quantization.utils.compute_error(hp_res, lp_res),
+            self.LINEAR_MIN_SQNR,
+        )
 
-import torch.distributed as dist
-from torch.distributed._tensor import DTensor, Replicate, Shard, DeviceMesh
-from torch.testing._internal.distributed._tensor.common_dtensor import (
-    DTensorTestBase,
-    with_comms,
-    NUM_DEVICES,
-)
 
 class TorchAOTensorParallelTestCase(DTensorTestBase):
-    """Basic test case for tensor subclasses
-    """
+    """Basic test case for tensor subclasses"""
+
     COMMON_DTYPES = [torch.float32, torch.float16, torch.bfloat16]
 
     TENSOR_SUBCLASS = AffineQuantizedTensor
@@ -247,9 +260,7 @@ def colwise_shard(m: torch.nn.Module, mesh: DeviceMesh) -> torch.nn.Module:
         # Construct DTensor from local shard
         dtensor = DTensor.from_local(local_shard, mesh, [Shard(0)])
         # Replace parameter in module
-        m.linear.weight = torch.nn.Parameter(
-            dtensor, requires_grad=False
-        )
+        m.linear.weight = torch.nn.Parameter(dtensor, requires_grad=False)
         return m
 
     @staticmethod
@@ -266,9 +277,7 @@ def rowwise_shard(m: torch.nn.Module, mesh: DeviceMesh) -> torch.nn.Module:
         # Construct DTensor from local shard
         dtensor = DTensor.from_local(local_shard, mesh, [Shard(1)])
         # Replace parameter in module
-        m.linear.weight = torch.nn.Parameter(
-            dtensor, requires_grad=False
-        )
+        m.linear.weight = torch.nn.Parameter(dtensor, requires_grad=False)
         return m
 
     def quantize(self, m: torch.nn.Module) -> torch.nn.Module:
@@ -289,7 +298,9 @@ def test_tp(self, dtype):
         class M(torch.nn.Module):
             def __init__(self, in_features, out_features, **kwargs) -> None:
                 super().__init__(**kwargs)
-                self.linear = torch.nn.Linear(in_features, out_features, bias=False, device="cuda")
+                self.linear = torch.nn.Linear(
+                    in_features, out_features, bias=False, device="cuda"
+                )
 
             def forward(self, x: torch.Tensor) -> torch.Tensor:
                 return self.linear(x)
@@ -301,12 +312,12 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         proj_up = M(1024, 2048).to(device).to(dtype)
         proj_dn = M(2048, 1024).to(device).to(dtype)
         example_input = 100 * torch.randn(128, 1024, device=device, dtype=dtype)
-        y = proj_dn(proj_up(example_input))
+        proj_dn(proj_up(example_input))
 
         # Quantize the model
         up_quant = self.quantize(proj_up)
         dn_quant = self.quantize(proj_dn)
-        y_q = dn_quant(up_quant(example_input))
+        dn_quant(up_quant(example_input))
 
         mesh = self.build_device_mesh()
         mesh.device_type = "cuda"
@@ -316,11 +327,9 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         dn_dist = self.rowwise_shard(dn_quant, mesh)
 
         # We need to turn inputs into DTensor form as well -- just a format change
-        input_dtensor = DTensor.from_local(
-            example_input, mesh, [Replicate()]
-        )
+        input_dtensor = DTensor.from_local(example_input, mesh, [Replicate()])
 
-        y_d = dn_dist(up_dist(input_dtensor))
+        dn_dist(up_dist(input_dtensor))
 
         if not TORCH_VERSION_AT_LEAST_2_6:
             # Need torch 2.6 to support compiled tensor parallelism
@@ -329,7 +338,8 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         up_compiled = torch.compile(up_dist)
         y_up = up_compiled(input_dtensor)
         dn_compiled = torch.compile(dn_dist)
-        y_dn = dn_compiled(y_up)
+        dn_compiled(y_up)
+
 
 common_utils.instantiate_parametrized_tests(TorchAOBasicTestCase)
 common_utils.instantiate_parametrized_tests(TorchAOCompileTestCase)

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,3 @@`
`1`		`-`
`2`	`1`	`# Re-exports`
`3`	`2`	`from .device_spec import CUDADeviceSpec, DeviceSpec`
`4`	`3`	`from .performance_counter import (`
`@@ -20,4 +19,3 @@`
`20`	`19`	`"DeviceSpec",`
`21`	`20`	`"total_model_params",`
`22`	`21`	`]`
`23`		`-`