Lint fixes test sparsity (#1360)

jainapurva · web-flow · commit a558f7e90012 · 2024-12-01T20:58:50.000-08:00
diff --git a/ruff.toml b/ruff.toml
@@ -11,6 +11,7 @@ include = [
     "test/float8/**/*.py",
     "test/quantization/**/*.py",
     "test/dtypes/**/*.py",
+    "test/sparsity/**/*.py",
     "test/prototype/low_bit_optim/**.py",
     "torchao/utils.py",
 
diff --git a/test/sparsity/test_fast_sparse_training.py b/test/sparsity/test_fast_sparse_training.py
@@ -1,19 +1,18 @@
-import logging
-import unittest
 import copy
+import unittest
 
 import torch
-import torch.nn.functional as F
 from torch import nn
 from torch.testing._internal.common_utils import TestCase
 
 from torchao.sparsity.training import (
+    SemiSparseLinear,
     swap_linear_with_semi_sparse_linear,
     swap_semi_sparse_linear_with_linear,
-    SemiSparseLinear
 )
 from torchao.utils import TORCH_VERSION_AT_LEAST_2_4, is_fbcode
 
+
 class ToyModel(nn.Module):
     def __init__(self):
         super().__init__()
@@ -26,23 +25,26 @@ def forward(self, x):
         x = self.linear2(x)
         return x
 
-class TestRuntimeSemiStructuredSparsity(TestCase):
 
+class TestRuntimeSemiStructuredSparsity(TestCase):
     @unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_4, "pytorch 2.4+ feature")
     @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")
     @unittest.skipIf(is_fbcode(), "broken in fbcode")
     @unittest.skip("Temporarily skipping to unpin nightlies")
     def test_runtime_weight_sparsification(self):
         # need this import inside to not break 2.2 tests
         from torch.sparse import SparseSemiStructuredTensorCUSPARSELT
+
         input = torch.rand((128, 128)).half().cuda()
         grad = torch.rand((128, 128)).half().cuda()
         model = ToyModel().half().cuda()
         model_c = copy.deepcopy(model)
 
         for name, mod in model.named_modules():
             if isinstance(mod, torch.nn.Linear):
-                sparse = SparseSemiStructuredTensorCUSPARSELT.prune_dense_static_sort(mod.weight.detach()).to_dense()
+                sparse = SparseSemiStructuredTensorCUSPARSELT.prune_dense_static_sort(
+                    mod.weight.detach()
+                ).to_dense()
                 mod.weight = nn.Parameter(sparse)
 
         dense_result = model(input)
@@ -62,8 +64,12 @@ def test_runtime_weight_sparsification(self):
         sparse_result.backward(grad)
 
         # check grad
-        assert torch.allclose(model.linear1.weight.grad, model_c.linear1.weight.grad, rtol=1e-1, atol=1e-1)
-        assert torch.allclose(model.linear2.weight.grad, model_c.linear2.weight.grad, rtol=1e-1, atol=1e-1)
+        assert torch.allclose(
+            model.linear1.weight.grad, model_c.linear1.weight.grad, rtol=1e-1, atol=1e-1
+        )
+        assert torch.allclose(
+            model.linear2.weight.grad, model_c.linear2.weight.grad, rtol=1e-1, atol=1e-1
+        )
 
         # check that swap back works
         swap_semi_sparse_linear_with_linear(model_c)
@@ -77,14 +83,17 @@ def test_runtime_weight_sparsification(self):
     def test_runtime_weight_sparsification_compile(self):
         # need this import inside to not break 2.2 tests
         from torch.sparse import SparseSemiStructuredTensorCUSPARSELT
+
         input = torch.rand((128, 128)).half().cuda()
         grad = torch.rand((128, 128)).half().cuda()
         model = ToyModel().half().cuda()
         model_c = copy.deepcopy(model)
 
         for name, mod in model.named_modules():
             if isinstance(mod, torch.nn.Linear):
-                sparse = SparseSemiStructuredTensorCUSPARSELT.prune_dense_static_sort(mod.weight.detach()).to_dense()
+                sparse = SparseSemiStructuredTensorCUSPARSELT.prune_dense_static_sort(
+                    mod.weight.detach()
+                ).to_dense()
                 mod.weight = nn.Parameter(sparse)
 
         model = torch.compile(model, fullgraph=True)
@@ -106,8 +115,12 @@ def test_runtime_weight_sparsification_compile(self):
         sparse_result.backward(grad)
 
         # check grad
-        assert torch.allclose(model.linear1.weight.grad, model_c.linear1.weight.grad, rtol=1e-1, atol=1e-1)
-        assert torch.allclose(model.linear2.weight.grad, model_c.linear2.weight.grad, rtol=1e-1, atol=1e-1)
+        assert torch.allclose(
+            model.linear1.weight.grad, model_c.linear1.weight.grad, rtol=1e-1, atol=1e-1
+        )
+        assert torch.allclose(
+            model.linear2.weight.grad, model_c.linear2.weight.grad, rtol=1e-1, atol=1e-1
+        )
 
         # check that swap back works
         swap_semi_sparse_linear_with_linear(model_c)
diff --git a/test/sparsity/test_marlin.py b/test/sparsity/test_marlin.py
@@ -1,28 +1,24 @@
-import torch
 import copy
-import pytest
 
+import pytest
+import torch
 from torch import nn
 from torch.testing._internal.common_utils import TestCase, run_tests
-from torchao.utils import TORCH_VERSION_AT_LEAST_2_5
+
 from torchao.dtypes import MarlinSparseLayout
-from torchao.sparsity.sparse_api import apply_fake_sparsity
 from torchao.quantization.quant_api import int4_weight_only, quantize_
-from torchao.sparsity.marlin import (
-    pack_to_marlin_24,
-    unpack_from_marlin_24,
-    inject_24
-)
 from torchao.quantization.quant_primitives import (
+    MappingType,
+    ZeroPointDomain,
     choose_qparams_affine,
     quantize_affine,
-    ZeroPointDomain,
-    MappingType,
 )
+from torchao.sparsity.marlin import inject_24, pack_to_marlin_24, unpack_from_marlin_24
+from torchao.sparsity.sparse_api import apply_fake_sparsity
+from torchao.utils import TORCH_VERSION_AT_LEAST_2_5
 
 
 class SparseMarlin24(TestCase):
-
     def setUp(self):
         super().setUp()
         torch.manual_seed(0)
@@ -53,7 +49,9 @@ def test_quant_sparse_marlin_layout_eager(self):
         quantize_(self.model, int4_weight_only(layout=MarlinSparseLayout()))
         sparse_result = self.model(self.input)
 
-        assert torch.allclose(dense_result, sparse_result, atol=3e-1), "Results are not close"
+        assert torch.allclose(
+            dense_result, sparse_result, atol=3e-1
+        ), "Results are not close"
 
     @pytest.mark.skipif(not TORCH_VERSION_AT_LEAST_2_5, reason="Needs PyTorch 2.5+")
     @pytest.mark.skipif(not torch.cuda.is_available(), reason="Need CUDA available")
@@ -71,7 +69,9 @@ def test_quant_sparse_marlin_layout_compile(self):
         self.model.forward = torch.compile(self.model.forward, fullgraph=True)
         sparse_result = self.model(self.input)
 
-        assert torch.allclose(dense_result, sparse_result, atol=3e-1), "Results are not close"
+        assert torch.allclose(
+            dense_result, sparse_result, atol=3e-1
+        ), "Results are not close"
 
     @pytest.mark.skipif(not torch.cuda.is_available(), reason="Need CUDA available")
     def test_pack_unpack_equivalence(self):
@@ -94,9 +94,30 @@ def test_pack_unpack_equivalence(self):
         # Inject 2:4 sparsity mask
         w_24, _ = inject_24(w, *w.shape)
 
-        # Quantize weights 
-        scales, zeros = choose_qparams_affine(w_24, mapping_type, block_size, target_dtype, quant_min, quant_max, eps, scale_dtype, zero_point_dtype, preserve_zero, zero_point_domain)
-        w_q_24 = quantize_affine(w_24, block_size, scales, zeros, target_dtype, quant_min, quant_max, zero_point_domain)
+        # Quantize weights
+        scales, zeros = choose_qparams_affine(
+            w_24,
+            mapping_type,
+            block_size,
+            target_dtype,
+            quant_min,
+            quant_max,
+            eps,
+            scale_dtype,
+            zero_point_dtype,
+            preserve_zero,
+            zero_point_domain,
+        )
+        w_q_24 = quantize_affine(
+            w_24,
+            block_size,
+            scales,
+            zeros,
+            target_dtype,
+            quant_min,
+            quant_max,
+            zero_point_domain,
+        )
         scales = scales.reshape(-1, w_q_24.shape[1])
 
         # Test pack/unpack equivalence
@@ -107,8 +128,12 @@ def test_pack_unpack_equivalence(self):
             q_w_comp, packed_scales, meta, shape, group_size, num_bits
         )
 
-        assert torch.equal(w_q_24, unpacked_q_w), "Unpacked weights do not match original weights"
-        assert torch.equal(scales, unpacked_scales), "Unpacked scales do not match original scales"
+        assert torch.equal(
+            w_q_24, unpacked_q_w
+        ), "Unpacked weights do not match original weights"
+        assert torch.equal(
+            scales, unpacked_scales
+        ), "Unpacked scales do not match original scales"
 
 
 if __name__ == "__main__":
diff --git a/test/sparsity/test_wanda.py b/test/sparsity/test_wanda.py
@@ -3,12 +3,13 @@
 
 import torch
 from torch import nn
-from torchao.sparsity import WandaSparsifier
 from torch.ao.pruning import FakeSparsity
 from torch.nn.utils.parametrize import is_parametrized
 from torch.testing._internal.common_pruning import SimpleLinear
 from torch.testing._internal.common_utils import TestCase
 
+from torchao.sparsity import WandaSparsifier
+
 logging.basicConfig(
     format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.INFO
 )
@@ -29,7 +30,9 @@ def test_prepare(self):
             assert hasattr(module.parametrizations["weight"][0], "mask")
             # Check parametrization exists and is correct
             assert is_parametrized(module, "weight")
-            assert type(module.parametrizations.weight[0]) == FakeSparsity
+            assert isinstance(
+                module.parametrizations.weight[0], FakeSparsity
+            ), "FakeSparsity not found"
             # check activation observer is present
             assert hasattr(module, "activation_post_process")
 
@@ -110,5 +113,6 @@ def test_two_layer_mlp_unstructured(self):
 
         sparsifier.squash_mask()
 
+
 if __name__ == "__main__":
     unittest.main()