Update: CompressedLinear to decompress once (#266)

rahul-tuli · web-flow · commit b762e5655ed5 · 2025-03-05T12:36:58.000-05:00
* Update: CompressedLinear to decompress once

Signed-off-by: Rahul Tuli &lt;rahul@neuralmagic.com&gt;

* Update name!

---------

Signed-off-by: Rahul Tuli &lt;rahul@neuralmagic.com&gt;
diff --git a/src/compressed_tensors/linear/compressed_linear.py b/src/compressed_tensors/linear/compressed_linear.py
@@ -38,6 +38,10 @@ class CompressedLinear(Linear):
     :param quantization_format: compression format module is stored as
     """
 
+    def __init__(self, *args, **kwargs) -> None:
+        super().__init__(*args, **kwargs)
+        self._is_compressed = True
+
     @classmethod
     @torch.no_grad()
     def from_linear(
@@ -86,5 +90,8 @@ def forward(self, input: Tensor) -> Tensor:
         """
         Decompresses the weight, then runs the wrapped forward pass
         """
-        uncompressed_weight = self.compressor.decompress_module(self)
-        return linear(input, uncompressed_weight, self.bias)
+        if self._is_compressed:
+            self.weight = self.compressor.decompress_module(self)
+            self._is_compressed = False
+
+        return linear(input, self.weight, self.bias)