Improvement for torch.compile support on Params4bit (#1673)

matthewdouglas · web-flow · commit d9333aa90616 · 2025-06-08T14:02:04.000-04:00
diff --git a/bitsandbytes/nn/modules.py b/bitsandbytes/nn/modules.py
@@ -291,13 +291,6 @@ def from_prequantized(
 
         return self
 
-    @classmethod
-    def __torch_function__(cls, func, types, args=(), kwargs=None):
-        if kwargs is None:
-            kwargs = {}
-        with torch._C.DisableTorchFunctionSubclass():
-            return func(*args, **kwargs)
-
     def _quantize(self, device):
         w = self.data.contiguous().to(device)
         w_4bit, quant_state = bnb.functional.quantize_4bit(
diff --git a/tests/test_linear4bit.py b/tests/test_linear4bit.py
@@ -270,10 +270,7 @@ def test_params4bit_real_serialization(device, quant_type, blocksize, compress_s
 @pytest.mark.parametrize("mode", ["default", "reduce-overhead"], ids=id_formatter("mode"))
 @pytest.mark.skipif(torch.__version__ < (2, 4), reason="Not supported in torch < 2.4")
 def test_linear4bit_torch_compile(device, quant_type, compute_dtype, compress_statistics, bias, fullgraph, mode):
-    if device == "cpu" and quant_type == "fp4":
-        pytest.skip("FP4 is not supported for CPU")
-
-    if fullgraph and torch.__version__ < (2, 8):
+    if fullgraph and torch.__version__ < (2, 8, 0, "dev"):
         pytest.skip("fullgraph mode requires torch 2.8 or higher")
 
     if device == "cuda" and platform.system() == "Windows":