Updated to_nf4 test

cpuhrsch · cpuhrsch · commit 5b3ee6e40429 · 2024-03-12T02:39:56.000Z
diff --git a/test/dtypes/test_uint4.py b/test/dtypes/test_uint4.py
@@ -18,7 +18,7 @@
     compute_error,
 )
 from torchao.quantization.quant_api import (
-    replace_with_custom_fn_if_matches_filter,
+    _replace_with_custom_fn_if_matches_filter,
 )
 from torch.ao.quantization.observer import ObserverBase
 from torch import nn
@@ -36,7 +36,7 @@ def fn(mod):
         mod.weight = torch.nn.Parameter(PerChannelSymmetricWeightUInt4Tensor.from_float(mod.weight), requires_grad=False)
         return mod
 
-    replace_with_custom_fn_if_matches_filter(
+    _replace_with_custom_fn_if_matches_filter(
         model,
         lambda mod: fn(mod),
         lambda mod, fqn: isinstance(mod, torch.nn.Linear),
diff --git a/test/modules/test_nf4_linear.py b/test/modules/test_nf4_linear.py
@@ -4,7 +4,7 @@
 import torch
 from torch import nn
 from torch.testing._internal.common_utils import TestCase
-from torchao.dtypes.nf4tensor import linear_nf4, NF4Tensor
+from torchao.dtypes.nf4tensor import linear_nf4, NF4Tensor, to_nf4
 import torch.nn.functional as F
 import io
 from collections import OrderedDict
@@ -48,7 +48,7 @@ class TestNF4Linear(TestCase):
     class TestMod(nn.Module):
         def __init__(self, tensor, block_size, scaler_block_size):
             super().__init__()
-            self.param = torch.nn.Parameter(NF4Tensor.from_tensor(tensor, block_size, scaler_block_size))
+            self.param = torch.nn.Parameter(to_nf4(tensor, block_size, scaler_block_size))
 
     def save_state_dict_to_buffer(self, state_dict: OrderedDict):
         buffer = io.BytesIO()
@@ -57,9 +57,7 @@ def save_state_dict_to_buffer(self, state_dict: OrderedDict):
         return buffer
 
     def test_register_nf4_as_param(self):
-        nf4_tensor = NF4Tensor.from_tensor(
-            inpt_tensor=torch.randn(512, 512, dtype=torch.bfloat16)
-        )
+        nf4_tensor = to_nf4(torch.randn(512, 512, dtype=torch.bfloat16))
 
         # Would raise if nn.Parameter registration fails, such as no detach()
         # impl when calling __torch_dispatch__
@@ -69,18 +67,14 @@ def test_register_nf4_as_param(self):
     def test_output_bf16(self):
         # Test to ensure W4 A16 produces A16
         inp = torch.randn(2, 512, dtype=torch.bfloat16, requires_grad=True)
-        nf4_tensor = NF4Tensor.from_tensor(
-            inpt_tensor=torch.randn(512, 512, dtype=torch.bfloat16)
-        )
+        nf4_tensor = to_nf4(torch.randn(512, 512, dtype=torch.bfloat16))
         out = linear_nf4(input=inp, weight=nf4_tensor)
         assert out.dtype == torch.bfloat16
 
     def test_backward_bf16(self):
         # Test to ensure backward pass gives activation a bf16 gradient and no gradient
         # to the linear's weight, as it is frozen.
-        nf4_tensor = NF4Tensor.from_tensor(
-            inpt_tensor=torch.randn(512, 512, dtype=torch.bfloat16)
-        )
+        nf4_tensor = to_nf4(torch.randn(512, 512, dtype=torch.bfloat16))
         inp = torch.randn(2, 512, dtype=torch.bfloat16, requires_grad=True)
         linear_nf4(inp, nf4_tensor).sum().backward()
         assert inp.grad is not None and inp.grad.dtype == torch.bfloat16
@@ -94,7 +88,7 @@ def test_reconstruction_qlora_vs_bnb(self):
         device = "cuda"
         embed_dim = 512
         input_weight = _build_input_weight(embed_dim, device)
-        nf4_weight = NF4Tensor.from_tensor(input_weight)
+        nf4_weight = to_nf4(input_weight)
         bnb_linear = _build_bnb_linear(input_weight, device)
         bnb_reconstruction = bnb_linear(
             torch.eye(embed_dim, embed_dim, dtype=torch.bfloat16, device=device)
@@ -118,7 +112,7 @@ def test_nf4_bnb_linear(self):
         dim = 512
         device = "cuda"
         input_weight = _build_input_weight(dim, device)
-        nf4_weight = NF4Tensor.from_tensor(input_weight)
+        nf4_weight = to_nf4(input_weight)
         bnb_linear = _build_bnb_linear(input_weight, device)
 
         inp = torch.randn(2, 512, dtype=torch.bfloat16, device="cuda")
diff --git a/torchao/dtypes/nf4tensor.py b/torchao/dtypes/nf4tensor.py
@@ -168,8 +168,8 @@ def __init__(
     def from_tensor(
         cls,
         inpt_tensor: torch.Tensor,
-        block_size: int = 64,
-        scaler_block_size: int = 256,
+        block_size: int,
+        scaler_block_size: int,
     ):
         assert inpt_tensor.dtype == torch.bfloat16
         assert (
@@ -510,6 +510,8 @@ def linear_nf4(input: torch.Tensor, weight: NF4Tensor) -> torch.Tensor:
     """
     return LinearNF4.apply(input, weight)
 
-def to_nf4(tensor):
+def to_nf4(tensor,
+           block_size: int = 64,
+           scaler_block_size: int = 256):
     tensor1 = tensor.to(torch.bfloat16)
-    return NF4Tensor.from_tensor(tensor1)
+    return NF4Tensor.from_tensor(tensor1, block_size, scaler_block_size)