update

dsikka · dsikka · commit 79437ef1e32e · 2025-04-01T18:49:02.000Z
diff --git a/src/compressed_tensors/compressors/model_compressors/model_compressor.py b/src/compressed_tensors/compressors/model_compressors/model_compressor.py
@@ -374,9 +374,9 @@ def compress(
 
         compressed_state_dict = state_dict
 
-        quantized_modules_to_args: Dict[
-            str, QuantizationArgs
-        ] = map_modules_to_quant_args(model)
+        quantized_modules_to_args: Dict[str, QuantizationArgs] = (
+            map_modules_to_quant_args(model)
+        )
 
         if self.quantization_compressor is not None:
             compressed_state_dict = self.quantization_compressor.compress(
diff --git a/src/compressed_tensors/quantization/lifecycle/initialize.py b/src/compressed_tensors/quantization/lifecycle/initialize.py
@@ -22,6 +22,8 @@
     wrap_module_forward_quantized,
 )
 from compressed_tensors.quantization.quant_args import (
+    FP4_NVFP4_DATA,
+    FP8_E4M3_DATA,
     ActivationOrdering,
     QuantizationArgs,
     QuantizationStrategy,
@@ -31,8 +33,6 @@
 from compressed_tensors.quantization.quant_scheme import QuantizationScheme
 from compressed_tensors.quantization.utils import is_kv_cache_quant_scheme
 from compressed_tensors.utils import (
-    FP4_NVFP4_DATA,
-    FP8_E4M3_DATA,
     disable_hf_hook,
     has_offloaded_params,
     register_offload_parameter,
@@ -176,11 +176,10 @@ def _initialize_scale_zero_point(
         # create and attach nvfp4 data
         tensor_amax = torch.abs(module.weight.data).max().to(torch.float32)
         # Setting data for now - could possibly be handled later in the pipeline
-        values = FP8_E4M3_DATA.max * FP4_NVFP4_DATA.max / tensor_amax
+        value = FP8_E4M3_DATA.max * FP4_NVFP4_DATA.max / tensor_amax
+        value = value.to(torch.float32).to(device)
         # Assuming the global scale can be torch.float16/bfloat16/module weight dtype and not only torch.float32?
-        init_global_scale = Parameter(
-            value, dtype=torch.float32, device=device, requires_grad=False
-        )
+        init_global_scale = Parameter(value, requires_grad=False)
         register_offload_parameter(
             module, f"f{base_name}_global_scale", init_global_scale
         )
diff --git a/src/compressed_tensors/quantization/quant_args.py b/src/compressed_tensors/quantization/quant_args.py
@@ -34,6 +34,17 @@
     "ActivationOrdering",
 ]
 
+
+@dataclass
+class FloatArgs:
+    exponent: int
+    mantissa: int
+    bits: int
+    max: float
+    min: float
+    dtype: Optional[torch.dtype] = None
+
+
 # TODO: Remove soon in favour of a more descriptive FloatArgs
 FP8_DTYPE = torch.float8_e4m3fn
 
@@ -48,16 +59,6 @@
 FP4_NVFP4_DATA = FloatArgs(exponent=2, mantissa=1, bits=4, max=6.0, min=-6.0)
 
 
-@dataclass
-class FloatArgs:
-    exponent: int
-    mantissa: int
-    bits: int
-    max: float
-    min: float
-    dtype: Optional[torch.dtype] = None
-
-
 class QuantizationType(str, Enum):
     """
     Enum storing quantization type options