[NVFP4] Small Nits (#341)

dsikka · web-flow · commit d7ce8ec01b07 · 2025-06-04T15:53:07.000-05:00
* small nits

* remove import; remove comment

* fix test
diff --git a/src/compressed_tensors/quantization/lifecycle/forward.py b/src/compressed_tensors/quantization/lifecycle/forward.py
@@ -21,7 +21,6 @@
     DynamicType,
     QuantizationArgs,
     QuantizationStrategy,
-    QuantizationType,
     round_to_quantized_type,
 )
 from compressed_tensors.quantization.quant_config import QuantizationStatus
@@ -405,7 +404,7 @@ def _quantize(
 
     # if a global scale is optionally provided, use it
     # to further scale the local `scale` parameter
-    if global_scale:
+    if global_scale is not None:
         scale = scale.to(global_scale.dtype) / global_scale
 
     scaled = x / scale
@@ -438,7 +437,7 @@ def _dequantize(
 
     # if a global scale is optionally provided, use it
     # to further scale the local `scale` parameter
-    if global_scale:
+    if global_scale is not None:
         scale = scale.to(global_scale.dtype) / global_scale
 
     dequant_value = x_q.to(scale.dtype)
diff --git a/src/compressed_tensors/quantization/utils/helpers.py b/src/compressed_tensors/quantization/utils/helpers.py
@@ -110,6 +110,7 @@ def calculate_qparams(
         else:
             scales = max_val_pos / (float(bit_range) / 2)
 
+        # TODO: in the case of MoEs, the global_scale may also be 0/need to be clamped
         if scales.dtype == FP8_E4M3_DATA.dtype:
             # torch.clamp not supported for FP8
             # use the next largest fp8 value from 0
@@ -495,4 +496,4 @@ def generate_gparam(
     max_vals = torch.max(updated_max_val, torch.zeros_like(updated_max_val))
     max_val_pos = torch.max(torch.abs(min_vals), torch.abs(max_vals))
     global_scale = scale_data.max * quant_data.max / max_val_pos
-    return global_scale.to(dtype)
+    return global_scale.to(dtype).reshape([1])
diff --git a/tests/test_quantization/test_utils/test_helpers.py b/tests/test_quantization/test_utils/test_helpers.py
@@ -70,6 +70,6 @@ def test_fused_global_scales():
     min_val, max_val = torch.aminmax(layer.weight)
     global_scale = generate_gparam(min_val.data, max_val.data)
     # max value should be = (448 * 6) / global_scale
-    assert max_tensor_value == pytest.approx(
+    assert max_tensor_value.item() == pytest.approx(
         FP4_E2M1_DATA.max * FP8_E4M3_DATA.max / global_scale, abs=0.001
     )

Original file line number	Diff line number	Diff line change
`@@ -70,6 +70,6 @@ def test_fused_global_scales():`
`70`	`70`	`min_val, max_val = torch.aminmax(layer.weight)`
`71`	`71`	`global_scale = generate_gparam(min_val.data, max_val.data)`
`72`	`72`	`# max value should be = (448 * 6) / global_scale`
`73`		`- assert max_tensor_value == pytest.approx(`
	`73`	`+ assert max_tensor_value.item() == pytest.approx(`
`74`	`74`	`FP4_E2M1_DATA.max * FP8_E4M3_DATA.max / global_scale, abs=0.001`
`75`	`75`	`)`