enforce that MXTensor scale dimensions are consistent with data (#2506)

vkuzo · web-flow · commit c1e84cc9a75a · 2025-07-09T07:39:29.000-04:00
* Update

[ghstack-poisoned]

* Update

[ghstack-poisoned]

* Update

[ghstack-poisoned]

* Update

[ghstack-poisoned]

* Update

[ghstack-poisoned]

* Update

[ghstack-poisoned]

* Update

[ghstack-poisoned]

* Update

[ghstack-poisoned]

* Update

[ghstack-poisoned]

* Update

[ghstack-poisoned]

* Update

[ghstack-poisoned]

* Update

[ghstack-poisoned]

* Update

[ghstack-poisoned]
diff --git a/test/prototype/mx_formats/test_mx_tensor.py b/test/prototype/mx_formats/test_mx_tensor.py
@@ -70,6 +70,14 @@ def assert_sqnr_gt_threshold(orig, new, threshold):
     else:
         assert_sqnr_gt_threshold(data_hp, data_mx_dq, 13.0)
 
+    # verify that if data.shape is (M, K) then scale.shape is (M, K // block_size)
+    prev_dims, K = data_hp.shape[:-1], data_hp.shape[-1]
+    if elem_dtype is torch.float4_e2m1fn_x2:
+        assert data_mx._data.shape == (*prev_dims, K // 2)
+    else:
+        assert data_mx._data.shape == (*prev_dims, K)
+    assert data_mx._scale_e8m0.shape == (*prev_dims, K // block_size)
+
 
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
 @pytest.mark.parametrize("elem_dtype", SUPPORTED_ELEM_DTYPES)
diff --git a/torchao/prototype/mx_formats/mx_tensor.py b/torchao/prototype/mx_formats/mx_tensor.py
@@ -331,6 +331,7 @@ def to_mx(
         raise AssertionError("unsupported")
 
     scale_e8m0_biased = scale_e8m0_biased.view(torch.float8_e8m0fnu)
+    scale_e8m0_biased = scale_e8m0_biased.squeeze(-1)
     return scale_e8m0_biased, data_lp