enforce that MXTensor scale dimensions are consistent with data

vkuzo · vkuzo · commit 531860474a07 · 2025-07-08T06:44:57.000-07:00
Summary: Ensures that if the data dims are (M, K) then scale dims are (M, K // block_size). Previously the scale dims were (M, K // block_size, 1). No logic change in surrounding code, but this is definitely more correct. Test Plan: ``` pytest test/prototype/mx_formats ./test/prototype/mx_formats/test_mx_dtensor.sh ``` Reviewers: Subscribers: Tasks: Tags: ghstack-source-id: 50e45ce ghstack-comment-id: 3049035188 Pull Request resolved: #2506
diff --git a/test/prototype/mx_formats/test_mx_tensor.py b/test/prototype/mx_formats/test_mx_tensor.py
@@ -70,6 +70,14 @@ def assert_sqnr_gt_threshold(orig, new, threshold):
     else:
         assert_sqnr_gt_threshold(data_hp, data_mx_dq, 13.0)
 
+    # verify that if data.shape is (M, K) then scale.shape is (M, K // block_size)
+    prev_dims, K = data_hp.shape[:-1], data_hp.shape[-1]
+    if elem_dtype is torch.float4_e2m1fn_x2:
+        assert data_mx._data.shape == (*prev_dims, K // 2)
+    else:
+        assert data_mx._data.shape == (*prev_dims, K)
+    assert data_mx._scale_e8m0.shape == (*prev_dims, K // block_size)
+
 
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
 @pytest.mark.parametrize("elem_dtype", SUPPORTED_ELEM_DTYPES)
diff --git a/torchao/prototype/mx_formats/mx_tensor.py b/torchao/prototype/mx_formats/mx_tensor.py
@@ -331,6 +331,7 @@ def to_mx(
         raise AssertionError("unsupported")
 
     scale_e8m0_biased = scale_e8m0_biased.view(torch.float8_e8m0fnu)
+    scale_e8m0_biased = scale_e8m0_biased.squeeze(-1)
     return scale_e8m0_biased, data_lp