use correct fp8 quantization dtype for AMD GPU

yuguo68 · web-flow · commit 212d912dcbfb · 2025-05-20T19:11:18.000-07:00
Differential Revision: D75021458 Pull Request resolved: #2225
diff --git a/test/float8/test_base.py b/test/float8/test_base.py
@@ -56,6 +56,7 @@
     tensor_to_scale,
 )
 from torchao.testing.float8.test_utils import get_test_float8_linear_config
+from torchao.utils import is_MI300, is_ROCM
 
 random.seed(0)
 torch.manual_seed(0)
@@ -271,6 +272,15 @@ def test_axiswise_gemm(self, a_shape, a_granularity, b_granularity):
         sqnr = compute_error(c_ref, c_fp8_compute)
         assert sqnr >= 25.0
 
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def test_fp8_dtype(
+        self,
+    ):
+        if is_ROCM() and is_MI300():
+            assert e4m3_dtype == torch.float8_e4m3fnuz
+        else:
+            assert e4m3_dtype == torch.float8_e4m3fn
+
 
 class TestFloat8Linear:
     def _test_linear_impl(
diff --git a/torchao/quantization/quant_api.py b/torchao/quantization/quant_api.py
@@ -52,6 +52,7 @@
     make_packed_linear_int8_dynamic_activation_intx_weight_tensor,
 )
 from torchao.dtypes.utils import Layout
+from torchao.float8.config import e4m3_dtype, e5m2_dtype
 from torchao.float8.float8_linear import Float8Linear
 from torchao.float8.inference import Float8MMConfig
 from torchao.quantization.linear_activation_weight_observed_tensor import (
@@ -1396,7 +1397,7 @@ class Float8WeightOnlyConfig(AOBaseConfig):
         The actual matmul will be computed in original precision of the weight tensor.
     """
 
-    weight_dtype: torch.dtype = torch.float8_e4m3fn
+    weight_dtype: torch.dtype = e4m3_dtype
     set_inductor_config: bool = True
 
 
@@ -1569,8 +1570,8 @@ class Float8DynamicActivationFloat8WeightConfig(AOBaseConfig):
 
     """
 
-    activation_dtype: torch.dtype = torch.float8_e4m3fn
-    weight_dtype: torch.dtype = torch.float8_e4m3fn
+    activation_dtype: torch.dtype = e4m3_dtype
+    weight_dtype: torch.dtype = e4m3_dtype
     granularity: Optional[
         Union[_fp8_granularities, Tuple[_fp8_granularities, _fp8_granularities]]
     ] = None
@@ -1660,8 +1661,8 @@ class Float8DynamicActivationFloat8SemiSparseWeightConfig(AOBaseConfig):
     """
 
     layout: Layout = CutlassSemiSparseLayout()
-    activation_dtype: torch.dtype = torch.float8_e5m2
-    weight_dtype: torch.dtype = torch.float8_e4m3fn
+    activation_dtype: torch.dtype = e5m2_dtype
+    weight_dtype: torch.dtype = e4m3_dtype
 
 
 @register_quantize_module_handler(Float8DynamicActivationFloat8SemiSparseWeightConfig)
@@ -1706,8 +1707,8 @@ class Float8StaticActivationFloat8WeightConfig(AOBaseConfig):
     """
 
     scale: torch.Tensor
-    activation_dtype: torch.dtype = torch.float8_e4m3fn
-    weight_dtype: torch.dtype = torch.float8_e4m3fn
+    activation_dtype: torch.dtype = e4m3_dtype
+    weight_dtype: torch.dtype = e4m3_dtype
     granularity: Optional[
         Union[_fp8_granularities, Tuple[_fp8_granularities, _fp8_granularities]]
     ] = None