Add gfx950 to rocm 7.0 (#4443)

xw285cornell · facebook-github-bot · commit c409ff6a81f7 · 2025-07-03T12:13:34.000-07:00
Summary: Pull Request resolved: #4443 X-link: facebookresearch/FBGEMM#1505 A few changes needed to make rocm7.0 compile * Add gfx950 when compile with rocm 7.0 * If HIP_FP8_TYPE_OCP is defined (depending on the gpu arch), we use uz or non-uz version of fp8 format https://www.internalfb.com/code/fbsource/[32c6a3c14fdf287629ada43ec53cc7eb1d1ed55d]/third-party/tp2/rocm/7.0.0/src/include/hip/amd_detail/amd_hip_fp8.h?lines=41-47 Reviewed By: q10 Differential Revision: D77711510 fbshipit-source-id: b847c58c6095e417cf8a6b101cfbb1d9cf670509
diff --git a/fbgemm_gpu/experimental/gen_ai/src/quantize/quantize.cu b/fbgemm_gpu/experimental/gen_ai/src/quantize/quantize.cu
@@ -89,16 +89,25 @@ namespace fbgemm_gpu {
 // outputs are of size float[D]
 
 #if (defined(USE_ROCM) && ROCM_VERSION >= 60200)
+#if HIP_FP8_TYPE_OCP
+using __nv_fp8x4_e4m3 = __hip_fp8x4_e4m3;
+using __nv_fp8x2_e4m3 = __hip_fp8x2_e4m3;
+using __nv_fp8_e4m3 = __hip_fp8_e4m3;
+using __nv_fp8_e5m2 = __hip_fp8_e5m2;
+#define torch_fp8_e4m3 at::kFloat8_e4m3fn
+#define torch_fp8_e5m2 at::kFloat8_e5m2
+#else // HIP_FP8_TYPE_OCP
 using __nv_fp8x4_e4m3 = __hip_fp8x4_e4m3_fnuz;
 using __nv_fp8x2_e4m3 = __hip_fp8x2_e4m3_fnuz;
 using __nv_fp8_e4m3 = __hip_fp8_e4m3_fnuz;
 using __nv_fp8_e5m2 = __hip_fp8_e5m2_fnuz;
 #define torch_fp8_e4m3 at::kFloat8_e4m3fnuz
 #define torch_fp8_e5m2 at::kFloat8_e5m2fnuz
-#else
+#endif // HIP_FP8_TYPE_OCP
+#else // USE_ROCM
 #define torch_fp8_e4m3 at::kFloat8_e4m3fn
 #define torch_fp8_e5m2 at::kFloat8_e5m2
-#endif
+#endif // USE_ROCM
 
 #if defined(CUDA_VERSION) && (CUDA_VERSION >= 12080)
 #include <torch/all.h>
diff --git a/fbgemm_gpu/include/fbgemm_gpu/utils/vec_quant.cuh b/fbgemm_gpu/include/fbgemm_gpu/utils/vec_quant.cuh
@@ -32,8 +32,12 @@
 #endif
 
 #if (defined(USE_ROCM) && ROCM_VERSION >= 60200)
+#if HIP_FP8_TYPE_OCP
+using __nv_fp8_e4m3 = __hip_fp8_e4m3;
+#else // HIP_FP8_TYPE_OCP
 using __nv_fp8_e4m3 = __hip_fp8_e4m3_fnuz;
-#endif
+#endif // HIP_FP8_TYPE_OCP
+#endif // (defined(USE_ROCM) && ROCM_VERSION >= 60200)
 
 namespace fbgemm_gpu {