Skip to content

Commit c409ff6

Browse files
xw285cornellfacebook-github-bot
authored andcommitted
Add gfx950 to rocm 7.0 (#4443)
Summary: Pull Request resolved: #4443 X-link: facebookresearch/FBGEMM#1505 A few changes needed to make rocm7.0 compile * Add gfx950 when compile with rocm 7.0 * If HIP_FP8_TYPE_OCP is defined (depending on the gpu arch), we use uz or non-uz version of fp8 format https://www.internalfb.com/code/fbsource/[32c6a3c14fdf287629ada43ec53cc7eb1d1ed55d]/third-party/tp2/rocm/7.0.0/src/include/hip/amd_detail/amd_hip_fp8.h?lines=41-47 Reviewed By: q10 Differential Revision: D77711510 fbshipit-source-id: b847c58c6095e417cf8a6b101cfbb1d9cf670509
1 parent 5938645 commit c409ff6

File tree

2 files changed

+16
-3
lines changed

2 files changed

+16
-3
lines changed

fbgemm_gpu/experimental/gen_ai/src/quantize/quantize.cu

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,16 +89,25 @@ namespace fbgemm_gpu {
8989
// outputs are of size float[D]
9090

9191
#if (defined(USE_ROCM) && ROCM_VERSION >= 60200)
92+
#if HIP_FP8_TYPE_OCP
93+
using __nv_fp8x4_e4m3 = __hip_fp8x4_e4m3;
94+
using __nv_fp8x2_e4m3 = __hip_fp8x2_e4m3;
95+
using __nv_fp8_e4m3 = __hip_fp8_e4m3;
96+
using __nv_fp8_e5m2 = __hip_fp8_e5m2;
97+
#define torch_fp8_e4m3 at::kFloat8_e4m3fn
98+
#define torch_fp8_e5m2 at::kFloat8_e5m2
99+
#else // HIP_FP8_TYPE_OCP
92100
using __nv_fp8x4_e4m3 = __hip_fp8x4_e4m3_fnuz;
93101
using __nv_fp8x2_e4m3 = __hip_fp8x2_e4m3_fnuz;
94102
using __nv_fp8_e4m3 = __hip_fp8_e4m3_fnuz;
95103
using __nv_fp8_e5m2 = __hip_fp8_e5m2_fnuz;
96104
#define torch_fp8_e4m3 at::kFloat8_e4m3fnuz
97105
#define torch_fp8_e5m2 at::kFloat8_e5m2fnuz
98-
#else
106+
#endif // HIP_FP8_TYPE_OCP
107+
#else // USE_ROCM
99108
#define torch_fp8_e4m3 at::kFloat8_e4m3fn
100109
#define torch_fp8_e5m2 at::kFloat8_e5m2
101-
#endif
110+
#endif // USE_ROCM
102111

103112
#if defined(CUDA_VERSION) && (CUDA_VERSION >= 12080)
104113
#include <torch/all.h>

fbgemm_gpu/include/fbgemm_gpu/utils/vec_quant.cuh

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,12 @@
3232
#endif
3333

3434
#if (defined(USE_ROCM) && ROCM_VERSION >= 60200)
35+
#if HIP_FP8_TYPE_OCP
36+
using __nv_fp8_e4m3 = __hip_fp8_e4m3;
37+
#else // HIP_FP8_TYPE_OCP
3538
using __nv_fp8_e4m3 = __hip_fp8_e4m3_fnuz;
36-
#endif
39+
#endif // HIP_FP8_TYPE_OCP
40+
#endif // (defined(USE_ROCM) && ROCM_VERSION >= 60200)
3741

3842
namespace fbgemm_gpu {
3943

0 commit comments

Comments
 (0)