Skip to content

Commit 25ae593

Browse files
gshtrashj-mistral
authored andcommitted
[ROCm] warpSize is being made non constexpr in ROCm 7.0 (vllm-project#20330)
Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com> Signed-off-by: Himanshu Jaju <hj@mistral.ai>
1 parent 8527cd5 commit 25ae593

File tree

4 files changed

+6
-24
lines changed

4 files changed

+6
-24
lines changed

csrc/attention/attention_kernels.cuh

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
#include "attention_dtypes.h"
2626
#include "attention_utils.cuh"
27+
#include "cuda_compat.h"
2728

2829
#ifdef USE_ROCM
2930
#include <hip/hip_bf16.h>
@@ -33,12 +34,6 @@ typedef __hip_bfloat16 __nv_bfloat16;
3334
#include "../quantization/fp8/nvidia/quant_utils.cuh"
3435
#endif
3536

36-
#ifndef USE_ROCM
37-
#define WARP_SIZE 32
38-
#else
39-
#define WARP_SIZE warpSize
40-
#endif
41-
4237
#define MAX(a, b) ((a) > (b) ? (a) : (b))
4338
#define MIN(a, b) ((a) < (b) ? (a) : (b))
4439
#define DIVIDE_ROUND_UP(a, b) (((a) + (b) - 1) / (b))
@@ -670,7 +665,6 @@ __global__ void paged_attention_v2_reduce_kernel(
670665

671666
} // namespace vllm
672667

673-
#undef WARP_SIZE
674668
#undef MAX
675669
#undef MIN
676670
#undef DIVIDE_ROUND_UP

csrc/attention/paged_attention_v1.cu

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,7 @@
1818
*/
1919

2020
#include "attention_kernels.cuh"
21-
22-
#ifndef USE_ROCM
23-
#define WARP_SIZE 32
24-
#else
25-
#define WARP_SIZE warpSize
26-
#endif
21+
#include "cuda_compat.h"
2722

2823
#define MAX(a, b) ((a) > (b) ? (a) : (b))
2924
#define MIN(a, b) ((a) < (b) ? (a) : (b))
@@ -187,7 +182,6 @@ void paged_attention_v1(
187182
CALL_V1_LAUNCHER_BLOCK_SIZE)
188183
}
189184

190-
#undef WARP_SIZE
191185
#undef MAX
192186
#undef MIN
193187
#undef DIVIDE_ROUND_UP

csrc/attention/paged_attention_v2.cu

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,7 @@
1818
*/
1919

2020
#include "attention_kernels.cuh"
21-
22-
#ifndef USE_ROCM
23-
#define WARP_SIZE 32
24-
#else
25-
#define WARP_SIZE warpSize
26-
#endif
21+
#include "cuda_compat.h"
2722

2823
#define MAX(a, b) ((a) > (b) ? (a) : (b))
2924
#define MIN(a, b) ((a) < (b) ? (a) : (b))
@@ -197,7 +192,6 @@ void paged_attention_v2(
197192
CALL_V2_LAUNCHER_BLOCK_SIZE)
198193
}
199194

200-
#undef WARP_SIZE
201195
#undef MAX
202196
#undef MIN
203197
#undef DIVIDE_ROUND_UP

csrc/cuda_compat.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@
44
#include <hip/hip_runtime.h>
55
#endif
66

7-
#ifndef USE_ROCM
8-
#define WARP_SIZE 32
7+
#if defined(USE_ROCM) && defined(__GFX9__)
8+
#define WARP_SIZE 64
99
#else
10-
#define WARP_SIZE warpSize
10+
#define WARP_SIZE 32
1111
#endif
1212

1313
#ifndef USE_ROCM

0 commit comments

Comments
 (0)