Skip to content

Commit 59abbd8

Browse files
[Fix] Allow kernel compilation for CUDA capability 8.7 (#19328)
Signed-off-by: Conroy Cheers <conroy@corncheese.org>
1 parent 95a6568 commit 59abbd8

File tree

1 file changed

+3
-3
lines changed

1 file changed

+3
-3
lines changed

CMakeLists.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
308308
# Keep building Marlin for 9.0 as there are some group sizes and shapes that
309309
# are not supported by Machete yet.
310310
# 9.0 for latest bf16 atomicAdd PTX
311-
cuda_archs_loose_intersection(MARLIN_ARCHS "8.0;9.0+PTX" "${CUDA_ARCHS}")
311+
cuda_archs_loose_intersection(MARLIN_ARCHS "8.0;8.7;9.0+PTX" "${CUDA_ARCHS}")
312312
if (MARLIN_ARCHS)
313313

314314
#
@@ -454,7 +454,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
454454
# kernels for the remaining archs that are not already built for 3x.
455455
# (Build 8.9 for FP8)
456456
cuda_archs_loose_intersection(SCALED_MM_2X_ARCHS
457-
"7.5;8.0;8.9+PTX" "${CUDA_ARCHS}")
457+
"7.5;8.0;8.7;8.9+PTX" "${CUDA_ARCHS}")
458458
# subtract out the archs that are already built for 3x
459459
list(REMOVE_ITEM SCALED_MM_2X_ARCHS ${SCALED_MM_3X_ARCHS})
460460
if (SCALED_MM_2X_ARCHS)
@@ -684,7 +684,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
684684

685685
list(APPEND VLLM_MOE_EXT_SRC "${VLLM_MOE_WNA16_SRC}")
686686
# 9.0 for latest bf16 atomicAdd PTX
687-
cuda_archs_loose_intersection(MARLIN_MOE_ARCHS "8.0;9.0+PTX" "${CUDA_ARCHS}")
687+
cuda_archs_loose_intersection(MARLIN_MOE_ARCHS "8.0;8.7;9.0+PTX" "${CUDA_ARCHS}")
688688
if (MARLIN_MOE_ARCHS)
689689

690690
#

0 commit comments

Comments
 (0)