@@ -488,22 +488,27 @@ def __init__(self, quant_config: Fp8Config):
488
488
logger .warning_once ("Failed to import DeepGemm kernels." )
489
489
elif not self .block_quant :
490
490
logger .warning_once ("Model is not block quantized. Not using "
491
- " DeepGemm kernels" )
491
+ "DeepGemm kernels" )
492
492
elif (current_platform .is_cuda ()
493
- and current_platform .has_device_capability (90 )):
493
+ and current_platform .is_device_capability (90 )):
494
494
logger .info_once ("Using DeepGemm kernels for Fp8MoEMethod." )
495
495
self .allow_deep_gemm = True
496
+ elif (current_platform .is_cuda ()
497
+ and is_blackwell_deep_gemm_used ()):
498
+ logger .info_once ("Using DeepGemm SM100 kernels for "
499
+ "Fp8MoEMethod." )
500
+ self .allow_deep_gemm = True
496
501
else :
497
502
logger .warning_once (
498
503
"DeepGemm not supported on the current platform." )
499
504
500
505
# Check for CutlassBlockScaledGroupedGemm support.
501
506
self .allow_cutlass_block_scaled_grouped_gemm = False
502
507
if not self .block_quant :
503
- logger .warning_once ("Model is not block quantized. Not using "
504
- "CutlassBlockScaledGroupedGemm kernels" )
508
+ logger .debug_once ("Model is not block quantized. Not using "
509
+ "CutlassBlockScaledGroupedGemm kernels" )
505
510
elif (current_platform .is_cuda ()
506
- and current_platform .has_device_capability (100 )):
511
+ and current_platform .is_device_capability (100 )):
507
512
logger .info_once (
508
513
"Using CutlassBlockScaledGroupedGemm kernels for Fp8MoEMethod."
509
514
)
0 commit comments