We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 468e240 commit 37bd8d6Copy full SHA for 37bd8d6
vllm/utils/deep_gemm.py
@@ -99,7 +99,7 @@ def fp8_m_grouped_gemm_nt_masked(*args, **kwargs):
99
100
def per_block_cast_to_fp8(x, *args, **kwargs):
101
if _per_block_cast_impl is not None and is_blackwell_deep_gemm_used():
102
- return _per_block_cast_impl(x)
+ return _per_block_cast_impl(x, use_ue8m0=True)
103
# TODO: refactor the `per_block_cast_to_fp8` from tests to vllm utils
104
from tests.kernels.quant_utils import per_block_cast_to_fp8 as _pbcf
105
return _pbcf(x, *args, **kwargs)
0 commit comments