We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent fc61195 commit ae102baCopy full SHA for ae102ba
vllm/model_executor/layers/quantization/fp8.py
@@ -799,8 +799,10 @@ def select_gemm_impl(
799
self.quant_config.weight_block_size, False)
800
return BatchedTritonOrDeepGemmExperts(
801
max_num_tokens=max_num_tokens_per_rank,
802
- world_size=prepare_finalize.world_size,
803
- dp_size=prepare_finalize.dp_size,
+ world_size=prepare_finalize.
+ world_size, # type: ignore [attr-defined]
804
+ dp_size=prepare_finalize.
805
+ dp_size, # type: ignore [attr-defined]
806
use_fp8_w8a8=True,
807
block_shape=self.quant_config.weight_block_size,
808
per_act_token_quant=False,
0 commit comments