We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 8d8ed0a commit 9a9b8e9Copy full SHA for 9a9b8e9
vllm/model_executor/layers/quantization/fp8.py
@@ -800,8 +800,10 @@ def select_gemm_impl(
800
self.quant_config.weight_block_size, False)
801
return BatchedTritonOrDeepGemmExperts(
802
max_num_tokens=max_num_tokens_per_rank,
803
- world_size=prepare_finalize.world_size,
804
- dp_size=prepare_finalize.dp_size,
+ world_size=prepare_finalize.
+ world_size, # type: ignore [attr-defined]
805
+ dp_size=prepare_finalize.
806
+ dp_size, # type: ignore [attr-defined]
807
use_fp8_w8a8=True,
808
block_shape=self.quant_config.weight_block_size,
809
per_act_token_quant=False,
0 commit comments