Skip to content

Commit fc61195

Browse files
committed
fix LM Eval Small Models test failure
Signed-off-by: Bill Nell <bnell@redhat.com>
1 parent 07d96ca commit fc61195

File tree

2 files changed

+16
-10
lines changed

2 files changed

+16
-10
lines changed

vllm/model_executor/layers/fused_moe/config.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -382,14 +382,20 @@ def make(
382382
per_out_ch_quant = (
383383
weight_quant.strategy == QuantizationStrategy.CHANNEL)
384384

385-
assert quant_dtype is not None
386-
387-
_quant_config = FusedMoEQuantConfig(
388-
quant_dtype=quant_dtype,
389-
per_act_token_quant=per_act_token_quant,
390-
per_out_ch_quant=per_out_ch_quant,
391-
block_shape=block_shape,
392-
)
385+
if quant_dtype is not None:
386+
_quant_config = FusedMoEQuantConfig(
387+
quant_dtype=quant_dtype,
388+
per_act_token_quant=per_act_token_quant,
389+
per_out_ch_quant=per_out_ch_quant,
390+
block_shape=block_shape,
391+
)
392+
else:
393+
logger.warning_once("MoE DP setup unable to determine "
394+
"quantization scheme or unsupported "
395+
"quantization type. This model will "
396+
"not run with DP enabled.")
397+
398+
_quant_config = FusedMoEQuantConfig()
393399
else:
394400
_quant_config = quant_config
395401

vllm/model_executor/layers/quantization/fp8.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -799,8 +799,8 @@ def select_gemm_impl(
799799
self.quant_config.weight_block_size, False)
800800
return BatchedTritonOrDeepGemmExperts(
801801
max_num_tokens=max_num_tokens_per_rank,
802-
world_size=moe.world_size,
803-
dp_size=moe.dp_size,
802+
world_size=prepare_finalize.world_size,
803+
dp_size=prepare_finalize.dp_size,
804804
use_fp8_w8a8=True,
805805
block_shape=self.quant_config.weight_block_size,
806806
per_act_token_quant=False,

0 commit comments

Comments
 (0)