File tree Expand file tree Collapse file tree 2 files changed +16
-10
lines changed
vllm/model_executor/layers Expand file tree Collapse file tree 2 files changed +16
-10
lines changed Original file line number Diff line number Diff line change @@ -382,14 +382,20 @@ def make(
382
382
per_out_ch_quant = (
383
383
weight_quant .strategy == QuantizationStrategy .CHANNEL )
384
384
385
- assert quant_dtype is not None
386
-
387
- _quant_config = FusedMoEQuantConfig (
388
- quant_dtype = quant_dtype ,
389
- per_act_token_quant = per_act_token_quant ,
390
- per_out_ch_quant = per_out_ch_quant ,
391
- block_shape = block_shape ,
392
- )
385
+ if quant_dtype is not None :
386
+ _quant_config = FusedMoEQuantConfig (
387
+ quant_dtype = quant_dtype ,
388
+ per_act_token_quant = per_act_token_quant ,
389
+ per_out_ch_quant = per_out_ch_quant ,
390
+ block_shape = block_shape ,
391
+ )
392
+ else :
393
+ logger .warning_once ("MoE DP setup unable to determine "
394
+ "quantization scheme or unsupported "
395
+ "quantization type. This model will "
396
+ "not run with DP enabled." )
397
+
398
+ _quant_config = FusedMoEQuantConfig ()
393
399
else :
394
400
_quant_config = quant_config
395
401
Original file line number Diff line number Diff line change @@ -799,8 +799,8 @@ def select_gemm_impl(
799
799
self .quant_config .weight_block_size , False )
800
800
return BatchedTritonOrDeepGemmExperts (
801
801
max_num_tokens = max_num_tokens_per_rank ,
802
- world_size = moe .world_size ,
803
- dp_size = moe .dp_size ,
802
+ world_size = prepare_finalize .world_size ,
803
+ dp_size = prepare_finalize .dp_size ,
804
804
use_fp8_w8a8 = True ,
805
805
block_shape = self .quant_config .weight_block_size ,
806
806
per_act_token_quant = False ,
You can’t perform that action at this time.
0 commit comments