File tree Expand file tree Collapse file tree 1 file changed +4
-1
lines changed
vllm/model_executor/layers/fused_moe Expand file tree Collapse file tree 1 file changed +4
-1
lines changed Original file line number Diff line number Diff line change @@ -322,7 +322,7 @@ def cutlass_moe_fp8(
322
322
topk_ids : torch .Tensor ,
323
323
w1_scale : torch .Tensor ,
324
324
w2_scale : torch .Tensor ,
325
- per_act_token : bool ,
325
+ per_act_token : Optional [ bool ] = None ,
326
326
activation : str = "silu" ,
327
327
a1_scale : Optional [torch .Tensor ] = None ,
328
328
a2_scale : Optional [torch .Tensor ] = None ,
@@ -366,6 +366,9 @@ def cutlass_moe_fp8(
366
366
Returns:
367
367
- torch.Tensor: The fp16 output tensor after applying the MoE layer.
368
368
"""
369
+ if per_act_token is None :
370
+ per_act_token = a1_scale .numel () != 1 if a1_scale is not None else (
371
+ a2_scale .numel () != 1 if a2_scale is not None else False )
369
372
per_out_ch = w1_scale .numel () != w1_q .size (0 )
370
373
371
374
num_experts = global_num_experts if global_num_experts != - 1 else w1_q .size (
You can’t perform that action at this time.
0 commit comments