fix a bug

harygo22 · weijinqian_v1 · commit d76c4fba50ad · 2025-07-09T16:28:08.000+08:00
Signed-off-by: weijinqian_v1 &lt;weijinqian@huawei.com&gt;
diff --git a/vllm_ascend/ascend_forward_context.py b/vllm_ascend/ascend_forward_context.py
@@ -22,7 +22,8 @@ def get_fused_moe_state(ep_size: int, with_prefill: bool):
     if ep_size == 1:
         return FusedMoEState.AllGather
     elif envs_ascend.VLLM_ASCEND_ENABLE_MOE_ALL2ALL_SEQ:
-        return FusedMoEState.All2AllSeq if ep_size < 16 else FusedMoEState.MC2
+        # MC2 Dispatch/Combine performs better than alltoall_seq in decoding stage.
+        return FusedMoEState.All2AllSeq if (ep_size < 16 or with_prefill) else FusedMoEState.MC2
     # NOTE: mc2 need ep_size >= 16 & all2all can't use in torchair graph.
     elif ep_size < 16 or with_prefill:
         return FusedMoEState.All2All