[Feature]Moe alltoallv communication optimization for unquantized RL training sence & alltoallv support dpo

weijinqian_v1 · weijinqian_v1 · commit cf3f1c803b6b · 2025-07-01T13:36:45.000+08:00
Signed-off-by: weijinqian_v1 &lt;weijinqian@huawei.com&gt;
diff --git a/vllm_ascend/ops/fused_moe.py b/vllm_ascend/ops/fused_moe.py
@@ -565,10 +565,8 @@ def fused_experts_with_all2allv(token_dispatcher, probs, routing_map, hidden_sta
     (share_experts_output, dispatched_input, tokens_per_expert) = token_dispatcher.token_permutation(
         hidden_states, probs, routing_map
     )
-    hidden_states_wrapper = [dispatched_input]
-    del dispatched_input
 
-    expert_output = apply_mlp(hidden_states_wrapper,
+    expert_output = apply_mlp(hidden_states,
                               w1,
                               w2,
                               tokens_per_expert)