[fix] fix compatibility for non-EPLB scenarios (#1142)

linfeng-yuan · web-flow · commit 706de0231762 · 2025-06-10T08:39:24.000+08:00
### What this PR does / why we need it? Fix incompatibility problem for non-EPLB scenarios in #1116 ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Tested with online serving and e2e CI. Signed-off-by: linfeng-yuan <1102311262@qq.com>
diff --git a/vllm_ascend/quantization/w8a8_dynamic.py b/vllm_ascend/quantization/w8a8_dynamic.py
@@ -150,8 +150,8 @@ def fused_experts_with_mc2(hidden_states: torch.Tensor,
                            log2phy: torch.Tensor = None,
                            global_redundant_expert_num: int = 0,
                            **kwargs) -> torch.Tensor:
-
-    topk_ids = log2phy[topk_ids]
+    if log2phy:
+        topk_ids = log2phy[topk_ids]
     global_bs = 0
     moe_expert_num = len(expert_map) + global_redundant_expert_num
     # hidden_states = hidden_states.bfloat16()
@@ -278,7 +278,8 @@ def fused_experts_with_all2all(
     log2phy: torch.Tensor = None,
     global_redundant_expert_num: int = 0,
 ):
-    topk_ids = log2phy[topk_ids]
+    if log2phy:
+        topk_ids = log2phy[topk_ids]
     original_shape = hidden_states.shape
     if len(original_shape) == 3:
         hidden_states = hidden_states.view(-1, hidden_states.shape[-1])