Fix enable_multistream_moe for unquantized scenario

sdmyzlp · sdmyzlp · commit 066ea109a65b · 2025-07-07T11:42:22.000+08:00
Signed-off-by: sdmyzlp &lt;lrwei2@petalmail.com&gt;
diff --git a/vllm_ascend/ops/fused_moe.py b/vllm_ascend/ops/fused_moe.py
@@ -1144,9 +1144,11 @@ def forward(self,
         if self.enable_multistream_moe:
             assert gate is not None
             router_logits, _ = gate(hidden_states)
-            if isinstance(self.quant_method.quant_method,
-                          AscendW8A8DynamicFusedMoEMethod
-                          ) and fused_moe_state == FusedMoEState.MC2:
+            if not isinstance(self.quant_method,
+                              AscendUnquantizedFusedMoEMethod) and isinstance(
+                                  self.quant_method.quant_method,
+                                  AscendW8A8DynamicFusedMoEMethod
+                              ) and fused_moe_state == FusedMoEState.MC2:
                 with npu_stream_switch("moe_secondary", 0):
                     quantized_x_for_share, dynamic_scale_for_share = torch_npu.npu_dynamic_quant(
                         hidden_states)