Skip to content

Commit 57d570a

Browse files
committed
Fix enable_multistream_moe for unquantized scenario
Signed-off-by: sdmyzlp <lrwei2@petalmail.com>
1 parent 9e099a5 commit 57d570a

File tree

1 file changed

+5
-3
lines changed

1 file changed

+5
-3
lines changed

vllm_ascend/ops/fused_moe.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
from vllm_ascend.ascend_config import get_ascend_config
4141
from vllm_ascend.ascend_forward_context import FusedMoEState
4242
from vllm_ascend.ops.expert_load_balancer import ExpertLoadBalancer
43+
from vllm_ascend.quantization.quant_config import AscendFusedMoEMethod
4344
from vllm_ascend.utils import (AscendSocVersion, dispose_tensor,
4445
get_ascend_soc_version, npu_stream_switch,
4546
npu_wait_tensor)
@@ -1144,9 +1145,10 @@ def forward(self,
11441145
if self.enable_multistream_moe:
11451146
assert gate is not None
11461147
router_logits, _ = gate(hidden_states)
1147-
if isinstance(self.quant_method.quant_method,
1148-
AscendW8A8DynamicFusedMoEMethod
1149-
) and fused_moe_state == FusedMoEState.MC2:
1148+
quant_method = self.quant_method
1149+
if isinstance(quant_method, AscendFusedMoEMethod) and isinstance(
1150+
quant_method.quant_method, AscendW8A8DynamicFusedMoEMethod
1151+
) and fused_moe_state == FusedMoEState.MC2:
11501152
with npu_stream_switch("moe_secondary", 0):
11511153
quantized_x_for_share, dynamic_scale_for_share = torch_npu.npu_dynamic_quant(
11521154
hidden_states)

0 commit comments

Comments
 (0)