File tree Expand file tree Collapse file tree 1 file changed +5
-3
lines changed Expand file tree Collapse file tree 1 file changed +5
-3
lines changed Original file line number Diff line number Diff line change 40
40
from vllm_ascend .ascend_config import get_ascend_config
41
41
from vllm_ascend .ascend_forward_context import FusedMoEState
42
42
from vllm_ascend .ops .expert_load_balancer import ExpertLoadBalancer
43
+ from vllm_ascend .quantization .quant_config import AscendFusedMoEMethod
43
44
from vllm_ascend .utils import (AscendSocVersion , dispose_tensor ,
44
45
get_ascend_soc_version , npu_stream_switch ,
45
46
npu_wait_tensor )
@@ -1144,9 +1145,10 @@ def forward(self,
1144
1145
if self .enable_multistream_moe :
1145
1146
assert gate is not None
1146
1147
router_logits , _ = gate (hidden_states )
1147
- if isinstance (self .quant_method .quant_method ,
1148
- AscendW8A8DynamicFusedMoEMethod
1149
- ) and fused_moe_state == FusedMoEState .MC2 :
1148
+ quant_method = self .quant_method
1149
+ if isinstance (quant_method , AscendFusedMoEMethod ) and isinstance (
1150
+ quant_method .quant_method , AscendW8A8DynamicFusedMoEMethod
1151
+ ) and fused_moe_state == FusedMoEState .MC2 :
1150
1152
with npu_stream_switch ("moe_secondary" , 0 ):
1151
1153
quantized_x_for_share , dynamic_scale_for_share = torch_npu .npu_dynamic_quant (
1152
1154
hidden_states )
You can’t perform that action at this time.
0 commit comments