We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 9e099a5 commit 066ea10Copy full SHA for 066ea10
vllm_ascend/ops/fused_moe.py
@@ -1144,9 +1144,11 @@ def forward(self,
1144
if self.enable_multistream_moe:
1145
assert gate is not None
1146
router_logits, _ = gate(hidden_states)
1147
- if isinstance(self.quant_method.quant_method,
1148
- AscendW8A8DynamicFusedMoEMethod
1149
- ) and fused_moe_state == FusedMoEState.MC2:
+ if not isinstance(self.quant_method,
+ AscendUnquantizedFusedMoEMethod) and isinstance(
+ self.quant_method.quant_method,
1150
+ AscendW8A8DynamicFusedMoEMethod
1151
+ ) and fused_moe_state == FusedMoEState.MC2:
1152
with npu_stream_switch("moe_secondary", 0):
1153
quantized_x_for_share, dynamic_scale_for_share = torch_npu.npu_dynamic_quant(
1154
hidden_states)
0 commit comments