Skip to content

Commit 415394c

Browse files
committed
remove cv parallel for float model
Signed-off-by: David9857 <985700846@qq.com>
1 parent 3511331 commit 415394c

File tree

2 files changed

+2
-17
lines changed

2 files changed

+2
-17
lines changed

vllm_ascend/models/deepseek_v2.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -286,10 +286,10 @@ def forward(
286286
top_k=CustomDeepseekV2MoE.top_k,
287287
enable_force_load_balance=enable_force_load_balance,
288288
**kwargs)
289-
289+
290290
if multistream:
291291
hidden_states, shared_output = hidden_states
292-
292+
293293
hidden_states = hidden_states * self.routed_scaling_factor
294294

295295
if self.tp_size > 1:

vllm_ascend/ops/fused_moe.py

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
import torch
2121
import torch.distributed as dist
2222
import torch_npu
23-
import torchair as tng # type: ignore
2423
from vllm.config import get_current_vllm_config
2524
from vllm.distributed import (GroupCoordinator,
2625
get_tensor_model_parallel_world_size,
@@ -87,13 +86,6 @@ def fused_experts_with_mc2(hidden_states: torch.Tensor,
8786
expand_x, dynamic_scale, expand_idx, expert_token_nums, ep_recv_counts = output[
8887
0:5]
8988

90-
shared_experts = kwargs.get('shared_experts', None)
91-
if shared_experts:
92-
shared_gate_up = kwargs.get('shared_gate_up', None)
93-
with tng.scope.npu_stream_switch('cv'):
94-
tng.scope.npu_wait_tensor(shared_gate_up, expand_x)
95-
shared_x = shared_experts.act_fn(shared_gate_up)
96-
9789
w1 = w1.transpose(1, 2)
9890
expert_token_nums = torch.cumsum(expert_token_nums,
9991
dim=0,
@@ -122,11 +114,6 @@ def fused_experts_with_mc2(hidden_states: torch.Tensor,
122114
group_list=group_list,
123115
)
124116

125-
if shared_experts:
126-
with tng.scope.npu_stream_switch('cv'):
127-
tng.scope.npu_wait_tensor(shared_x, down_out_list)
128-
shared_output = shared_experts.down_proj(shared_x)
129-
130117
down_out_list = torch.cat(down_out_list, dim=0)
131118

132119
# moeCombine
@@ -156,8 +143,6 @@ def fused_experts_with_mc2(hidden_states: torch.Tensor,
156143

157144
hidden_states = torch_npu.npu_moe_distribute_combine(**kwargs_mc2)
158145

159-
if shared_experts:
160-
return hidden_states, shared_output
161146
return hidden_states
162147

163148

0 commit comments

Comments
 (0)