Skip to content

Commit a9da140

Browse files
authored
[BugFix] Fix the bug when enabling both DBO and MoE multistream of deepseek. (#1759)
When enabling both DBO and MoE multistream of deepseek model, there exists problem. This PR fixes this situation and add e2e ut test for this scenario. Signed-off-by: whx-sjtu <2952154980@qq.com>
1 parent aeec295 commit a9da140

File tree

1 file changed

+0
-35
lines changed

1 file changed

+0
-35
lines changed

vllm_ascend/models/deepseek_dbo.py

Lines changed: 0 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -149,41 +149,6 @@ def __init__(
149149
)
150150
CustomDeepseekDBOMoE.top_k = config.num_experts_per_tok
151151

152-
def forward(
153-
self,
154-
hidden_states: torch.Tensor,
155-
attn_metadata: Optional[AttentionMetadata] = None) -> torch.Tensor:
156-
forward_context = get_forward_context()
157-
if attn_metadata is None:
158-
attn_metadata = forward_context.attn_metadata
159-
160-
# when profile runs, force experts to load balanced tokens
161-
# to avoid high memory consumption on a single rank.
162-
enable_force_load_balance = forward_context.in_profile_run
163-
164-
is_prefill = forward_context.with_prefill
165-
# If this node is kv_consumer, we force the moe always runs in decode path to make sure
166-
# the behaviour aligned between dummy_run and normal model_execute.
167-
if self.kv_consumer:
168-
is_prefill = False
169-
170-
# router_logits: (num_tokens, n_experts)
171-
router_logits, _ = self.gate(hidden_states)
172-
173-
experts_hidden_states = self.experts(
174-
hidden_states=hidden_states,
175-
router_logits=router_logits,
176-
is_prefill=is_prefill,
177-
top_k=CustomDeepseekDBOMoE.top_k,
178-
enable_force_load_balance=enable_force_load_balance,
179-
shared_experts=self.shared_experts)
180-
181-
hidden_states = (
182-
experts_hidden_states[0] * self.routed_scaling_factor +
183-
experts_hidden_states[1])
184-
185-
return hidden_states
186-
187152
# ----------------------------------------- TBO-related --------------------------------------------
188153
def _forward_ms_op_shared_expert(
189154
self,

0 commit comments

Comments
 (0)