Skip to content

Commit a9bccf8

Browse files
author
weijinqian_v1
committed
add moe_block: AscendSparseMoeBlock
Signed-off-by: weijinqian_v1 <weijinqian@huawei.com>
1 parent 49e9771 commit a9bccf8

File tree

1 file changed

+6
-3
lines changed

1 file changed

+6
-3
lines changed

vllm_ascend/models/moe_block.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
from typing import Optional
1919

2020
import torch
21+
import vllm.model_executor.models.qwen3_moe as qwen3
22+
2123
from torch import nn
2224
from vllm.attention import AttentionMetadata
2325
from vllm.distributed import (get_tensor_model_parallel_world_size,
@@ -91,13 +93,12 @@ def forward(
9193
attn_metadata = get_forward_context().attn_metadata
9294
# when profile runs, force experts to load balanced tokens
9395
# to avoid high memory consumption on a single rank.
94-
is_prefill = True
9596
if attn_metadata is None:
9697
# for profile run
9798
is_prefill = True
9899
enable_force_load_balance = True
99100
else:
100-
# is_prefill = attn_metadata.num_prefills > 0 is_prefill or
101+
is_prefill = False
101102
enable_force_load_balance = False
102103
if hasattr(attn_metadata, 'with_prefill_across_dp'):
103104
is_prefill = attn_metadata.with_prefill_across_dp
@@ -114,4 +115,6 @@ def forward(
114115
shared_experts=None,
115116
)
116117

117-
return hidden_states
118+
return hidden_states
119+
120+
qwen3.Qwen3MoeSparseMoeBlock = AscendSparseMoeBlock

0 commit comments

Comments
 (0)