Skip to content

Commit c978b89

Browse files
author
weijinqian_v1
committed
handle code clean
Signed-off-by: weijinqian_v1 <weijinqian@huawei.com>
1 parent de97cde commit c978b89

File tree

6 files changed

+12
-12
lines changed

6 files changed

+12
-12
lines changed

tests/ut/test_moe_util.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,7 @@
1010
get_capacity, group_limited_topk, permute, sort_chunks_by_idxs,
1111
topk_softmax_with_capacity, unpermute)
1212

13-
import vllm_ascend.patch.worker.patch_common.patch_utils # type: ignore[import] # isort: skip # noqa
14-
13+
import vllm_ascend.patch.worker.patch_common.patch_utils # type: ignore[import] # isort: skip # noqa
1514

1615

1716
class TestMoeUtils:

tests/ut/test_token_dispatcher.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,7 @@
1010
MoEAlltoAllSeqOverLapDispatcher, MoeDispatcherConfig)
1111
from vllm_ascend.utils import adapt_patch # noqa E402
1212

13-
import vllm_ascend.patch.worker.patch_common.patch_utils # type: ignore[import] # isort: skip # noqa
14-
15-
13+
import vllm_ascend.patch.worker.patch_common.patch_utils # type: ignore[import] # isort: skip # noqa
1614

1715
adapt_patch(True)
1816

vllm_ascend/envs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@
142142
# 1: enable moe all2all seq.
143143
"VLLM_ASCEND_ENABLE_MOE_ALL2ALL_SEQ":
144144
lambda: bool(int(os.getenv('VLLM_ASCEND_ENABLE_MOE_ALL2ALL_SEQ', '0'))),
145-
# ENABLE chunk mc2
145+
# ENABLE chunk mc2
146146
"VLLM_ASCEND_ENABLE_CHUNK_MC2":
147147
lambda: bool(int(os.getenv("VLLM_ASCEND_ENABLE_CHUNK_MC2", "0"))),
148148
# Batch MC2 in prefill: The number of tokens in each batch

vllm_ascend/models/deepseek_dbo.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,8 @@ def __init__(
147147
intermediate_size=intermediate_size,
148148
hidden_act=config.hidden_act,
149149
quant_config=quant_config,
150-
reduce_results=True if not envs_ascend.VLLM_ASCEND_ENABLE_MOE_ALL2ALL_SEQ else False,
150+
reduce_results=True if
151+
not envs_ascend.VLLM_ASCEND_ENABLE_MOE_ALL2ALL_SEQ else False,
151152
prefix=f"{prefix}.shared_experts",
152153
)
153154
CustomDeepseekDBOMoE.top_k = config.num_experts_per_tok
@@ -232,7 +233,9 @@ def _forward_op_gating(
232233
chunk_hidden_states = torch.tensor_split(hidden_states,
233234
self.tp_size,
234235
dim=0)
235-
chunked_hidden_states_sizes = [x.shape[0] for x in chunk_hidden_states]
236+
chunked_hidden_states_sizes = [
237+
x.shape[0] for x in chunk_hidden_states
238+
]
236239
local_hidden_states = chunk_hidden_states[self.tp_rank]
237240
else:
238241
local_hidden_states = hidden_states
@@ -275,7 +278,8 @@ def _forward_op_gating(
275278
# to avoid accumulating too much tokens on a single rank.
276279
# currently it is only activated when doing profile runs.
277280
if enable_force_load_balance:
278-
topk_ids = torch.randint_like(topk_ids, 0, self.config.n_routed_experts)
281+
topk_ids = torch.randint_like(topk_ids, 0,
282+
self.config.n_routed_experts)
279283

280284
return topk_weights, topk_ids, local_hidden_states, chunked_hidden_states_sizes
281285

vllm_ascend/multistream/ms_split.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -294,8 +294,8 @@ def model_input_split_v1_attn(
294294
token_index)
295295

296296
is_only_prefill_pre = is_only_prefill_post = attn_metadata.is_only_prefill
297-
has_prefill_pre, _ = torch.any(
298-
query_lens_pre > 1).item(), torch.any(query_lens_post > 1).item()
297+
has_prefill_pre, _ = torch.any(query_lens_pre > 1).item(), torch.any(
298+
query_lens_post > 1).item()
299299

300300
if not attn_metadata.is_only_prefill:
301301
is_only_prefill_post = torch.all(query_lens_post > 1).item()

vllm_ascend/ops/moe_dispatcher/token_dispatcher.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@
3333
from vllm_ascend.ops.comm_utils import async_all_to_all
3434
from vllm_ascend.ops.moe_dispatcher.moe_utils import (
3535
get_capacity, permute, topk_softmax_with_capacity, unpermute)
36-
3736
""" We use the following notation throughout this file:
3837
H: hidden size
3938
B: micro batch size

0 commit comments

Comments
 (0)