handle code clean

weijinqian_v1 · weijinqian_v1 · commit c978b893f5d9 · 2025-07-10T18:51:00.000+08:00
Signed-off-by: weijinqian_v1 &lt;weijinqian@huawei.com&gt;
diff --git a/tests/ut/test_moe_util.py b/tests/ut/test_moe_util.py
@@ -10,8 +10,7 @@
     get_capacity, group_limited_topk, permute, sort_chunks_by_idxs,
     topk_softmax_with_capacity, unpermute)
 
-import vllm_ascend.patch.worker.patch_common.patch_utils # type: ignore[import]  # isort: skip  # noqa
-
+import vllm_ascend.patch.worker.patch_common.patch_utils  # type: ignore[import]  # isort: skip  # noqa
 
 
 class TestMoeUtils:
diff --git a/tests/ut/test_token_dispatcher.py b/tests/ut/test_token_dispatcher.py
@@ -10,9 +10,7 @@
     MoEAlltoAllSeqOverLapDispatcher, MoeDispatcherConfig)
 from vllm_ascend.utils import adapt_patch  # noqa E402
 
-import vllm_ascend.patch.worker.patch_common.patch_utils # type: ignore[import]  # isort: skip  # noqa
-
-
+import vllm_ascend.patch.worker.patch_common.patch_utils  # type: ignore[import]  # isort: skip  # noqa
 
 adapt_patch(True)
 
diff --git a/vllm_ascend/envs.py b/vllm_ascend/envs.py
@@ -142,7 +142,7 @@
     #   1: enable moe all2all seq.
     "VLLM_ASCEND_ENABLE_MOE_ALL2ALL_SEQ":
     lambda: bool(int(os.getenv('VLLM_ASCEND_ENABLE_MOE_ALL2ALL_SEQ', '0'))),
-# ENABLE chunk mc2
+    # ENABLE chunk mc2
     "VLLM_ASCEND_ENABLE_CHUNK_MC2":
     lambda: bool(int(os.getenv("VLLM_ASCEND_ENABLE_CHUNK_MC2", "0"))),
     # Batch MC2 in prefill: The number of tokens in each batch
diff --git a/vllm_ascend/models/deepseek_dbo.py b/vllm_ascend/models/deepseek_dbo.py
@@ -147,7 +147,8 @@ def __init__(
                 intermediate_size=intermediate_size,
                 hidden_act=config.hidden_act,
                 quant_config=quant_config,
-                reduce_results=True if not envs_ascend.VLLM_ASCEND_ENABLE_MOE_ALL2ALL_SEQ else False,
+                reduce_results=True if
+                not envs_ascend.VLLM_ASCEND_ENABLE_MOE_ALL2ALL_SEQ else False,
                 prefix=f"{prefix}.shared_experts",
             )
         CustomDeepseekDBOMoE.top_k = config.num_experts_per_tok
@@ -232,7 +233,9 @@ def _forward_op_gating(
             chunk_hidden_states = torch.tensor_split(hidden_states,
                                                      self.tp_size,
                                                      dim=0)
-            chunked_hidden_states_sizes = [x.shape[0] for x in chunk_hidden_states]
+            chunked_hidden_states_sizes = [
+                x.shape[0] for x in chunk_hidden_states
+            ]
             local_hidden_states = chunk_hidden_states[self.tp_rank]
         else:
             local_hidden_states = hidden_states
@@ -275,7 +278,8 @@ def _forward_op_gating(
         # to avoid accumulating too much tokens on a single rank.
         # currently it is only activated when doing profile runs.
         if enable_force_load_balance:
-            topk_ids = torch.randint_like(topk_ids, 0, self.config.n_routed_experts)
+            topk_ids = torch.randint_like(topk_ids, 0,
+                                          self.config.n_routed_experts)
 
         return topk_weights, topk_ids, local_hidden_states, chunked_hidden_states_sizes
 
diff --git a/vllm_ascend/multistream/ms_split.py b/vllm_ascend/multistream/ms_split.py
@@ -294,8 +294,8 @@ def model_input_split_v1_attn(
                                                  token_index)
 
     is_only_prefill_pre = is_only_prefill_post = attn_metadata.is_only_prefill
-    has_prefill_pre, _ = torch.any(
-        query_lens_pre > 1).item(), torch.any(query_lens_post > 1).item()
+    has_prefill_pre, _ = torch.any(query_lens_pre > 1).item(), torch.any(
+        query_lens_post > 1).item()
 
     if not attn_metadata.is_only_prefill:
         is_only_prefill_post = torch.all(query_lens_post > 1).item()
diff --git a/vllm_ascend/ops/moe_dispatcher/token_dispatcher.py b/vllm_ascend/ops/moe_dispatcher/token_dispatcher.py
@@ -33,7 +33,6 @@
 from vllm_ascend.ops.comm_utils import async_all_to_all
 from vllm_ascend.ops.moe_dispatcher.moe_utils import (
     get_capacity, permute, topk_softmax_with_capacity, unpermute)
-
 """ We use the following notation throughout this file:
      H: hidden size
      B: micro batch size