handle clean code

weijinqian · weijinqian_v1 · commit eaed83df5d5d · 2025-07-12T23:43:53.000+08:00
Signed-off-by: weijinqian_v1 &lt;weijinqian@huawei.com&gt;
diff --git a/vllm_ascend/multistream/ms_split.py b/vllm_ascend/multistream/ms_split.py
@@ -308,21 +308,21 @@ def model_input_split_v1_attn(
     elif attn_metadata.attn_state == AscendAttentionState.DecodeOnly:
         # should be none in decode only state
         attn_mask_pre = attn_mask_post = attn_metadata.attn_mask
-        attn_state_pre = attn_state_post = AscendAttentionState.DecodeOnly
+        attn_state_pre = attn_state_post = AscendAttentionState.DecodeOnly  # noqa
     else:
         # chunked prefill
         assert attn_metadata.attn_mask is not None
         if has_prefill_pre:
-            attn_state_pre = attn_state_post = AscendAttentionState.ChunkedPrefill
+            attn_state_pre = attn_state_post = AscendAttentionState.ChunkedPrefill   # noqa
             attn_mask_pre = attn_metadata.attn_mask[:token_index, :max(
                 seq_lens_pre)].contiguous()
-            attn_state_post = AscendAttentionState.ChunkedPrefill
+            attn_state_post = AscendAttentionState.ChunkedPrefill  # noqa
             attn_mask_post = attn_metadata.attn_mask[
                 token_index:, :max(seq_lens_post)].contiguous()
         else:
-            attn_state_pre = AscendAttentionState.DecodeOnly
+            attn_state_pre = AscendAttentionState.DecodeOnly  # noqa
             attn_mask_pre = None
-            attn_state_post = AscendAttentionState.ChunkedPrefill
+            attn_state_post = AscendAttentionState.ChunkedPrefill  # noqa
             attn_mask_post = attn_metadata.attn_mask[
                 token_index:, :max(seq_lens_post)].contiguous()
 
diff --git a/vllm_ascend/ops/moe_dispatcher/token_dispatcher.py b/vllm_ascend/ops/moe_dispatcher/token_dispatcher.py
@@ -201,6 +201,8 @@ def __init__(self, config: MoEDispatcherConfig):
         self.cached_global_input_tokens = None
         self.cached_shared_expert_output = None
         self.tokens_per_expert = None
+        self.perm1_finish_event = None
+        self.global_input_tokens_local_experts_indices = None
 
         if MoEAlltoAllSeqOverLapDispatcher.overlap_stream is None:
             MoEAlltoAllSeqOverLapDispatcher.overlap_stream = torch.npu.Stream()
@@ -280,7 +282,7 @@ def preprocess(self,
                     "num_global_tokens_per_local_expert must be set before operations."
                 )
             self.device_sync_point = "no_sync"
-            self.global_input_tokens_local_experts_indices: Tensor = torch.repeat_interleave(
+            self.global_input_tokens_local_experts_indices = torch.repeat_interleave(
                 self.expert_ids_per_ep_rank,
                 self.num_global_tokens_per_local_expert.ravel())
 
@@ -426,7 +428,7 @@ def preprocess_and_permtute1(self,
                 raise ValueError(
                     "num_global_tokens_per_local_expert must be set before operations."
                 )
-            self.global_input_tokens_local_experts_indices: Tensor = torch.repeat_interleave(
+            self.global_input_tokens_local_experts_indices = torch.repeat_interleave(
                 self.expert_ids_per_ep_rank,
                 self.num_global_tokens_per_local_expert.ravel())
 
@@ -462,6 +464,7 @@ def permute2(self):
             global_input_tokens, self.reversed_global_input_permutation_mapping = torch_npu.npu_moe_token_permute(
                 self.cached_global_input_tokens,
                 self.global_input_tokens_local_experts_indices)
+            assert self.cached_global_input_tokens is not None
             self.cached_global_input_tokens.untyped_storage().resize_(0)
             self.cached_global_input_tokens = None