handle clean code

weijinqian · weijinqian · commit ba1176522c62 · 2025-07-11T23:43:43.000+08:00
Signed-off-by: weijinqian_v1 &lt;weijinqian@huawei.com&gt;
diff --git a/vllm_ascend/models/__init__.py b/vllm_ascend/models/__init__.py
@@ -8,7 +8,6 @@ def register_model():
     from .deepseek_mtp import CustomDeepSeekMTP  # noqa: F401
     from .deepseek_v2 import CustomDeepseekV2ForCausalLM  # noqa: F401
     from .deepseek_v2 import CustomDeepseekV3ForCausalLM  # noqa: F401
-    from .moe_block import AscendSparseMoeBlock  # noqa: F401
     from .qwen2_5_vl import \
         AscendQwen2_5_VLForConditionalGeneration  # noqa: F401
     from .qwen2_vl import AscendQwen2VLForConditionalGeneration  # noqa: F401
diff --git a/vllm_ascend/multistream/ms_split.py b/vllm_ascend/multistream/ms_split.py
@@ -304,12 +304,14 @@ def model_input_split_v1_attn(
         # the attn_mla kernel in torch npu only accept 128*128 attn mask
         attn_mask_pre = attn_mask_post = attn_metadata.attn_mask
         attn_state_pre = attn_state_post = attn_metadata.attn_state
+
     elif attn_metadata.attn_state == AscendAttentionState.DecodeOnly:
         # should be none in decode only state
         attn_mask_pre = attn_mask_post = attn_metadata.attn_mask
         attn_state_pre = attn_state_post = AscendAttentionState.DecodeOnly
     else:
         # chunked prefill
+        assert attn_metadata.attn_mask is not None
         if has_prefill_pre:
             attn_state_pre = attn_state_post = AscendAttentionState.ChunkedPrefill
             attn_mask_pre = attn_metadata.attn_mask[:token_index, :max(
diff --git a/vllm_ascend/ops/moe_dispatcher/token_dispatcher.py b/vllm_ascend/ops/moe_dispatcher/token_dispatcher.py
@@ -24,6 +24,7 @@
 
 import torch
 import torch_npu
+from torch import Tensor
 from vllm.distributed.parallel_state import get_ep_group
 
 from vllm_ascend.distributed.tensor_parallel import (
@@ -279,7 +280,7 @@ def preprocess(self,
                     "num_global_tokens_per_local_expert must be set before operations."
                 )
             self.device_sync_point = "no_sync"
-            self.global_input_tokens_local_experts_indices = torch.repeat_interleave(
+            self.global_input_tokens_local_experts_indices: Tensor = torch.repeat_interleave(
                 self.expert_ids_per_ep_rank,
                 self.num_global_tokens_per_local_expert.ravel())
 
@@ -314,6 +315,7 @@ def token_permutation(
 
         # Permutation 1: input to AlltoAll input
         def alltoall_token_permutation1(hidden_states, routing_map):
+            assert self.hidden_shape is not None
             hidden_states = hidden_states.view(-1, self.hidden_shape[-1])
             tokens_per_expert = self.preprocess(routing_map)
             if self.tp_ep_size > 1:
@@ -390,6 +392,7 @@ def preprocess_and_permtute1(self,
         self.top_indices = routing_map
         assert probs.dim() == 2, "Expected 2D tensor for probs"
         assert routing_map.dim() == 2, "Expected 2D tensor for routing map"
+        assert self.hidden_shape is not None
 
         hidden_states = hidden_states.view(-1, self.hidden_shape[-1])
         tokens_per_expert = self.preprocess(routing_map, with_sync=False)
@@ -401,6 +404,7 @@ def preprocess_and_permtute1(self,
         event = torch.npu.current_stream().record_event()
         self.perm1_finish_event = torch.npu.Event()
         with torch.npu.stream(self.overlap_stream):
+            assert self.overlap_stream is not None
             self.overlap_stream.wait_event(event)
 
             if shared_experts is not None:
@@ -418,7 +422,11 @@ def preprocess_and_permtute1(self,
         # repeat interleve will launch a sync on current_stream.
         if self.num_local_experts > 1:
             self.device_sync_point = "no_sync"
-            self.global_input_tokens_local_experts_indices = torch.repeat_interleave(
+            if self.num_global_tokens_per_local_expert is None:
+                raise ValueError(
+                    "num_global_tokens_per_local_expert must be set before operations."
+                )
+            self.global_input_tokens_local_experts_indices: Tensor = torch.repeat_interleave(
                 self.expert_ids_per_ep_rank,
                 self.num_global_tokens_per_local_expert.ravel())
 
@@ -441,6 +449,10 @@ def dispatch_alltoall(self):
             ep_group,
         )
         permute1_ep_all_to_all_handle.wait()
+        if self.cached_permutated_local_input_tokens is None:
+            raise ValueError(
+                "cached_permutated_local_input_tokens must be set before operations."
+            )
         self.cached_permutated_local_input_tokens.untyped_storage().resize_(0)
         self.cached_permutated_local_input_tokens = None