fix lint

lidenghui1110 · lidenghui1110 · commit 70588531ab8b · 2025-07-03T15:51:48.000+08:00
Signed-off-by: lidenghui &lt;lidenghui1110@gmail.com&gt;
diff --git a/vllm_ascend/attention/attention_v1.py b/vllm_ascend/attention/attention_v1.py
@@ -274,8 +274,8 @@ def forward(
             shape = [batch_size * seq_len, num_heads, head_size]
         """
         num_tokens = query.shape[0]
-        use_kv_cache_int8 = len(kv_cache
-        ) > 0 and kv_cache[0].dtype == torch.int8
+        use_kv_cache_int8 = len(
+            kv_cache) > 0 and kv_cache[0].dtype == torch.int8
         if output is None:
             output = torch.empty(num_tokens,
                                  self.num_heads,
diff --git a/vllm_ascend/attention/mla_v1.py b/vllm_ascend/attention/mla_v1.py
@@ -1020,8 +1020,7 @@ def _forward_decode(
                 num_kv_heads=self.num_kv_heads,
                 num_heads=self.num_heads,
                 scale_value=self.scale,
-                block_table=attn_metadata.decode.
-                block_table,  # type:ignore
+                block_table=attn_metadata.decode.block_table,  # type:ignore
                 context_lens=attn_metadata.decode.seq_lens,  # type:ignore
                 mla_vheadsize=self.kv_lora_rank,
                 out=attn_output)
diff --git a/vllm_ascend/ops/attention.py b/vllm_ascend/ops/attention.py
@@ -138,7 +138,8 @@ def vanilla_chunked_prefill(
 def vanilla_chunked_prefill_mla(
         output: torch.Tensor,  # (num_tokens, num_heads, v_head_dim)
         query: torch.Tensor,  # (num_tokens, num_heads, nope_dim + rope_dim)
-        kv_c_and_k_pe_cache: tuple[torch.Tensor], # (num_blocks, block_size, latent_kv/rope_dim)
+        kv_c_and_k_pe_cache: tuple[
+            torch.Tensor],  # (num_blocks, block_size, latent_kv/rope_dim)
         block_tables: torch.Tensor,  # (batch_size, max_num_blocks_per_seq)
         query_lens: torch.Tensor,  # (batch_size)
         context_lens: torch.Tensor,  # (batch_size)
@@ -156,10 +157,9 @@ def vanilla_chunked_prefill_mla(
     num_heads = query.size(1)
     cache_kv_c = kv_c_and_k_pe_cache[0].squeeze()
     cache_k_pe = kv_c_and_k_pe_cache[1].squeeze()
-    
+
     # cached_kv_c: [batch_size, max_context_len, latent_kv]
     # cached_k_pe: [batch_size, max_context_len, rope_dim]
-    batch_size = query_lens.size(0)
     block_size, latent_kv_dim = cache_kv_c.size(1), cache_kv_c.size(-1)
     max_num_blocks_per_seq = block_tables.size(1)
     cache_kv_c = cache_kv_c[block_tables].view(
diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py
@@ -2138,19 +2138,23 @@ def initialize_kv_cache(self, kv_cache_config: KVCacheConfig) -> None:
                         nope_dim = self.model_config.hf_text_config.kv_lora_rank
                         assert head_dim == rope_dim + nope_dim, \
                             f"head_dim({head_dim}) != rope_dim({rope_dim}) + nope_dim({nope_dim})"
-                        nope_cache_shape = (num_blocks, block_size, 
-                                            num_kv_heads, nope_dim)
-                        rope_cache_shape = (num_blocks, block_size, 
-                                            num_kv_heads, rope_dim)
-                        nope_cache = torch.zeros(
-                            nope_cache_shape, dtype=dtype, device=self.device, 
-                            pin_memory=True)
-                        rope_cache = torch.zeros(
-                            rope_cache_shape, dtype=dtype, device=self.device, 
-                            pin_memory=True)
+                        layer_kv_cache_nope_shape = (num_blocks, block_size,
+                                                     num_kv_heads, nope_dim)
+                        layer_kv_cache_pe_shape = (num_blocks, block_size,
+                                                   num_kv_heads, rope_dim)
+                        layer_kv_cache_nope = torch.zeros(
+                            layer_kv_cache_nope_shape,
+                            dtype=dtype,
+                            device=self.device)
+                        layer_kv_cache_pe = torch.zeros(
+                            layer_kv_cache_pe_shape,
+                            dtype=dtype,
+                            device=self.device)
                         kv_caches[layer_name] = (
-                            torch_npu.npu_format_cast(nope_cache, acl_format),
-                            torch_npu.npu_format_cast(rope_cache, acl_format),
+                            torch_npu.npu_format_cast(layer_kv_cache_nope,
+                                                      acl_format),
+                            torch_npu.npu_format_cast(layer_kv_cache_pe,
+                                                      acl_format),
                         )
                     else:
                         num_caches = kv_cache_shape[0]
@@ -2160,8 +2164,8 @@ def initialize_kv_cache(self, kv_cache_config: KVCacheConfig) -> None:
                             kv_cache = torch.zeros(cache_shape,
                                                    dtype=dtype,
                                                    device=self.device)
-                            kv_cache = torch_npu.npu_format_cast(kv_cache,
-                                                                 acl_format)
+                            kv_cache = torch_npu.npu_format_cast(
+                                kv_cache, acl_format)
                             kv_cache_list.append(kv_cache)
                         kv_caches[layer_name] = kv_cache_list
                 else: