Fix lint

sarckk · sarckk · commit 1cbd312f7016 · 2025-07-02T22:05:39.000-07:00
Signed-off-by: Yong Hoon Shin &lt;yhshin@meta.com&gt;
diff --git a/tests/v1/e2e/test_kv_sharing_skip_prefill.py b/tests/v1/e2e/test_kv_sharing_skip_prefill.py
@@ -3,7 +3,7 @@
 
 import gc
 from collections.abc import Iterable
-from typing import List, Optional, Union
+from typing import Optional, Union
 
 import pytest
 import torch
@@ -112,7 +112,7 @@ def __init__(
         *,
         vllm_config: VllmConfig,
         prefix: str = "",
-        layers: List[nn.Module],
+        layers: list[nn.Module],
     ):
         super().__init__()
         self.layers = layers
@@ -162,7 +162,8 @@ def __init__(self,
             )
 
         # Pre-allocate static buffers for CUDA graph
-        self.max_num_tokens = vllm_config.scheduler_config.max_num_batched_tokens
+        self.max_num_tokens =\
+            vllm_config.scheduler_config.max_num_batched_tokens
         self.dtype = vllm_config.model_config.dtype
         self.device = next(self.parameters()).device
         self.hidden_size = vllm_config.model_config.get_hidden_size()
diff --git a/vllm/config.py b/vllm/config.py
@@ -4066,7 +4066,7 @@ class CompilationConfig:
     - None (default): capture sizes are inferred from vllm config.
     - list[int]: capture sizes are specified as given."""
     cudagraph_share_memory_pool: bool = True
-    """Whether to share a single global memory pool for each CUDA graph captured"""
+    """Whether to share a single global memory pool for each graph capture"""
     cudagraph_copy_inputs: bool = False
     """Whether to copy input tensors for
     cudagraph. If the caller can guarantee that the same input buffers
diff --git a/vllm/v1/attention/backends/flash_attn.py b/vllm/v1/attention/backends/flash_attn.py
@@ -223,7 +223,7 @@ def build_skip_prefill(
         # num_decode_tokens: [1, 2, 1]
         num_decode_tokens = torch.bincount(request_ids, minlength=num_reqs)
 
-        # Calculate new query_start_loc only considering tokens in decode_indices
+        # Calculate new query_start_loc with tokens in decode_indices
         # decode_query_start_loc: [0, 1, 3, 4]
         decode_query_start_loc = torch.empty(num_reqs + 1,
                                              device=query_start_loc.device,