minor cleanups

LucasWilkinson · LucasWilkinson · commit 5268d6aea9b3 · 2025-07-09T05:20:52.000Z
Signed-off-by: Lucas Wilkinson &lt;lwilkins@redhat.com&gt;
diff --git a/tests/v1/attention/test_attention_backends.py b/tests/v1/attention/test_attention_backends.py
@@ -95,7 +95,7 @@ def create_and_prepopulate_kv_cache(
         device: torch.device,
         num_blocks: int,
         common_attn_metadata: CommonAttentionMetadata,
-        randomize_blocks: bool = True) -> tuple[torch.Tensor, torch.Tensor]:
+        randomize_blocks: bool = True) -> torch.Tensor:
     """Create and prepopulate a KV cache with context data.
     
     Args:
diff --git a/vllm/v1/attention/backends/flash_attn.py b/vllm/v1/attention/backends/flash_attn.py
@@ -2,7 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 """Attention layer with FlashAttention."""
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Any, ClassVar, Optional
+from typing import Any, ClassVar, Optional
 
 import numpy as np
 import torch
@@ -30,9 +30,6 @@
     make_local_attention_virtual_batches)
 from vllm.v1.kv_cache_interface import AttentionSpec
 
-if TYPE_CHECKING:
-    pass
-
 logger = init_logger(__name__)
 
 # NOTE(woosuk): This is an arbitrary number. Tune it if needed.