Few more changes to solve some other pre-commit hooks failures

christian-pinto · christian-pinto · commit 0dba4cd1c052 · 2025-07-04T09:28:14.000Z
Signed-off-by: Christian Pinto &lt;christian.pinto@ibm.com&gt;
diff --git a/tests/models/multimodal/pooling/test_prithvi_mae.py b/tests/models/multimodal/pooling/test_prithvi_mae.py
@@ -6,31 +6,35 @@
 
 from ....conftest import VllmRunner
 
+
 def generate_test_mm_data():
     mm_data = {
         "pixel_values": torch.full((6, 512, 512), 1.0, dtype=torch.float16),
         "location_coords": torch.full((1, 2), 1.0, dtype=torch.float16),
     }
     return mm_data
-     
+
+
 def _run_test(
     vllm_runner: type[VllmRunner],
     model: str,
-) -> None:   
+) -> None:
 
     mm_data = generate_test_mm_data()
     prompt = {
         # This model deals with no text input
         "prompt_token_ids": [1],
         "multi_modal_data": mm_data
     }
-    with vllm_runner(model, task="embed",
-                    dtype=torch.float16,
-                    enforce_eager=True,
-                    skip_tokenizer_init=True) as vllm_model:
-        output = vllm_model.encode(prompt)
-            
-MODELS=["christian-pinto/Prithvi-EO-2.0-300M-TL-VLLM"]
+    with vllm_runner(model,
+                     task="embed",
+                     dtype=torch.float16,
+                     enforce_eager=True,
+                     skip_tokenizer_init=True) as vllm_model:
+        vllm_model.encode(prompt)
+
+MODELS = ["christian-pinto/Prithvi-EO-2.0-300M-TL-VLLM"]
+
 @pytest.mark.parametrize("model", MODELS)
 def test_models_image(
     hf_runner,
@@ -41,4 +45,4 @@ def test_models_image(
     _run_test(
         vllm_runner,
         model,
-    )
+    )
diff --git a/vllm/v1/core/kv_cache_manager.py b/vllm/v1/core/kv_cache_manager.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+from abc import ABC, abstractmethod
 from collections import defaultdict
 from dataclasses import dataclass
 from typing import Optional
@@ -64,7 +65,72 @@ def new_empty(self) -> "KVCacheBlocks":
         return KVCacheBlocks(tuple([] for _ in range(len(self.blocks))))
 
 
-class DummyKVCacheManager:
+class KVCacheManagerInterface(ABC):
+
+    @abstractmethod
+    def usage(self) -> float:
+        raise NotImplementedError
+
+    @abstractmethod
+    def make_prefix_cache_stats(self) -> Optional[PrefixCacheStats]:
+        raise NotImplementedError
+
+    @abstractmethod
+    def get_computed_blocks(self,
+                            request: Request) -> tuple[KVCacheBlocks, int]:
+        raise NotImplementedError
+
+    @abstractmethod
+    def allocate_slots(
+        self,
+        request: Request,
+        num_new_tokens: int,
+        num_new_computed_tokens: int = 0,
+        new_computed_blocks: Optional[KVCacheBlocks] = None,
+        num_draft_tokens: int = 0,
+        num_lookahead_tokens: int = 0,
+        delay_cache_blocks: bool = False,
+    ) -> Optional[KVCacheBlocks]:
+        raise NotImplementedError
+
+    @abstractmethod
+    def free(self, request: Request) -> None:
+        raise NotImplementedError
+
+    @abstractmethod
+    def reset_prefix_cache(self) -> bool:
+        raise NotImplementedError
+
+    @abstractmethod
+    def get_num_common_prefix_blocks(
+        self,
+        request: Request,
+        num_running_requests: int,
+    ) -> list[int]:
+        raise NotImplementedError
+
+    @abstractmethod
+    def free_block_hashes(self, request: Request) -> None:
+        raise NotImplementedError
+
+    @abstractmethod
+    def take_events(self) -> list[KVCacheEvent]:
+        raise NotImplementedError
+
+    @abstractmethod
+    def get_block_ids(self, request_id: str) -> tuple[list[int], ...]:
+        raise NotImplementedError
+
+    @abstractmethod
+    def cache_blocks(self, request: Request, num_computed_tokens: int) -> None:
+        raise NotImplementedError
+
+    @abstractmethod
+    def create_empty_block_list(self) -> KVCacheBlocks:
+        raise NotImplementedError
+
+
+class DummyKVCacheManager(KVCacheManagerInterface):
 
     @property
     def usage(self) -> float:
@@ -88,7 +154,7 @@ def allocate_slots(
         delay_cache_blocks: bool = False,
     ) -> Optional[KVCacheBlocks]:
         #if we do not return a KV cache block requests are unschedulable
-        return KVCacheBlocks([KVCacheBlock(block_id=0)])
+        return KVCacheBlocks(tuple([KVCacheBlock(block_id=0)]))
 
     def free(self, request: Request) -> None:
         pass
@@ -109,20 +175,20 @@ def free_block_hashes(self, request: Request) -> None:
     def take_events(self) -> list[KVCacheEvent]:
         return []
 
-    def get_block_ids(self, request_id: str) -> list[list[int]]:
+    def get_block_ids(self, request_id: str) -> tuple[list[int], ...]:
         """Get the block ids of a request."""
-        return []
+        return tuple([])
 
     def cache_blocks(self, request: Request, num_computed_tokens: int) -> None:
         """Cache the blocks for the request, if enabled."""
         pass
 
     def create_empty_block_list(self) -> KVCacheBlocks:
         """Creates a new KVCacheBlocks instance with no blocks."""
-        return (KVCacheBlocks([]), 0)
+        return KVCacheBlocks(tuple([]))
 
 
-class KVCacheManager:
+class KVCacheManager(KVCacheManagerInterface):
 
     def __init__(
         self,
diff --git a/vllm/v1/core/sched/scheduler.py b/vllm/v1/core/sched/scheduler.py
@@ -493,8 +493,8 @@ def schedule(self) -> SchedulerOutput:
 
                 if self.lora_config and request.lora_request:
                     scheduled_loras.add(request.lora_request.lora_int_id)
-                req_to_new_block_ids[request.request_id] = (
-                    self.kv_cache_manager.get_block_ids(request.request_id))
+                req_to_new_block_ids[request.request_id] = \
+                    self.kv_cache_manager.get_block_ids(request.request_id)
                 num_scheduled_tokens[request.request_id] = num_new_tokens
                 token_budget -= num_new_tokens
                 request.status = RequestStatus.RUNNING
diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py
@@ -140,7 +140,7 @@ def _initialize_kv_caches(
             kv_cache_specs = []
             kv_cache_configs = [
                 KVCacheConfig(num_blocks=0,
-                              kv_cache_tensors={},
+                              kv_cache_tensors=[],
                               kv_cache_groups=[])
             ]
         else:

Original file line number	Diff line number	Diff line change
`@@ -140,7 +140,7 @@ def _initialize_kv_caches(`
`140`	`140`	`kv_cache_specs = []`
`141`	`141`	`kv_cache_configs = [`
`142`	`142`	`KVCacheConfig(num_blocks=0,`
`143`		`- kv_cache_tensors={},`
	`143`	`+ kv_cache_tensors=[],`
`144`	`144`	`kv_cache_groups=[])`
`145`	`145`	`]`
`146`	`146`	`else:`